feat: "Not a dupe" ignore with SQLite persistence
- New ignore_db module with SQLite-backed dismissal storage - Groups flagged as not-a-dupe are persisted to ~/.config/deduper/ignores.db - Fingerprint based on sorted SHA256 hashes (content-stable) - Ignored groups filtered out on subsequent runs - Review UI: green "Not a dupe" button per group - Dismissed groups fade out immediately in browser - DEDUPER_DB_DIR env var to override DB location - 4 new unit tests for ignore_db - 29 tests passing
This commit is contained in:
125
src/ignore_db.rs
Normal file
125
src/ignore_db.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
use anyhow::Result;
|
||||
use rusqlite::Connection;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn db_path() -> PathBuf {
|
||||
let dir = dirs_or_default();
|
||||
std::fs::create_dir_all(&dir).ok();
|
||||
dir.join("ignores.db")
|
||||
}
|
||||
|
||||
fn dirs_or_default() -> PathBuf {
|
||||
std::env::var("DEDUPER_DB_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
|
||||
PathBuf::from(home).join(".config").join("deduper")
|
||||
})
|
||||
}
|
||||
|
||||
pub fn open_db() -> Result<Connection> {
|
||||
let path = db_path();
|
||||
let conn = Connection::open(&path)?;
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS ignored_groups (
|
||||
fingerprint TEXT PRIMARY KEY,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
note TEXT DEFAULT ''
|
||||
);",
|
||||
)?;
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
/// Fingerprint = sorted sha256 hashes joined by `|`
|
||||
pub fn group_fingerprint(sha256s: &[&str]) -> String {
|
||||
let mut sorted: Vec<&str> = sha256s.to_vec();
|
||||
sorted.sort();
|
||||
sorted.dedup();
|
||||
sorted.join("|")
|
||||
}
|
||||
|
||||
pub fn ignore_group(conn: &Connection, fingerprint: &str) -> Result<()> {
|
||||
conn.execute(
|
||||
"INSERT OR IGNORE INTO ignored_groups (fingerprint) VALUES (?1)",
|
||||
[fingerprint],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_group_ignored(conn: &Connection, fingerprint: &str) -> bool {
|
||||
conn.query_row(
|
||||
"SELECT 1 FROM ignored_groups WHERE fingerprint = ?1",
|
||||
[fingerprint],
|
||||
|_| Ok(true),
|
||||
)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn remove_ignore(conn: &Connection, fingerprint: &str) -> Result<()> {
|
||||
conn.execute(
|
||||
"DELETE FROM ignored_groups WHERE fingerprint = ?1",
|
||||
[fingerprint],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn list_ignored(conn: &Connection) -> Result<Vec<(String, String)>> {
|
||||
let mut stmt = conn.prepare("SELECT fingerprint, created_at FROM ignored_groups ORDER BY created_at DESC")?;
|
||||
let rows = stmt.query_map([], |row| {
|
||||
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
|
||||
})?;
|
||||
Ok(rows.filter_map(|r| r.ok()).collect())
|
||||
}
|
||||
|
||||
pub fn open_db_in_memory() -> Result<Connection> {
|
||||
let conn = Connection::open_in_memory()?;
|
||||
conn.execute_batch(
|
||||
"CREATE TABLE IF NOT EXISTS ignored_groups (
|
||||
fingerprint TEXT PRIMARY KEY,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
note TEXT DEFAULT ''
|
||||
);",
|
||||
)?;
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn ignore_and_check_group() {
|
||||
let conn = open_db_in_memory().unwrap();
|
||||
let fp = group_fingerprint(&["sha_b", "sha_a"]);
|
||||
assert!(!is_group_ignored(&conn, &fp));
|
||||
ignore_group(&conn, &fp).unwrap();
|
||||
assert!(is_group_ignored(&conn, &fp));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fingerprint_is_sorted_and_stable() {
|
||||
let fp1 = group_fingerprint(&["bbb", "aaa"]);
|
||||
let fp2 = group_fingerprint(&["aaa", "bbb"]);
|
||||
assert_eq!(fp1, fp2);
|
||||
assert_eq!(fp1, "aaa|bbb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_ignore_works() {
|
||||
let conn = open_db_in_memory().unwrap();
|
||||
let fp = group_fingerprint(&["x", "y"]);
|
||||
ignore_group(&conn, &fp).unwrap();
|
||||
assert!(is_group_ignored(&conn, &fp));
|
||||
remove_ignore(&conn, &fp).unwrap();
|
||||
assert!(!is_group_ignored(&conn, &fp));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_ignored_returns_entries() {
|
||||
let conn = open_db_in_memory().unwrap();
|
||||
ignore_group(&conn, "fp1").unwrap();
|
||||
ignore_group(&conn, "fp2").unwrap();
|
||||
let list = list_ignored(&conn).unwrap();
|
||||
assert_eq!(list.len(), 2);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod ignore_db;
|
||||
use anyhow::Result;
|
||||
use image::imageops::FilterType;
|
||||
use image::ImageReader;
|
||||
|
||||
19
src/main.rs
19
src/main.rs
@@ -1,4 +1,5 @@
|
||||
use deduper::{find_duplicate_groups, scan_images, DuplicateKind};
|
||||
use deduper::ignore_db;
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
|
||||
@@ -56,14 +57,28 @@ fn main() {
|
||||
}
|
||||
};
|
||||
|
||||
let groups = find_duplicate_groups(&entries, config.threshold);
|
||||
let mut groups = find_duplicate_groups(&entries, config.threshold);
|
||||
|
||||
// Filter out ignored groups
|
||||
let db = ignore_db::open_db().ok();
|
||||
if let Some(ref conn) = db {
|
||||
groups.retain(|g| {
|
||||
let sha_list: Vec<&str> = entries.iter()
|
||||
.filter(|e| g.paths.contains(&e.path))
|
||||
.map(|e| e.sha256.as_str())
|
||||
.collect();
|
||||
let fp = ignore_db::group_fingerprint(&sha_list);
|
||||
!ignore_db::is_group_ignored(conn, &fp)
|
||||
});
|
||||
}
|
||||
|
||||
if groups.is_empty() {
|
||||
println!("no image duplicates found");
|
||||
return;
|
||||
}
|
||||
|
||||
if config.review {
|
||||
review::launch_review(&groups);
|
||||
review::launch_review(&groups, &entries);
|
||||
} else {
|
||||
for (idx, group) in groups.iter().enumerate() {
|
||||
let kind = match group.kind {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use base64::Engine;
|
||||
use deduper::{DuplicateGroup, DuplicateKind};
|
||||
use deduper::{DuplicateGroup, DuplicateKind, ImageEntry, ignore_db};
|
||||
use image::imageops::FilterType;
|
||||
use std::fs;
|
||||
use tiny_http::{Header, Method, Response, Server};
|
||||
|
||||
const THUMB_MAX: u32 = 300;
|
||||
|
||||
pub fn launch_review(groups: &[DuplicateGroup]) {
|
||||
pub fn launch_review(groups: &[DuplicateGroup], entries: &[ImageEntry]) {
|
||||
let port = find_open_port();
|
||||
let addr = format!("127.0.0.1:{port}");
|
||||
let server = Server::http(&addr).expect("failed to start review server");
|
||||
@@ -15,6 +15,8 @@ pub fn launch_review(groups: &[DuplicateGroup]) {
|
||||
let html = build_review_html(groups);
|
||||
eprintln!("review server running at http://{addr}");
|
||||
|
||||
let db = ignore_db::open_db().ok();
|
||||
|
||||
let _ = open::that(format!("http://{addr}"));
|
||||
|
||||
loop {
|
||||
@@ -34,6 +36,9 @@ pub fn launch_review(groups: &[DuplicateGroup]) {
|
||||
(Method::Post, "/delete") => {
|
||||
handle_delete(req);
|
||||
}
|
||||
(Method::Post, "/ignore") => {
|
||||
handle_ignore(req, groups, entries, &db);
|
||||
}
|
||||
(Method::Post, "/shutdown") => {
|
||||
let header = Header::from_bytes("Content-Type", "text/plain").unwrap();
|
||||
let _ = req.respond(Response::from_string("bye").with_header(header));
|
||||
@@ -81,6 +86,49 @@ fn handle_delete(mut req: tiny_http::Request) -> usize {
|
||||
deleted
|
||||
}
|
||||
|
||||
fn handle_ignore(
|
||||
mut req: tiny_http::Request,
|
||||
groups: &[DuplicateGroup],
|
||||
entries: &[ImageEntry],
|
||||
db: &Option<rusqlite::Connection>,
|
||||
) {
|
||||
let mut body = Vec::new();
|
||||
let _ = std::io::Read::read_to_end(req.as_reader(), &mut body);
|
||||
let body_str = String::from_utf8_lossy(&body);
|
||||
|
||||
// Body = group index as JSON number
|
||||
let group_idx: usize = serde_json::from_str(&body_str).unwrap_or(usize::MAX);
|
||||
|
||||
let (ok, msg) = if let Some(conn) = db {
|
||||
if let Some(group) = groups.get(group_idx) {
|
||||
let sha_list: Vec<&str> = entries
|
||||
.iter()
|
||||
.filter(|e| group.paths.contains(&e.path))
|
||||
.map(|e| e.sha256.as_str())
|
||||
.collect();
|
||||
let fp = ignore_db::group_fingerprint(&sha_list);
|
||||
match ignore_db::ignore_group(conn, &fp) {
|
||||
Ok(_) => {
|
||||
eprintln!("ignored group {group_idx} (fingerprint: {fp})");
|
||||
(true, "group ignored")
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("failed to ignore group: {e}");
|
||||
(false, "db error")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(false, "invalid group index")
|
||||
}
|
||||
} else {
|
||||
(false, "no database")
|
||||
};
|
||||
|
||||
let json = format!("{{\"ok\":{ok},\"message\":\"{msg}\"}}");
|
||||
let header = Header::from_bytes("Content-Type", "application/json").unwrap();
|
||||
let _ = req.respond(Response::from_string(json).with_header(header));
|
||||
}
|
||||
|
||||
fn make_thumbnail_data_uri(path: &std::path::Path) -> String {
|
||||
let data = match fs::read(path) {
|
||||
Ok(d) => d,
|
||||
@@ -117,7 +165,7 @@ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; b
|
||||
h1 { text-align: center; margin-bottom: 20px; color: #e94560; }
|
||||
.summary { text-align: center; margin-bottom: 20px; color: #aaa; }
|
||||
.group { background: #16213e; border-radius: 12px; padding: 20px; margin-bottom: 24px; border: 1px solid #0f3460; }
|
||||
.group-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; }
|
||||
.group-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; flex-wrap: wrap; gap: 8px; }
|
||||
.group-title { font-size: 1.2em; font-weight: bold; }
|
||||
.badge { padding: 4px 12px; border-radius: 20px; font-size: 0.85em; font-weight: bold; }
|
||||
.badge-exact { background: #e94560; color: white; }
|
||||
@@ -135,10 +183,13 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; }
|
||||
.btn-delete:hover { background: #c73650; }
|
||||
.btn-done { background: #533483; color: white; }
|
||||
.btn-done:hover { background: #3d2660; }
|
||||
.btn-all { background: #0f3460; color: #eee; border: 1px solid #533483; }
|
||||
.btn-all { background: #0f3460; color: #eee; border: 1px solid #533483; font-size: 0.85em; padding: 6px 14px; }
|
||||
.btn-all:hover { background: #16213e; }
|
||||
.btn-ignore { background: #2d6a4f; color: white; font-size: 0.85em; padding: 6px 14px; }
|
||||
.btn-ignore:hover { background: #1b4332; }
|
||||
.status { text-align: center; margin-top: 16px; font-size: 1.1em; color: #e94560; }
|
||||
.deleted { opacity: 0.3; pointer-events: none; }
|
||||
.ignored { opacity: 0.2; pointer-events: none; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -147,7 +198,7 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; }
|
||||
|
||||
let total_files: usize = groups.iter().map(|g| g.paths.len()).sum();
|
||||
html.push_str(&format!(
|
||||
"<p class=\"summary\">{} groups, {} files — select files to delete</p>\n",
|
||||
"<p class=\"summary\">{} groups, {} files — select files to delete or dismiss false positives</p>\n",
|
||||
groups.len(),
|
||||
total_files
|
||||
));
|
||||
@@ -168,6 +219,7 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; }
|
||||
<span class="group-title">Group {}</span>
|
||||
<span class="badge {badge_class}">{kind_str}</span>
|
||||
<button class="btn btn-all" onclick="selectAllBut('{idx}')">Keep first, select rest</button>
|
||||
<button class="btn btn-ignore" onclick="ignoreGroup({idx})">👁️ Not a dupe</button>
|
||||
</div>
|
||||
<div class="images">
|
||||
"#,
|
||||
@@ -215,6 +267,26 @@ function selectAllBut(groupId) {
|
||||
updateCount();
|
||||
}
|
||||
|
||||
async function ignoreGroup(groupIdx) {
|
||||
try {
|
||||
const res = await fetch('/ignore', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify(groupIdx)
|
||||
});
|
||||
const data = await res.json();
|
||||
if (data.ok) {
|
||||
const el = document.getElementById('group-' + groupIdx);
|
||||
if (el) el.classList.add('ignored');
|
||||
document.getElementById('status').textContent = 'Group ' + (groupIdx+1) + ' dismissed — won\'t appear next run';
|
||||
} else {
|
||||
document.getElementById('status').textContent = 'Error: ' + data.message;
|
||||
}
|
||||
} catch(e) {
|
||||
document.getElementById('status').textContent = 'Error: ' + e.message;
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteSelected() {
|
||||
const checks = document.querySelectorAll('.del-check:checked');
|
||||
if (checks.length === 0) { alert('No files selected'); return; }
|
||||
|
||||
Reference in New Issue
Block a user