diff --git a/Cargo.lock b/Cargo.lock index f102f73..77ee2a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,18 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aligned" version = "0.4.3" @@ -276,6 +288,7 @@ dependencies = [ "base64", "image", "open", + "rusqlite", "serde", "serde_json", "sha2", @@ -334,6 +347,18 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fax" version = "0.2.7" @@ -408,6 +433,24 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown", +] + [[package]] name = "httpdate" version = "1.0.3" @@ -531,6 +574,17 @@ dependencies = [ "cc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "log" version = "0.4.29" @@ -697,6 +751,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "png" version = "0.18.1" @@ -888,6 +948,20 @@ version = "0.8.53" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4" +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -1070,6 +1144,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index f755f0d..e91dd26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,4 @@ open = "5" base64 = "0.22" serde = { version = "1", features = ["derive"] } serde_json = "1" +rusqlite = { version = "0.31", features = ["bundled"] } diff --git a/src/ignore_db.rs b/src/ignore_db.rs new file mode 100644 index 0000000..5d4b90d --- /dev/null +++ b/src/ignore_db.rs @@ -0,0 +1,125 @@ +use anyhow::Result; +use rusqlite::Connection; +use std::path::PathBuf; + +fn db_path() -> PathBuf { + let dir = dirs_or_default(); + std::fs::create_dir_all(&dir).ok(); + dir.join("ignores.db") +} + +fn dirs_or_default() -> PathBuf { + std::env::var("DEDUPER_DB_DIR") + .map(PathBuf::from) + .unwrap_or_else(|_| { + let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); + PathBuf::from(home).join(".config").join("deduper") + }) +} + +pub fn open_db() -> Result { + let path = db_path(); + let conn = Connection::open(&path)?; + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS ignored_groups ( + fingerprint TEXT PRIMARY KEY, + created_at TEXT DEFAULT (datetime('now')), + note TEXT DEFAULT '' + );", + )?; + Ok(conn) +} + +/// Fingerprint = sorted sha256 hashes joined by `|` +pub fn group_fingerprint(sha256s: &[&str]) -> String { + let mut sorted: Vec<&str> = sha256s.to_vec(); + sorted.sort(); + sorted.dedup(); + sorted.join("|") +} + +pub fn ignore_group(conn: &Connection, fingerprint: &str) -> Result<()> { + conn.execute( + "INSERT OR IGNORE INTO ignored_groups (fingerprint) VALUES (?1)", + [fingerprint], + )?; + Ok(()) +} + +pub fn is_group_ignored(conn: &Connection, fingerprint: &str) -> bool { + conn.query_row( + "SELECT 1 FROM ignored_groups WHERE fingerprint = ?1", + [fingerprint], + |_| Ok(true), + ) + .unwrap_or(false) +} + +pub fn remove_ignore(conn: &Connection, fingerprint: &str) -> Result<()> { + conn.execute( + "DELETE FROM ignored_groups WHERE fingerprint = ?1", + [fingerprint], + )?; + Ok(()) +} + +pub fn list_ignored(conn: &Connection) -> Result> { + let mut stmt = conn.prepare("SELECT fingerprint, created_at FROM ignored_groups ORDER BY created_at DESC")?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + Ok(rows.filter_map(|r| r.ok()).collect()) +} + +pub fn open_db_in_memory() -> Result { + let conn = Connection::open_in_memory()?; + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS ignored_groups ( + fingerprint TEXT PRIMARY KEY, + created_at TEXT DEFAULT (datetime('now')), + note TEXT DEFAULT '' + );", + )?; + Ok(conn) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ignore_and_check_group() { + let conn = open_db_in_memory().unwrap(); + let fp = group_fingerprint(&["sha_b", "sha_a"]); + assert!(!is_group_ignored(&conn, &fp)); + ignore_group(&conn, &fp).unwrap(); + assert!(is_group_ignored(&conn, &fp)); + } + + #[test] + fn fingerprint_is_sorted_and_stable() { + let fp1 = group_fingerprint(&["bbb", "aaa"]); + let fp2 = group_fingerprint(&["aaa", "bbb"]); + assert_eq!(fp1, fp2); + assert_eq!(fp1, "aaa|bbb"); + } + + #[test] + fn remove_ignore_works() { + let conn = open_db_in_memory().unwrap(); + let fp = group_fingerprint(&["x", "y"]); + ignore_group(&conn, &fp).unwrap(); + assert!(is_group_ignored(&conn, &fp)); + remove_ignore(&conn, &fp).unwrap(); + assert!(!is_group_ignored(&conn, &fp)); + } + + #[test] + fn list_ignored_returns_entries() { + let conn = open_db_in_memory().unwrap(); + ignore_group(&conn, "fp1").unwrap(); + ignore_group(&conn, "fp2").unwrap(); + let list = list_ignored(&conn).unwrap(); + assert_eq!(list.len(), 2); + } +} diff --git a/src/lib.rs b/src/lib.rs index fb59c63..f21902d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod ignore_db; use anyhow::Result; use image::imageops::FilterType; use image::ImageReader; diff --git a/src/main.rs b/src/main.rs index ebe3673..c64cc96 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use deduper::{find_duplicate_groups, scan_images, DuplicateKind}; +use deduper::ignore_db; use std::env; use std::path::Path; @@ -56,14 +57,28 @@ fn main() { } }; - let groups = find_duplicate_groups(&entries, config.threshold); + let mut groups = find_duplicate_groups(&entries, config.threshold); + + // Filter out ignored groups + let db = ignore_db::open_db().ok(); + if let Some(ref conn) = db { + groups.retain(|g| { + let sha_list: Vec<&str> = entries.iter() + .filter(|e| g.paths.contains(&e.path)) + .map(|e| e.sha256.as_str()) + .collect(); + let fp = ignore_db::group_fingerprint(&sha_list); + !ignore_db::is_group_ignored(conn, &fp) + }); + } + if groups.is_empty() { println!("no image duplicates found"); return; } if config.review { - review::launch_review(&groups); + review::launch_review(&groups, &entries); } else { for (idx, group) in groups.iter().enumerate() { let kind = match group.kind { diff --git a/src/review.rs b/src/review.rs index c5990a8..ac3f6f2 100644 --- a/src/review.rs +++ b/src/review.rs @@ -1,12 +1,12 @@ use base64::Engine; -use deduper::{DuplicateGroup, DuplicateKind}; +use deduper::{DuplicateGroup, DuplicateKind, ImageEntry, ignore_db}; use image::imageops::FilterType; use std::fs; use tiny_http::{Header, Method, Response, Server}; const THUMB_MAX: u32 = 300; -pub fn launch_review(groups: &[DuplicateGroup]) { +pub fn launch_review(groups: &[DuplicateGroup], entries: &[ImageEntry]) { let port = find_open_port(); let addr = format!("127.0.0.1:{port}"); let server = Server::http(&addr).expect("failed to start review server"); @@ -15,6 +15,8 @@ pub fn launch_review(groups: &[DuplicateGroup]) { let html = build_review_html(groups); eprintln!("review server running at http://{addr}"); + let db = ignore_db::open_db().ok(); + let _ = open::that(format!("http://{addr}")); loop { @@ -34,6 +36,9 @@ pub fn launch_review(groups: &[DuplicateGroup]) { (Method::Post, "/delete") => { handle_delete(req); } + (Method::Post, "/ignore") => { + handle_ignore(req, groups, entries, &db); + } (Method::Post, "/shutdown") => { let header = Header::from_bytes("Content-Type", "text/plain").unwrap(); let _ = req.respond(Response::from_string("bye").with_header(header)); @@ -81,6 +86,49 @@ fn handle_delete(mut req: tiny_http::Request) -> usize { deleted } +fn handle_ignore( + mut req: tiny_http::Request, + groups: &[DuplicateGroup], + entries: &[ImageEntry], + db: &Option, +) { + let mut body = Vec::new(); + let _ = std::io::Read::read_to_end(req.as_reader(), &mut body); + let body_str = String::from_utf8_lossy(&body); + + // Body = group index as JSON number + let group_idx: usize = serde_json::from_str(&body_str).unwrap_or(usize::MAX); + + let (ok, msg) = if let Some(conn) = db { + if let Some(group) = groups.get(group_idx) { + let sha_list: Vec<&str> = entries + .iter() + .filter(|e| group.paths.contains(&e.path)) + .map(|e| e.sha256.as_str()) + .collect(); + let fp = ignore_db::group_fingerprint(&sha_list); + match ignore_db::ignore_group(conn, &fp) { + Ok(_) => { + eprintln!("ignored group {group_idx} (fingerprint: {fp})"); + (true, "group ignored") + } + Err(e) => { + eprintln!("failed to ignore group: {e}"); + (false, "db error") + } + } + } else { + (false, "invalid group index") + } + } else { + (false, "no database") + }; + + let json = format!("{{\"ok\":{ok},\"message\":\"{msg}\"}}"); + let header = Header::from_bytes("Content-Type", "application/json").unwrap(); + let _ = req.respond(Response::from_string(json).with_header(header)); +} + fn make_thumbnail_data_uri(path: &std::path::Path) -> String { let data = match fs::read(path) { Ok(d) => d, @@ -117,7 +165,7 @@ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; b h1 { text-align: center; margin-bottom: 20px; color: #e94560; } .summary { text-align: center; margin-bottom: 20px; color: #aaa; } .group { background: #16213e; border-radius: 12px; padding: 20px; margin-bottom: 24px; border: 1px solid #0f3460; } -.group-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; } +.group-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; flex-wrap: wrap; gap: 8px; } .group-title { font-size: 1.2em; font-weight: bold; } .badge { padding: 4px 12px; border-radius: 20px; font-size: 0.85em; font-weight: bold; } .badge-exact { background: #e94560; color: white; } @@ -135,10 +183,13 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; } .btn-delete:hover { background: #c73650; } .btn-done { background: #533483; color: white; } .btn-done:hover { background: #3d2660; } -.btn-all { background: #0f3460; color: #eee; border: 1px solid #533483; } +.btn-all { background: #0f3460; color: #eee; border: 1px solid #533483; font-size: 0.85em; padding: 6px 14px; } .btn-all:hover { background: #16213e; } +.btn-ignore { background: #2d6a4f; color: white; font-size: 0.85em; padding: 6px 14px; } +.btn-ignore:hover { background: #1b4332; } .status { text-align: center; margin-top: 16px; font-size: 1.1em; color: #e94560; } .deleted { opacity: 0.3; pointer-events: none; } +.ignored { opacity: 0.2; pointer-events: none; } @@ -147,7 +198,7 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; } let total_files: usize = groups.iter().map(|g| g.paths.len()).sum(); html.push_str(&format!( - "

{} groups, {} files — select files to delete

\n", + "

{} groups, {} files — select files to delete or dismiss false positives

\n", groups.len(), total_files )); @@ -168,6 +219,7 @@ h1 { text-align: center; margin-bottom: 20px; color: #e94560; } Group {} {kind_str} +
"#, @@ -215,6 +267,26 @@ function selectAllBut(groupId) { updateCount(); } +async function ignoreGroup(groupIdx) { + try { + const res = await fetch('/ignore', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(groupIdx) + }); + const data = await res.json(); + if (data.ok) { + const el = document.getElementById('group-' + groupIdx); + if (el) el.classList.add('ignored'); + document.getElementById('status').textContent = 'Group ' + (groupIdx+1) + ' dismissed — won\'t appear next run'; + } else { + document.getElementById('status').textContent = 'Error: ' + data.message; + } + } catch(e) { + document.getElementById('status').textContent = 'Error: ' + e.message; + } +} + async function deleteSelected() { const checks = document.querySelectorAll('.del-check:checked'); if (checks.length === 0) { alert('No files selected'); return; }