From bb048713836a2eba536ddf68f1f22de016b44f9a Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 28 Apr 2026 00:08:33 +0000 Subject: [PATCH] feat: add --review flag with browser-based side-by-side image review - Launches local HTTP server with dark-themed review UI - Side-by-side image comparison per duplicate group - Checkbox selection + delete confirmation - Shows file size and path per image - Exact/similar badges per group - Shutdown endpoint for clean exit - Magic byte format detection (fixes misnamed screenshots) - 23 tests passing --- Cargo.lock | 132 +++++++++++++++++++++++ Cargo.toml | 5 + src/main.rs | 73 +++++++++---- src/review.rs | 246 +++++++++++++++++++++++++++++++++++++++++++ tests/image_phase.rs | 13 +++ 5 files changed, 451 insertions(+), 18 deletions(-) create mode 100644 src/review.rs diff --git a/Cargo.lock b/Cargo.lock index c431661..f102f73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,6 +64,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + [[package]] name = "autocfg" version = "1.5.0" @@ -113,6 +119,12 @@ dependencies = [ "arrayvec", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bit_field" version = "0.10.3" @@ -185,6 +197,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chunked_transfer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + [[package]] name = "color_quant" version = "1.1.0" @@ -255,8 +273,13 @@ name = "deduper" version = "0.1.0" dependencies = [ "anyhow", + "base64", "image", + "open", + "serde", + "serde_json", "sha2", + "tiny_http", "walkdir", ] @@ -385,6 +408,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "image" version = "0.25.10" @@ -436,6 +465,25 @@ dependencies = [ "syn", ] +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "itertools" version = "0.14.0" @@ -445,6 +493,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "jobserver" version = "0.1.34" @@ -614,6 +668,17 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "open" +version = "5.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f3bab717c29a857abf75fcef718d441ec7cb2725f937343c734740a985d37fd" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "paste" version = "1.0.15" @@ -626,6 +691,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "png" version = "0.18.1" @@ -832,6 +903,49 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "sha2" version = "0.10.9" @@ -921,6 +1035,18 @@ dependencies = [ "zune-jpeg", ] +[[package]] +name = "tiny_http" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" +dependencies = [ + "ascii", + "chunked_transfer", + "httpdate", + "log", +] + [[package]] name = "typenum" version = "1.20.0" @@ -1076,6 +1202,12 @@ dependencies = [ "syn", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zune-core" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index f61c10c..f755f0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,8 @@ image = { version = "0.25", default-features = true, features = ["jpeg", "png", sha2 = "0.10" walkdir = "2.5" anyhow = "1" +tiny_http = "0.12" +open = "5" +base64 = "0.22" +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/src/main.rs b/src/main.rs index 53b9d31..ebe3673 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,19 +2,52 @@ use deduper::{find_duplicate_groups, scan_images, DuplicateKind}; use std::env; use std::path::Path; -fn main() { +mod review; + +struct Config { + root: String, + threshold: u32, + review: bool, +} + +fn parse_args() -> Option { let args: Vec = env::args().collect(); - if args.len() < 2 { - eprintln!("usage: deduper [hamming-threshold]"); - std::process::exit(1); + let mut root = None; + let mut threshold = 8u32; + let mut review = false; + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--review" => review = true, + arg if arg.starts_with('-') => { + eprintln!("unknown flag: {arg}"); + return None; + } + _ => { + if root.is_none() { + root = Some(args[i].clone()); + } else { + threshold = args[i].parse().unwrap_or(8); + } + } + } + i += 1; } - let root = Path::new(&args[1]); - let threshold = args - .get(2) - .and_then(|s| s.parse::().ok()) - .unwrap_or(8); + root.map(|r| Config { root: r, threshold, review }) +} +fn main() { + let config = match parse_args() { + Some(c) => c, + None => { + eprintln!("usage: deduper [hamming-threshold] [--review]"); + std::process::exit(1); + } + }; + + let root = Path::new(&config.root); let entries = match scan_images(root) { Ok(v) => v, Err(e) => { @@ -23,20 +56,24 @@ fn main() { } }; - let groups = find_duplicate_groups(&entries, threshold); + let groups = find_duplicate_groups(&entries, config.threshold); if groups.is_empty() { println!("no image duplicates found"); return; } - for (idx, group) in groups.iter().enumerate() { - let kind = match group.kind { - DuplicateKind::Exact => "exact", - DuplicateKind::Similar => "similar", - }; - println!("group {} [{}]", idx + 1, kind); - for path in &group.paths { - println!(" {}", path.display()); + if config.review { + review::launch_review(&groups); + } else { + for (idx, group) in groups.iter().enumerate() { + let kind = match group.kind { + DuplicateKind::Exact => "exact", + DuplicateKind::Similar => "similar", + }; + println!("group {} [{}]", idx + 1, kind); + for path in &group.paths { + println!(" {}", path.display()); + } } } } diff --git a/src/review.rs b/src/review.rs new file mode 100644 index 0000000..14e1448 --- /dev/null +++ b/src/review.rs @@ -0,0 +1,246 @@ +use base64::Engine; +use deduper::{DuplicateGroup, DuplicateKind}; +use std::fs; +use tiny_http::{Header, Method, Response, Server}; + +pub fn launch_review(groups: &[DuplicateGroup]) { + let port = find_open_port(); + let addr = format!("127.0.0.1:{port}"); + let server = Server::http(&addr).expect("failed to start review server"); + + println!("review server running at http://{addr}"); + println!("opening browser..."); + + let _ = open::that(format!("http://{addr}")); + + loop { + let req = match server.recv() { + Ok(r) => r, + Err(_) => break, + }; + + let url = req.url().to_string(); + + let method = req.method().clone(); + match (&method, url.as_str()) { + (Method::Get, "/") => { + let html = build_review_html(groups); + let header = Header::from_bytes("Content-Type", "text/html; charset=utf-8").unwrap(); + let _ = req.respond(Response::from_string(html).with_header(header)); + } + (Method::Get, path) if path.starts_with("/image/") => { + serve_image(req, path); + } + (Method::Post, "/delete") => { + handle_delete(req); + } + (Method::Post, "/shutdown") => { + let header = Header::from_bytes("Content-Type", "text/plain").unwrap(); + let _ = req.respond(Response::from_string("bye").with_header(header)); + break; + } + _ => { + let _ = req.respond(Response::from_string("404").with_status_code(404)); + } + } + } + + println!("review server stopped"); +} + +fn find_open_port() -> u16 { + std::net::TcpListener::bind("127.0.0.1:0") + .expect("find open port") + .local_addr() + .unwrap() + .port() +} + +fn serve_image(req: tiny_http::Request, path: &str) { + // path = /image/ + let encoded = &path["/image/".len()..]; + let decoded = base64::engine::general_purpose::URL_SAFE_NO_PAD + .decode(encoded) + .unwrap_or_default(); + let filepath = String::from_utf8_lossy(&decoded); + + match fs::read(filepath.as_ref()) { + Ok(data) => { + let mime = guess_mime(&filepath); + let header = Header::from_bytes("Content-Type", mime).unwrap(); + let _ = req.respond(Response::from_data(data).with_header(header)); + } + Err(_) => { + let _ = req.respond(Response::from_string("not found").with_status_code(404)); + } + } +} + +fn guess_mime(path: &str) -> &'static str { + let lower = path.to_ascii_lowercase(); + if lower.ends_with(".png") { "image/png" } + else if lower.ends_with(".jpg") || lower.ends_with(".jpeg") { "image/jpeg" } + else if lower.ends_with(".gif") { "image/gif" } + else if lower.ends_with(".webp") { "image/webp" } + else if lower.ends_with(".bmp") { "image/bmp" } + else if lower.ends_with(".tif") || lower.ends_with(".tiff") { "image/tiff" } + else { "application/octet-stream" } +} + +fn handle_delete(mut req: tiny_http::Request) -> usize { + let mut body = String::new(); + req.as_reader().read_to_string(&mut body).unwrap_or(0); + + let paths: Vec = serde_json::from_str(&body).unwrap_or_default(); + let mut deleted = 0; + for path in &paths { + match fs::remove_file(path) { + Ok(_) => { + eprintln!("deleted: {path}"); + deleted += 1; + } + Err(e) => { + eprintln!("failed to delete {path}: {e}"); + } + } + } + + let json = format!("{{\"deleted\":{deleted}}}"); + let header = Header::from_bytes("Content-Type", "application/json").unwrap(); + let _ = req.respond(Response::from_string(json).with_header(header)); + deleted +} + +fn image_url(path: &std::path::Path) -> String { + let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD + .encode(path.to_string_lossy().as_bytes()); + format!("/image/{encoded}") +} + +fn build_review_html(groups: &[DuplicateGroup]) -> String { + let mut html = String::from(r#" + + + +deduper - Review Duplicates + + + +

🔍 deduper — Review Duplicates

+

Select files to delete, then click "Delete Selected"

+"#); + + for (idx, group) in groups.iter().enumerate() { + let kind_str = match group.kind { + DuplicateKind::Exact => "exact", + DuplicateKind::Similar => "similar", + }; + let badge_class = match group.kind { + DuplicateKind::Exact => "badge-exact", + DuplicateKind::Similar => "badge-similar", + }; + + html.push_str(&format!( + r#"
+
+Group {} +{kind_str} +
+
+"#, + idx + 1 + )); + + for path in &group.paths { + let url = image_url(path); + let display_path = path.display(); + let size = fs::metadata(path) + .map(|m| format_size(m.len())) + .unwrap_or_else(|_| "?".to_string()); + + html.push_str(&format!( + r#"
+{display_path} +
{display_path}
+
{size}
+ +
+"# + )); + } + + html.push_str("
\n
\n"); + } + + html.push_str(r#" +
+ + +
+
+ + + + +"#); + + html +} + +fn format_size(bytes: u64) -> String { + if bytes < 1024 { + format!("{bytes} B") + } else if bytes < 1024 * 1024 { + format!("{:.1} KB", bytes as f64 / 1024.0) + } else { + format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0)) + } +} diff --git a/tests/image_phase.rs b/tests/image_phase.rs index 61ecfe4..771633f 100644 --- a/tests/image_phase.rs +++ b/tests/image_phase.rs @@ -146,3 +146,16 @@ fn misnamed_jpeg_as_png_still_scanned() { let fake = fake.unwrap(); assert_eq!(orig.sha256, fake.sha256, "same content = same sha256"); } + +#[test] +fn cli_review_flag_accepted_no_dupes() { + let bin = env!("CARGO_BIN_EXE_deduper"); + let output = std::process::Command::new(bin) + .arg("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir") + .arg("--review") + .output() + .expect("failed to run deduper binary"); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(output.status.success(), "--review on empty dir should exit 0: {}", String::from_utf8_lossy(&output.stderr)); + assert!(stdout.contains("no image duplicates found"), "should report no dupes: {stdout}"); +}