feat: add --review flag with browser-based side-by-side image review

- Launches local HTTP server with dark-themed review UI
- Side-by-side image comparison per duplicate group
- Checkbox selection + delete confirmation
- Shows file size and path per image
- Exact/similar badges per group
- Shutdown endpoint for clean exit
- Magic byte format detection (fixes misnamed screenshots)
- 23 tests passing
This commit is contained in:
admin
2026-04-28 00:08:33 +00:00
parent 9dc8a495bb
commit bb04871383
5 changed files with 451 additions and 18 deletions

132
Cargo.lock generated
View File

@@ -64,6 +64,12 @@ dependencies = [
"stable_deref_trait",
]
[[package]]
name = "ascii"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16"
[[package]]
name = "autocfg"
version = "1.5.0"
@@ -113,6 +119,12 @@ dependencies = [
"arrayvec",
]
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bit_field"
version = "0.10.3"
@@ -185,6 +197,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "chunked_transfer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901"
[[package]]
name = "color_quant"
version = "1.1.0"
@@ -255,8 +273,13 @@ name = "deduper"
version = "0.1.0"
dependencies = [
"anyhow",
"base64",
"image",
"open",
"serde",
"serde_json",
"sha2",
"tiny_http",
"walkdir",
]
@@ -385,6 +408,12 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "httpdate"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "image"
version = "0.25.10"
@@ -436,6 +465,25 @@ dependencies = [
"syn",
]
[[package]]
name = "is-docker"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3"
dependencies = [
"once_cell",
]
[[package]]
name = "is-wsl"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5"
dependencies = [
"is-docker",
"once_cell",
]
[[package]]
name = "itertools"
version = "0.14.0"
@@ -445,6 +493,12 @@ dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "jobserver"
version = "0.1.34"
@@ -614,6 +668,17 @@ version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "open"
version = "5.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f3bab717c29a857abf75fcef718d441ec7cb2725f937343c734740a985d37fd"
dependencies = [
"is-wsl",
"libc",
"pathdiff",
]
[[package]]
name = "paste"
version = "1.0.15"
@@ -626,6 +691,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
[[package]]
name = "pathdiff"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
[[package]]
name = "png"
version = "0.18.1"
@@ -832,6 +903,49 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "sha2"
version = "0.10.9"
@@ -921,6 +1035,18 @@ dependencies = [
"zune-jpeg",
]
[[package]]
name = "tiny_http"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82"
dependencies = [
"ascii",
"chunked_transfer",
"httpdate",
"log",
]
[[package]]
name = "typenum"
version = "1.20.0"
@@ -1076,6 +1202,12 @@ dependencies = [
"syn",
]
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zune-core"
version = "0.5.1"

View File

@@ -8,3 +8,8 @@ image = { version = "0.25", default-features = true, features = ["jpeg", "png",
sha2 = "0.10"
walkdir = "2.5"
anyhow = "1"
tiny_http = "0.12"
open = "5"
base64 = "0.22"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

View File

@@ -2,19 +2,52 @@ use deduper::{find_duplicate_groups, scan_images, DuplicateKind};
use std::env;
use std::path::Path;
fn main() {
mod review;
struct Config {
root: String,
threshold: u32,
review: bool,
}
fn parse_args() -> Option<Config> {
let args: Vec<String> = env::args().collect();
if args.len() < 2 {
eprintln!("usage: deduper <folder> [hamming-threshold]");
std::process::exit(1);
let mut root = None;
let mut threshold = 8u32;
let mut review = false;
let mut i = 1;
while i < args.len() {
match args[i].as_str() {
"--review" => review = true,
arg if arg.starts_with('-') => {
eprintln!("unknown flag: {arg}");
return None;
}
_ => {
if root.is_none() {
root = Some(args[i].clone());
} else {
threshold = args[i].parse().unwrap_or(8);
}
}
}
i += 1;
}
let root = Path::new(&args[1]);
let threshold = args
.get(2)
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(8);
root.map(|r| Config { root: r, threshold, review })
}
fn main() {
let config = match parse_args() {
Some(c) => c,
None => {
eprintln!("usage: deduper <folder> [hamming-threshold] [--review]");
std::process::exit(1);
}
};
let root = Path::new(&config.root);
let entries = match scan_images(root) {
Ok(v) => v,
Err(e) => {
@@ -23,12 +56,15 @@ fn main() {
}
};
let groups = find_duplicate_groups(&entries, threshold);
let groups = find_duplicate_groups(&entries, config.threshold);
if groups.is_empty() {
println!("no image duplicates found");
return;
}
if config.review {
review::launch_review(&groups);
} else {
for (idx, group) in groups.iter().enumerate() {
let kind = match group.kind {
DuplicateKind::Exact => "exact",
@@ -39,4 +75,5 @@ fn main() {
println!(" {}", path.display());
}
}
}
}

246
src/review.rs Normal file
View File

@@ -0,0 +1,246 @@
use base64::Engine;
use deduper::{DuplicateGroup, DuplicateKind};
use std::fs;
use tiny_http::{Header, Method, Response, Server};
pub fn launch_review(groups: &[DuplicateGroup]) {
let port = find_open_port();
let addr = format!("127.0.0.1:{port}");
let server = Server::http(&addr).expect("failed to start review server");
println!("review server running at http://{addr}");
println!("opening browser...");
let _ = open::that(format!("http://{addr}"));
loop {
let req = match server.recv() {
Ok(r) => r,
Err(_) => break,
};
let url = req.url().to_string();
let method = req.method().clone();
match (&method, url.as_str()) {
(Method::Get, "/") => {
let html = build_review_html(groups);
let header = Header::from_bytes("Content-Type", "text/html; charset=utf-8").unwrap();
let _ = req.respond(Response::from_string(html).with_header(header));
}
(Method::Get, path) if path.starts_with("/image/") => {
serve_image(req, path);
}
(Method::Post, "/delete") => {
handle_delete(req);
}
(Method::Post, "/shutdown") => {
let header = Header::from_bytes("Content-Type", "text/plain").unwrap();
let _ = req.respond(Response::from_string("bye").with_header(header));
break;
}
_ => {
let _ = req.respond(Response::from_string("404").with_status_code(404));
}
}
}
println!("review server stopped");
}
fn find_open_port() -> u16 {
std::net::TcpListener::bind("127.0.0.1:0")
.expect("find open port")
.local_addr()
.unwrap()
.port()
}
fn serve_image(req: tiny_http::Request, path: &str) {
// path = /image/<base64-encoded-filepath>
let encoded = &path["/image/".len()..];
let decoded = base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(encoded)
.unwrap_or_default();
let filepath = String::from_utf8_lossy(&decoded);
match fs::read(filepath.as_ref()) {
Ok(data) => {
let mime = guess_mime(&filepath);
let header = Header::from_bytes("Content-Type", mime).unwrap();
let _ = req.respond(Response::from_data(data).with_header(header));
}
Err(_) => {
let _ = req.respond(Response::from_string("not found").with_status_code(404));
}
}
}
fn guess_mime(path: &str) -> &'static str {
let lower = path.to_ascii_lowercase();
if lower.ends_with(".png") { "image/png" }
else if lower.ends_with(".jpg") || lower.ends_with(".jpeg") { "image/jpeg" }
else if lower.ends_with(".gif") { "image/gif" }
else if lower.ends_with(".webp") { "image/webp" }
else if lower.ends_with(".bmp") { "image/bmp" }
else if lower.ends_with(".tif") || lower.ends_with(".tiff") { "image/tiff" }
else { "application/octet-stream" }
}
fn handle_delete(mut req: tiny_http::Request) -> usize {
let mut body = String::new();
req.as_reader().read_to_string(&mut body).unwrap_or(0);
let paths: Vec<String> = serde_json::from_str(&body).unwrap_or_default();
let mut deleted = 0;
for path in &paths {
match fs::remove_file(path) {
Ok(_) => {
eprintln!("deleted: {path}");
deleted += 1;
}
Err(e) => {
eprintln!("failed to delete {path}: {e}");
}
}
}
let json = format!("{{\"deleted\":{deleted}}}");
let header = Header::from_bytes("Content-Type", "application/json").unwrap();
let _ = req.respond(Response::from_string(json).with_header(header));
deleted
}
fn image_url(path: &std::path::Path) -> String {
let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD
.encode(path.to_string_lossy().as_bytes());
format!("/image/{encoded}")
}
fn build_review_html(groups: &[DuplicateGroup]) -> String {
let mut html = String::from(r#"<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>deduper - Review Duplicates</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #1a1a2e; color: #eee; padding: 20px; }
h1 { text-align: center; margin-bottom: 20px; color: #e94560; }
.group { background: #16213e; border-radius: 12px; padding: 20px; margin-bottom: 24px; border: 1px solid #0f3460; }
.group-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; }
.group-title { font-size: 1.2em; font-weight: bold; }
.badge { padding: 4px 12px; border-radius: 20px; font-size: 0.85em; font-weight: bold; }
.badge-exact { background: #e94560; color: white; }
.badge-similar { background: #0f3460; color: #eee; border: 1px solid #533483; }
.images { display: flex; flex-wrap: wrap; gap: 16px; }
.image-card { position: relative; background: #0f3460; border-radius: 8px; padding: 12px; max-width: 300px; flex: 1; min-width: 200px; }
.image-card img { width: 100%; height: 200px; object-fit: contain; border-radius: 4px; background: #000; }
.image-card .path { font-size: 0.75em; color: #aaa; margin-top: 8px; word-break: break-all; }
.image-card .size { font-size: 0.8em; color: #888; margin-top: 4px; }
.image-card label { display: flex; align-items: center; gap: 8px; margin-top: 8px; cursor: pointer; }
.image-card input[type=checkbox] { width: 20px; height: 20px; accent-color: #e94560; }
.actions { text-align: center; margin-top: 30px; }
.btn { padding: 12px 32px; border: none; border-radius: 8px; font-size: 1em; cursor: pointer; margin: 0 8px; }
.btn-delete { background: #e94560; color: white; }
.btn-delete:hover { background: #c73650; }
.btn-done { background: #533483; color: white; }
.btn-done:hover { background: #3d2660; }
.status { text-align: center; margin-top: 16px; font-size: 1.1em; color: #e94560; }
</style>
</head>
<body>
<h1>🔍 deduper — Review Duplicates</h1>
<p style="text-align:center;margin-bottom:20px;color:#aaa;">Select files to delete, then click "Delete Selected"</p>
"#);
for (idx, group) in groups.iter().enumerate() {
let kind_str = match group.kind {
DuplicateKind::Exact => "exact",
DuplicateKind::Similar => "similar",
};
let badge_class = match group.kind {
DuplicateKind::Exact => "badge-exact",
DuplicateKind::Similar => "badge-similar",
};
html.push_str(&format!(
r#"<div class="group">
<div class="group-header">
<span class="group-title">Group {}</span>
<span class="badge {badge_class}">{kind_str}</span>
</div>
<div class="images">
"#,
idx + 1
));
for path in &group.paths {
let url = image_url(path);
let display_path = path.display();
let size = fs::metadata(path)
.map(|m| format_size(m.len()))
.unwrap_or_else(|_| "?".to_string());
html.push_str(&format!(
r#"<div class="image-card">
<img src="{url}" alt="{display_path}" loading="lazy">
<div class="path">{display_path}</div>
<div class="size">{size}</div>
<label><input type="checkbox" class="del-check" value="{display_path}"> Delete this</label>
</div>
"#
));
}
html.push_str("</div>\n</div>\n");
}
html.push_str(r#"
<div class="actions">
<button class="btn btn-delete" onclick="deleteSelected()">🗑️ Delete Selected</button>
<button class="btn btn-done" onclick="shutdown()">✅ Done</button>
</div>
<div class="status" id="status"></div>
<script>
async function deleteSelected() {
const checks = document.querySelectorAll('.del-check:checked');
if (checks.length === 0) { alert('No files selected'); return; }
const paths = Array.from(checks).map(c => c.value);
if (!confirm('Delete ' + paths.length + ' file(s)?\n\n' + paths.join('\n'))) return;
const res = await fetch('/delete', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify(paths)
});
const data = await res.json();
document.getElementById('status').textContent = 'Deleted ' + data.deleted + ' file(s)';
// Remove deleted cards
checks.forEach(c => {
const card = c.closest('.image-card');
if (card) card.style.opacity = '0.3';
c.disabled = true;
});
}
async function shutdown() {
await fetch('/shutdown', {method: 'POST'});
document.getElementById('status').textContent = 'Review complete. You can close this tab.';
}
</script>
</body>
</html>
"#);
html
}
fn format_size(bytes: u64) -> String {
if bytes < 1024 {
format!("{bytes} B")
} else if bytes < 1024 * 1024 {
format!("{:.1} KB", bytes as f64 / 1024.0)
} else {
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
}
}

View File

@@ -146,3 +146,16 @@ fn misnamed_jpeg_as_png_still_scanned() {
let fake = fake.unwrap();
assert_eq!(orig.sha256, fake.sha256, "same content = same sha256");
}
#[test]
fn cli_review_flag_accepted_no_dupes() {
let bin = env!("CARGO_BIN_EXE_deduper");
let output = std::process::Command::new(bin)
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir")
.arg("--review")
.output()
.expect("failed to run deduper binary");
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(output.status.success(), "--review on empty dir should exit 0: {}", String::from_utf8_lossy(&output.stderr));
assert!(stdout.contains("no image duplicates found"), "should report no dupes: {stdout}");
}