Files
deduper/tests/image_phase.rs
admin bb04871383 feat: add --review flag with browser-based side-by-side image review
- Launches local HTTP server with dark-themed review UI
- Side-by-side image comparison per duplicate group
- Checkbox selection + delete confirmation
- Shows file size and path per image
- Exact/similar badges per group
- Shutdown endpoint for clean exit
- Magic byte format detection (fixes misnamed screenshots)
- 23 tests passing
2026-04-28 00:08:33 +00:00

162 lines
7.2 KiB
Rust

use deduper::{find_duplicate_groups, hamming, scan_images, DuplicateKind};
use std::path::Path;
fn fixture(name: &str) -> String {
format!("/a0/usr/projects/deduper/.a0proj/test_media/images/{name}")
}
#[test]
fn image_phase_real_files_red_green() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan images");
assert!(entries.len() >= 5, "need fixtures");
let orig = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig.jpg")))
.unwrap();
let copy = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig_copy.jpg")))
.unwrap();
let resized = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig_resized.jpg")))
.unwrap();
let blue = entries
.iter()
.find(|e| e.path == Path::new(&fixture("solid_blue.jpg")))
.unwrap();
assert_eq!(orig.sha256, copy.sha256);
assert!(hamming(orig.dhash, resized.dhash) <= 8, "resized should be similar");
assert!(hamming(orig.dhash, blue.dhash) > 8, "blue should be unrelated");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Exact
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig_copy.jpg"))
}), "missing exact group");
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Similar
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig_resized.jpg"))
}), "missing similar group");
assert!(!groups.iter().any(|g| {
g.paths.iter().any(|p| p.ends_with("solid_blue.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
}), "false positive with unrelated image");
}
#[test]
fn scan_empty_dir_returns_no_entries() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir");
let entries = scan_images(dir).expect("scan empty dir");
assert!(entries.is_empty(), "empty dir should yield no entries");
}
#[test]
fn cropped_image_similar_to_original() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).expect("orig.jpg");
let cropped = entries.iter().find(|e| e.path.ends_with("orig_cropped.jpg")).expect("orig_cropped.jpg");
assert_ne!(orig.sha256, cropped.sha256, "cropped should differ in bytes");
assert!(hamming(orig.dhash, cropped.dhash) <= 12, "cropped should be perceptually similar (hamming <= 12)");
}
#[test]
fn non_image_files_excluded_from_scan() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
// readme.txt and data.csv exist in dir but must not appear in results
assert!(!entries.iter().any(|e| e.path.ends_with("readme.txt")), "txt file should be excluded");
assert!(!entries.iter().any(|e| e.path.ends_with("data.csv")), "csv file should be excluded");
// all entries should have image extensions
for e in &entries {
let ext = e.path.extension().unwrap().to_str().unwrap().to_ascii_lowercase();
assert!(matches!(ext.as_str(), "jpg"|"jpeg"|"png"|"webp"|"bmp"|"gif"|"tif"|"tiff"),
"unexpected ext: {ext} in {}", e.path.display());
}
}
#[test]
fn scan_recurses_into_subdirectories() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
// gradient_sub.jpg lives in subdir/ - must be found
let sub = entries.iter().find(|e| e.path.ends_with("subdir/gradient_sub.jpg"));
assert!(sub.is_some(), "should find image in subdirectory");
// it's an exact copy of gradient.jpg
let grad = entries.iter().find(|e| e.path.ends_with("gradient.jpg") && !e.path.to_str().unwrap().contains("subdir")).unwrap();
let sub = sub.unwrap();
assert_eq!(grad.sha256, sub.sha256, "exact copy should have same sha256");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Exact
&& g.paths.iter().any(|p| p.ends_with("gradient.jpg") && !p.to_str().unwrap().contains("subdir"))
&& g.paths.iter().any(|p| p.ends_with("gradient_sub.jpg"))
}), "should group gradient.jpg and subdir/gradient_sub.jpg as exact");
}
#[test]
fn single_image_no_duplicates() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/single");
let entries = scan_images(dir).expect("scan");
assert_eq!(entries.len(), 1, "should find exactly one image");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.is_empty(), "single image should produce no duplicate groups");
}
#[test]
fn scan_skips_corrupt_image_files() {
// corrupt.png exists in fixtures dir with invalid PNG data
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan should not fail on corrupt files");
// corrupt.png should be skipped, not cause an error
assert!(!entries.iter().any(|e| e.path.ends_with("corrupt.png")), "corrupt file should be skipped");
// valid images still found
assert!(entries.iter().any(|e| e.path.ends_with("orig.jpg")), "valid images should still be found");
}
#[test]
fn cli_binary_reports_duplicates() {
let bin = env!("CARGO_BIN_EXE_deduper");
let output = std::process::Command::new(bin)
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images")
.arg("8")
.output()
.expect("failed to run deduper binary");
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(output.status.success(), "binary should exit 0");
assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}");
assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}");
}
#[test]
fn misnamed_jpeg_as_png_still_scanned() {
// fake_png.png is actually JPEG data with .png extension
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
let fake = entries.iter().find(|e| e.path.ends_with("fake_png.png"));
assert!(fake.is_some(), "misnamed JPEG-as-PNG should be scanned via magic bytes");
// should have same hash as orig.jpg since it's a copy
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).unwrap();
let fake = fake.unwrap();
assert_eq!(orig.sha256, fake.sha256, "same content = same sha256");
}
#[test]
fn cli_review_flag_accepted_no_dupes() {
let bin = env!("CARGO_BIN_EXE_deduper");
let output = std::process::Command::new(bin)
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir")
.arg("--review")
.output()
.expect("failed to run deduper binary");
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(output.status.success(), "--review on empty dir should exit 0: {}", String::from_utf8_lossy(&output.stderr));
assert!(stdout.contains("no image duplicates found"), "should report no dupes: {stdout}");
}