Files
deduper/tests/image_phase.rs
admin e1f8201b5c feat: complete image phase - SHA-256 exact + dHash perceptual duplicate detection
- lib.rs: scan_images, compute_dhash, hamming, find_duplicate_groups
- main.rs: CLI with folder arg and optional hamming threshold
- 13 unit tests: hamming, is_image_path, dhash, find_duplicate_groups
- 7 integration tests: real files, empty dir, cropped, non-image exclusion,
  subdirectory recursion, single file, CLI binary output
- All 20 tests passing
2026-04-27 23:33:27 +00:00

125 lines
5.4 KiB
Rust

use deduper::{find_duplicate_groups, hamming, scan_images, DuplicateKind};
use std::path::Path;
fn fixture(name: &str) -> String {
format!("/a0/usr/projects/deduper/.a0proj/test_media/images/{name}")
}
#[test]
fn image_phase_real_files_red_green() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan images");
assert!(entries.len() >= 5, "need fixtures");
let orig = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig.jpg")))
.unwrap();
let copy = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig_copy.jpg")))
.unwrap();
let resized = entries
.iter()
.find(|e| e.path == Path::new(&fixture("orig_resized.jpg")))
.unwrap();
let blue = entries
.iter()
.find(|e| e.path == Path::new(&fixture("solid_blue.jpg")))
.unwrap();
assert_eq!(orig.sha256, copy.sha256);
assert!(hamming(orig.dhash, resized.dhash) <= 8, "resized should be similar");
assert!(hamming(orig.dhash, blue.dhash) > 8, "blue should be unrelated");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Exact
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig_copy.jpg"))
}), "missing exact group");
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Similar
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig_resized.jpg"))
}), "missing similar group");
assert!(!groups.iter().any(|g| {
g.paths.iter().any(|p| p.ends_with("solid_blue.jpg"))
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
}), "false positive with unrelated image");
}
#[test]
fn scan_empty_dir_returns_no_entries() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir");
let entries = scan_images(dir).expect("scan empty dir");
assert!(entries.is_empty(), "empty dir should yield no entries");
}
#[test]
fn cropped_image_similar_to_original() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).expect("orig.jpg");
let cropped = entries.iter().find(|e| e.path.ends_with("orig_cropped.jpg")).expect("orig_cropped.jpg");
assert_ne!(orig.sha256, cropped.sha256, "cropped should differ in bytes");
assert!(hamming(orig.dhash, cropped.dhash) <= 12, "cropped should be perceptually similar (hamming <= 12)");
}
#[test]
fn non_image_files_excluded_from_scan() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
// readme.txt and data.csv exist in dir but must not appear in results
assert!(!entries.iter().any(|e| e.path.ends_with("readme.txt")), "txt file should be excluded");
assert!(!entries.iter().any(|e| e.path.ends_with("data.csv")), "csv file should be excluded");
// all entries should have image extensions
for e in &entries {
let ext = e.path.extension().unwrap().to_str().unwrap().to_ascii_lowercase();
assert!(matches!(ext.as_str(), "jpg"|"jpeg"|"png"|"webp"|"bmp"|"gif"|"tif"|"tiff"),
"unexpected ext: {ext} in {}", e.path.display());
}
}
#[test]
fn scan_recurses_into_subdirectories() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
let entries = scan_images(dir).expect("scan");
// gradient_sub.jpg lives in subdir/ - must be found
let sub = entries.iter().find(|e| e.path.ends_with("subdir/gradient_sub.jpg"));
assert!(sub.is_some(), "should find image in subdirectory");
// it's an exact copy of gradient.jpg
let grad = entries.iter().find(|e| e.path.ends_with("gradient.jpg") && !e.path.to_str().unwrap().contains("subdir")).unwrap();
let sub = sub.unwrap();
assert_eq!(grad.sha256, sub.sha256, "exact copy should have same sha256");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.iter().any(|g| {
g.kind == DuplicateKind::Exact
&& g.paths.iter().any(|p| p.ends_with("gradient.jpg") && !p.to_str().unwrap().contains("subdir"))
&& g.paths.iter().any(|p| p.ends_with("gradient_sub.jpg"))
}), "should group gradient.jpg and subdir/gradient_sub.jpg as exact");
}
#[test]
fn single_image_no_duplicates() {
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/single");
let entries = scan_images(dir).expect("scan");
assert_eq!(entries.len(), 1, "should find exactly one image");
let groups = find_duplicate_groups(&entries, 8);
assert!(groups.is_empty(), "single image should produce no duplicate groups");
}
#[test]
fn cli_binary_reports_duplicates() {
let bin = env!("CARGO_BIN_EXE_deduper");
let output = std::process::Command::new(bin)
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images")
.arg("8")
.output()
.expect("failed to run deduper binary");
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(output.status.success(), "binary should exit 0");
assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}");
assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}");
}