- lib.rs: scan_images, compute_dhash, hamming, find_duplicate_groups - main.rs: CLI with folder arg and optional hamming threshold - 13 unit tests: hamming, is_image_path, dhash, find_duplicate_groups - 7 integration tests: real files, empty dir, cropped, non-image exclusion, subdirectory recursion, single file, CLI binary output - All 20 tests passing
125 lines
5.4 KiB
Rust
125 lines
5.4 KiB
Rust
use deduper::{find_duplicate_groups, hamming, scan_images, DuplicateKind};
|
|
use std::path::Path;
|
|
|
|
fn fixture(name: &str) -> String {
|
|
format!("/a0/usr/projects/deduper/.a0proj/test_media/images/{name}")
|
|
}
|
|
|
|
#[test]
|
|
fn image_phase_real_files_red_green() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
|
let entries = scan_images(dir).expect("scan images");
|
|
assert!(entries.len() >= 5, "need fixtures");
|
|
|
|
let orig = entries
|
|
.iter()
|
|
.find(|e| e.path == Path::new(&fixture("orig.jpg")))
|
|
.unwrap();
|
|
let copy = entries
|
|
.iter()
|
|
.find(|e| e.path == Path::new(&fixture("orig_copy.jpg")))
|
|
.unwrap();
|
|
let resized = entries
|
|
.iter()
|
|
.find(|e| e.path == Path::new(&fixture("orig_resized.jpg")))
|
|
.unwrap();
|
|
let blue = entries
|
|
.iter()
|
|
.find(|e| e.path == Path::new(&fixture("solid_blue.jpg")))
|
|
.unwrap();
|
|
|
|
assert_eq!(orig.sha256, copy.sha256);
|
|
assert!(hamming(orig.dhash, resized.dhash) <= 8, "resized should be similar");
|
|
assert!(hamming(orig.dhash, blue.dhash) > 8, "blue should be unrelated");
|
|
|
|
let groups = find_duplicate_groups(&entries, 8);
|
|
assert!(groups.iter().any(|g| {
|
|
g.kind == DuplicateKind::Exact
|
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
|
&& g.paths.iter().any(|p| p.ends_with("orig_copy.jpg"))
|
|
}), "missing exact group");
|
|
assert!(groups.iter().any(|g| {
|
|
g.kind == DuplicateKind::Similar
|
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
|
&& g.paths.iter().any(|p| p.ends_with("orig_resized.jpg"))
|
|
}), "missing similar group");
|
|
assert!(!groups.iter().any(|g| {
|
|
g.paths.iter().any(|p| p.ends_with("solid_blue.jpg"))
|
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
|
}), "false positive with unrelated image");
|
|
}
|
|
|
|
#[test]
|
|
fn scan_empty_dir_returns_no_entries() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir");
|
|
let entries = scan_images(dir).expect("scan empty dir");
|
|
assert!(entries.is_empty(), "empty dir should yield no entries");
|
|
}
|
|
|
|
#[test]
|
|
fn cropped_image_similar_to_original() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
|
let entries = scan_images(dir).expect("scan");
|
|
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).expect("orig.jpg");
|
|
let cropped = entries.iter().find(|e| e.path.ends_with("orig_cropped.jpg")).expect("orig_cropped.jpg");
|
|
assert_ne!(orig.sha256, cropped.sha256, "cropped should differ in bytes");
|
|
assert!(hamming(orig.dhash, cropped.dhash) <= 12, "cropped should be perceptually similar (hamming <= 12)");
|
|
}
|
|
|
|
#[test]
|
|
fn non_image_files_excluded_from_scan() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
|
let entries = scan_images(dir).expect("scan");
|
|
// readme.txt and data.csv exist in dir but must not appear in results
|
|
assert!(!entries.iter().any(|e| e.path.ends_with("readme.txt")), "txt file should be excluded");
|
|
assert!(!entries.iter().any(|e| e.path.ends_with("data.csv")), "csv file should be excluded");
|
|
// all entries should have image extensions
|
|
for e in &entries {
|
|
let ext = e.path.extension().unwrap().to_str().unwrap().to_ascii_lowercase();
|
|
assert!(matches!(ext.as_str(), "jpg"|"jpeg"|"png"|"webp"|"bmp"|"gif"|"tif"|"tiff"),
|
|
"unexpected ext: {ext} in {}", e.path.display());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn scan_recurses_into_subdirectories() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
|
let entries = scan_images(dir).expect("scan");
|
|
// gradient_sub.jpg lives in subdir/ - must be found
|
|
let sub = entries.iter().find(|e| e.path.ends_with("subdir/gradient_sub.jpg"));
|
|
assert!(sub.is_some(), "should find image in subdirectory");
|
|
// it's an exact copy of gradient.jpg
|
|
let grad = entries.iter().find(|e| e.path.ends_with("gradient.jpg") && !e.path.to_str().unwrap().contains("subdir")).unwrap();
|
|
let sub = sub.unwrap();
|
|
assert_eq!(grad.sha256, sub.sha256, "exact copy should have same sha256");
|
|
let groups = find_duplicate_groups(&entries, 8);
|
|
assert!(groups.iter().any(|g| {
|
|
g.kind == DuplicateKind::Exact
|
|
&& g.paths.iter().any(|p| p.ends_with("gradient.jpg") && !p.to_str().unwrap().contains("subdir"))
|
|
&& g.paths.iter().any(|p| p.ends_with("gradient_sub.jpg"))
|
|
}), "should group gradient.jpg and subdir/gradient_sub.jpg as exact");
|
|
}
|
|
|
|
#[test]
|
|
fn single_image_no_duplicates() {
|
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/single");
|
|
let entries = scan_images(dir).expect("scan");
|
|
assert_eq!(entries.len(), 1, "should find exactly one image");
|
|
let groups = find_duplicate_groups(&entries, 8);
|
|
assert!(groups.is_empty(), "single image should produce no duplicate groups");
|
|
}
|
|
|
|
#[test]
|
|
fn cli_binary_reports_duplicates() {
|
|
let bin = env!("CARGO_BIN_EXE_deduper");
|
|
let output = std::process::Command::new(bin)
|
|
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images")
|
|
.arg("8")
|
|
.output()
|
|
.expect("failed to run deduper binary");
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
assert!(output.status.success(), "binary should exit 0");
|
|
assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}");
|
|
assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}");
|
|
}
|