use deduper::{find_duplicate_groups, hamming, scan_images, DuplicateKind}; use std::path::Path; fn fixture(name: &str) -> String { format!("/a0/usr/projects/deduper/.a0proj/test_media/images/{name}") } #[test] fn image_phase_real_files_red_green() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images"); let entries = scan_images(dir).expect("scan images"); assert!(entries.len() >= 5, "need fixtures"); let orig = entries .iter() .find(|e| e.path == Path::new(&fixture("orig.jpg"))) .unwrap(); let copy = entries .iter() .find(|e| e.path == Path::new(&fixture("orig_copy.jpg"))) .unwrap(); let resized = entries .iter() .find(|e| e.path == Path::new(&fixture("orig_resized.jpg"))) .unwrap(); let blue = entries .iter() .find(|e| e.path == Path::new(&fixture("solid_blue.jpg"))) .unwrap(); assert_eq!(orig.sha256, copy.sha256); assert!(hamming(orig.dhash, resized.dhash) <= 8, "resized should be similar"); assert!(hamming(orig.dhash, blue.dhash) > 8, "blue should be unrelated"); let groups = find_duplicate_groups(&entries, 8); assert!(groups.iter().any(|g| { g.kind == DuplicateKind::Exact && g.paths.iter().any(|p| p.ends_with("orig.jpg")) && g.paths.iter().any(|p| p.ends_with("orig_copy.jpg")) }), "missing exact group"); assert!(groups.iter().any(|g| { g.kind == DuplicateKind::Similar && g.paths.iter().any(|p| p.ends_with("orig.jpg")) && g.paths.iter().any(|p| p.ends_with("orig_resized.jpg")) }), "missing similar group"); assert!(!groups.iter().any(|g| { g.paths.iter().any(|p| p.ends_with("solid_blue.jpg")) && g.paths.iter().any(|p| p.ends_with("orig.jpg")) }), "false positive with unrelated image"); } #[test] fn scan_empty_dir_returns_no_entries() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir"); let entries = scan_images(dir).expect("scan empty dir"); assert!(entries.is_empty(), "empty dir should yield no entries"); } #[test] fn cropped_image_similar_to_original() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images"); let entries = scan_images(dir).expect("scan"); let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).expect("orig.jpg"); let cropped = entries.iter().find(|e| e.path.ends_with("orig_cropped.jpg")).expect("orig_cropped.jpg"); assert_ne!(orig.sha256, cropped.sha256, "cropped should differ in bytes"); assert!(hamming(orig.dhash, cropped.dhash) <= 12, "cropped should be perceptually similar (hamming <= 12)"); } #[test] fn non_image_files_excluded_from_scan() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images"); let entries = scan_images(dir).expect("scan"); // readme.txt and data.csv exist in dir but must not appear in results assert!(!entries.iter().any(|e| e.path.ends_with("readme.txt")), "txt file should be excluded"); assert!(!entries.iter().any(|e| e.path.ends_with("data.csv")), "csv file should be excluded"); // all entries should have image extensions for e in &entries { let ext = e.path.extension().unwrap().to_str().unwrap().to_ascii_lowercase(); assert!(matches!(ext.as_str(), "jpg"|"jpeg"|"png"|"webp"|"bmp"|"gif"|"tif"|"tiff"), "unexpected ext: {ext} in {}", e.path.display()); } } #[test] fn scan_recurses_into_subdirectories() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images"); let entries = scan_images(dir).expect("scan"); // gradient_sub.jpg lives in subdir/ - must be found let sub = entries.iter().find(|e| e.path.ends_with("subdir/gradient_sub.jpg")); assert!(sub.is_some(), "should find image in subdirectory"); // it's an exact copy of gradient.jpg let grad = entries.iter().find(|e| e.path.ends_with("gradient.jpg") && !e.path.to_str().unwrap().contains("subdir")).unwrap(); let sub = sub.unwrap(); assert_eq!(grad.sha256, sub.sha256, "exact copy should have same sha256"); let groups = find_duplicate_groups(&entries, 8); assert!(groups.iter().any(|g| { g.kind == DuplicateKind::Exact && g.paths.iter().any(|p| p.ends_with("gradient.jpg") && !p.to_str().unwrap().contains("subdir")) && g.paths.iter().any(|p| p.ends_with("gradient_sub.jpg")) }), "should group gradient.jpg and subdir/gradient_sub.jpg as exact"); } #[test] fn single_image_no_duplicates() { let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/single"); let entries = scan_images(dir).expect("scan"); assert_eq!(entries.len(), 1, "should find exactly one image"); let groups = find_duplicate_groups(&entries, 8); assert!(groups.is_empty(), "single image should produce no duplicate groups"); } #[test] fn cli_binary_reports_duplicates() { let bin = env!("CARGO_BIN_EXE_deduper"); let output = std::process::Command::new(bin) .arg("/a0/usr/projects/deduper/.a0proj/test_media/images") .arg("8") .output() .expect("failed to run deduper binary"); let stdout = String::from_utf8_lossy(&output.stdout); assert!(output.status.success(), "binary should exit 0"); assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}"); assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}"); }