feat: complete image phase - SHA-256 exact + dHash perceptual duplicate detection
- lib.rs: scan_images, compute_dhash, hamming, find_duplicate_groups - main.rs: CLI with folder arg and optional hamming threshold - 13 unit tests: hamming, is_image_path, dhash, find_duplicate_groups - 7 integration tests: real files, empty dir, cropped, non-image exclusion, subdirectory recursion, single file, CLI binary output - All 20 tests passing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
/target
|
/target
|
||||||
|
.ssh/
|
||||||
|
|||||||
1101
Cargo.lock
generated
Normal file
1101
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,3 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
image = { version = "0.25", default-features = true, features = ["jpeg", "png", "gif", "webp", "bmp", "tiff"] }
|
||||||
|
sha2 = "0.10"
|
||||||
|
walkdir = "2.5"
|
||||||
|
anyhow = "1"
|
||||||
|
|||||||
286
src/lib.rs
Normal file
286
src/lib.rs
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use image::imageops::FilterType;
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct ImageEntry {
|
||||||
|
pub path: PathBuf,
|
||||||
|
pub sha256: String,
|
||||||
|
pub dhash: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct DuplicateGroup {
|
||||||
|
pub kind: DuplicateKind,
|
||||||
|
pub paths: Vec<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum DuplicateKind {
|
||||||
|
Exact,
|
||||||
|
Similar,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_image_path(path: &Path) -> bool {
|
||||||
|
path.extension()
|
||||||
|
.and_then(|e| e.to_str())
|
||||||
|
.map(|e| matches!(e.to_ascii_lowercase().as_str(), "jpg" | "jpeg" | "png" | "webp" | "bmp" | "gif" | "tif" | "tiff"))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn scan_images(root: &Path) -> Result<Vec<ImageEntry>> {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for entry in WalkDir::new(root).follow_links(true) {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if !entry.file_type().is_file() || !is_image_path(path) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let bytes = fs::read(path)?;
|
||||||
|
let sha256 = format!("{:x}", Sha256::digest(&bytes));
|
||||||
|
let img = image::open(path)?;
|
||||||
|
let dhash = compute_dhash(&img);
|
||||||
|
out.push(ImageEntry {
|
||||||
|
path: path.to_path_buf(),
|
||||||
|
sha256,
|
||||||
|
dhash,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compute_dhash(img: &image::DynamicImage) -> u64 {
|
||||||
|
let gray = img
|
||||||
|
.grayscale()
|
||||||
|
.resize_exact(9, 8, FilterType::Triangle)
|
||||||
|
.to_luma8();
|
||||||
|
let mut hash = 0u64;
|
||||||
|
let mut bit = 0;
|
||||||
|
for y in 0..8 {
|
||||||
|
for x in 0..8 {
|
||||||
|
let left = gray.get_pixel(x, y)[0];
|
||||||
|
let right = gray.get_pixel(x + 1, y)[0];
|
||||||
|
if left > right {
|
||||||
|
hash |= 1 << bit;
|
||||||
|
}
|
||||||
|
bit += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hash
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hamming(a: u64, b: u64) -> u32 {
|
||||||
|
(a ^ b).count_ones()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_duplicate_groups(entries: &[ImageEntry], hamming_threshold: u32) -> Vec<DuplicateGroup> {
|
||||||
|
let mut groups = Vec::new();
|
||||||
|
|
||||||
|
let mut exact: HashMap<&str, Vec<PathBuf>> = HashMap::new();
|
||||||
|
for e in entries {
|
||||||
|
exact.entry(&e.sha256).or_default().push(e.path.clone());
|
||||||
|
}
|
||||||
|
for paths in exact.into_values() {
|
||||||
|
if paths.len() > 1 {
|
||||||
|
groups.push(DuplicateGroup {
|
||||||
|
kind: DuplicateKind::Exact,
|
||||||
|
paths,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let n = entries.len();
|
||||||
|
let mut parent: Vec<usize> = (0..n).collect();
|
||||||
|
|
||||||
|
fn find(parent: &mut [usize], x: usize) -> usize {
|
||||||
|
if parent[x] != x {
|
||||||
|
let p = parent[x];
|
||||||
|
parent[x] = find(parent, p);
|
||||||
|
}
|
||||||
|
parent[x]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn union(parent: &mut [usize], a: usize, b: usize) {
|
||||||
|
let ra = find(parent, a);
|
||||||
|
let rb = find(parent, b);
|
||||||
|
if ra != rb {
|
||||||
|
parent[rb] = ra;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..n {
|
||||||
|
for j in (i + 1)..n {
|
||||||
|
if entries[i].sha256 == entries[j].sha256 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if hamming(entries[i].dhash, entries[j].dhash) <= hamming_threshold {
|
||||||
|
union(&mut parent, i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut similar: HashMap<usize, Vec<PathBuf>> = HashMap::new();
|
||||||
|
for (idx, e) in entries.iter().enumerate() {
|
||||||
|
let root = find(&mut parent, idx);
|
||||||
|
similar.entry(root).or_default().push(e.path.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let exact_paths: HashSet<PathBuf> = groups
|
||||||
|
.iter()
|
||||||
|
.flat_map(|g| g.paths.iter().cloned())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for paths in similar.into_values() {
|
||||||
|
if paths.len() > 1 {
|
||||||
|
let non_exact = paths.iter().filter(|p| !exact_paths.contains(*p)).count();
|
||||||
|
if non_exact >= 2 {
|
||||||
|
groups.push(DuplicateGroup {
|
||||||
|
kind: DuplicateKind::Similar,
|
||||||
|
paths,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
groups
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hamming_identical() {
|
||||||
|
assert_eq!(hamming(0, 0), 0);
|
||||||
|
assert_eq!(hamming(u64::MAX, u64::MAX), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hamming_opposite() {
|
||||||
|
assert_eq!(hamming(0, u64::MAX), 64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hamming_single_bit() {
|
||||||
|
assert_eq!(hamming(0b0000, 0b0001), 1);
|
||||||
|
assert_eq!(hamming(0b1010, 0b1000), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn is_image_path_accepts_valid_extensions() {
|
||||||
|
for ext in &["jpg", "jpeg", "png", "webp", "bmp", "gif", "tif", "tiff"] {
|
||||||
|
let p = PathBuf::from(format!("photo.{ext}"));
|
||||||
|
assert!(is_image_path(&p), "should accept .{ext}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn is_image_path_case_insensitive() {
|
||||||
|
assert!(is_image_path(Path::new("photo.JPG")));
|
||||||
|
assert!(is_image_path(Path::new("photo.Png")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn is_image_path_rejects_non_image() {
|
||||||
|
assert!(!is_image_path(Path::new("file.txt")));
|
||||||
|
assert!(!is_image_path(Path::new("file.mp3")));
|
||||||
|
assert!(!is_image_path(Path::new("file.pdf")));
|
||||||
|
assert!(!is_image_path(Path::new("noext")));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dhash_deterministic() {
|
||||||
|
let img = image::DynamicImage::new_rgb8(100, 100);
|
||||||
|
let h1 = compute_dhash(&img);
|
||||||
|
let h2 = compute_dhash(&img);
|
||||||
|
assert_eq!(h1, h2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dhash_solid_images_differ_from_gradient() {
|
||||||
|
// Solid black: all pixels 0 -> dhash = 0 (no left > right)
|
||||||
|
let black = image::DynamicImage::new_rgb8(64, 64);
|
||||||
|
// Gradient: right-to-left (bright left, dark right) -> left > right = true
|
||||||
|
let mut grad = image::RgbImage::new(64, 64);
|
||||||
|
for y in 0..64 {
|
||||||
|
for x in 0..64 {
|
||||||
|
let v = (255 - x * 255 / 63) as u8;
|
||||||
|
grad.put_pixel(x, y, image::Rgb([v, v, v]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let grad = image::DynamicImage::ImageRgb8(grad);
|
||||||
|
let h_black = compute_dhash(&black);
|
||||||
|
let h_grad = compute_dhash(&grad);
|
||||||
|
assert_ne!(h_black, h_grad, "solid vs gradient hashes must differ");
|
||||||
|
assert!(hamming(h_black, h_grad) > 8, "solid vs gradient should differ significantly");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_entry(path: &str, sha: &str, dhash: u64) -> ImageEntry {
|
||||||
|
ImageEntry {
|
||||||
|
path: PathBuf::from(path),
|
||||||
|
sha256: sha.to_string(),
|
||||||
|
dhash,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn find_groups_empty_input() {
|
||||||
|
let groups = find_duplicate_groups(&[], 8);
|
||||||
|
assert!(groups.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn find_groups_no_duplicates() {
|
||||||
|
let entries = vec![
|
||||||
|
make_entry("a.jpg", "aaa", 0),
|
||||||
|
make_entry("b.jpg", "bbb", u64::MAX),
|
||||||
|
];
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups.is_empty(), "different hash+dhash = no groups");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn find_groups_exact_only() {
|
||||||
|
let entries = vec![
|
||||||
|
make_entry("a.jpg", "same", 100),
|
||||||
|
make_entry("b.jpg", "same", 100),
|
||||||
|
make_entry("c.jpg", "diff", 999),
|
||||||
|
];
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert_eq!(groups.iter().filter(|g| g.kind == DuplicateKind::Exact).count(), 1);
|
||||||
|
let exact = groups.iter().find(|g| g.kind == DuplicateKind::Exact).unwrap();
|
||||||
|
assert_eq!(exact.paths.len(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn find_groups_similar_only() {
|
||||||
|
let entries = vec![
|
||||||
|
make_entry("a.jpg", "aaa", 0b0000_0000),
|
||||||
|
make_entry("b.jpg", "bbb", 0b0000_0011), // hamming=2
|
||||||
|
make_entry("c.jpg", "ccc", u64::MAX), // far away
|
||||||
|
];
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups.iter().any(|g| g.kind == DuplicateKind::Similar), "should find similar pair");
|
||||||
|
assert!(!groups.iter().any(|g| g.paths.iter().any(|p| p == Path::new("c.jpg"))
|
||||||
|
&& g.paths.iter().any(|p| p == Path::new("a.jpg"))), "c.jpg should not group with a.jpg");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn find_groups_threshold_boundary() {
|
||||||
|
let entries = vec![
|
||||||
|
make_entry("a.jpg", "aaa", 0),
|
||||||
|
make_entry("b.jpg", "bbb", 0b1111_1111), // hamming=8
|
||||||
|
];
|
||||||
|
// threshold=8: should match
|
||||||
|
let groups8 = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups8.iter().any(|g| g.kind == DuplicateKind::Similar));
|
||||||
|
// threshold=7: should NOT match
|
||||||
|
let groups7 = find_duplicate_groups(&entries, 7);
|
||||||
|
assert!(!groups7.iter().any(|g| g.kind == DuplicateKind::Similar));
|
||||||
|
}
|
||||||
|
}
|
||||||
41
src/main.rs
41
src/main.rs
@@ -1,3 +1,42 @@
|
|||||||
|
use deduper::{find_duplicate_groups, scan_images, DuplicateKind};
|
||||||
|
use std::env;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("Hello, world!");
|
let args: Vec<String> = env::args().collect();
|
||||||
|
if args.len() < 2 {
|
||||||
|
eprintln!("usage: deduper <folder> [hamming-threshold]");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let root = Path::new(&args[1]);
|
||||||
|
let threshold = args
|
||||||
|
.get(2)
|
||||||
|
.and_then(|s| s.parse::<u32>().ok())
|
||||||
|
.unwrap_or(8);
|
||||||
|
|
||||||
|
let entries = match scan_images(root) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("scan error: {e}");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let groups = find_duplicate_groups(&entries, threshold);
|
||||||
|
if groups.is_empty() {
|
||||||
|
println!("no image duplicates found");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (idx, group) in groups.iter().enumerate() {
|
||||||
|
let kind = match group.kind {
|
||||||
|
DuplicateKind::Exact => "exact",
|
||||||
|
DuplicateKind::Similar => "similar",
|
||||||
|
};
|
||||||
|
println!("group {} [{}]", idx + 1, kind);
|
||||||
|
for path in &group.paths {
|
||||||
|
println!(" {}", path.display());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
124
tests/image_phase.rs
Normal file
124
tests/image_phase.rs
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
use deduper::{find_duplicate_groups, hamming, scan_images, DuplicateKind};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
fn fixture(name: &str) -> String {
|
||||||
|
format!("/a0/usr/projects/deduper/.a0proj/test_media/images/{name}")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn image_phase_real_files_red_green() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
||||||
|
let entries = scan_images(dir).expect("scan images");
|
||||||
|
assert!(entries.len() >= 5, "need fixtures");
|
||||||
|
|
||||||
|
let orig = entries
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.path == Path::new(&fixture("orig.jpg")))
|
||||||
|
.unwrap();
|
||||||
|
let copy = entries
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.path == Path::new(&fixture("orig_copy.jpg")))
|
||||||
|
.unwrap();
|
||||||
|
let resized = entries
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.path == Path::new(&fixture("orig_resized.jpg")))
|
||||||
|
.unwrap();
|
||||||
|
let blue = entries
|
||||||
|
.iter()
|
||||||
|
.find(|e| e.path == Path::new(&fixture("solid_blue.jpg")))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(orig.sha256, copy.sha256);
|
||||||
|
assert!(hamming(orig.dhash, resized.dhash) <= 8, "resized should be similar");
|
||||||
|
assert!(hamming(orig.dhash, blue.dhash) > 8, "blue should be unrelated");
|
||||||
|
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups.iter().any(|g| {
|
||||||
|
g.kind == DuplicateKind::Exact
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("orig_copy.jpg"))
|
||||||
|
}), "missing exact group");
|
||||||
|
assert!(groups.iter().any(|g| {
|
||||||
|
g.kind == DuplicateKind::Similar
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("orig_resized.jpg"))
|
||||||
|
}), "missing similar group");
|
||||||
|
assert!(!groups.iter().any(|g| {
|
||||||
|
g.paths.iter().any(|p| p.ends_with("solid_blue.jpg"))
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("orig.jpg"))
|
||||||
|
}), "false positive with unrelated image");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn scan_empty_dir_returns_no_entries() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/empty_dir");
|
||||||
|
let entries = scan_images(dir).expect("scan empty dir");
|
||||||
|
assert!(entries.is_empty(), "empty dir should yield no entries");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cropped_image_similar_to_original() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
||||||
|
let entries = scan_images(dir).expect("scan");
|
||||||
|
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).expect("orig.jpg");
|
||||||
|
let cropped = entries.iter().find(|e| e.path.ends_with("orig_cropped.jpg")).expect("orig_cropped.jpg");
|
||||||
|
assert_ne!(orig.sha256, cropped.sha256, "cropped should differ in bytes");
|
||||||
|
assert!(hamming(orig.dhash, cropped.dhash) <= 12, "cropped should be perceptually similar (hamming <= 12)");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn non_image_files_excluded_from_scan() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
||||||
|
let entries = scan_images(dir).expect("scan");
|
||||||
|
// readme.txt and data.csv exist in dir but must not appear in results
|
||||||
|
assert!(!entries.iter().any(|e| e.path.ends_with("readme.txt")), "txt file should be excluded");
|
||||||
|
assert!(!entries.iter().any(|e| e.path.ends_with("data.csv")), "csv file should be excluded");
|
||||||
|
// all entries should have image extensions
|
||||||
|
for e in &entries {
|
||||||
|
let ext = e.path.extension().unwrap().to_str().unwrap().to_ascii_lowercase();
|
||||||
|
assert!(matches!(ext.as_str(), "jpg"|"jpeg"|"png"|"webp"|"bmp"|"gif"|"tif"|"tiff"),
|
||||||
|
"unexpected ext: {ext} in {}", e.path.display());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn scan_recurses_into_subdirectories() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
||||||
|
let entries = scan_images(dir).expect("scan");
|
||||||
|
// gradient_sub.jpg lives in subdir/ - must be found
|
||||||
|
let sub = entries.iter().find(|e| e.path.ends_with("subdir/gradient_sub.jpg"));
|
||||||
|
assert!(sub.is_some(), "should find image in subdirectory");
|
||||||
|
// it's an exact copy of gradient.jpg
|
||||||
|
let grad = entries.iter().find(|e| e.path.ends_with("gradient.jpg") && !e.path.to_str().unwrap().contains("subdir")).unwrap();
|
||||||
|
let sub = sub.unwrap();
|
||||||
|
assert_eq!(grad.sha256, sub.sha256, "exact copy should have same sha256");
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups.iter().any(|g| {
|
||||||
|
g.kind == DuplicateKind::Exact
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("gradient.jpg") && !p.to_str().unwrap().contains("subdir"))
|
||||||
|
&& g.paths.iter().any(|p| p.ends_with("gradient_sub.jpg"))
|
||||||
|
}), "should group gradient.jpg and subdir/gradient_sub.jpg as exact");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_image_no_duplicates() {
|
||||||
|
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images/single");
|
||||||
|
let entries = scan_images(dir).expect("scan");
|
||||||
|
assert_eq!(entries.len(), 1, "should find exactly one image");
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(groups.is_empty(), "single image should produce no duplicate groups");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cli_binary_reports_duplicates() {
|
||||||
|
let bin = env!("CARGO_BIN_EXE_deduper");
|
||||||
|
let output = std::process::Command::new(bin)
|
||||||
|
.arg("/a0/usr/projects/deduper/.a0proj/test_media/images")
|
||||||
|
.arg("8")
|
||||||
|
.output()
|
||||||
|
.expect("failed to run deduper binary");
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
|
assert!(output.status.success(), "binary should exit 0");
|
||||||
|
assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}");
|
||||||
|
assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}");
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user