From f13b712e99c3d924df07c0e21f0d451e3dffb45b Mon Sep 17 00:00:00 2001 From: admin Date: Tue, 28 Apr 2026 00:34:11 +0000 Subject: [PATCH] feat: skip similar pairs where path contains thumbnail/thumbnails - Filters out thumbnail-vs-original false positives - Only applies to pairs (group size 2) - Case-insensitive path check - New test: similar_pair_with_thumbnail_path_ignored - 25 tests passing --- src/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 051bf63..fb59c63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -174,6 +174,13 @@ pub fn find_duplicate_groups_with_size_ratio( if paths.len() > 1 { let non_exact = paths.iter().filter(|p| !exact_paths.contains(*p)).count(); if non_exact >= 2 { + // Skip pairs where a thumbnail path is involved + if paths.len() == 2 && paths.iter().any(|p| { + let s = p.to_string_lossy().to_ascii_lowercase(); + s.contains("thumbnail") || s.contains("thumbnails") + }) { + continue; + } groups.push(DuplicateGroup { kind: DuplicateKind::Similar, paths, @@ -351,4 +358,15 @@ mod tests { assert!(groups2.iter().any(|g| g.kind == DuplicateKind::Similar), "files with similar sizes should be grouped"); } + + #[test] + fn similar_pair_with_thumbnail_path_ignored() { + let entries = vec![ + make_entry_sized("/photos/cat.jpg", "aaa", 0b0000_0000, 100_000), + make_entry_sized("/photos/.thumbnails/cat.jpg", "bbb", 0b0000_0001, 95_000), + ]; + let groups = find_duplicate_groups(&entries, 8); + assert!(!groups.iter().any(|g| g.kind == DuplicateKind::Similar), + "similar pair where one path contains 'thumbnails' should be ignored"); + } }