feat: skip similar pairs where path contains thumbnail/thumbnails
- Filters out thumbnail-vs-original false positives - Only applies to pairs (group size 2) - Case-insensitive path check - New test: similar_pair_with_thumbnail_path_ignored - 25 tests passing
This commit is contained in:
18
src/lib.rs
18
src/lib.rs
@@ -174,6 +174,13 @@ pub fn find_duplicate_groups_with_size_ratio(
|
||||
if paths.len() > 1 {
|
||||
let non_exact = paths.iter().filter(|p| !exact_paths.contains(*p)).count();
|
||||
if non_exact >= 2 {
|
||||
// Skip pairs where a thumbnail path is involved
|
||||
if paths.len() == 2 && paths.iter().any(|p| {
|
||||
let s = p.to_string_lossy().to_ascii_lowercase();
|
||||
s.contains("thumbnail") || s.contains("thumbnails")
|
||||
}) {
|
||||
continue;
|
||||
}
|
||||
groups.push(DuplicateGroup {
|
||||
kind: DuplicateKind::Similar,
|
||||
paths,
|
||||
@@ -351,4 +358,15 @@ mod tests {
|
||||
assert!(groups2.iter().any(|g| g.kind == DuplicateKind::Similar),
|
||||
"files with similar sizes should be grouped");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn similar_pair_with_thumbnail_path_ignored() {
|
||||
let entries = vec![
|
||||
make_entry_sized("/photos/cat.jpg", "aaa", 0b0000_0000, 100_000),
|
||||
make_entry_sized("/photos/.thumbnails/cat.jpg", "bbb", 0b0000_0001, 95_000),
|
||||
];
|
||||
let groups = find_duplicate_groups(&entries, 8);
|
||||
assert!(!groups.iter().any(|g| g.kind == DuplicateKind::Similar),
|
||||
"similar pair where one path contains 'thumbnails' should be ignored");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user