feat: skip similar pairs where path contains thumbnail/thumbnails
- Filters out thumbnail-vs-original false positives - Only applies to pairs (group size 2) - Case-insensitive path check - New test: similar_pair_with_thumbnail_path_ignored - 25 tests passing
This commit is contained in:
18
src/lib.rs
18
src/lib.rs
@@ -174,6 +174,13 @@ pub fn find_duplicate_groups_with_size_ratio(
|
|||||||
if paths.len() > 1 {
|
if paths.len() > 1 {
|
||||||
let non_exact = paths.iter().filter(|p| !exact_paths.contains(*p)).count();
|
let non_exact = paths.iter().filter(|p| !exact_paths.contains(*p)).count();
|
||||||
if non_exact >= 2 {
|
if non_exact >= 2 {
|
||||||
|
// Skip pairs where a thumbnail path is involved
|
||||||
|
if paths.len() == 2 && paths.iter().any(|p| {
|
||||||
|
let s = p.to_string_lossy().to_ascii_lowercase();
|
||||||
|
s.contains("thumbnail") || s.contains("thumbnails")
|
||||||
|
}) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
groups.push(DuplicateGroup {
|
groups.push(DuplicateGroup {
|
||||||
kind: DuplicateKind::Similar,
|
kind: DuplicateKind::Similar,
|
||||||
paths,
|
paths,
|
||||||
@@ -351,4 +358,15 @@ mod tests {
|
|||||||
assert!(groups2.iter().any(|g| g.kind == DuplicateKind::Similar),
|
assert!(groups2.iter().any(|g| g.kind == DuplicateKind::Similar),
|
||||||
"files with similar sizes should be grouped");
|
"files with similar sizes should be grouped");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn similar_pair_with_thumbnail_path_ignored() {
|
||||||
|
let entries = vec![
|
||||||
|
make_entry_sized("/photos/cat.jpg", "aaa", 0b0000_0000, 100_000),
|
||||||
|
make_entry_sized("/photos/.thumbnails/cat.jpg", "bbb", 0b0000_0001, 95_000),
|
||||||
|
];
|
||||||
|
let groups = find_duplicate_groups(&entries, 8);
|
||||||
|
assert!(!groups.iter().any(|g| g.kind == DuplicateKind::Similar),
|
||||||
|
"similar pair where one path contains 'thumbnails' should be ignored");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user