feat: detect image format from magic bytes, not file extension
- Fixes misnamed files (e.g. JPEG saved as .png) being skipped - Uses image::ImageReader with guessed format from content - Fixes Android screenshots with wrong extension being skipped - New test: misnamed_jpeg_as_png_still_scanned - 22 tests passing
This commit is contained in:
12
src/lib.rs
12
src/lib.rs
@@ -1,8 +1,10 @@
|
||||
use anyhow::Result;
|
||||
use image::imageops::FilterType;
|
||||
use image::ImageReader;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::io::Cursor;
|
||||
use std::path::{Path, PathBuf};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
@@ -48,8 +50,14 @@ pub fn scan_images(root: &Path) -> Result<Vec<ImageEntry>> {
|
||||
}
|
||||
};
|
||||
let sha256 = format!("{:x}", Sha256::digest(&bytes));
|
||||
let img = match image::open(path) {
|
||||
Ok(i) => i,
|
||||
let img = match ImageReader::new(Cursor::new(&bytes)).with_guessed_format() {
|
||||
Ok(reader) => match reader.decode() {
|
||||
Ok(i) => i,
|
||||
Err(e) => {
|
||||
eprintln!("warning: skipping {}: {e}", path.display());
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("warning: skipping {}: {e}", path.display());
|
||||
continue;
|
||||
|
||||
@@ -133,3 +133,16 @@ fn cli_binary_reports_duplicates() {
|
||||
assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}");
|
||||
assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn misnamed_jpeg_as_png_still_scanned() {
|
||||
// fake_png.png is actually JPEG data with .png extension
|
||||
let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images");
|
||||
let entries = scan_images(dir).expect("scan");
|
||||
let fake = entries.iter().find(|e| e.path.ends_with("fake_png.png"));
|
||||
assert!(fake.is_some(), "misnamed JPEG-as-PNG should be scanned via magic bytes");
|
||||
// should have same hash as orig.jpg since it's a copy
|
||||
let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).unwrap();
|
||||
let fake = fake.unwrap();
|
||||
assert_eq!(orig.sha256, fake.sha256, "same content = same sha256");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user