feat: detect image format from magic bytes, not file extension

- Fixes misnamed files (e.g. JPEG saved as .png) being skipped
- Uses image::ImageReader with guessed format from content
- Fixes Android screenshots with wrong extension being skipped
- New test: misnamed_jpeg_as_png_still_scanned
- 22 tests passing
This commit is contained in:
admin
2026-04-27 23:57:20 +00:00
parent deb5321a8a
commit 9dc8a495bb
2 changed files with 23 additions and 2 deletions

View File

@@ -1,8 +1,10 @@
use anyhow::Result;
use image::imageops::FilterType;
use image::ImageReader;
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::Cursor;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
@@ -48,8 +50,14 @@ pub fn scan_images(root: &Path) -> Result<Vec<ImageEntry>> {
}
};
let sha256 = format!("{:x}", Sha256::digest(&bytes));
let img = match image::open(path) {
Ok(i) => i,
let img = match ImageReader::new(Cursor::new(&bytes)).with_guessed_format() {
Ok(reader) => match reader.decode() {
Ok(i) => i,
Err(e) => {
eprintln!("warning: skipping {}: {e}", path.display());
continue;
}
},
Err(e) => {
eprintln!("warning: skipping {}: {e}", path.display());
continue;