From 9dc8a495bbb57dfaad4163e0423b93f7e5ddc068 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 27 Apr 2026 23:57:20 +0000 Subject: [PATCH] feat: detect image format from magic bytes, not file extension - Fixes misnamed files (e.g. JPEG saved as .png) being skipped - Uses image::ImageReader with guessed format from content - Fixes Android screenshots with wrong extension being skipped - New test: misnamed_jpeg_as_png_still_scanned - 22 tests passing --- src/lib.rs | 12 ++++++++++-- tests/image_phase.rs | 13 +++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 15f9819..a0147c8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ use anyhow::Result; use image::imageops::FilterType; +use image::ImageReader; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; use std::fs; +use std::io::Cursor; use std::path::{Path, PathBuf}; use walkdir::WalkDir; @@ -48,8 +50,14 @@ pub fn scan_images(root: &Path) -> Result> { } }; let sha256 = format!("{:x}", Sha256::digest(&bytes)); - let img = match image::open(path) { - Ok(i) => i, + let img = match ImageReader::new(Cursor::new(&bytes)).with_guessed_format() { + Ok(reader) => match reader.decode() { + Ok(i) => i, + Err(e) => { + eprintln!("warning: skipping {}: {e}", path.display()); + continue; + } + }, Err(e) => { eprintln!("warning: skipping {}: {e}", path.display()); continue; diff --git a/tests/image_phase.rs b/tests/image_phase.rs index 28ba43b..61ecfe4 100644 --- a/tests/image_phase.rs +++ b/tests/image_phase.rs @@ -133,3 +133,16 @@ fn cli_binary_reports_duplicates() { assert!(stdout.contains("[exact]"), "output should contain exact groups: {stdout}"); assert!(stdout.contains("[similar]"), "output should contain similar groups: {stdout}"); } + +#[test] +fn misnamed_jpeg_as_png_still_scanned() { + // fake_png.png is actually JPEG data with .png extension + let dir = Path::new("/a0/usr/projects/deduper/.a0proj/test_media/images"); + let entries = scan_images(dir).expect("scan"); + let fake = entries.iter().find(|e| e.path.ends_with("fake_png.png")); + assert!(fake.is_some(), "misnamed JPEG-as-PNG should be scanned via magic bytes"); + // should have same hash as orig.jpg since it's a copy + let orig = entries.iter().find(|e| e.path.ends_with("orig.jpg")).unwrap(); + let fake = fake.unwrap(); + assert_eq!(orig.sha256, fake.sha256, "same content = same sha256"); +}