diff --git a/Cargo.lock b/Cargo.lock index 77ee2a6..4f0934a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,6 +221,19 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -287,6 +300,7 @@ dependencies = [ "anyhow", "base64", "image", + "indicatif", "open", "rusqlite", "serde", @@ -312,6 +326,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "equator" version = "0.4.2" @@ -497,6 +517,19 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8" +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "interpolate_name" version = "0.2.4" @@ -552,6 +585,16 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lebe" version = "0.5.3" @@ -716,6 +759,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.21.4" @@ -770,6 +819,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1133,6 +1188,12 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "v_frame" version = "0.3.9" @@ -1220,6 +1281,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "weezl" version = "0.1.12" @@ -1232,7 +1303,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1241,6 +1312,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -1250,6 +1330,70 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "wit-bindgen" version = "0.57.1" diff --git a/Cargo.toml b/Cargo.toml index e91dd26..f169821 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ base64 = "0.22" serde = { version = "1", features = ["derive"] } serde_json = "1" rusqlite = { version = "0.31", features = ["bundled"] } +indicatif = "0.17" diff --git a/src/lib.rs b/src/lib.rs index ce6c990..6883f28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,48 @@ pub fn scan_images(root: &Path) -> Result> { Ok(out) } +pub fn collect_image_paths(root: &Path) -> Result> { + let mut paths = Vec::new(); + for entry in WalkDir::new(root).follow_links(true) { + let entry = entry?; + if entry.file_type().is_file() && is_image_path(entry.path()) { + paths.push(entry.path().to_path_buf()); + } + } + Ok(paths) +} + +pub fn process_image(path: &Path) -> Option { + let bytes = match fs::read(path) { + Ok(b) => b, + Err(e) => { + eprintln!("warning: skipping {}: {e}", path.display()); + return None; + } + }; + let sha256 = format!("{:x}", Sha256::digest(&bytes)); + let img = match ImageReader::new(Cursor::new(&bytes)).with_guessed_format() { + Ok(reader) => match reader.decode() { + Ok(i) => i, + Err(e) => { + eprintln!("warning: skipping {}: {e}", path.display()); + return None; + } + }, + Err(e) => { + eprintln!("warning: skipping {}: {e}", path.display()); + return None; + } + }; + let dhash = compute_dhash(&img); + Some(ImageEntry { + path: path.to_path_buf(), + sha256, + dhash, + file_size: bytes.len() as u64, + }) +} + pub fn compute_dhash(img: &image::DynamicImage) -> u64 { let gray = img .grayscale() diff --git a/src/main.rs b/src/main.rs index c64cc96..89ff67c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ -use deduper::{find_duplicate_groups, scan_images, DuplicateKind}; +use deduper::{find_duplicate_groups, collect_image_paths, process_image, DuplicateKind}; use deduper::ignore_db; +use indicatif::{ProgressBar, ProgressStyle}; use std::env; use std::path::Path; @@ -49,7 +50,8 @@ fn main() { }; let root = Path::new(&config.root); - let entries = match scan_images(root) { + + let paths = match collect_image_paths(root) { Ok(v) => v, Err(e) => { eprintln!("scan error: {e}"); @@ -57,6 +59,21 @@ fn main() { } }; + let pb = ProgressBar::new(paths.len() as u64); + pb.set_style(ProgressStyle::default_bar() + .template("{spinner:.red} [{bar:40.cyan/blue}] {pos}/{len} images ({eta})") + .unwrap() + .progress_chars("=>-")); + + let mut entries = Vec::with_capacity(paths.len()); + for path in &paths { + if let Some(entry) = process_image(path) { + entries.push(entry); + } + pb.inc(1); + } + pb.finish_with_message(format!("{} images processed", entries.len())); + let mut groups = find_duplicate_groups(&entries, config.threshold); // Filter out ignored groups