From 6c7470e7a6fa268d96fa9a2947771eb9a286588d Mon Sep 17 00:00:00 2001 From: dschlueter Date: Tue, 10 Feb 2026 10:13:47 +0100 Subject: [PATCH] feat: Bessere Erkennung von Doppel-Extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - .tar.gz, .tar.bz2, .tar.xz, .tar.zst, .tar.lz, .tar.Z werden jetzt korrekt als Einheit behandelt - "my archive.tar.gz" → "my_archive.tar.gz" (nicht mehr "my_archive.gz") - Neue Hilfsfunktion split_filename() - Tests für Doppel-Extensions hinzugefügt Co-Authored-By: Claude Sonnet 4.5 --- src/sanitizer.rs | 82 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/src/sanitizer.rs b/src/sanitizer.rs index bd7de80..2c14606 100644 --- a/src/sanitizer.rs +++ b/src/sanitizer.rs @@ -13,6 +13,38 @@ static RE_INVALID: Lazy = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap()); static RE_ADJACENT: Lazy = Lazy::new(|| Regex::new(r"_\.|\._").unwrap()); static RE_MULTI: Lazy = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap()); +// Bekannte Doppel-Extensions (z.B. .tar.gz) +const DOUBLE_EXTENSIONS: &[&str] = &[ + ".tar.gz", + ".tar.bz2", + ".tar.xz", + ".tar.zst", + ".tar.lz", + ".tar.Z", +]; + +/// Trennt Dateiname in Basis und Extension, berücksichtigt Doppel-Extensions +fn split_filename(filename: &str) -> (String, String) { + // Prüfe auf bekannte Doppel-Extensions + for double_ext in DOUBLE_EXTENSIONS { + if filename.ends_with(double_ext) { + let base_len = filename.len() - double_ext.len(); + if base_len > 0 { + return ( + filename[..base_len].to_string(), + double_ext.to_string(), + ); + } + } + } + + // Standard-Fall: nur letzte Extension + match filename.rsplit_once('.') { + Some((b, e)) if !b.is_empty() => (b.to_string(), format!(".{e}")), + _ => (filename.to_string(), String::new()), + } +} + /// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen. pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option { let original = name.to_string_lossy(); @@ -24,10 +56,7 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option (b.to_string(), format!(".{e}")), - _ => (rest.to_string(), String::new()), - }; + let (mut base, mut ext) = split_filename(rest); // Platzhalter (C++, c++, C#, c#) anlegen base = preserve_special_identifiers(&base); @@ -290,19 +319,56 @@ mod tests { Some("test_file.txt".to_string()) ); - // Double extension (currently only keeps last) + // Double extension with spaces in base name assert_eq!( - clean_filename(OsStr::new("archive.tar.gz"), &config, false), - None // No special chars to clean + clean_filename(OsStr::new("my archive.tar.gz"), &config, false), + Some("my_archive.tar.gz".to_string()) ); - // Multiple dots + // Other double extensions + assert_eq!( + clean_filename(OsStr::new("backup file.tar.bz2"), &config, false), + Some("backup_file.tar.bz2".to_string()) + ); + + assert_eq!( + clean_filename(OsStr::new("data set.tar.xz"), &config, false), + Some("data_set.tar.xz".to_string()) + ); + + // Multiple dots (not a double extension) assert_eq!( clean_filename(OsStr::new("foo..bar.txt"), &config, false), Some("foo.bar.txt".to_string()) ); } + #[test] + fn test_split_filename() { + // Double extensions + assert_eq!( + split_filename("archive.tar.gz"), + ("archive".to_string(), ".tar.gz".to_string()) + ); + + assert_eq!( + split_filename("backup.tar.bz2"), + ("backup".to_string(), ".tar.bz2".to_string()) + ); + + // Single extension + assert_eq!( + split_filename("file.txt"), + ("file".to_string(), ".txt".to_string()) + ); + + // No extension + assert_eq!( + split_filename("README"), + ("README".to_string(), String::new()) + ); + } + #[test] fn test_clean_filename_special_identifiers() { let config = Config::default();