feat: Bessere Erkennung von Doppel-Extensions

- .tar.gz, .tar.bz2, .tar.xz, .tar.zst, .tar.lz, .tar.Z
  werden jetzt korrekt als Einheit behandelt
- "my archive.tar.gz" → "my_archive.tar.gz" (nicht mehr "my_archive.gz")
- Neue Hilfsfunktion split_filename()
- Tests für Doppel-Extensions hinzugefügt

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-10 10:13:47 +01:00
commit 6c7470e7a6

View file

@ -13,6 +13,38 @@ static RE_INVALID: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap());
static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());
// Bekannte Doppel-Extensions (z.B. .tar.gz)
const DOUBLE_EXTENSIONS: &[&str] = &[
".tar.gz",
".tar.bz2",
".tar.xz",
".tar.zst",
".tar.lz",
".tar.Z",
];
/// Trennt Dateiname in Basis und Extension, berücksichtigt Doppel-Extensions
fn split_filename(filename: &str) -> (String, String) {
// Prüfe auf bekannte Doppel-Extensions
for double_ext in DOUBLE_EXTENSIONS {
if filename.ends_with(double_ext) {
let base_len = filename.len() - double_ext.len();
if base_len > 0 {
return (
filename[..base_len].to_string(),
double_ext.to_string(),
);
}
}
}
// Standard-Fall: nur letzte Extension
match filename.rsplit_once('.') {
Some((b, e)) if !b.is_empty() => (b.to_string(), format!(".{e}")),
_ => (filename.to_string(), String::new()),
}
}
/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
let original = name.to_string_lossy();
@ -24,10 +56,7 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<St
};
// Stamm und Extension trennen (nur im Rest, nicht im hidden_prefix)
let (mut base, mut ext) = match rest.rsplit_once('.') {
Some((b, e)) if !b.is_empty() => (b.to_string(), format!(".{e}")),
_ => (rest.to_string(), String::new()),
};
let (mut base, mut ext) = split_filename(rest);
// Platzhalter (C++, c++, C#, c#) anlegen
base = preserve_special_identifiers(&base);
@ -290,19 +319,56 @@ mod tests {
Some("test_file.txt".to_string())
);
// Double extension (currently only keeps last)
// Double extension with spaces in base name
assert_eq!(
clean_filename(OsStr::new("archive.tar.gz"), &config, false),
None // No special chars to clean
clean_filename(OsStr::new("my archive.tar.gz"), &config, false),
Some("my_archive.tar.gz".to_string())
);
// Multiple dots
// Other double extensions
assert_eq!(
clean_filename(OsStr::new("backup file.tar.bz2"), &config, false),
Some("backup_file.tar.bz2".to_string())
);
assert_eq!(
clean_filename(OsStr::new("data set.tar.xz"), &config, false),
Some("data_set.tar.xz".to_string())
);
// Multiple dots (not a double extension)
assert_eq!(
clean_filename(OsStr::new("foo..bar.txt"), &config, false),
Some("foo.bar.txt".to_string())
);
}
#[test]
fn test_split_filename() {
// Double extensions
assert_eq!(
split_filename("archive.tar.gz"),
("archive".to_string(), ".tar.gz".to_string())
);
assert_eq!(
split_filename("backup.tar.bz2"),
("backup".to_string(), ".tar.bz2".to_string())
);
// Single extension
assert_eq!(
split_filename("file.txt"),
("file".to_string(), ".txt".to_string())
);
// No extension
assert_eq!(
split_filename("README"),
("README".to_string(), String::new())
);
}
#[test]
fn test_clean_filename_special_identifiers() {
let config = Config::default();