feat: Bessere Erkennung von Doppel-Extensions
- .tar.gz, .tar.bz2, .tar.xz, .tar.zst, .tar.lz, .tar.Z werden jetzt korrekt als Einheit behandelt - "my archive.tar.gz" → "my_archive.tar.gz" (nicht mehr "my_archive.gz") - Neue Hilfsfunktion split_filename() - Tests für Doppel-Extensions hinzugefügt Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
608acaffc6
commit
d3c6ae2503
1 changed files with 74 additions and 8 deletions
|
|
@ -13,6 +13,38 @@ static RE_INVALID: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap());
|
||||||
static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
|
static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
|
||||||
static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());
|
static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());
|
||||||
|
|
||||||
|
// Bekannte Doppel-Extensions (z.B. .tar.gz)
|
||||||
|
const DOUBLE_EXTENSIONS: &[&str] = &[
|
||||||
|
".tar.gz",
|
||||||
|
".tar.bz2",
|
||||||
|
".tar.xz",
|
||||||
|
".tar.zst",
|
||||||
|
".tar.lz",
|
||||||
|
".tar.Z",
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Trennt Dateiname in Basis und Extension, berücksichtigt Doppel-Extensions
|
||||||
|
fn split_filename(filename: &str) -> (String, String) {
|
||||||
|
// Prüfe auf bekannte Doppel-Extensions
|
||||||
|
for double_ext in DOUBLE_EXTENSIONS {
|
||||||
|
if filename.ends_with(double_ext) {
|
||||||
|
let base_len = filename.len() - double_ext.len();
|
||||||
|
if base_len > 0 {
|
||||||
|
return (
|
||||||
|
filename[..base_len].to_string(),
|
||||||
|
double_ext.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standard-Fall: nur letzte Extension
|
||||||
|
match filename.rsplit_once('.') {
|
||||||
|
Some((b, e)) if !b.is_empty() => (b.to_string(), format!(".{e}")),
|
||||||
|
_ => (filename.to_string(), String::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
|
/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
|
||||||
pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
|
pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
|
||||||
let original = name.to_string_lossy();
|
let original = name.to_string_lossy();
|
||||||
|
|
@ -24,10 +56,7 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<St
|
||||||
};
|
};
|
||||||
|
|
||||||
// Stamm und Extension trennen (nur im Rest, nicht im hidden_prefix)
|
// Stamm und Extension trennen (nur im Rest, nicht im hidden_prefix)
|
||||||
let (mut base, mut ext) = match rest.rsplit_once('.') {
|
let (mut base, mut ext) = split_filename(rest);
|
||||||
Some((b, e)) if !b.is_empty() => (b.to_string(), format!(".{e}")),
|
|
||||||
_ => (rest.to_string(), String::new()),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Platzhalter (C++, c++, C#, c#) anlegen
|
// Platzhalter (C++, c++, C#, c#) anlegen
|
||||||
base = preserve_special_identifiers(&base);
|
base = preserve_special_identifiers(&base);
|
||||||
|
|
@ -290,19 +319,56 @@ mod tests {
|
||||||
Some("test_file.txt".to_string())
|
Some("test_file.txt".to_string())
|
||||||
);
|
);
|
||||||
|
|
||||||
// Double extension (currently only keeps last)
|
// Double extension with spaces in base name
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
clean_filename(OsStr::new("archive.tar.gz"), &config, false),
|
clean_filename(OsStr::new("my archive.tar.gz"), &config, false),
|
||||||
None // No special chars to clean
|
Some("my_archive.tar.gz".to_string())
|
||||||
);
|
);
|
||||||
|
|
||||||
// Multiple dots
|
// Other double extensions
|
||||||
|
assert_eq!(
|
||||||
|
clean_filename(OsStr::new("backup file.tar.bz2"), &config, false),
|
||||||
|
Some("backup_file.tar.bz2".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
clean_filename(OsStr::new("data set.tar.xz"), &config, false),
|
||||||
|
Some("data_set.tar.xz".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
// Multiple dots (not a double extension)
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
clean_filename(OsStr::new("foo..bar.txt"), &config, false),
|
clean_filename(OsStr::new("foo..bar.txt"), &config, false),
|
||||||
Some("foo.bar.txt".to_string())
|
Some("foo.bar.txt".to_string())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_filename() {
|
||||||
|
// Double extensions
|
||||||
|
assert_eq!(
|
||||||
|
split_filename("archive.tar.gz"),
|
||||||
|
("archive".to_string(), ".tar.gz".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
split_filename("backup.tar.bz2"),
|
||||||
|
("backup".to_string(), ".tar.bz2".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
// Single extension
|
||||||
|
assert_eq!(
|
||||||
|
split_filename("file.txt"),
|
||||||
|
("file".to_string(), ".txt".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
// No extension
|
||||||
|
assert_eq!(
|
||||||
|
split_filename("README"),
|
||||||
|
("README".to_string(), String::new())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_clean_filename_special_identifiers() {
|
fn test_clean_filename_special_identifiers() {
|
||||||
let config = Config::default();
|
let config = Config::default();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue