Implement sequences feature v1.1.0
- Add -s/--sequence option to select transformation sequences - Add -L flag to list all available sequences - Implement 5 hardcoded sequences: default, lower, upper, minimal, utf-8 - Refactor clean_filename() to support sequence-based transformations - Update all tests to pass sequence parameter (25 tests passing) - Add 8 new integration tests for sequence functionality - Update documentation (README, CHANGELOG, manpage) - Update shell completions (bash, zsh, fish) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
d78e318d8a
commit
2ec4d12d6c
12 changed files with 501 additions and 52 deletions
|
|
@ -21,6 +21,14 @@ pub struct Cli {
|
|||
#[clap(short = 'r', long)]
|
||||
pub recursive: bool,
|
||||
|
||||
/// Wählt eine Transformations-Sequenz aus (default, lower, upper, minimal, utf-8)
|
||||
#[clap(short = 's', long, value_name = "NAME")]
|
||||
pub sequence: Option<String>,
|
||||
|
||||
/// Listet alle verfügbaren Sequences auf
|
||||
#[clap(short = 'L')]
|
||||
pub list_sequences: bool,
|
||||
|
||||
/// Ausgaben unterdrücken (keine Umbenennungsinfos auf stdout)
|
||||
#[clap(short, long)]
|
||||
pub quiet: bool,
|
||||
|
|
|
|||
66
src/main.rs
66
src/main.rs
|
|
@ -12,7 +12,7 @@ use glob::Pattern;
|
|||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use log::{debug, error, info};
|
||||
use rayon::prelude::*;
|
||||
use sanitizer::{clean_filename, is_excluded, is_safe_rename};
|
||||
use sanitizer::{clean_filename, is_excluded, is_safe_rename, Sequence};
|
||||
use std::fs;
|
||||
use std::io::IsTerminal;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -60,6 +60,28 @@ fn main() -> Result<()> {
|
|||
colored::control::set_override(false);
|
||||
}
|
||||
|
||||
// -L Option: Liste Sequences und beende
|
||||
if args.list_sequences {
|
||||
list_sequences(&args);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Sequence auswählen
|
||||
let sequence = if let Some(seq_name) = &args.sequence {
|
||||
Sequence::find(seq_name).ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"Unbekannte Sequence: '{}'. Nutze -L um verfügbare Sequences anzuzeigen.",
|
||||
seq_name
|
||||
)
|
||||
})?
|
||||
} else {
|
||||
Sequence::default()
|
||||
};
|
||||
|
||||
if args.verbose {
|
||||
info!("Verwende Sequence: {}", sequence.name);
|
||||
}
|
||||
|
||||
// Config-Datei laden: entweder --conf oder Standard-Hierarchie
|
||||
let config = if let Some(config_path) = &args.config_file {
|
||||
Config::from_file(config_path, args.verbose)
|
||||
|
|
@ -150,7 +172,7 @@ fn main() -> Result<()> {
|
|||
|
||||
// Dateiname ermitteln und bereinigen
|
||||
let filename = old_path.file_name()?;
|
||||
let new_name = clean_filename(filename, &config, false)?;
|
||||
let new_name = clean_filename(filename, &config, &sequence, false)?;
|
||||
let new_path = old_path.with_file_name(&new_name);
|
||||
|
||||
Some(RenameOperation {
|
||||
|
|
@ -176,7 +198,7 @@ fn main() -> Result<()> {
|
|||
}
|
||||
|
||||
let filename = old_path.file_name()?;
|
||||
let new_name = clean_filename(filename, &config, false)?;
|
||||
let new_name = clean_filename(filename, &config, &sequence, false)?;
|
||||
let new_path = old_path.with_file_name(&new_name);
|
||||
|
||||
Some(RenameOperation {
|
||||
|
|
@ -264,3 +286,41 @@ fn main() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Listet alle verfügbaren Sequences auf
|
||||
fn list_sequences(args: &Cli) {
|
||||
println!("Verfügbare Sequences:");
|
||||
println!();
|
||||
|
||||
for seq in Sequence::all() {
|
||||
println!(" {}", seq.name.bold());
|
||||
|
||||
if args.verbose {
|
||||
println!(" Description: {}", seq.description);
|
||||
println!(
|
||||
" Umlauts → ASCII: {}",
|
||||
if seq.apply_umlauts { "yes" } else { "no" }
|
||||
);
|
||||
println!(" Case transform: {:?}", seq.apply_case);
|
||||
println!(
|
||||
" Emoji handling: {}",
|
||||
if seq.apply_emojis {
|
||||
"replace"
|
||||
} else {
|
||||
"keep"
|
||||
}
|
||||
);
|
||||
println!(
|
||||
" Mode: {}",
|
||||
if seq.minimal_mode { "minimal" } else { "full" }
|
||||
);
|
||||
} else {
|
||||
println!(" {}", seq.description);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
if !args.verbose {
|
||||
println!("Nutze -L -v für detaillierte Informationen über jede Sequence.");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
215
src/sanitizer.rs
215
src/sanitizer.rs
|
|
@ -13,6 +13,82 @@ static RE_INVALID: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap());
|
|||
static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
|
||||
static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());
|
||||
|
||||
/// Repräsentiert eine Transformations-Sequenz
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Sequence {
|
||||
pub name: &'static str,
|
||||
pub description: &'static str,
|
||||
pub apply_umlauts: bool,
|
||||
pub apply_case: CaseTransform,
|
||||
pub apply_emojis: bool,
|
||||
pub minimal_mode: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum CaseTransform {
|
||||
None,
|
||||
Lower,
|
||||
Upper,
|
||||
}
|
||||
|
||||
impl Sequence {
|
||||
/// Gibt alle verfügbaren Sequences zurück
|
||||
pub fn all() -> Vec<Sequence> {
|
||||
vec![
|
||||
Sequence {
|
||||
name: "default",
|
||||
description: "Standard transformation: spaces→underscores, umlauts→ASCII, remove special chars",
|
||||
apply_umlauts: true,
|
||||
apply_case: CaseTransform::None,
|
||||
apply_emojis: true,
|
||||
minimal_mode: false,
|
||||
},
|
||||
Sequence {
|
||||
name: "lower",
|
||||
description: "Like default, but convert everything to lowercase",
|
||||
apply_umlauts: true,
|
||||
apply_case: CaseTransform::Lower,
|
||||
apply_emojis: true,
|
||||
minimal_mode: false,
|
||||
},
|
||||
Sequence {
|
||||
name: "upper",
|
||||
description: "Like default, but convert everything to UPPERCASE",
|
||||
apply_umlauts: true,
|
||||
apply_case: CaseTransform::Upper,
|
||||
apply_emojis: true,
|
||||
minimal_mode: false,
|
||||
},
|
||||
Sequence {
|
||||
name: "minimal",
|
||||
description: "Minimal changes: only replace spaces, keep umlauts and UTF-8",
|
||||
apply_umlauts: false,
|
||||
apply_case: CaseTransform::None,
|
||||
apply_emojis: false,
|
||||
minimal_mode: true,
|
||||
},
|
||||
Sequence {
|
||||
name: "utf-8",
|
||||
description: "UTF-8 friendly: spaces→underscores, keep umlauts, remove special chars",
|
||||
apply_umlauts: false,
|
||||
apply_case: CaseTransform::None,
|
||||
apply_emojis: true,
|
||||
minimal_mode: false,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Findet eine Sequence nach Namen
|
||||
pub fn find(name: &str) -> Option<Sequence> {
|
||||
Self::all().into_iter().find(|s| s.name == name)
|
||||
}
|
||||
|
||||
/// Gibt die Default-Sequence zurück
|
||||
pub fn default() -> Sequence {
|
||||
Self::find("default").unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
// Bekannte Doppel-Extensions (z.B. .tar.gz)
|
||||
const DOUBLE_EXTENSIONS: &[&str] = &[
|
||||
".tar.gz",
|
||||
|
|
@ -45,8 +121,13 @@ fn split_filename(filename: &str) -> (String, String) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
|
||||
pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
|
||||
/// Bereinigt den übergebenen Dateinamen mit gegebener Sequence.
|
||||
pub fn clean_filename(
|
||||
name: &OsStr,
|
||||
config: &Config,
|
||||
sequence: &Sequence,
|
||||
verbose: bool,
|
||||
) -> Option<String> {
|
||||
let original = name.to_string_lossy();
|
||||
|
||||
// Versteckte Dateien (mit führendem Punkt) korrekt behandeln
|
||||
|
|
@ -62,35 +143,64 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<St
|
|||
base = preserve_special_identifiers(&base);
|
||||
ext = preserve_special_identifiers(&ext);
|
||||
|
||||
// 1) Konfig-Replacements anwenden (zuerst)
|
||||
// 1) Config-Replacements anwenden (immer zuerst)
|
||||
for (k, v) in &config.replacements {
|
||||
base = base.replace(k, v);
|
||||
}
|
||||
|
||||
// 2) Danach hart-codierte Ersetzungen anwenden
|
||||
// 2) Sequence-basierte Umlaut-Ersetzung
|
||||
if sequence.apply_umlauts {
|
||||
base = apply_umlaut_replacements(&base);
|
||||
}
|
||||
|
||||
// 3) Hardcoded replacements (Apostroph etc.)
|
||||
base = apply_hardcoded_replacements(&base);
|
||||
|
||||
// 3) Emojis und hochgestellte Zeichen ersetzen
|
||||
base = replace_emojis_and_superscript(&base);
|
||||
// 4) Case-Transformation (auf base UND extension anwenden)
|
||||
match sequence.apply_case {
|
||||
CaseTransform::Lower => {
|
||||
base = base.to_lowercase();
|
||||
ext = ext.to_lowercase();
|
||||
}
|
||||
CaseTransform::Upper => {
|
||||
base = base.to_uppercase();
|
||||
ext = ext.to_uppercase();
|
||||
}
|
||||
CaseTransform::None => {}
|
||||
}
|
||||
|
||||
// 4) Entfernen/Ersetzen aller übrigen ungültigen Zeichen
|
||||
base = RE_INVALID.replace_all(&base, "_").to_string();
|
||||
// 5) Emojis ersetzen (wenn aktiviert)
|
||||
if sequence.apply_emojis {
|
||||
base = replace_emojis_and_superscript(&base);
|
||||
}
|
||||
|
||||
// 6) Ungültige Zeichen behandeln
|
||||
if sequence.minimal_mode {
|
||||
// Minimal: Nur Leerzeichen und gefährliche Zeichen
|
||||
base = base.replace(' ', "_");
|
||||
// Entferne nur absolut gefährliche Zeichen
|
||||
base = base
|
||||
.replace('/', "_")
|
||||
.replace('\\', "_")
|
||||
.replace('\0', "_")
|
||||
.replace('\n', "_");
|
||||
} else {
|
||||
// Standard: Alle ungültigen Zeichen → Unterstrich
|
||||
base = RE_INVALID.replace_all(&base, "_").to_string();
|
||||
}
|
||||
|
||||
// Ungültige Kombinationen aus Punkt und Unterstrich
|
||||
base = RE_ADJACENT.replace_all(&base, ".").to_string();
|
||||
|
||||
// Mehrfache Punkte/Unterstriche auf einen reduzieren
|
||||
base = RE_MULTI
|
||||
.replace_all(
|
||||
&base,
|
||||
|caps: &Captures| {
|
||||
if caps[0].contains('.') {
|
||||
"."
|
||||
} else {
|
||||
"_"
|
||||
}
|
||||
},
|
||||
)
|
||||
.replace_all(&base, |caps: &Captures| {
|
||||
if caps[0].contains('.') {
|
||||
"."
|
||||
} else {
|
||||
"_"
|
||||
}
|
||||
})
|
||||
.to_string();
|
||||
|
||||
// Führender Punkt soll bleiben, führende Unterstriche sollen verschwinden
|
||||
|
|
@ -149,6 +259,18 @@ fn apply_hardcoded_replacements(input: &str) -> String {
|
|||
.replace("ˆ", "_")
|
||||
}
|
||||
|
||||
/// Ersetzt deutsche Umlaute durch ASCII-Äquivalente
|
||||
fn apply_umlaut_replacements(input: &str) -> String {
|
||||
input
|
||||
.replace("ä", "ae")
|
||||
.replace("ö", "oe")
|
||||
.replace("ü", "ue")
|
||||
.replace("Ä", "Ae")
|
||||
.replace("Ö", "Oe")
|
||||
.replace("Ü", "Ue")
|
||||
.replace("ß", "ss")
|
||||
}
|
||||
|
||||
/// Entfernt am Anfang nur Unterstriche, einen führenden Punkt (.) bewahrt es.
|
||||
fn trim_leading_underscores_preserve_leading_dot(s: &str) -> String {
|
||||
let mut chars = s.chars().peekable();
|
||||
|
|
@ -290,33 +412,29 @@ mod tests {
|
|||
use std::ffi::OsStr;
|
||||
|
||||
fn make_test_config() -> Config {
|
||||
let mut replacements = std::collections::HashMap::new();
|
||||
replacements.insert("ä".to_string(), "ae".to_string());
|
||||
replacements.insert("ö".to_string(), "oe".to_string());
|
||||
replacements.insert("ü".to_string(), "ue".to_string());
|
||||
replacements.insert("ß".to_string(), "ss".to_string());
|
||||
Config { replacements }
|
||||
Config::default()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_filename_basic() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// Spaces should become underscores
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("test file.txt"), &config, false),
|
||||
clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
|
||||
Some("test_file.txt".to_string())
|
||||
);
|
||||
|
||||
// Parentheses should become underscores
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("file (1).txt"), &config, false),
|
||||
clean_filename(OsStr::new("file (1).txt"), &config, &sequence, false),
|
||||
Some("file_1.txt".to_string())
|
||||
);
|
||||
|
||||
// Multiple underscores should be collapsed
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("test__file.txt"), &config, false),
|
||||
clean_filename(OsStr::new("test__file.txt"), &config, &sequence, false),
|
||||
Some("test_file.txt".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -324,28 +442,29 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_hidden_files() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// Hidden files should keep their leading dot
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new(".gitignore"), &config, false),
|
||||
clean_filename(OsStr::new(".gitignore"), &config, &sequence, false),
|
||||
None // No change needed
|
||||
);
|
||||
|
||||
// Hidden files with spaces
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new(".my config"), &config, false),
|
||||
clean_filename(OsStr::new(".my config"), &config, &sequence, false),
|
||||
Some(".my_config".to_string())
|
||||
);
|
||||
|
||||
// Hidden files with extension
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new(".test file.txt"), &config, false),
|
||||
clean_filename(OsStr::new(".test file.txt"), &config, &sequence, false),
|
||||
Some(".test_file.txt".to_string())
|
||||
);
|
||||
|
||||
// Multiple leading dots
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("...strange"), &config, false),
|
||||
clean_filename(OsStr::new("...strange"), &config, &sequence, false),
|
||||
Some(".unnamed.strange".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -353,20 +472,21 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_umlauts() {
|
||||
let config = make_test_config();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// German umlauts
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("Müller.pdf"), &config, false),
|
||||
clean_filename(OsStr::new("Müller.pdf"), &config, &sequence, false),
|
||||
Some("Mueller.pdf".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("schön.txt"), &config, false),
|
||||
clean_filename(OsStr::new("schön.txt"), &config, &sequence, false),
|
||||
Some("schoen.txt".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("Größe.doc"), &config, false),
|
||||
clean_filename(OsStr::new("Größe.doc"), &config, &sequence, false),
|
||||
Some("Groesse.doc".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -374,33 +494,34 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_extensions() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// Single extension
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("test file.txt"), &config, false),
|
||||
clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
|
||||
Some("test_file.txt".to_string())
|
||||
);
|
||||
|
||||
// Double extension with spaces in base name
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("my archive.tar.gz"), &config, false),
|
||||
clean_filename(OsStr::new("my archive.tar.gz"), &config, &sequence, false),
|
||||
Some("my_archive.tar.gz".to_string())
|
||||
);
|
||||
|
||||
// Other double extensions
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("backup file.tar.bz2"), &config, false),
|
||||
clean_filename(OsStr::new("backup file.tar.bz2"), &config, &sequence, false),
|
||||
Some("backup_file.tar.bz2".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("data set.tar.xz"), &config, false),
|
||||
clean_filename(OsStr::new("data set.tar.xz"), &config, &sequence, false),
|
||||
Some("data_set.tar.xz".to_string())
|
||||
);
|
||||
|
||||
// Multiple dots (not a double extension)
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("foo..bar.txt"), &config, false),
|
||||
clean_filename(OsStr::new("foo..bar.txt"), &config, &sequence, false),
|
||||
Some("foo.bar.txt".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -434,16 +555,17 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_special_identifiers() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// C++ should be preserved
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("test C++.txt"), &config, false),
|
||||
clean_filename(OsStr::new("test C++.txt"), &config, &sequence, false),
|
||||
Some("test_C++.txt".to_string())
|
||||
);
|
||||
|
||||
// C# should be preserved
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("guide C#.pdf"), &config, false),
|
||||
clean_filename(OsStr::new("guide C#.pdf"), &config, &sequence, false),
|
||||
Some("guide_C#.pdf".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -451,15 +573,16 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_no_change_needed() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// Already clean filenames should return None
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("clean_file.txt"), &config, false),
|
||||
clean_filename(OsStr::new("clean_file.txt"), &config, &sequence, false),
|
||||
None
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("another-file.pdf"), &config, false),
|
||||
clean_filename(OsStr::new("another-file.pdf"), &config, &sequence, false),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
|
@ -467,10 +590,11 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_empty_after_cleaning() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// File with only special chars should become "unnamed"
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("###.txt"), &config, false),
|
||||
clean_filename(OsStr::new("###.txt"), &config, &sequence, false),
|
||||
Some("unnamed.txt".to_string())
|
||||
);
|
||||
}
|
||||
|
|
@ -478,10 +602,11 @@ mod tests {
|
|||
#[test]
|
||||
fn test_clean_filename_apostrophe() {
|
||||
let config = Config::default();
|
||||
let sequence = Sequence::default();
|
||||
|
||||
// Apostrophes should be removed (not replaced with underscore)
|
||||
assert_eq!(
|
||||
clean_filename(OsStr::new("O'Reilly.pdf"), &config, false),
|
||||
clean_filename(OsStr::new("O'Reilly.pdf"), &config, &sequence, false),
|
||||
Some("OReilly.pdf".to_string())
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue