Implement sequences feature v1.1.0

- Add -s/--sequence option to select transformation sequences - Add -L flag to list all available sequences - Implement 5 hardcoded sequences: default, lower, upper, minimal, utf-8 - Refactor clean_filename() to support sequence-based transformations - Update all tests to pass sequence parameter (25 tests passing) - Add 8 new integration tests for sequence functionality - Update documentation (README, CHANGELOG, manpage) - Update shell completions (bash, zsh, fish) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-10 18:38:23 +01:00 · 2026-02-10 18:38:23 +01:00 · 2ec4d12d6c
commit 2ec4d12d6c
parent d78e318d8a
12 changed files with 501 additions and 52 deletions
--- a/src/cli.rs
+++ b/src/cli.rs
@ -21,6 +21,14 @@ pub struct Cli {
    #[clap(short = 'r', long)]
    pub recursive: bool,

+    /// Wählt eine Transformations-Sequenz aus (default, lower, upper, minimal, utf-8)
+    #[clap(short = 's', long, value_name = "NAME")]
+    pub sequence: Option<String>,
+
+    /// Listet alle verfügbaren Sequences auf
+    #[clap(short = 'L')]
+    pub list_sequences: bool,
+
    /// Ausgaben unterdrücken (keine Umbenennungsinfos auf stdout)
    #[clap(short, long)]
    pub quiet: bool,
--- a/src/main.rs
+++ b/src/main.rs
@ -12,7 +12,7 @@ use glob::Pattern;
 use indicatif::{ProgressBar, ProgressStyle};
 use log::{debug, error, info};
 use rayon::prelude::*;
-use sanitizer::{clean_filename, is_excluded, is_safe_rename};
+use sanitizer::{clean_filename, is_excluded, is_safe_rename, Sequence};
 use std::fs;
 use std::io::IsTerminal;
 use std::path::PathBuf;
@ -60,6 +60,28 @@ fn main() -> Result<()> {
        colored::control::set_override(false);
    }

+    // -L Option: Liste Sequences und beende
+    if args.list_sequences {
+        list_sequences(&args);
+        return Ok(());
+    }
+
+    // Sequence auswählen
+    let sequence = if let Some(seq_name) = &args.sequence {
+        Sequence::find(seq_name).ok_or_else(|| {
+            anyhow::anyhow!(
+                "Unbekannte Sequence: '{}'. Nutze -L um verfügbare Sequences anzuzeigen.",
+                seq_name
+            )
+        })?
+    } else {
+        Sequence::default()
+    };
+
+    if args.verbose {
+        info!("Verwende Sequence: {}", sequence.name);
+    }
+
    // Config-Datei laden: entweder --conf oder Standard-Hierarchie
    let config = if let Some(config_path) = &args.config_file {
        Config::from_file(config_path, args.verbose)
@ -150,7 +172,7 @@ fn main() -> Result<()> {

                    // Dateiname ermitteln und bereinigen
                    let filename = old_path.file_name()?;
-                    let new_name = clean_filename(filename, &config, false)?;
+                    let new_name = clean_filename(filename, &config, &sequence, false)?;
                    let new_path = old_path.with_file_name(&new_name);

                    Some(RenameOperation {
@ -176,7 +198,7 @@ fn main() -> Result<()> {
                    }

                    let filename = old_path.file_name()?;
-                    let new_name = clean_filename(filename, &config, false)?;
+                    let new_name = clean_filename(filename, &config, &sequence, false)?;
                    let new_path = old_path.with_file_name(&new_name);

                    Some(RenameOperation {
@ -264,3 +286,41 @@ fn main() -> Result<()> {

    Ok(())
 }
+
+/// Listet alle verfügbaren Sequences auf
+fn list_sequences(args: &Cli) {
+    println!("Verfügbare Sequences:");
+    println!();
+
+    for seq in Sequence::all() {
+        println!("  {}", seq.name.bold());
+
+        if args.verbose {
+            println!("    Description: {}", seq.description);
+            println!(
+                "    Umlauts → ASCII: {}",
+                if seq.apply_umlauts { "yes" } else { "no" }
+            );
+            println!("    Case transform: {:?}", seq.apply_case);
+            println!(
+                "    Emoji handling: {}",
+                if seq.apply_emojis {
+                    "replace"
+                } else {
+                    "keep"
+                }
+            );
+            println!(
+                "    Mode: {}",
+                if seq.minimal_mode { "minimal" } else { "full" }
+            );
+        } else {
+            println!("    {}", seq.description);
+        }
+        println!();
+    }
+
+    if !args.verbose {
+        println!("Nutze -L -v für detaillierte Informationen über jede Sequence.");
+    }
+}
--- a/src/sanitizer.rs
+++ b/src/sanitizer.rs
@ -13,6 +13,82 @@ static RE_INVALID: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap());
 static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
 static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());

+/// Repräsentiert eine Transformations-Sequenz
+#[derive(Debug, Clone)]
+pub struct Sequence {
+    pub name: &'static str,
+    pub description: &'static str,
+    pub apply_umlauts: bool,
+    pub apply_case: CaseTransform,
+    pub apply_emojis: bool,
+    pub minimal_mode: bool,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum CaseTransform {
+    None,
+    Lower,
+    Upper,
+}
+
+impl Sequence {
+    /// Gibt alle verfügbaren Sequences zurück
+    pub fn all() -> Vec<Sequence> {
+        vec![
+            Sequence {
+                name: "default",
+                description: "Standard transformation: spaces→underscores, umlauts→ASCII, remove special chars",
+                apply_umlauts: true,
+                apply_case: CaseTransform::None,
+                apply_emojis: true,
+                minimal_mode: false,
+            },
+            Sequence {
+                name: "lower",
+                description: "Like default, but convert everything to lowercase",
+                apply_umlauts: true,
+                apply_case: CaseTransform::Lower,
+                apply_emojis: true,
+                minimal_mode: false,
+            },
+            Sequence {
+                name: "upper",
+                description: "Like default, but convert everything to UPPERCASE",
+                apply_umlauts: true,
+                apply_case: CaseTransform::Upper,
+                apply_emojis: true,
+                minimal_mode: false,
+            },
+            Sequence {
+                name: "minimal",
+                description: "Minimal changes: only replace spaces, keep umlauts and UTF-8",
+                apply_umlauts: false,
+                apply_case: CaseTransform::None,
+                apply_emojis: false,
+                minimal_mode: true,
+            },
+            Sequence {
+                name: "utf-8",
+                description: "UTF-8 friendly: spaces→underscores, keep umlauts, remove special chars",
+                apply_umlauts: false,
+                apply_case: CaseTransform::None,
+                apply_emojis: true,
+                minimal_mode: false,
+            },
+        ]
+    }
+
+    /// Findet eine Sequence nach Namen
+    pub fn find(name: &str) -> Option<Sequence> {
+        Self::all().into_iter().find(|s| s.name == name)
+    }
+
+    /// Gibt die Default-Sequence zurück
+    pub fn default() -> Sequence {
+        Self::find("default").unwrap()
+    }
+}
+
 // Bekannte Doppel-Extensions (z.B. .tar.gz)
 const DOUBLE_EXTENSIONS: &[&str] = &[
    ".tar.gz",
@ -45,8 +121,13 @@ fn split_filename(filename: &str) -> (String, String) {
    }
 }

-/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
-pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
+/// Bereinigt den übergebenen Dateinamen mit gegebener Sequence.
+pub fn clean_filename(
+    name: &OsStr,
+    config: &Config,
+    sequence: &Sequence,
+    verbose: bool,
+) -> Option<String> {
    let original = name.to_string_lossy();

    // Versteckte Dateien (mit führendem Punkt) korrekt behandeln
@ -62,35 +143,64 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<St
    base = preserve_special_identifiers(&base);
    ext = preserve_special_identifiers(&ext);

-    // 1) Konfig-Replacements anwenden (zuerst)
+    // 1) Config-Replacements anwenden (immer zuerst)
    for (k, v) in &config.replacements {
        base = base.replace(k, v);
    }

-    // 2) Danach hart-codierte Ersetzungen anwenden
+    // 2) Sequence-basierte Umlaut-Ersetzung
+    if sequence.apply_umlauts {
+        base = apply_umlaut_replacements(&base);
+    }
+
+    // 3) Hardcoded replacements (Apostroph etc.)
    base = apply_hardcoded_replacements(&base);

-    // 3) Emojis und hochgestellte Zeichen ersetzen
-    base = replace_emojis_and_superscript(&base);
+    // 4) Case-Transformation (auf base UND extension anwenden)
+    match sequence.apply_case {
+        CaseTransform::Lower => {
+            base = base.to_lowercase();
+            ext = ext.to_lowercase();
+        }
+        CaseTransform::Upper => {
+            base = base.to_uppercase();
+            ext = ext.to_uppercase();
+        }
+        CaseTransform::None => {}
+    }

-    // 4) Entfernen/Ersetzen aller übrigen ungültigen Zeichen
-    base = RE_INVALID.replace_all(&base, "_").to_string();
+    // 5) Emojis ersetzen (wenn aktiviert)
+    if sequence.apply_emojis {
+        base = replace_emojis_and_superscript(&base);
+    }
+
+    // 6) Ungültige Zeichen behandeln
+    if sequence.minimal_mode {
+        // Minimal: Nur Leerzeichen und gefährliche Zeichen
+        base = base.replace(' ', "_");
+        // Entferne nur absolut gefährliche Zeichen
+        base = base
+            .replace('/', "_")
+            .replace('\\', "_")
+            .replace('\0', "_")
+            .replace('\n', "_");
+    } else {
+        // Standard: Alle ungültigen Zeichen → Unterstrich
+        base = RE_INVALID.replace_all(&base, "_").to_string();
+    }

    // Ungültige Kombinationen aus Punkt und Unterstrich
    base = RE_ADJACENT.replace_all(&base, ".").to_string();

    // Mehrfache Punkte/Unterstriche auf einen reduzieren
    base = RE_MULTI
-        .replace_all(
-            &base,
-            |caps: &Captures| {
-                if caps[0].contains('.') {
-                    "."
-                } else {
-                    "_"
-                }
-            },
-        )
+        .replace_all(&base, |caps: &Captures| {
+            if caps[0].contains('.') {
+                "."
+            } else {
+                "_"
+            }
+        })
        .to_string();

    // Führender Punkt soll bleiben, führende Unterstriche sollen verschwinden
@ -149,6 +259,18 @@ fn apply_hardcoded_replacements(input: &str) -> String {
        .replace("ˆ", "_")
 }

+/// Ersetzt deutsche Umlaute durch ASCII-Äquivalente
+fn apply_umlaut_replacements(input: &str) -> String {
+    input
+        .replace("ä", "ae")
+        .replace("ö", "oe")
+        .replace("ü", "ue")
+        .replace("Ä", "Ae")
+        .replace("Ö", "Oe")
+        .replace("Ü", "Ue")
+        .replace("ß", "ss")
+}
+
 /// Entfernt am Anfang nur Unterstriche, einen führenden Punkt (.) bewahrt es.
 fn trim_leading_underscores_preserve_leading_dot(s: &str) -> String {
    let mut chars = s.chars().peekable();
@ -290,33 +412,29 @@ mod tests {
    use std::ffi::OsStr;

    fn make_test_config() -> Config {
-        let mut replacements = std::collections::HashMap::new();
-        replacements.insert("ä".to_string(), "ae".to_string());
-        replacements.insert("ö".to_string(), "oe".to_string());
-        replacements.insert("ü".to_string(), "ue".to_string());
-        replacements.insert("ß".to_string(), "ss".to_string());
-        Config { replacements }
+        Config::default()
    }

    #[test]
    fn test_clean_filename_basic() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // Spaces should become underscores
        assert_eq!(
-            clean_filename(OsStr::new("test file.txt"), &config, false),
+            clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
            Some("test_file.txt".to_string())
        );

        // Parentheses should become underscores
        assert_eq!(
-            clean_filename(OsStr::new("file (1).txt"), &config, false),
+            clean_filename(OsStr::new("file (1).txt"), &config, &sequence, false),
            Some("file_1.txt".to_string())
        );

        // Multiple underscores should be collapsed
        assert_eq!(
-            clean_filename(OsStr::new("test__file.txt"), &config, false),
+            clean_filename(OsStr::new("test__file.txt"), &config, &sequence, false),
            Some("test_file.txt".to_string())
        );
    }
@ -324,28 +442,29 @@ mod tests {
    #[test]
    fn test_clean_filename_hidden_files() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // Hidden files should keep their leading dot
        assert_eq!(
-            clean_filename(OsStr::new(".gitignore"), &config, false),
+            clean_filename(OsStr::new(".gitignore"), &config, &sequence, false),
            None // No change needed
        );

        // Hidden files with spaces
        assert_eq!(
-            clean_filename(OsStr::new(".my config"), &config, false),
+            clean_filename(OsStr::new(".my config"), &config, &sequence, false),
            Some(".my_config".to_string())
        );

        // Hidden files with extension
        assert_eq!(
-            clean_filename(OsStr::new(".test file.txt"), &config, false),
+            clean_filename(OsStr::new(".test file.txt"), &config, &sequence, false),
            Some(".test_file.txt".to_string())
        );

        // Multiple leading dots
        assert_eq!(
-            clean_filename(OsStr::new("...strange"), &config, false),
+            clean_filename(OsStr::new("...strange"), &config, &sequence, false),
            Some(".unnamed.strange".to_string())
        );
    }
@ -353,20 +472,21 @@ mod tests {
    #[test]
    fn test_clean_filename_umlauts() {
        let config = make_test_config();
+        let sequence = Sequence::default();

        // German umlauts
        assert_eq!(
-            clean_filename(OsStr::new("Müller.pdf"), &config, false),
+            clean_filename(OsStr::new("Müller.pdf"), &config, &sequence, false),
            Some("Mueller.pdf".to_string())
        );

        assert_eq!(
-            clean_filename(OsStr::new("schön.txt"), &config, false),
+            clean_filename(OsStr::new("schön.txt"), &config, &sequence, false),
            Some("schoen.txt".to_string())
        );

        assert_eq!(
-            clean_filename(OsStr::new("Größe.doc"), &config, false),
+            clean_filename(OsStr::new("Größe.doc"), &config, &sequence, false),
            Some("Groesse.doc".to_string())
        );
    }
@ -374,33 +494,34 @@ mod tests {
    #[test]
    fn test_clean_filename_extensions() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // Single extension
        assert_eq!(
-            clean_filename(OsStr::new("test file.txt"), &config, false),
+            clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
            Some("test_file.txt".to_string())
        );

        // Double extension with spaces in base name
        assert_eq!(
-            clean_filename(OsStr::new("my archive.tar.gz"), &config, false),
+            clean_filename(OsStr::new("my archive.tar.gz"), &config, &sequence, false),
            Some("my_archive.tar.gz".to_string())
        );

        // Other double extensions
        assert_eq!(
-            clean_filename(OsStr::new("backup file.tar.bz2"), &config, false),
+            clean_filename(OsStr::new("backup file.tar.bz2"), &config, &sequence, false),
            Some("backup_file.tar.bz2".to_string())
        );

        assert_eq!(
-            clean_filename(OsStr::new("data set.tar.xz"), &config, false),
+            clean_filename(OsStr::new("data set.tar.xz"), &config, &sequence, false),
            Some("data_set.tar.xz".to_string())
        );

        // Multiple dots (not a double extension)
        assert_eq!(
-            clean_filename(OsStr::new("foo..bar.txt"), &config, false),
+            clean_filename(OsStr::new("foo..bar.txt"), &config, &sequence, false),
            Some("foo.bar.txt".to_string())
        );
    }
@ -434,16 +555,17 @@ mod tests {
    #[test]
    fn test_clean_filename_special_identifiers() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // C++ should be preserved
        assert_eq!(
-            clean_filename(OsStr::new("test C++.txt"), &config, false),
+            clean_filename(OsStr::new("test C++.txt"), &config, &sequence, false),
            Some("test_C++.txt".to_string())
        );

        // C# should be preserved
        assert_eq!(
-            clean_filename(OsStr::new("guide C#.pdf"), &config, false),
+            clean_filename(OsStr::new("guide C#.pdf"), &config, &sequence, false),
            Some("guide_C#.pdf".to_string())
        );
    }
@ -451,15 +573,16 @@ mod tests {
    #[test]
    fn test_clean_filename_no_change_needed() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // Already clean filenames should return None
        assert_eq!(
-            clean_filename(OsStr::new("clean_file.txt"), &config, false),
+            clean_filename(OsStr::new("clean_file.txt"), &config, &sequence, false),
            None
        );

        assert_eq!(
-            clean_filename(OsStr::new("another-file.pdf"), &config, false),
+            clean_filename(OsStr::new("another-file.pdf"), &config, &sequence, false),
            None
        );
    }
@ -467,10 +590,11 @@ mod tests {
    #[test]
    fn test_clean_filename_empty_after_cleaning() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // File with only special chars should become "unnamed"
        assert_eq!(
-            clean_filename(OsStr::new("###.txt"), &config, false),
+            clean_filename(OsStr::new("###.txt"), &config, &sequence, false),
            Some("unnamed.txt".to_string())
        );
    }
@ -478,10 +602,11 @@ mod tests {
    #[test]
    fn test_clean_filename_apostrophe() {
        let config = Config::default();
+        let sequence = Sequence::default();

        // Apostrophes should be removed (not replaced with underscore)
        assert_eq!(
-            clean_filename(OsStr::new("O'Reilly.pdf"), &config, false),
+            clean_filename(OsStr::new("O'Reilly.pdf"), &config, &sequence, false),
            Some("OReilly.pdf".to_string())
        );
    }