From 2ec4d12d6c1b0d1544ad6fd6651983f8a1ef7653 Mon Sep 17 00:00:00 2001 From: dschlueter Date: Tue, 10 Feb 2026 18:38:23 +0100 Subject: [PATCH] Implement sequences feature v1.1.0 - Add -s/--sequence option to select transformation sequences - Add -L flag to list all available sequences - Implement 5 hardcoded sequences: default, lower, upper, minimal, utf-8 - Refactor clean_filename() to support sequence-based transformations - Update all tests to pass sequence parameter (25 tests passing) - Add 8 new integration tests for sequence functionality - Update documentation (README, CHANGELOG, manpage) - Update shell completions (bash, zsh, fish) Co-Authored-By: Claude Sonnet 4.5 --- CHANGELOG.md | 24 +++++ Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 49 +++++++++ completions/_ntu | 2 + completions/ntu.bash | 7 +- completions/ntu.fish | 2 + man/ntu.1 | 44 +++++++- src/cli.rs | 8 ++ src/main.rs | 66 +++++++++++- src/sanitizer.rs | 215 +++++++++++++++++++++++++++++-------- tests/integration_tests.rs | 132 +++++++++++++++++++++++ 12 files changed, 501 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f440fc..d6c3b9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.0] - 2025-02-10 + +### Added +- **`-s/--sequence ` option**: Select transformation sequence (default, lower, upper, minimal, utf-8) +- **`-L` option**: List all available sequences (use with `-v` for details) +- **5 hardcoded sequences**: + - `default`: Current behavior (umlauts→ASCII, spaces→underscores) + - `lower`: Like default + convert to lowercase + - `upper`: Like default + convert to UPPERCASE + - `minimal`: Only replace spaces, keep UTF-8 characters + - `utf-8`: UTF-8 friendly (keep umlauts, remove special chars) + +### Changed +- Refactored `clean_filename()` to support sequence-based transformations +- Umlaut replacements moved from hardcoded to sequence-specific logic +- Case transformations now also apply to file extensions + +### Technical +- Added `Sequence` struct and `CaseTransform` enum in `sanitizer.rs` +- Extended CLI with `-s` and `-L` options +- Added `list_sequences()` function in `main.rs` +- Updated all unit tests to pass `Sequence` parameter +- Added 8 new integration tests for sequence functionality + ## [1.0.0] - 2025-02-10 ### ⚠️ BREAKING CHANGES diff --git a/Cargo.lock b/Cargo.lock index 8818daf..ae9009e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "NameToUnix" -version = "1.0.0" +version = "1.1.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 4e5d9b0..116c56f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "NameToUnix" -version = "1.0.0" +version = "1.1.0" edition = "2021" authors = ["Dieter Schlüter "] description = "Ein Tool zum Anpassen von Verzeichnis- und Dateinamen an Linux-Konventionen" diff --git a/README.md b/README.md index c186904..a8a14df 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,27 @@ specified paths and their immediate children by default. Use the `-r` or # New (v1.x): ntu -r /path/to/files ``` +## Sequences + +Starting with v1.1.0, `ntu` supports transformation sequences similar to detox. Sequences control how filenames are transformed: + +- **default**: Standard transformation (umlauts→ASCII, spaces→underscores, remove special chars) +- **lower**: Like default, but convert to lowercase +- **upper**: Like default, but convert to UPPERCASE +- **minimal**: Only replace spaces, keep UTF-8 characters +- **utf-8**: UTF-8 friendly (keep umlauts, replace spaces, remove special chars) + +```bash +# Use specific sequence +ntu -s lower /path/to/files + +# List all available sequences +ntu -L + +# List sequences with details +ntu -L -v +``` + ## Functions / Funktionen - Replaces spaces and special characters in file and directory names with underscores @@ -146,6 +167,20 @@ ntu -r --modify-root /path/to/files # Combine options ntu --dry-run -r -v --special /path/to/files +# Use lowercase sequence +ntu -r -s lower /path/to/files + +# Minimal mode (only spaces, keep UTF-8) +ntu -s minimal /path/to/files + +# UTF-8 friendly mode +ntu -s utf-8 /path/to/files + +# List available sequences +ntu -L + +# List sequences with details +ntu -L -v ``` **Note:** The following directories/files are automatically excluded: @@ -185,6 +220,20 @@ ntu -r --modify-root /pfad/zu/dateien # Optionen kombinieren ntu --dry-run -r -v --special /pfad/zu/dateien +# Kleinbuchstaben-Sequenz verwenden +ntu -r -s lower /pfad/zu/dateien + +# Minimal-Modus (nur Leerzeichen, UTF-8 behalten) +ntu -s minimal /pfad/zu/dateien + +# UTF-8 freundlicher Modus +ntu -s utf-8 /pfad/zu/dateien + +# Verfügbare Sequenzen auflisten +ntu -L + +# Sequenzen mit Details auflisten +ntu -L -v ``` **Hinweis:** Die folgenden Verzeichnisse/Dateien werden automatisch ausgeschlossen: diff --git a/completions/_ntu b/completions/_ntu index 3722c77..c87eb2e 100644 --- a/completions/_ntu +++ b/completions/_ntu @@ -8,6 +8,8 @@ _ntu() { _arguments -C \ '(-r --recursive)'{-r,--recursive}'[Process directories recursively]' \ + '(-s --sequence)'{-s,--sequence}'[Use transformation sequence]:sequence:(default lower upper minimal utf-8)' \ + '-L[List available sequences]' \ '--conf[Use specific configuration file]:config file:_files' \ '(-n --dry-run --no-changes)'{-n,--dry-run,--no-changes}'[Only preview changes without renaming]' \ '(-q --quiet)'{-q,--quiet}'[Suppress output]' \ diff --git a/completions/ntu.bash b/completions/ntu.bash index 7cc0926..1c04986 100644 --- a/completions/ntu.bash +++ b/completions/ntu.bash @@ -7,10 +7,15 @@ _ntu_completion() { prev="${COMP_WORDS[COMP_CWORD-1]}" # All available options - opts="--recursive --conf --dry-run --no-changes --quiet --force --exclude --verbose --modify-root --special --no-color --help --version -r -n -q -f -e -v -h -V" + opts="--recursive --sequence --conf --dry-run --no-changes --quiet --force --exclude --verbose --modify-root --special --no-color --help --version -r -s -L -n -q -f -e -v -h -V" # Handle options that require arguments case "${prev}" in + -s|--sequence) + # Suggest available sequences + COMPREPLY=( $(compgen -W "default lower upper minimal utf-8" -- ${cur}) ) + return 0 + ;; -e|--exclude) # Suggest glob patterns COMPREPLY=( $(compgen -W '"*.tmp" "*.log" "*.bak" "*.swp" "*~"' -- ${cur}) ) diff --git a/completions/ntu.fish b/completions/ntu.fish index d8f08fd..1d18384 100644 --- a/completions/ntu.fish +++ b/completions/ntu.fish @@ -5,6 +5,8 @@ complete -c ntu -f -d 'Sanitize file and directory names to Unix conventions' # Options complete -c ntu -s r -l recursive -d 'Process directories recursively' +complete -c ntu -s s -l sequence -d 'Use transformation sequence' -xa 'default lower upper minimal utf-8' +complete -c ntu -s L -d 'List available sequences' complete -c ntu -l conf -d 'Use specific configuration file' -r -F complete -c ntu -s q -l quiet -d 'Suppress output (no rename information)' complete -c ntu -s n -l dry-run -d 'Show what would be renamed without making changes' diff --git a/man/ntu.1 b/man/ntu.1 index 4bd38d2..7837d4a 100644 --- a/man/ntu.1 +++ b/man/ntu.1 @@ -1,4 +1,4 @@ -.TH NTU 1 "2025-02-10" "NameToUnix 1.0.0" "User Commands" +.TH NTU 1 "2025-02-10" "NameToUnix 1.1.0" "User Commands" .SH NAME ntu \- sanitize file and directory names to Unix conventions .SH SYNOPSIS @@ -21,6 +21,12 @@ like .tar.gz, and handles hidden files correctly. .BR \-r ", " \-\-recursive Process directories recursively (default: only immediate children) .TP +.BR \-s ", " \-\-sequence " \fINAME\fR" +Use a specific transformation sequence. Available sequences: default, lower, upper, minimal, utf-8. Use \fB\-L\fR to list all sequences. +.TP +.BR \-L +List all available transformation sequences. Use with \fB\-v\fR for detailed information. +.TP .BR \-\-conf " \fIFILE\fR" Use a specific configuration file instead of the default hierarchy .TP @@ -78,6 +84,27 @@ Replaced with underscores. .TP .B Multiple Underscores Consecutive underscores are collapsed to a single underscore. +.SH SEQUENCES +.B ntu +supports different transformation sequences that can be selected with the \fB\-s\fR option: +.TP +.B default +Standard transformation: spaces become underscores, German umlauts are converted +to ASCII equivalents, special characters are removed or replaced. +.TP +.B lower +Like default, but converts all text to lowercase. +.TP +.B upper +Like default, but converts all text to UPPERCASE. +.TP +.B minimal +Minimal changes: only replaces spaces with underscores, keeps umlauts and +other UTF-8 characters. +.TP +.B utf-8 +UTF-8 friendly: keeps umlauts and UTF-8 characters, replaces spaces, +removes special characters. .SH EXCLUDED PATTERNS By default, the following directories are automatically excluded: .PP @@ -111,6 +138,21 @@ Process multiple directories: .TP Verbose output with no colors: .B ntu \-v \-\-no\-color /path/to/directory +.TP +Use lowercase sequence: +.B ntu \-r \-s lower /path/to/files +.TP +Minimal mode (only spaces, keep UTF-8): +.B ntu \-s minimal /path/to/files +.TP +UTF-8 friendly mode: +.B ntu \-s utf-8 /path/to/files +.TP +List all available sequences: +.B ntu \-L +.TP +List sequences with details: +.B ntu \-L \-v .SH CONFIGURATION .B ntu looks for configuration files in the following locations (in order): diff --git a/src/cli.rs b/src/cli.rs index 9bba884..afe4b30 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -21,6 +21,14 @@ pub struct Cli { #[clap(short = 'r', long)] pub recursive: bool, + /// Wählt eine Transformations-Sequenz aus (default, lower, upper, minimal, utf-8) + #[clap(short = 's', long, value_name = "NAME")] + pub sequence: Option, + + /// Listet alle verfügbaren Sequences auf + #[clap(short = 'L')] + pub list_sequences: bool, + /// Ausgaben unterdrücken (keine Umbenennungsinfos auf stdout) #[clap(short, long)] pub quiet: bool, diff --git a/src/main.rs b/src/main.rs index f1a68a5..2cd0a47 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,7 @@ use glob::Pattern; use indicatif::{ProgressBar, ProgressStyle}; use log::{debug, error, info}; use rayon::prelude::*; -use sanitizer::{clean_filename, is_excluded, is_safe_rename}; +use sanitizer::{clean_filename, is_excluded, is_safe_rename, Sequence}; use std::fs; use std::io::IsTerminal; use std::path::PathBuf; @@ -60,6 +60,28 @@ fn main() -> Result<()> { colored::control::set_override(false); } + // -L Option: Liste Sequences und beende + if args.list_sequences { + list_sequences(&args); + return Ok(()); + } + + // Sequence auswählen + let sequence = if let Some(seq_name) = &args.sequence { + Sequence::find(seq_name).ok_or_else(|| { + anyhow::anyhow!( + "Unbekannte Sequence: '{}'. Nutze -L um verfügbare Sequences anzuzeigen.", + seq_name + ) + })? + } else { + Sequence::default() + }; + + if args.verbose { + info!("Verwende Sequence: {}", sequence.name); + } + // Config-Datei laden: entweder --conf oder Standard-Hierarchie let config = if let Some(config_path) = &args.config_file { Config::from_file(config_path, args.verbose) @@ -150,7 +172,7 @@ fn main() -> Result<()> { // Dateiname ermitteln und bereinigen let filename = old_path.file_name()?; - let new_name = clean_filename(filename, &config, false)?; + let new_name = clean_filename(filename, &config, &sequence, false)?; let new_path = old_path.with_file_name(&new_name); Some(RenameOperation { @@ -176,7 +198,7 @@ fn main() -> Result<()> { } let filename = old_path.file_name()?; - let new_name = clean_filename(filename, &config, false)?; + let new_name = clean_filename(filename, &config, &sequence, false)?; let new_path = old_path.with_file_name(&new_name); Some(RenameOperation { @@ -264,3 +286,41 @@ fn main() -> Result<()> { Ok(()) } + +/// Listet alle verfügbaren Sequences auf +fn list_sequences(args: &Cli) { + println!("Verfügbare Sequences:"); + println!(); + + for seq in Sequence::all() { + println!(" {}", seq.name.bold()); + + if args.verbose { + println!(" Description: {}", seq.description); + println!( + " Umlauts → ASCII: {}", + if seq.apply_umlauts { "yes" } else { "no" } + ); + println!(" Case transform: {:?}", seq.apply_case); + println!( + " Emoji handling: {}", + if seq.apply_emojis { + "replace" + } else { + "keep" + } + ); + println!( + " Mode: {}", + if seq.minimal_mode { "minimal" } else { "full" } + ); + } else { + println!(" {}", seq.description); + } + println!(); + } + + if !args.verbose { + println!("Nutze -L -v für detaillierte Informationen über jede Sequence."); + } +} diff --git a/src/sanitizer.rs b/src/sanitizer.rs index 34734e6..16b2d85 100644 --- a/src/sanitizer.rs +++ b/src/sanitizer.rs @@ -13,6 +13,82 @@ static RE_INVALID: Lazy = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap()); static RE_ADJACENT: Lazy = Lazy::new(|| Regex::new(r"_\.|\._").unwrap()); static RE_MULTI: Lazy = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap()); +/// Repräsentiert eine Transformations-Sequenz +#[derive(Debug, Clone)] +pub struct Sequence { + pub name: &'static str, + pub description: &'static str, + pub apply_umlauts: bool, + pub apply_case: CaseTransform, + pub apply_emojis: bool, + pub minimal_mode: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum CaseTransform { + None, + Lower, + Upper, +} + +impl Sequence { + /// Gibt alle verfügbaren Sequences zurück + pub fn all() -> Vec { + vec![ + Sequence { + name: "default", + description: "Standard transformation: spaces→underscores, umlauts→ASCII, remove special chars", + apply_umlauts: true, + apply_case: CaseTransform::None, + apply_emojis: true, + minimal_mode: false, + }, + Sequence { + name: "lower", + description: "Like default, but convert everything to lowercase", + apply_umlauts: true, + apply_case: CaseTransform::Lower, + apply_emojis: true, + minimal_mode: false, + }, + Sequence { + name: "upper", + description: "Like default, but convert everything to UPPERCASE", + apply_umlauts: true, + apply_case: CaseTransform::Upper, + apply_emojis: true, + minimal_mode: false, + }, + Sequence { + name: "minimal", + description: "Minimal changes: only replace spaces, keep umlauts and UTF-8", + apply_umlauts: false, + apply_case: CaseTransform::None, + apply_emojis: false, + minimal_mode: true, + }, + Sequence { + name: "utf-8", + description: "UTF-8 friendly: spaces→underscores, keep umlauts, remove special chars", + apply_umlauts: false, + apply_case: CaseTransform::None, + apply_emojis: true, + minimal_mode: false, + }, + ] + } + + /// Findet eine Sequence nach Namen + pub fn find(name: &str) -> Option { + Self::all().into_iter().find(|s| s.name == name) + } + + /// Gibt die Default-Sequence zurück + pub fn default() -> Sequence { + Self::find("default").unwrap() + } +} + // Bekannte Doppel-Extensions (z.B. .tar.gz) const DOUBLE_EXTENSIONS: &[&str] = &[ ".tar.gz", @@ -45,8 +121,13 @@ fn split_filename(filename: &str) -> (String, String) { } } -/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen. -pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option { +/// Bereinigt den übergebenen Dateinamen mit gegebener Sequence. +pub fn clean_filename( + name: &OsStr, + config: &Config, + sequence: &Sequence, + verbose: bool, +) -> Option { let original = name.to_string_lossy(); // Versteckte Dateien (mit führendem Punkt) korrekt behandeln @@ -62,35 +143,64 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option { + base = base.to_lowercase(); + ext = ext.to_lowercase(); + } + CaseTransform::Upper => { + base = base.to_uppercase(); + ext = ext.to_uppercase(); + } + CaseTransform::None => {} + } - // 4) Entfernen/Ersetzen aller übrigen ungültigen Zeichen - base = RE_INVALID.replace_all(&base, "_").to_string(); + // 5) Emojis ersetzen (wenn aktiviert) + if sequence.apply_emojis { + base = replace_emojis_and_superscript(&base); + } + + // 6) Ungültige Zeichen behandeln + if sequence.minimal_mode { + // Minimal: Nur Leerzeichen und gefährliche Zeichen + base = base.replace(' ', "_"); + // Entferne nur absolut gefährliche Zeichen + base = base + .replace('/', "_") + .replace('\\', "_") + .replace('\0', "_") + .replace('\n', "_"); + } else { + // Standard: Alle ungültigen Zeichen → Unterstrich + base = RE_INVALID.replace_all(&base, "_").to_string(); + } // Ungültige Kombinationen aus Punkt und Unterstrich base = RE_ADJACENT.replace_all(&base, ".").to_string(); // Mehrfache Punkte/Unterstriche auf einen reduzieren base = RE_MULTI - .replace_all( - &base, - |caps: &Captures| { - if caps[0].contains('.') { - "." - } else { - "_" - } - }, - ) + .replace_all(&base, |caps: &Captures| { + if caps[0].contains('.') { + "." + } else { + "_" + } + }) .to_string(); // Führender Punkt soll bleiben, führende Unterstriche sollen verschwinden @@ -149,6 +259,18 @@ fn apply_hardcoded_replacements(input: &str) -> String { .replace("ˆ", "_") } +/// Ersetzt deutsche Umlaute durch ASCII-Äquivalente +fn apply_umlaut_replacements(input: &str) -> String { + input + .replace("ä", "ae") + .replace("ö", "oe") + .replace("ü", "ue") + .replace("Ä", "Ae") + .replace("Ö", "Oe") + .replace("Ü", "Ue") + .replace("ß", "ss") +} + /// Entfernt am Anfang nur Unterstriche, einen führenden Punkt (.) bewahrt es. fn trim_leading_underscores_preserve_leading_dot(s: &str) -> String { let mut chars = s.chars().peekable(); @@ -290,33 +412,29 @@ mod tests { use std::ffi::OsStr; fn make_test_config() -> Config { - let mut replacements = std::collections::HashMap::new(); - replacements.insert("ä".to_string(), "ae".to_string()); - replacements.insert("ö".to_string(), "oe".to_string()); - replacements.insert("ü".to_string(), "ue".to_string()); - replacements.insert("ß".to_string(), "ss".to_string()); - Config { replacements } + Config::default() } #[test] fn test_clean_filename_basic() { let config = Config::default(); + let sequence = Sequence::default(); // Spaces should become underscores assert_eq!( - clean_filename(OsStr::new("test file.txt"), &config, false), + clean_filename(OsStr::new("test file.txt"), &config, &sequence, false), Some("test_file.txt".to_string()) ); // Parentheses should become underscores assert_eq!( - clean_filename(OsStr::new("file (1).txt"), &config, false), + clean_filename(OsStr::new("file (1).txt"), &config, &sequence, false), Some("file_1.txt".to_string()) ); // Multiple underscores should be collapsed assert_eq!( - clean_filename(OsStr::new("test__file.txt"), &config, false), + clean_filename(OsStr::new("test__file.txt"), &config, &sequence, false), Some("test_file.txt".to_string()) ); } @@ -324,28 +442,29 @@ mod tests { #[test] fn test_clean_filename_hidden_files() { let config = Config::default(); + let sequence = Sequence::default(); // Hidden files should keep their leading dot assert_eq!( - clean_filename(OsStr::new(".gitignore"), &config, false), + clean_filename(OsStr::new(".gitignore"), &config, &sequence, false), None // No change needed ); // Hidden files with spaces assert_eq!( - clean_filename(OsStr::new(".my config"), &config, false), + clean_filename(OsStr::new(".my config"), &config, &sequence, false), Some(".my_config".to_string()) ); // Hidden files with extension assert_eq!( - clean_filename(OsStr::new(".test file.txt"), &config, false), + clean_filename(OsStr::new(".test file.txt"), &config, &sequence, false), Some(".test_file.txt".to_string()) ); // Multiple leading dots assert_eq!( - clean_filename(OsStr::new("...strange"), &config, false), + clean_filename(OsStr::new("...strange"), &config, &sequence, false), Some(".unnamed.strange".to_string()) ); } @@ -353,20 +472,21 @@ mod tests { #[test] fn test_clean_filename_umlauts() { let config = make_test_config(); + let sequence = Sequence::default(); // German umlauts assert_eq!( - clean_filename(OsStr::new("Müller.pdf"), &config, false), + clean_filename(OsStr::new("Müller.pdf"), &config, &sequence, false), Some("Mueller.pdf".to_string()) ); assert_eq!( - clean_filename(OsStr::new("schön.txt"), &config, false), + clean_filename(OsStr::new("schön.txt"), &config, &sequence, false), Some("schoen.txt".to_string()) ); assert_eq!( - clean_filename(OsStr::new("Größe.doc"), &config, false), + clean_filename(OsStr::new("Größe.doc"), &config, &sequence, false), Some("Groesse.doc".to_string()) ); } @@ -374,33 +494,34 @@ mod tests { #[test] fn test_clean_filename_extensions() { let config = Config::default(); + let sequence = Sequence::default(); // Single extension assert_eq!( - clean_filename(OsStr::new("test file.txt"), &config, false), + clean_filename(OsStr::new("test file.txt"), &config, &sequence, false), Some("test_file.txt".to_string()) ); // Double extension with spaces in base name assert_eq!( - clean_filename(OsStr::new("my archive.tar.gz"), &config, false), + clean_filename(OsStr::new("my archive.tar.gz"), &config, &sequence, false), Some("my_archive.tar.gz".to_string()) ); // Other double extensions assert_eq!( - clean_filename(OsStr::new("backup file.tar.bz2"), &config, false), + clean_filename(OsStr::new("backup file.tar.bz2"), &config, &sequence, false), Some("backup_file.tar.bz2".to_string()) ); assert_eq!( - clean_filename(OsStr::new("data set.tar.xz"), &config, false), + clean_filename(OsStr::new("data set.tar.xz"), &config, &sequence, false), Some("data_set.tar.xz".to_string()) ); // Multiple dots (not a double extension) assert_eq!( - clean_filename(OsStr::new("foo..bar.txt"), &config, false), + clean_filename(OsStr::new("foo..bar.txt"), &config, &sequence, false), Some("foo.bar.txt".to_string()) ); } @@ -434,16 +555,17 @@ mod tests { #[test] fn test_clean_filename_special_identifiers() { let config = Config::default(); + let sequence = Sequence::default(); // C++ should be preserved assert_eq!( - clean_filename(OsStr::new("test C++.txt"), &config, false), + clean_filename(OsStr::new("test C++.txt"), &config, &sequence, false), Some("test_C++.txt".to_string()) ); // C# should be preserved assert_eq!( - clean_filename(OsStr::new("guide C#.pdf"), &config, false), + clean_filename(OsStr::new("guide C#.pdf"), &config, &sequence, false), Some("guide_C#.pdf".to_string()) ); } @@ -451,15 +573,16 @@ mod tests { #[test] fn test_clean_filename_no_change_needed() { let config = Config::default(); + let sequence = Sequence::default(); // Already clean filenames should return None assert_eq!( - clean_filename(OsStr::new("clean_file.txt"), &config, false), + clean_filename(OsStr::new("clean_file.txt"), &config, &sequence, false), None ); assert_eq!( - clean_filename(OsStr::new("another-file.pdf"), &config, false), + clean_filename(OsStr::new("another-file.pdf"), &config, &sequence, false), None ); } @@ -467,10 +590,11 @@ mod tests { #[test] fn test_clean_filename_empty_after_cleaning() { let config = Config::default(); + let sequence = Sequence::default(); // File with only special chars should become "unnamed" assert_eq!( - clean_filename(OsStr::new("###.txt"), &config, false), + clean_filename(OsStr::new("###.txt"), &config, &sequence, false), Some("unnamed.txt".to_string()) ); } @@ -478,10 +602,11 @@ mod tests { #[test] fn test_clean_filename_apostrophe() { let config = Config::default(); + let sequence = Sequence::default(); // Apostrophes should be removed (not replaced with underscore) assert_eq!( - clean_filename(OsStr::new("O'Reilly.pdf"), &config, false), + clean_filename(OsStr::new("O'Reilly.pdf"), &config, &sequence, false), Some("OReilly.pdf".to_string()) ); } diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 0262e4b..2143f89 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -257,3 +257,135 @@ fn test_conf_option_missing_file() { .failure() .stderr(predicate::str::contains("nicht gefunden")); } + +#[test] +fn test_sequence_lower() { + let temp_dir = TempDir::new().unwrap(); + let config_file = temp_dir.path().join("empty.toml"); + fs::write(&config_file, "[replacements]\n").unwrap(); + let file_path = temp_dir.path().join("Test File.txt"); + fs::write(&file_path, "content").unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("--conf") + .arg(&config_file) + .arg("-s") + .arg("lower") + .arg(temp_dir.path()); + cmd.assert().success(); + + assert!(temp_dir.path().join("test_file.txt").exists()); +} + +#[test] +fn test_sequence_upper() { + let temp_dir = TempDir::new().unwrap(); + let config_file = temp_dir.path().join("empty.toml"); + fs::write(&config_file, "[replacements]\n").unwrap(); + let file_path = temp_dir.path().join("test file.txt"); + fs::write(&file_path, "content").unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("--conf") + .arg(&config_file) + .arg("-s") + .arg("upper") + .arg(temp_dir.path()); + cmd.assert().success(); + + assert!(temp_dir.path().join("TEST_FILE.TXT").exists()); +} + +#[test] +fn test_sequence_minimal() { + let temp_dir = TempDir::new().unwrap(); + let config_file = temp_dir.path().join("empty.toml"); + fs::write(&config_file, "[replacements]\n").unwrap(); + let file_path = temp_dir.path().join("Müller Datei.txt"); + fs::write(&file_path, "content").unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("--conf") + .arg(&config_file) + .arg("-s") + .arg("minimal") + .arg(temp_dir.path()); + cmd.assert().success(); + + // Umlaute bleiben erhalten, nur Leerzeichen ersetzt + assert!(temp_dir.path().join("Müller_Datei.txt").exists()); +} + +#[test] +fn test_sequence_utf8() { + let temp_dir = TempDir::new().unwrap(); + let config_file = temp_dir.path().join("empty.toml"); + fs::write(&config_file, "[replacements]\n").unwrap(); + let file_path = temp_dir.path().join("schön (1).txt"); + fs::write(&file_path, "content").unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("--conf") + .arg(&config_file) + .arg("-s") + .arg("utf-8") + .arg(temp_dir.path()); + cmd.assert().success(); + + // Umlaut bleibt, Klammern entfernt + assert!(temp_dir.path().join("schön_1.txt").exists()); +} + +#[test] +fn test_list_sequences() { + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("-L"); + cmd.assert() + .success() + .stdout(predicate::str::contains("default")) + .stdout(predicate::str::contains("lower")) + .stdout(predicate::str::contains("upper")) + .stdout(predicate::str::contains("minimal")) + .stdout(predicate::str::contains("utf-8")); +} + +#[test] +fn test_list_sequences_verbose() { + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("-L").arg("-v"); + cmd.assert() + .success() + .stdout(predicate::str::contains("Umlauts → ASCII")) + .stdout(predicate::str::contains("Case transform")); +} + +#[test] +fn test_invalid_sequence() { + let temp_dir = TempDir::new().unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("-s").arg("invalid_seq").arg(temp_dir.path()); + cmd.assert() + .failure() + .stderr(predicate::str::contains("Unbekannte Sequence")); +} + +#[test] +fn test_sequence_default_explicit() { + let temp_dir = TempDir::new().unwrap(); + let config_file = temp_dir.path().join("empty.toml"); + fs::write(&config_file, "[replacements]\n").unwrap(); + let file_path = temp_dir.path().join("Müller File.txt"); + fs::write(&file_path, "content").unwrap(); + + let mut cmd = Command::new(cargo_bin!("ntu")); + cmd.arg("--conf") + .arg(&config_file) + .arg("-s") + .arg("default") + .arg(temp_dir.path()); + cmd.assert().success(); + + // Default: Umlaut → ASCII + assert!(temp_dir.path().join("Mueller_File.txt").exists()); +}