Implement sequences feature v1.1.0

- Add -s/--sequence option to select transformation sequences
- Add -L flag to list all available sequences
- Implement 5 hardcoded sequences: default, lower, upper, minimal, utf-8
- Refactor clean_filename() to support sequence-based transformations
- Update all tests to pass sequence parameter (25 tests passing)
- Add 8 new integration tests for sequence functionality
- Update documentation (README, CHANGELOG, manpage)
- Update shell completions (bash, zsh, fish)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-10 18:38:23 +01:00
commit 2ec4d12d6c
12 changed files with 501 additions and 52 deletions

View file

@ -21,6 +21,14 @@ pub struct Cli {
#[clap(short = 'r', long)]
pub recursive: bool,
/// Wählt eine Transformations-Sequenz aus (default, lower, upper, minimal, utf-8)
#[clap(short = 's', long, value_name = "NAME")]
pub sequence: Option<String>,
/// Listet alle verfügbaren Sequences auf
#[clap(short = 'L')]
pub list_sequences: bool,
/// Ausgaben unterdrücken (keine Umbenennungsinfos auf stdout)
#[clap(short, long)]
pub quiet: bool,

View file

@ -12,7 +12,7 @@ use glob::Pattern;
use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, error, info};
use rayon::prelude::*;
use sanitizer::{clean_filename, is_excluded, is_safe_rename};
use sanitizer::{clean_filename, is_excluded, is_safe_rename, Sequence};
use std::fs;
use std::io::IsTerminal;
use std::path::PathBuf;
@ -60,6 +60,28 @@ fn main() -> Result<()> {
colored::control::set_override(false);
}
// -L Option: Liste Sequences und beende
if args.list_sequences {
list_sequences(&args);
return Ok(());
}
// Sequence auswählen
let sequence = if let Some(seq_name) = &args.sequence {
Sequence::find(seq_name).ok_or_else(|| {
anyhow::anyhow!(
"Unbekannte Sequence: '{}'. Nutze -L um verfügbare Sequences anzuzeigen.",
seq_name
)
})?
} else {
Sequence::default()
};
if args.verbose {
info!("Verwende Sequence: {}", sequence.name);
}
// Config-Datei laden: entweder --conf oder Standard-Hierarchie
let config = if let Some(config_path) = &args.config_file {
Config::from_file(config_path, args.verbose)
@ -150,7 +172,7 @@ fn main() -> Result<()> {
// Dateiname ermitteln und bereinigen
let filename = old_path.file_name()?;
let new_name = clean_filename(filename, &config, false)?;
let new_name = clean_filename(filename, &config, &sequence, false)?;
let new_path = old_path.with_file_name(&new_name);
Some(RenameOperation {
@ -176,7 +198,7 @@ fn main() -> Result<()> {
}
let filename = old_path.file_name()?;
let new_name = clean_filename(filename, &config, false)?;
let new_name = clean_filename(filename, &config, &sequence, false)?;
let new_path = old_path.with_file_name(&new_name);
Some(RenameOperation {
@ -264,3 +286,41 @@ fn main() -> Result<()> {
Ok(())
}
/// Listet alle verfügbaren Sequences auf
fn list_sequences(args: &Cli) {
println!("Verfügbare Sequences:");
println!();
for seq in Sequence::all() {
println!(" {}", seq.name.bold());
if args.verbose {
println!(" Description: {}", seq.description);
println!(
" Umlauts → ASCII: {}",
if seq.apply_umlauts { "yes" } else { "no" }
);
println!(" Case transform: {:?}", seq.apply_case);
println!(
" Emoji handling: {}",
if seq.apply_emojis {
"replace"
} else {
"keep"
}
);
println!(
" Mode: {}",
if seq.minimal_mode { "minimal" } else { "full" }
);
} else {
println!(" {}", seq.description);
}
println!();
}
if !args.verbose {
println!("Nutze -L -v für detaillierte Informationen über jede Sequence.");
}
}

View file

@ -13,6 +13,82 @@ static RE_INVALID: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^\w.\-]").unwrap());
static RE_ADJACENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"_\.|\._").unwrap());
static RE_MULTI: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_\.]{2,}").unwrap());
/// Repräsentiert eine Transformations-Sequenz
#[derive(Debug, Clone)]
pub struct Sequence {
pub name: &'static str,
pub description: &'static str,
pub apply_umlauts: bool,
pub apply_case: CaseTransform,
pub apply_emojis: bool,
pub minimal_mode: bool,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum CaseTransform {
None,
Lower,
Upper,
}
impl Sequence {
/// Gibt alle verfügbaren Sequences zurück
pub fn all() -> Vec<Sequence> {
vec![
Sequence {
name: "default",
description: "Standard transformation: spaces→underscores, umlauts→ASCII, remove special chars",
apply_umlauts: true,
apply_case: CaseTransform::None,
apply_emojis: true,
minimal_mode: false,
},
Sequence {
name: "lower",
description: "Like default, but convert everything to lowercase",
apply_umlauts: true,
apply_case: CaseTransform::Lower,
apply_emojis: true,
minimal_mode: false,
},
Sequence {
name: "upper",
description: "Like default, but convert everything to UPPERCASE",
apply_umlauts: true,
apply_case: CaseTransform::Upper,
apply_emojis: true,
minimal_mode: false,
},
Sequence {
name: "minimal",
description: "Minimal changes: only replace spaces, keep umlauts and UTF-8",
apply_umlauts: false,
apply_case: CaseTransform::None,
apply_emojis: false,
minimal_mode: true,
},
Sequence {
name: "utf-8",
description: "UTF-8 friendly: spaces→underscores, keep umlauts, remove special chars",
apply_umlauts: false,
apply_case: CaseTransform::None,
apply_emojis: true,
minimal_mode: false,
},
]
}
/// Findet eine Sequence nach Namen
pub fn find(name: &str) -> Option<Sequence> {
Self::all().into_iter().find(|s| s.name == name)
}
/// Gibt die Default-Sequence zurück
pub fn default() -> Sequence {
Self::find("default").unwrap()
}
}
// Bekannte Doppel-Extensions (z.B. .tar.gz)
const DOUBLE_EXTENSIONS: &[&str] = &[
".tar.gz",
@ -45,8 +121,13 @@ fn split_filename(filename: &str) -> (String, String) {
}
}
/// Bereinigt den übergebenen Dateinamen oder Verzeichnisnamen.
pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<String> {
/// Bereinigt den übergebenen Dateinamen mit gegebener Sequence.
pub fn clean_filename(
name: &OsStr,
config: &Config,
sequence: &Sequence,
verbose: bool,
) -> Option<String> {
let original = name.to_string_lossy();
// Versteckte Dateien (mit führendem Punkt) korrekt behandeln
@ -62,35 +143,64 @@ pub fn clean_filename(name: &OsStr, config: &Config, verbose: bool) -> Option<St
base = preserve_special_identifiers(&base);
ext = preserve_special_identifiers(&ext);
// 1) Konfig-Replacements anwenden (zuerst)
// 1) Config-Replacements anwenden (immer zuerst)
for (k, v) in &config.replacements {
base = base.replace(k, v);
}
// 2) Danach hart-codierte Ersetzungen anwenden
// 2) Sequence-basierte Umlaut-Ersetzung
if sequence.apply_umlauts {
base = apply_umlaut_replacements(&base);
}
// 3) Hardcoded replacements (Apostroph etc.)
base = apply_hardcoded_replacements(&base);
// 3) Emojis und hochgestellte Zeichen ersetzen
base = replace_emojis_and_superscript(&base);
// 4) Case-Transformation (auf base UND extension anwenden)
match sequence.apply_case {
CaseTransform::Lower => {
base = base.to_lowercase();
ext = ext.to_lowercase();
}
CaseTransform::Upper => {
base = base.to_uppercase();
ext = ext.to_uppercase();
}
CaseTransform::None => {}
}
// 4) Entfernen/Ersetzen aller übrigen ungültigen Zeichen
base = RE_INVALID.replace_all(&base, "_").to_string();
// 5) Emojis ersetzen (wenn aktiviert)
if sequence.apply_emojis {
base = replace_emojis_and_superscript(&base);
}
// 6) Ungültige Zeichen behandeln
if sequence.minimal_mode {
// Minimal: Nur Leerzeichen und gefährliche Zeichen
base = base.replace(' ', "_");
// Entferne nur absolut gefährliche Zeichen
base = base
.replace('/', "_")
.replace('\\', "_")
.replace('\0', "_")
.replace('\n', "_");
} else {
// Standard: Alle ungültigen Zeichen → Unterstrich
base = RE_INVALID.replace_all(&base, "_").to_string();
}
// Ungültige Kombinationen aus Punkt und Unterstrich
base = RE_ADJACENT.replace_all(&base, ".").to_string();
// Mehrfache Punkte/Unterstriche auf einen reduzieren
base = RE_MULTI
.replace_all(
&base,
|caps: &Captures| {
if caps[0].contains('.') {
"."
} else {
"_"
}
},
)
.replace_all(&base, |caps: &Captures| {
if caps[0].contains('.') {
"."
} else {
"_"
}
})
.to_string();
// Führender Punkt soll bleiben, führende Unterstriche sollen verschwinden
@ -149,6 +259,18 @@ fn apply_hardcoded_replacements(input: &str) -> String {
.replace("ˆ", "_")
}
/// Ersetzt deutsche Umlaute durch ASCII-Äquivalente
fn apply_umlaut_replacements(input: &str) -> String {
input
.replace("ä", "ae")
.replace("ö", "oe")
.replace("ü", "ue")
.replace("Ä", "Ae")
.replace("Ö", "Oe")
.replace("Ü", "Ue")
.replace("ß", "ss")
}
/// Entfernt am Anfang nur Unterstriche, einen führenden Punkt (.) bewahrt es.
fn trim_leading_underscores_preserve_leading_dot(s: &str) -> String {
let mut chars = s.chars().peekable();
@ -290,33 +412,29 @@ mod tests {
use std::ffi::OsStr;
fn make_test_config() -> Config {
let mut replacements = std::collections::HashMap::new();
replacements.insert("ä".to_string(), "ae".to_string());
replacements.insert("ö".to_string(), "oe".to_string());
replacements.insert("ü".to_string(), "ue".to_string());
replacements.insert("ß".to_string(), "ss".to_string());
Config { replacements }
Config::default()
}
#[test]
fn test_clean_filename_basic() {
let config = Config::default();
let sequence = Sequence::default();
// Spaces should become underscores
assert_eq!(
clean_filename(OsStr::new("test file.txt"), &config, false),
clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
Some("test_file.txt".to_string())
);
// Parentheses should become underscores
assert_eq!(
clean_filename(OsStr::new("file (1).txt"), &config, false),
clean_filename(OsStr::new("file (1).txt"), &config, &sequence, false),
Some("file_1.txt".to_string())
);
// Multiple underscores should be collapsed
assert_eq!(
clean_filename(OsStr::new("test__file.txt"), &config, false),
clean_filename(OsStr::new("test__file.txt"), &config, &sequence, false),
Some("test_file.txt".to_string())
);
}
@ -324,28 +442,29 @@ mod tests {
#[test]
fn test_clean_filename_hidden_files() {
let config = Config::default();
let sequence = Sequence::default();
// Hidden files should keep their leading dot
assert_eq!(
clean_filename(OsStr::new(".gitignore"), &config, false),
clean_filename(OsStr::new(".gitignore"), &config, &sequence, false),
None // No change needed
);
// Hidden files with spaces
assert_eq!(
clean_filename(OsStr::new(".my config"), &config, false),
clean_filename(OsStr::new(".my config"), &config, &sequence, false),
Some(".my_config".to_string())
);
// Hidden files with extension
assert_eq!(
clean_filename(OsStr::new(".test file.txt"), &config, false),
clean_filename(OsStr::new(".test file.txt"), &config, &sequence, false),
Some(".test_file.txt".to_string())
);
// Multiple leading dots
assert_eq!(
clean_filename(OsStr::new("...strange"), &config, false),
clean_filename(OsStr::new("...strange"), &config, &sequence, false),
Some(".unnamed.strange".to_string())
);
}
@ -353,20 +472,21 @@ mod tests {
#[test]
fn test_clean_filename_umlauts() {
let config = make_test_config();
let sequence = Sequence::default();
// German umlauts
assert_eq!(
clean_filename(OsStr::new("Müller.pdf"), &config, false),
clean_filename(OsStr::new("Müller.pdf"), &config, &sequence, false),
Some("Mueller.pdf".to_string())
);
assert_eq!(
clean_filename(OsStr::new("schön.txt"), &config, false),
clean_filename(OsStr::new("schön.txt"), &config, &sequence, false),
Some("schoen.txt".to_string())
);
assert_eq!(
clean_filename(OsStr::new("Größe.doc"), &config, false),
clean_filename(OsStr::new("Größe.doc"), &config, &sequence, false),
Some("Groesse.doc".to_string())
);
}
@ -374,33 +494,34 @@ mod tests {
#[test]
fn test_clean_filename_extensions() {
let config = Config::default();
let sequence = Sequence::default();
// Single extension
assert_eq!(
clean_filename(OsStr::new("test file.txt"), &config, false),
clean_filename(OsStr::new("test file.txt"), &config, &sequence, false),
Some("test_file.txt".to_string())
);
// Double extension with spaces in base name
assert_eq!(
clean_filename(OsStr::new("my archive.tar.gz"), &config, false),
clean_filename(OsStr::new("my archive.tar.gz"), &config, &sequence, false),
Some("my_archive.tar.gz".to_string())
);
// Other double extensions
assert_eq!(
clean_filename(OsStr::new("backup file.tar.bz2"), &config, false),
clean_filename(OsStr::new("backup file.tar.bz2"), &config, &sequence, false),
Some("backup_file.tar.bz2".to_string())
);
assert_eq!(
clean_filename(OsStr::new("data set.tar.xz"), &config, false),
clean_filename(OsStr::new("data set.tar.xz"), &config, &sequence, false),
Some("data_set.tar.xz".to_string())
);
// Multiple dots (not a double extension)
assert_eq!(
clean_filename(OsStr::new("foo..bar.txt"), &config, false),
clean_filename(OsStr::new("foo..bar.txt"), &config, &sequence, false),
Some("foo.bar.txt".to_string())
);
}
@ -434,16 +555,17 @@ mod tests {
#[test]
fn test_clean_filename_special_identifiers() {
let config = Config::default();
let sequence = Sequence::default();
// C++ should be preserved
assert_eq!(
clean_filename(OsStr::new("test C++.txt"), &config, false),
clean_filename(OsStr::new("test C++.txt"), &config, &sequence, false),
Some("test_C++.txt".to_string())
);
// C# should be preserved
assert_eq!(
clean_filename(OsStr::new("guide C#.pdf"), &config, false),
clean_filename(OsStr::new("guide C#.pdf"), &config, &sequence, false),
Some("guide_C#.pdf".to_string())
);
}
@ -451,15 +573,16 @@ mod tests {
#[test]
fn test_clean_filename_no_change_needed() {
let config = Config::default();
let sequence = Sequence::default();
// Already clean filenames should return None
assert_eq!(
clean_filename(OsStr::new("clean_file.txt"), &config, false),
clean_filename(OsStr::new("clean_file.txt"), &config, &sequence, false),
None
);
assert_eq!(
clean_filename(OsStr::new("another-file.pdf"), &config, false),
clean_filename(OsStr::new("another-file.pdf"), &config, &sequence, false),
None
);
}
@ -467,10 +590,11 @@ mod tests {
#[test]
fn test_clean_filename_empty_after_cleaning() {
let config = Config::default();
let sequence = Sequence::default();
// File with only special chars should become "unnamed"
assert_eq!(
clean_filename(OsStr::new("###.txt"), &config, false),
clean_filename(OsStr::new("###.txt"), &config, &sequence, false),
Some("unnamed.txt".to_string())
);
}
@ -478,10 +602,11 @@ mod tests {
#[test]
fn test_clean_filename_apostrophe() {
let config = Config::default();
let sequence = Sequence::default();
// Apostrophes should be removed (not replaced with underscore)
assert_eq!(
clean_filename(OsStr::new("O'Reilly.pdf"), &config, false),
clean_filename(OsStr::new("O'Reilly.pdf"), &config, &sequence, false),
Some("OReilly.pdf".to_string())
);
}