diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..7935796 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,38 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +**Musiksammlung** is a Python CLI tool that automates digitizing physical CD collections for use with Jellyfin. It orchestrates: CD ripping (via `abcde`), OCR of cover/back images (via Tesseract), LLM-based tracklist extraction, file renaming/tagging, and M3U playlist generation. + +## Build & Development Commands + +```bash +pip install -e ".[dev]" # Install in editable mode with dev deps +pytest tests/ -v # Run all tests +pytest tests/test_models.py -v # Run a single test module +ruff check src/ tests/ # Lint +musiksammlung --help # CLI entry point +``` + +## Architecture + +The pipeline flows: **OCR → LLM → Organize → Tag → Playlist** + +- `models.py` — Pydantic models (`Album`, `Disc`, `Track`) shared across all modules; the LLM JSON output validates directly into `Album` +- `cli.py` — Typer CLI with three commands: `scan` (OCR+LLM→JSON), `apply` (JSON→files), `process` (full pipeline) +- `ocr.py` — Tesseract wrapper with Pillow-based image preprocessing +- `llm_parser.py` — Sends OCR text to LLM (Ollama or OpenAI-compatible), enforces JSON output, retries on parse failure +- `organizer.py` — Builds source→target file mapping, handles single-disc and multi-disc layouts +- `tagger.py` — Sets audio tags via mutagen (format-agnostic), optional cover embedding for FLAC/MP3 +- `playlist.py` — Generates M3U playlists with relative paths +- `ripper.py` — Drives `abcde` via subprocess for CD ripping +- `cover.py` — Resizes/converts cover images to JPEG for Jellyfin + +## Conventions + +- Python 3.11+, German variable names and comments are acceptable +- Pydantic for data models, Typer for CLI, mutagen for audio tagging +- External tools required at runtime: `tesseract`, `abcde` +- The two-step workflow (`scan` → review JSON → `apply`) is the recommended default over the one-shot `process` command diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ffcae8f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,36 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "musiksammlung" +version = "0.1.0" +description = "CLI-Tool zum Digitalisieren von CD-Sammlungen für Jellyfin" +requires-python = ">=3.11" +dependencies = [ + "typer>=0.12", + "pydantic>=2.0", + "mutagen>=1.47", + "Pillow>=10.0", + "httpx>=0.27", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-cov", + "ruff", +] + +[project.scripts] +musiksammlung = "musiksammlung.cli:app" + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = ["E", "F", "I", "W"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/src/musiksammlung/__init__.py b/src/musiksammlung/__init__.py new file mode 100644 index 0000000..22aa402 --- /dev/null +++ b/src/musiksammlung/__init__.py @@ -0,0 +1,3 @@ +"""Musiksammlung – CLI-Tool zum Digitalisieren von CD-Sammlungen für Jellyfin.""" + +__version__ = "0.1.0" diff --git a/src/musiksammlung/cli.py b/src/musiksammlung/cli.py new file mode 100644 index 0000000..f7404cb --- /dev/null +++ b/src/musiksammlung/cli.py @@ -0,0 +1,175 @@ +"""CLI-Interface mit Typer.""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path + +import typer + +from musiksammlung.cover import copy_covers +from musiksammlung.llm_parser import parse_tracklist +from musiksammlung.models import Album +from musiksammlung.ocr import ocr_images +from musiksammlung.organizer import apply_mapping, build_mapping +from musiksammlung.playlist import generate_playlist +from musiksammlung.tagger import tag_album + +app = typer.Typer( + name="musiksammlung", + help="CLI-Tool zum Digitalisieren von CD-Sammlungen für Jellyfin.", +) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) + + +@app.command() +def scan( + images: list[Path] = typer.Argument(..., help="Bilder der CD-Rückseite/Booklet"), + output: Path = typer.Option("album.json", "--output", "-o", help="Ausgabe-JSON-Datei"), + languages: str = typer.Option("deu+eng", "--lang", "-l", help="OCR-Sprachen"), + backend: str = typer.Option("ollama", "--backend", "-b", help="LLM-Backend"), + model: str = typer.Option("llama3", "--model", "-m", help="LLM-Modell"), + base_url: str = typer.Option("http://localhost:11434", "--url", help="LLM-API-URL"), +) -> None: + """OCR + LLM → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).""" + # Bilder prüfen + for img in images: + if not img.exists(): + typer.echo(f"Fehler: Bild nicht gefunden: {img}", err=True) + raise typer.Exit(1) + + typer.echo("Starte OCR...") + ocr_text = ocr_images(images, languages) + typer.echo(f"OCR-Text ({len(ocr_text)} Zeichen) erkannt.") + + typer.echo("Starte LLM-Parsing...") + album = parse_tracklist(ocr_text, backend=backend, model=model, base_url=base_url) + + output.write_text(album.model_dump_json(indent=2), encoding="utf-8") + typer.echo(f"Album-JSON gespeichert: {output}") + typer.echo(f" Artist: {album.artist}") + typer.echo(f" Album: {album.album}") + typer.echo(f" Year: {album.year}") + for disc in album.discs: + typer.echo(f" Disc {disc.disc_number}: {len(disc.tracks)} Tracks") + + +@app.command() +def apply( + input_dir: Path = typer.Argument(..., help="Verzeichnis mit gerippten Audiodateien"), + album_json: Path = typer.Argument(..., help="Album-JSON aus 'scan'"), + output_dir: Path = typer.Argument(..., help="Jellyfin-Musikverzeichnis"), + front: Path | None = typer.Option(None, "--front", help="Front-Cover-Bild"), + back: Path | None = typer.Option(None, "--back", help="Rückseiten-Cover-Bild"), + dry_run: bool = typer.Option(False, "--dry-run", help="Nur anzeigen, nichts ändern"), +) -> None: + """Album-JSON + Audiodateien → Jellyfin-Struktur aufbauen.""" + # JSON laden und validieren + raw = json.loads(album_json.read_text(encoding="utf-8")) + album = Album.model_validate(raw) + + # Mapping berechnen und anzeigen + mapping = build_mapping(album, input_dir, output_dir) + typer.echo(f"Mapping: {len(mapping)} Dateien") + for src, dst in mapping.items(): + typer.echo(f" {src.name} → {dst.relative_to(output_dir)}") + + if dry_run: + typer.echo("[DRY-RUN] Keine Änderungen vorgenommen.") + return + + # Dateien verschieben + apply_mapping(mapping) + + # Album-Verzeichnis bestimmen + first_target = next(iter(mapping.values())) + if len(album.discs) > 1: + album_dir = first_target.parent.parent # CD1/ → Album/ + else: + album_dir = first_target.parent + + # Tags setzen + typer.echo("Setze Audio-Tags...") + tag_album(album, album_dir) + + # Cover kopieren + copy_covers(front, back, album_dir) + + # Playlist erzeugen + generate_playlist(album, album_dir) + + typer.echo(f"Fertig! Album liegt in: {album_dir}") + + +@app.command() +def process( + input_dir: Path = typer.Argument(..., help="Verzeichnis mit Audiodateien und Bildern"), + output_dir: Path = typer.Argument(..., help="Jellyfin-Musikverzeichnis"), + front: Path | None = typer.Option(None, "--front", help="Front-Cover-Bild"), + back: Path | None = typer.Option(None, "--back", help="Rückseiten-Bild (für OCR + Cover)"), + images: list[Path] | None = typer.Option( + None, "--image", "-i", help="Zusätzliche Bilder für OCR" + ), + languages: str = typer.Option("deu+eng", "--lang", "-l"), + backend: str = typer.Option("ollama", "--backend", "-b"), + model: str = typer.Option("llama3", "--model", "-m"), + base_url: str = typer.Option("http://localhost:11434", "--url"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + """Komplett-Pipeline: OCR → LLM → Organize → Tag → Playlist.""" + # OCR-Bilder zusammenstellen + ocr_sources: list[Path] = [] + if back and back.exists(): + ocr_sources.append(back) + if images: + ocr_sources.extend(images) + + if not ocr_sources: + typer.echo("Fehler: Mindestens ein Bild für OCR nötig (--back oder --image)", err=True) + raise typer.Exit(1) + + # 1. OCR + typer.echo("Schritt 1/5: OCR...") + ocr_text = ocr_images(ocr_sources, languages) + + # 2. LLM-Parsing + typer.echo("Schritt 2/5: LLM-Parsing...") + album = parse_tracklist(ocr_text, backend=backend, model=model, base_url=base_url) + typer.echo(f" → {album.artist} – {album.album} ({album.year})") + + # JSON zur Kontrolle speichern + json_path = input_dir / "album.json" + json_path.write_text(album.model_dump_json(indent=2), encoding="utf-8") + + # 3. Dateien organisieren + typer.echo("Schritt 3/5: Dateien organisieren...") + mapping = build_mapping(album, input_dir, output_dir) + apply_mapping(mapping, dry_run=dry_run) + + if dry_run: + typer.echo("[DRY-RUN] Abbruch nach Mapping-Anzeige.") + return + + # Album-Verzeichnis bestimmen + first_target = next(iter(mapping.values())) + album_dir = first_target.parent.parent if len(album.discs) > 1 else first_target.parent + + # 4. Tags + Cover + typer.echo("Schritt 4/5: Tags & Cover...") + tag_album(album, album_dir) + copy_covers(front, back, album_dir) + + # 5. Playlist + typer.echo("Schritt 5/5: Playlist...") + generate_playlist(album, album_dir) + + typer.echo(f"Fertig! Album: {album_dir}") + + +if __name__ == "__main__": + app() diff --git a/src/musiksammlung/config.py b/src/musiksammlung/config.py new file mode 100644 index 0000000..483c22d --- /dev/null +++ b/src/musiksammlung/config.py @@ -0,0 +1,28 @@ +"""Konfiguration und Defaults.""" + +from __future__ import annotations + +from pathlib import Path + +from pydantic import BaseModel + +# Unterstützte Audio-Formate +AUDIO_EXTENSIONS = {".flac", ".mp3", ".ogg", ".opus", ".wav", ".m4a"} + +# Standard-Bilddateien, die als Cover/Rückseite erkannt werden +DEFAULT_FRONT_PATTERNS = ["cover_front.*", "front.*", "cover.*"] +DEFAULT_BACK_PATTERNS = ["cover_back.*", "back.*", "inlay.*", "booklet.*"] + + +class AppConfig(BaseModel): + """Globale Konfiguration für einen Durchlauf.""" + + input_dir: Path + output_dir: Path + audio_format: str = "flac" + cd_device: str = "/dev/cdrom" + ocr_languages: str = "deu+eng" + llm_backend: str = "ollama" # "ollama", "openai", "anthropic" + llm_model: str = "llama3" + llm_base_url: str = "http://localhost:11434" + dry_run: bool = False diff --git a/src/musiksammlung/cover.py b/src/musiksammlung/cover.py new file mode 100644 index 0000000..6910442 --- /dev/null +++ b/src/musiksammlung/cover.py @@ -0,0 +1,51 @@ +"""Cover-Bilder verarbeiten und ins Album-Verzeichnis kopieren.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from PIL import Image + +logger = logging.getLogger(__name__) + +# Jellyfin erkennt diese Dateinamen automatisch +FRONT_COVER_NAME = "cover.jpg" +BACK_COVER_NAME = "back.jpg" + + +def prepare_cover(source: Path, target: Path, max_size: int = 1200) -> None: + """Kopiert und optimiert ein Cover-Bild. + + Konvertiert zu JPEG, begrenzt Größe auf max_size Pixel (längste Seite). + """ + img = Image.open(source) + + # Auf max_size skalieren, Seitenverhältnis beibehalten + if max(img.size) > max_size: + img.thumbnail((max_size, max_size), Image.LANCZOS) + + # In RGB konvertieren (JPEG unterstützt kein RGBA) + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + + target.parent.mkdir(parents=True, exist_ok=True) + img.save(target, "JPEG", quality=90) + logger.info("Cover gespeichert: %s → %s", source.name, target) + + +def copy_covers( + front_image: Path | None, + back_image: Path | None, + album_dir: Path, +) -> None: + """Kopiert Front- und Rückseiten-Cover in das Album-Verzeichnis.""" + if front_image and front_image.exists(): + prepare_cover(front_image, album_dir / FRONT_COVER_NAME) + else: + logger.warning("Kein Front-Cover gefunden") + + if back_image and back_image.exists(): + prepare_cover(back_image, album_dir / BACK_COVER_NAME) + else: + logger.debug("Kein Back-Cover angegeben") diff --git a/src/musiksammlung/llm_parser.py b/src/musiksammlung/llm_parser.py new file mode 100644 index 0000000..295c7c7 --- /dev/null +++ b/src/musiksammlung/llm_parser.py @@ -0,0 +1,126 @@ +"""LLM-basiertes Parsing von OCR-Text zu strukturierten Album-Daten.""" + +from __future__ import annotations + +import json +import logging + +import httpx +from pydantic import ValidationError + +from musiksammlung.models import Album + +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT = """\ +Du bist ein Parser für CD-Rückseiten und Tracklisten. +Analysiere den OCR-Text und extrahiere: Artist, Albumtitel, Jahr (falls vorhanden) \ +und für jede CD die Tracks in korrekter Reihenfolge. +Ignoriere Werbung, Copyright-Hinweise und Kleingedrucktes. + +Regeln: +- Wenn es Hinweise wie "CD 1", "CD 2", "Disc 1", "Disc 2" gibt, ordne die Tracks \ + der entsprechenden disc_number zu. +- Ohne Disc-Angabe: alles als disc_number=1 behandeln. +- Zusätze wie "live", "bonus track", "remastered" gehören in den Tracktitel. +- Bei Unsicherheit: Feld weglassen oder null setzen, nichts erfinden. + +Gib ausschließlich valides JSON zurück, kein anderer Text. Format: +{ + "artist": "...", + "album": "...", + "year": 1987, + "discs": [ + { + "disc_number": 1, + "name": null, + "tracks": [ + {"track_number": 1, "title": "..."}, + {"track_number": 2, "title": "..."} + ] + } + ] +} +""" + + +def _call_ollama(ocr_text: str, model: str, base_url: str) -> str: + """Ruft Ollama-API auf und gibt die Antwort als String zurück.""" + response = httpx.post( + f"{base_url}/api/generate", + json={ + "model": model, + "system": SYSTEM_PROMPT, + "prompt": ocr_text, + "stream": False, + "format": "json", + }, + timeout=120.0, + ) + response.raise_for_status() + return response.json()["response"] + + +def _call_openai_compatible( + ocr_text: str, model: str, base_url: str, api_key: str | None = None +) -> str: + """Ruft eine OpenAI-kompatible API auf (OpenAI, Anthropic via Proxy, etc.).""" + headers = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + response = httpx.post( + f"{base_url}/v1/chat/completions", + headers=headers, + json={ + "model": model, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": ocr_text}, + ], + "response_format": {"type": "json_object"}, + }, + timeout=120.0, + ) + response.raise_for_status() + return response.json()["choices"][0]["message"]["content"] + + +def parse_tracklist( + ocr_text: str, + backend: str = "ollama", + model: str = "llama3", + base_url: str = "http://localhost:11434", + api_key: str | None = None, + max_retries: int = 2, +) -> Album: + """Parst OCR-Text via LLM zu einem Album-Modell. + + Args: + ocr_text: Rohtext aus der OCR-Erkennung + backend: 'ollama' oder 'openai' + model: Modellname + base_url: API-Basis-URL + api_key: API-Key (nur für OpenAI-kompatible Backends) + max_retries: Anzahl Wiederholungsversuche bei ungültigem JSON + + Returns: + Validiertes Album-Objekt + """ + for attempt in range(max_retries + 1): + try: + if backend == "ollama": + raw = _call_ollama(ocr_text, model, base_url) + else: + raw = _call_openai_compatible(ocr_text, model, base_url, api_key) + + data = json.loads(raw) + album = Album.model_validate(data) + logger.info("LLM-Parsing erfolgreich: %s - %s", album.artist, album.album) + return album + + except (json.JSONDecodeError, ValidationError) as e: + logger.warning("Versuch %d/%d fehlgeschlagen: %s", attempt + 1, max_retries + 1, e) + if attempt == max_retries: + msg = f"LLM lieferte nach {max_retries + 1} Versuchen kein valides JSON" + raise ValueError(msg) from e diff --git a/src/musiksammlung/models.py b/src/musiksammlung/models.py new file mode 100644 index 0000000..f459f2e --- /dev/null +++ b/src/musiksammlung/models.py @@ -0,0 +1,38 @@ +"""Zentrale Datenmodelle für Album, Disc und Track.""" + +from __future__ import annotations + +import re + +from pydantic import BaseModel, field_validator + + +class Track(BaseModel): + track_number: int + title: str + + +class Disc(BaseModel): + disc_number: int + name: str | None = None # z.B. "Live in Berlin" + tracks: list[Track] + + +class Album(BaseModel): + artist: str + album: str + year: int | None = None + discs: list[Disc] + + @field_validator("album", "artist") + @classmethod + def sanitize_name(cls, v: str) -> str: + """Entfernt Zeichen, die in Dateinamen problematisch sind.""" + return re.sub(r'[<>:"/\\|?*]', "_", v).strip() + + @property + def folder_name(self) -> str: + """Jellyfin-konformer Ordnername: 'Album (Year)' oder nur 'Album'.""" + if self.year: + return f"{self.album} ({self.year})" + return self.album diff --git a/src/musiksammlung/ocr.py b/src/musiksammlung/ocr.py new file mode 100644 index 0000000..8cbab2a --- /dev/null +++ b/src/musiksammlung/ocr.py @@ -0,0 +1,70 @@ +"""OCR via Tesseract mit optionaler Bildvorverarbeitung.""" + +from __future__ import annotations + +import logging +import subprocess +import tempfile +from pathlib import Path + +from PIL import Image, ImageFilter, ImageOps + +logger = logging.getLogger(__name__) + + +def preprocess_image(image_path: Path) -> Path: + """Verbessert Kontrast und Schärfe für bessere OCR-Ergebnisse. + + Returns: + Pfad zum vorverarbeiteten Bild (temporäre Datei). + """ + img = Image.open(image_path) + img = ImageOps.grayscale(img) + img = ImageOps.autocontrast(img, cutoff=2) + img = img.filter(ImageFilter.SHARPEN) + + tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) + img.save(tmp.name, dpi=(300, 300)) + logger.debug("Vorverarbeitetes Bild: %s → %s", image_path, tmp.name) + return Path(tmp.name) + + +def run_ocr(image_path: Path, languages: str = "deu+eng") -> str: + """Führt Tesseract-OCR auf einem Bild aus. + + Args: + image_path: Pfad zum Bild + languages: Tesseract-Sprachcodes, z.B. 'deu+eng' + + Returns: + Erkannter Text als String. + """ + cmd = [ + "tesseract", + str(image_path), + "stdout", + "-l", languages, + "--psm", "6", # einheitlicher Textblock + ] + + logger.info("OCR: %s", " ".join(cmd)) + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + raise RuntimeError(f"Tesseract fehlgeschlagen: {result.stderr}") + + return result.stdout.strip() + + +def ocr_images(image_paths: list[Path], languages: str = "deu+eng") -> str: + """Führt OCR auf mehreren Bildern aus und fügt die Texte zusammen.""" + texts = [] + for path in image_paths: + preprocessed = preprocess_image(path) + try: + text = run_ocr(preprocessed, languages) + if text: + texts.append(text) + finally: + preprocessed.unlink(missing_ok=True) + return "\n\n".join(texts) diff --git a/src/musiksammlung/organizer.py b/src/musiksammlung/organizer.py new file mode 100644 index 0000000..22a6a3e --- /dev/null +++ b/src/musiksammlung/organizer.py @@ -0,0 +1,92 @@ +"""Verzeichnisstruktur anlegen und Audiodateien umbenennen.""" + +from __future__ import annotations + +import logging +import re +import shutil +from pathlib import Path + +from musiksammlung.config import AUDIO_EXTENSIONS +from musiksammlung.models import Album + +logger = logging.getLogger(__name__) + + +def _sanitize_filename(name: str) -> str: + """Entfernt problematische Zeichen aus Dateinamen.""" + return re.sub(r'[<>:"/\\|?*]', "_", name).strip() + + +def discover_audio_files(directory: Path) -> list[Path]: + """Findet und sortiert Audiodateien numerisch nach Track-Nummer.""" + files = [f for f in directory.iterdir() if f.suffix.lower() in AUDIO_EXTENSIONS] + # Sortiere nach der Zahl im Dateinamen (z.B. Track_01 → 1) + def extract_number(p: Path) -> int: + match = re.search(r"(\d+)", p.stem) + return int(match.group(1)) if match else 0 + return sorted(files, key=extract_number) + + +def build_mapping( + album: Album, + input_dir: Path, + output_root: Path, +) -> dict[Path, Path]: + """Berechnet das Quell→Ziel-Mapping für alle Audiodateien. + + Args: + album: Validiertes Album-Modell + input_dir: Verzeichnis mit den gerippten Dateien + output_root: Jellyfin-Musikverzeichnis + + Returns: + Dict von Quellpfad → Zielpfad + """ + artist_dir = _sanitize_filename(album.artist) + album_dir = output_root / artist_dir / _sanitize_filename(album.folder_name) + mapping: dict[Path, Path] = {} + multi_disc = len(album.discs) > 1 + + for disc in album.discs: + # Quellverzeichnis: bei Multi-CD z.B. input_dir/CD1, sonst input_dir direkt + if multi_disc: + source_dir = input_dir / f"CD{disc.disc_number}" + target_dir = album_dir / f"CD{disc.disc_number}" + else: + source_dir = input_dir + target_dir = album_dir + + audio_files = discover_audio_files(source_dir) + + if len(audio_files) != len(disc.tracks): + logger.warning( + "Disc %d: %d Dateien gefunden, aber %d Tracks im JSON", + disc.disc_number, + len(audio_files), + len(disc.tracks), + ) + + for audio_file, track in zip(audio_files, disc.tracks): + safe_title = _sanitize_filename(track.title) + new_name = f"{track.track_number:02d} {safe_title}{audio_file.suffix}" + mapping[audio_file] = target_dir / new_name + + return mapping + + +def apply_mapping(mapping: dict[Path, Path], dry_run: bool = False) -> None: + """Verschiebt/benennt Dateien gemäß dem Mapping um. + + Args: + mapping: Quellpfad → Zielpfad + dry_run: Wenn True, nur loggen ohne Dateien zu bewegen + """ + for source, target in mapping.items(): + if dry_run: + logger.info("[DRY-RUN] %s → %s", source, target) + continue + + target.parent.mkdir(parents=True, exist_ok=True) + shutil.move(str(source), str(target)) + logger.info("Verschoben: %s → %s", source, target) diff --git a/src/musiksammlung/playlist.py b/src/musiksammlung/playlist.py new file mode 100644 index 0000000..9f3bc70 --- /dev/null +++ b/src/musiksammlung/playlist.py @@ -0,0 +1,57 @@ +"""M3U-Playlist-Generierung für Jellyfin.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from musiksammlung.models import Album +from musiksammlung.organizer import _sanitize_filename + +logger = logging.getLogger(__name__) + + +def generate_playlist(album: Album, album_dir: Path) -> Path: + """Erzeugt eine M3U-Playlist für das gesamte Album. + + Die Playlist liegt im Album-Root und referenziert alle Tracks + über relative Pfade (CD1/01 Titel.flac, CD2/01 Titel.flac, ...). + + Returns: + Pfad zur erzeugten Playlist-Datei. + """ + playlist_name = _sanitize_filename(album.album) + ".m3u" + playlist_path = album_dir / playlist_name + multi_disc = len(album.discs) > 1 + + lines = ["#EXTM3U"] + + for disc in album.discs: + if multi_disc: + disc_prefix = f"CD{disc.disc_number}/" + else: + disc_prefix = "" + + for track in disc.tracks: + safe_title = _sanitize_filename(track.title) + # Audiodatei im Zielverzeichnis finden + pattern = f"{track.track_number:02d} {safe_title}.*" + if multi_disc: + search_dir = album_dir / f"CD{disc.disc_number}" + else: + search_dir = album_dir + + matches = list(search_dir.glob(pattern)) + if matches: + filename = matches[0].name + else: + # Fallback: generischer Name mit .flac + filename = f"{track.track_number:02d} {safe_title}.flac" + logger.warning("Datei nicht gefunden, Fallback: %s", filename) + + lines.append(f"#EXTINF:0,{track.title}") + lines.append(f"{disc_prefix}{filename}") + + playlist_path.write_text("\n".join(lines) + "\n", encoding="utf-8") + logger.info("Playlist erstellt: %s", playlist_path) + return playlist_path diff --git a/src/musiksammlung/ripper.py b/src/musiksammlung/ripper.py new file mode 100644 index 0000000..a83db15 --- /dev/null +++ b/src/musiksammlung/ripper.py @@ -0,0 +1,57 @@ +"""CD-Ripping via abcde.""" + +from __future__ import annotations + +import logging +import subprocess +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def rip_disc( + device: str, + output_dir: Path, + audio_format: str = "flac", + eject: bool = True, +) -> Path: + """Rippt eine CD mit abcde in output_dir. + + Args: + device: CD-Laufwerk, z.B. '/dev/cdrom' + output_dir: Zielverzeichnis für die gerippten Dateien + audio_format: Ausgabeformat (flac, mp3, ogg, opus) + eject: CD nach dem Rippen auswerfen + + Returns: + Pfad zum Verzeichnis mit den gerippten Dateien + """ + output_dir.mkdir(parents=True, exist_ok=True) + + cmd = [ + "abcde", + "-n", # kein CDDB-Lookup + "-N", # non-interaktiv + "-p", # führende Nullen bei Tracknummern + "-o", audio_format, + "-d", device, + "-D", # kein Debug + ] + if eject: + cmd.append("-x") + + logger.info("Starte Ripping: %s", " ".join(cmd)) + + result = subprocess.run( + cmd, + cwd=str(output_dir), + capture_output=True, + text=True, + ) + + if result.returncode != 0: + logger.error("abcde Fehler: %s", result.stderr) + raise RuntimeError(f"abcde fehlgeschlagen (exit {result.returncode}): {result.stderr}") + + logger.info("Ripping abgeschlossen: %s", output_dir) + return output_dir diff --git a/src/musiksammlung/tagger.py b/src/musiksammlung/tagger.py new file mode 100644 index 0000000..ac03139 --- /dev/null +++ b/src/musiksammlung/tagger.py @@ -0,0 +1,99 @@ +"""Audio-Tagging mit mutagen.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from mutagen import File as MutagenFile +from mutagen.flac import FLAC, Picture +from mutagen.id3 import APIC, ID3 + +from musiksammlung.models import Album, Disc, Track + +logger = logging.getLogger(__name__) + + +def tag_file( + path: Path, + album: Album, + disc: Disc, + track: Track, +) -> None: + """Setzt Audio-Tags auf einer Datei. + + Verwendet mutagen im Easy-Modus für formatunabhängiges Tagging. + """ + audio = MutagenFile(str(path), easy=True) + if audio is None: + logger.warning("Kann Datei nicht öffnen: %s", path) + return + + audio["artist"] = album.artist + audio["album"] = album.album + audio["albumartist"] = album.artist + audio["title"] = track.title + audio["tracknumber"] = f"{track.track_number}/{len(disc.tracks)}" + audio["discnumber"] = str(disc.disc_number) + + if album.year: + audio["date"] = str(album.year) + + audio.save() + logger.info("Tags gesetzt: %s", path.name) + + +def tag_album(album: Album, album_dir: Path) -> None: + """Setzt Tags auf allen Audiodateien eines Albums.""" + multi_disc = len(album.discs) > 1 + + for disc in album.discs: + if multi_disc: + disc_dir = album_dir / f"CD{disc.disc_number}" + else: + disc_dir = album_dir + + for track in disc.tracks: + # Dateiname-Pattern: "01 Titel.ext" + pattern = f"{track.track_number:02d} *" + matches = list(disc_dir.glob(pattern)) + if matches: + tag_file(matches[0], album, disc, track) + else: + logger.warning( + "Keine Datei für Track %d: %s", track.track_number, track.title + ) + + +def embed_cover(audio_path: Path, cover_path: Path) -> None: + """Bettet ein Cover-Bild in eine Audiodatei ein.""" + cover_data = cover_path.read_bytes() + mime = "image/jpeg" if cover_path.suffix.lower() in (".jpg", ".jpeg") else "image/png" + + suffix = audio_path.suffix.lower() + + if suffix == ".flac": + audio = FLAC(str(audio_path)) + pic = Picture() + pic.type = 3 # Front cover + pic.mime = mime + pic.data = cover_data + audio.add_picture(pic) + audio.save() + + elif suffix == ".mp3": + audio = ID3(str(audio_path)) + audio.add(APIC( + encoding=3, + mime=mime, + type=3, + desc="Front cover", + data=cover_data, + )) + audio.save() + + else: + logger.debug("Cover-Embedding für %s nicht unterstützt", suffix) + return + + logger.info("Cover eingebettet: %s", audio_path.name) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..c3c2634 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,42 @@ +"""Tests für die Datenmodelle.""" + +from musiksammlung.models import Album + + +def test_album_folder_name_with_year(): + album = Album(artist="Test", album="Mein Album", year=1987, discs=[]) + assert album.folder_name == "Mein Album (1987)" + + +def test_album_folder_name_without_year(): + album = Album(artist="Test", album="Mein Album", year=None, discs=[]) + assert album.folder_name == "Mein Album" + + +def test_sanitize_name(): + album = Album(artist='Art:ist', album='Al/bum?', year=None, discs=[]) + assert ":" not in album.artist + assert "/" not in album.album + assert "?" not in album.album + + +def test_album_from_json(): + data = { + "artist": "Die Toten Hosen", + "album": "Opium fürs Volk", + "year": 1996, + "discs": [ + { + "disc_number": 1, + "tracks": [ + {"track_number": 1, "title": "Bonnie & Clyde"}, + {"track_number": 2, "title": "Zehn kleine Jägermeister"}, + ], + } + ], + } + album = Album.model_validate(data) + assert album.artist == "Die Toten Hosen" + assert len(album.discs) == 1 + assert len(album.discs[0].tracks) == 2 + assert album.discs[0].tracks[1].title == "Zehn kleine Jägermeister" diff --git a/tests/test_organizer.py b/tests/test_organizer.py new file mode 100644 index 0000000..c5b077a --- /dev/null +++ b/tests/test_organizer.py @@ -0,0 +1,78 @@ +"""Tests für den Organizer.""" + +from pathlib import Path + +from musiksammlung.models import Album, Disc, Track +from musiksammlung.organizer import build_mapping, discover_audio_files + + +def test_discover_audio_files(tmp_path: Path): + """Findet und sortiert Audiodateien korrekt.""" + (tmp_path / "Track_03.flac").touch() + (tmp_path / "Track_01.flac").touch() + (tmp_path / "Track_02.flac").touch() + (tmp_path / "cover.jpg").touch() # soll ignoriert werden + + files = discover_audio_files(tmp_path) + assert len(files) == 3 + assert files[0].name == "Track_01.flac" + assert files[2].name == "Track_03.flac" + + +def test_build_mapping_single_disc(tmp_path: Path): + """Mapping für ein Single-CD-Album.""" + (tmp_path / "Track_01.flac").touch() + (tmp_path / "Track_02.flac").touch() + + album = Album( + artist="TestArtist", + album="TestAlbum", + year=2000, + discs=[ + Disc( + disc_number=1, + tracks=[ + Track(track_number=1, title="Erster Song"), + Track(track_number=2, title="Zweiter Song"), + ], + ) + ], + ) + + output = tmp_path / "output" + mapping = build_mapping(album, tmp_path, output) + + assert len(mapping) == 2 + targets = list(mapping.values()) + assert targets[0].name == "01 Erster Song.flac" + assert targets[1].name == "02 Zweiter Song.flac" + # Single-Disc: kein CD1-Unterordner + assert "CD1" not in str(targets[0]) + + +def test_build_mapping_multi_disc(tmp_path: Path): + """Mapping für ein Multi-CD-Album.""" + cd1 = tmp_path / "CD1" + cd2 = tmp_path / "CD2" + cd1.mkdir() + cd2.mkdir() + (cd1 / "Track_01.flac").touch() + (cd2 / "Track_01.flac").touch() + + album = Album( + artist="Artist", + album="Box Set", + year=1999, + discs=[ + Disc(disc_number=1, tracks=[Track(track_number=1, title="Song A")]), + Disc(disc_number=2, tracks=[Track(track_number=1, title="Song B")]), + ], + ) + + output = tmp_path / "output" + mapping = build_mapping(album, tmp_path, output) + + assert len(mapping) == 2 + targets = list(mapping.values()) + assert "CD1" in str(targets[0]) + assert "CD2" in str(targets[1]) diff --git a/tests/test_playlist.py b/tests/test_playlist.py new file mode 100644 index 0000000..4fe2d83 --- /dev/null +++ b/tests/test_playlist.py @@ -0,0 +1,37 @@ +"""Tests für die Playlist-Generierung.""" + +from pathlib import Path + +from musiksammlung.models import Album, Disc, Track +from musiksammlung.playlist import generate_playlist + + +def test_generate_playlist_single_disc(tmp_path: Path): + """Erzeugt eine M3U-Playlist für ein Single-CD-Album.""" + album = Album( + artist="Artist", + album="TestAlbum", + year=2000, + discs=[ + Disc( + disc_number=1, + tracks=[ + Track(track_number=1, title="Song Eins"), + Track(track_number=2, title="Song Zwei"), + ], + ) + ], + ) + + # Dummy-Audiodateien anlegen + (tmp_path / "01 Song Eins.flac").touch() + (tmp_path / "02 Song Zwei.flac").touch() + + playlist_path = generate_playlist(album, tmp_path) + assert playlist_path.exists() + content = playlist_path.read_text() + assert "#EXTM3U" in content + assert "01 Song Eins.flac" in content + assert "02 Song Zwei.flac" in content + # Kein CD-Prefix bei Single-Disc + assert "CD1/" not in content