Add project skeleton: CLI pipeline for CD digitization

Modular Python package with Typer CLI (scan/apply/process commands),
Pydantic data models, OCR via Tesseract, LLM-based tracklist parsing,
mutagen audio tagging, M3U playlist generation, and cover processing.
Includes 8 passing tests and ruff lint config.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-15 00:47:54 +01:00
commit 3e073250ca
17 changed files with 1027 additions and 0 deletions

View file

@ -0,0 +1,38 @@
"""Zentrale Datenmodelle für Album, Disc und Track."""
from __future__ import annotations
import re
from pydantic import BaseModel, field_validator
class Track(BaseModel):
track_number: int
title: str
class Disc(BaseModel):
disc_number: int
name: str | None = None # z.B. "Live in Berlin"
tracks: list[Track]
class Album(BaseModel):
artist: str
album: str
year: int | None = None
discs: list[Disc]
@field_validator("album", "artist")
@classmethod
def sanitize_name(cls, v: str) -> str:
"""Entfernt Zeichen, die in Dateinamen problematisch sind."""
return re.sub(r'[<>:"/\\|?*]', "_", v).strip()
@property
def folder_name(self) -> str:
"""Jellyfin-konformer Ordnername: 'Album (Year)' oder nur 'Album'."""
if self.year:
return f"{self.album} ({self.year})"
return self.album