diff --git a/cover_handler.py b/cover_handler.py index 8ebdce7..4debeae 100755 --- a/cover_handler.py +++ b/cover_handler.py @@ -48,6 +48,7 @@ def _image_ok(path: Path) -> bool: def find_local_cover(image_files: List[Path]) -> Optional[Path]: priority = ("folder", "front", "cover", "album") + # Sort by priority keyword, then size descending def key(p: Path): name = p.name.lower() score = next((i for i, kw in enumerate(priority) if kw in name), len(priority)) @@ -60,41 +61,6 @@ def find_local_cover(image_files: List[Path]) -> Optional[Path]: return None -def normalize_cover_to_folder_jpg(cover_path: Path) -> Path: - """ - Stellt sicher dass das Cover als folder.jpg (JPEG) im Album-Verzeichnis liegt. - - Ist es bereits folder.jpg → unverändert zurückgeben. - - Ist es eine andere JPEG → umbenennen. - - Ist es WebP oder PNG → zu JPEG konvertieren, Original löschen. - Gibt den Pfad zur folder.jpg zurück. - """ - dest = cover_path.parent / "folder.jpg" - if cover_path.resolve() == dest.resolve(): - return dest - - suffix = cover_path.suffix.lower() - try: - if suffix in (".jpg", ".jpeg"): - cover_path.rename(dest) - elif HAS_PIL: - import io - with cover_path.open("rb") as f: - raw = f.read() - with Image.open(io.BytesIO(raw)) as img: - buf = io.BytesIO() - img.convert("RGB").save(buf, format="JPEG", quality=92) - dest.write_bytes(buf.getvalue()) - cover_path.unlink() - else: - # PIL nicht verfügbar: einfach umbenennen, auch wenn es kein JPEG ist - cover_path.rename(dest) - print(f" 🖼️ Cover normalisiert → folder.jpg ({cover_path.name})") - except Exception as e: - print(f" ⚠️ Cover-Normalisierung fehlgeschlagen: {e}", file=sys.stderr) - return cover_path - return dest - - def _mb_cover_url(release_mbid: str) -> Optional[str]: url = f"https://coverartarchive.org/release/{release_mbid}/front" if not HAS_REQUESTS: @@ -221,72 +187,14 @@ def embed_cover(audio_path: Path, cover_path: Path) -> bool: return False -def _discogs_cover_url(artist: Optional[str], album: Optional[str]) -> Optional[str]: - """Sucht auf Discogs nach artist+album und gibt die primäre Image-URL zurück.""" - if not HAS_REQUESTS or not artist or not album: - return None - import os - token = os.getenv("DISCOGS_TOKEN", "") - headers = {"User-Agent": "MusicMetadataEnricher/1.0"} - if token: - headers["Authorization"] = f"Discogs token={token}" - try: - r = requests.get( - "https://api.discogs.com/database/search", - params={"artist": artist, "release_title": album, "type": "release", "per_page": 3}, - headers=headers, - timeout=10, - ) - if r.status_code != 200: - return None - results = r.json().get("results", []) - for result in results: - cover = result.get("cover_image") or result.get("thumb") - if cover and "spacer" not in cover: - return cover - except Exception as e: - print(f" ⚠️ Discogs-Suchfehler: {e}", file=sys.stderr) - return None - - -def download_discogs_cover(artist: Optional[str], album: Optional[str], dest_dir: Path) -> Optional[Path]: - url = _discogs_cover_url(artist, album) - if not url: - return None - dest = dest_dir / "folder.jpg" - try: - r = requests.get(url, timeout=15, headers={"User-Agent": "MusicMetadataEnricher/1.0"}) - if r.status_code != 200: - return None - ct = r.headers.get("content-type", "") - if ("png" in ct or url.lower().endswith(".png")) and HAS_PIL: - import io - with Image.open(io.BytesIO(r.content)) as img: - buf = io.BytesIO() - img.convert("RGB").save(buf, format="JPEG", quality=92) - dest.write_bytes(buf.getvalue()) - else: - dest.write_bytes(r.content) - if _image_ok(dest): - return dest - dest.unlink(missing_ok=True) - except Exception as e: - print(f" ⚠️ Discogs-Cover-Fehler: {e}", file=sys.stderr) - dest.unlink(missing_ok=True) - return None - - def resolve_cover( image_files: List[Path], release_mbid: Optional[str], album_dir: Path, - artist: Optional[str] = None, - album: Optional[str] = None, ) -> tuple[Optional[Path], Optional[str]]: """Returns (cover_path, source_label).""" local = find_local_cover(image_files) if local: - local = normalize_cover_to_folder_jpg(local) return local, "local" if release_mbid: @@ -294,9 +202,4 @@ def resolve_cover( if downloaded: return downloaded, "musicbrainz" - if artist or album: - downloaded = download_discogs_cover(artist, album, album_dir) - if downloaded: - return downloaded, "discogs" - return None, None diff --git a/executor.py b/executor.py index 5a7a2cc..cfa559c 100755 --- a/executor.py +++ b/executor.py @@ -42,52 +42,21 @@ def _safe_name(s: str) -> str: return re.sub(r"\s+", "_", s).strip("._-") -_CLASSICAL_GENRE_KEYWORDS = { - "classical", "klassik", "baroque", "barock", "romantic", "romantik", - "opera", "oper", "operetta", "operette", "chamber", "kammermusik", - "symphon", "concerto", "oratorio", "sacred", "kirchenmusik", - "renaissance", "medieval", "contemporary classical", -} - -_CLASSICAL_COMPOSER_KEYWORDS = { - # Bekannte Komponisten als Signal (Nachname reicht) - "bach", "beethoven", "mozart", "handel", "haydn", "schubert", "brahms", - "chopin", "liszt", "schumann", "wagner", "verdi", "puccini", "vivaldi", - "telemann", "buxtehude", "monteverdi", "palestrina", "purcell", - "mahler", "bruckner", "dvorak", "tchaikovsky", "tschaikowski", - "debussy", "ravel", "satie", "strauss", "sibelius", "grieg", -} - - def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool: """ - Klassik-Schema (Performer_-_Komponist_-_Werk) wird angewendet wenn: - 1. Genre explizit klassisch ist, ODER - 2. track_artist ist ein bekannter Komponist (und ≠ albumartist), ODER - 3. albumartist ≠ track_artist UND beide sind bekannte Komponistennamen. - Reine Performer≠Komponist-Heuristik ohne Genre-Bestätigung ist abgeschaltet - (zu viele Falschpositive bei Samplern, Jazz, Volksmusik). + Classical schema applies when performer (albumartist) ≠ composer (track_artist), + which covers both 'real' classical music and jazz-on-classical-themes albums. + Genre keyword matching is used as additional signal but not required. """ aa = (albumartist or "").casefold().strip() ta = (track_artist or "").casefold().strip() - g = (genre or "").casefold().strip() - if not aa or aa in ("various artists", "unknown artist", "unknown"): return False if not ta or ta in ("unknown artist", "unknown"): - return False + return False # placeholder, not a real composer if aa == ta: return False - - # Primäres Signal: Genre-Keyword - if any(kw in g for kw in _CLASSICAL_GENRE_KEYWORDS): - return True - - # Sekundäres Signal: track_artist enthält bekannten Komponistennamen - if any(kw in ta for kw in _CLASSICAL_COMPOSER_KEYWORDS): - return True - - return False + return True # performer ≠ composer → classical naming def _proposed_filename( diff --git a/metadata_resolver.py b/metadata_resolver.py index 94a1ca7..f109d04 100755 --- a/metadata_resolver.py +++ b/metadata_resolver.py @@ -33,63 +33,6 @@ try: except ImportError: HAS_ANTHROPIC = False -# --------------------------------------------------------------------------- -# Genre normalization -# --------------------------------------------------------------------------- - -_GENRE_MAP: Dict[str, str] = { - # Deutsch → Englisch (Jellyfin-Standardbegriffe) - "volksmusik": "Folk", - "volkslieder": "Folk", - "volkslied": "Folk", - "heimatlieder": "Folk", - "schlager": "Schlager", - "deutsche schlager": "Schlager", - "marsch": "March", - "marschmusik": "March", - "militaermusik": "March", - "militärmusik": "March", - "kirchenmusik": "Sacred", - "chormusik": "Choral", - "kinderlieder": "Children", - "weihnachtslieder": "Christmas", - "weihnachtsmusik": "Christmas", - "blasmusik": "Brass Band", - "operette": "Operetta", - "oper": "Opera", - "kammermusik": "Chamber Music", - "klassik": "Classical", - "classic": "Classical", - "klassische musik": "Classical", - "barock": "Baroque", - "romantik": "Romantic", - # Englische Varianten vereinheitlichen - "rhythm and blues": "R&B", - "rhythmic soul": "R&B", - "rock and roll": "Rock 'n' Roll", - "rock & roll": "Rock 'n' Roll", - "easy listening": "Easy Listening", - "vocal pop": "Pop", - "adult contemporary": "Pop", - "big band": "Big Band", - "swing music": "Swing", - "latin jazz": "Latin Jazz", - "bossa nova": "Bossa Nova", - "nueva cancion": "Nueva Canción", -} - - -def normalize_genre(genre: Optional[str]) -> Optional[str]: - if not genre: - return genre - key = genre.strip().lower() - normalized = _GENRE_MAP.get(key) - if normalized: - return normalized - # Titlcase wenn nicht in der Map (verhindert ALL CAPS oder all lowercase) - return genre.strip().title() if genre == genre.upper() or genre == genre.lower() else genre.strip() - - _MB_RATE_LIMIT = 1.1 # seconds between MusicBrainz requests _last_mb_call = 0.0 ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "") @@ -562,7 +505,7 @@ def resolve( album=album, albumartist=albumartist, date=year, - genre=normalize_genre(genre), + genre=genre, label=label, mbid=release_mbid, cover_path=None, diff --git a/music_enricher.py b/music_enricher.py index c99af9e..e14645f 100755 --- a/music_enricher.py +++ b/music_enricher.py @@ -148,8 +148,6 @@ def process_album( hints.cover_images, proposal.mbid, album_dir, - artist=proposal.albumartist, - album=proposal.album, ) if cover_path and not args.no_cover: proposal.cover_path = cover_path @@ -230,71 +228,6 @@ def process_album( return stats -def _print_status(args: argparse.Namespace) -> None: - """Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten.""" - from mutagen import File as MutagenFile - - IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"} - AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"} - - album_dirs: List[Path] = [] - if args.album: - album_dirs.append(args.album.expanduser().resolve()) - for raw in args.paths: - root = Path(raw).expanduser().resolve() - if root.is_dir(): - album_dirs.extend(collect_album_dirs(root)) - - no_cover, bad_tags, ok = [], [], [] - - for album_dir in sorted(album_dirs): - has_cover = any( - f.suffix.lower() in IMAGE_EXTS - for f in album_dir.rglob("*") if f.is_file() - ) - audio_files = [ - f for f in sorted(album_dir.rglob("*")) - if f.is_file() and f.suffix.lower() in AUDIO_EXTS - ] - missing_tags = [] - for af in audio_files[:3]: # nur erste 3 prüfen (schnell) - try: - tags = MutagenFile(str(af), easy=True) - if tags is None: - missing_tags.append(af.name) - continue - title = (tags.get("title") or [""])[0].strip() - artist = (tags.get("artist") or [""])[0].strip() - if not title or title.lower() in ("unknown", "audiotrack", "") \ - or not artist or artist.lower() in ("unknown", ""): - missing_tags.append(af.name) - except Exception: - missing_tags.append(af.name) - - problems = [] - if not has_cover: - problems.append("kein Cover") - if missing_tags: - problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)") - - if problems: - bad_tags.append((album_dir, problems)) - else: - ok.append(album_dir) - - print(f"\n{'=' * 60}") - print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben") - print(f"{'=' * 60}") - print(f" ✅ In Ordnung: {len(ok)}") - print(f" ⚠️ Mit Problemen: {len(bad_tags)}") - print() - for album_dir, problems in bad_tags: - print(f" 💿 {album_dir.name}") - for p in problems: - print(f" → {p}") - print("=" * 60) - - def main() -> None: parser = argparse.ArgumentParser( description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin", @@ -329,17 +262,9 @@ def main() -> None: parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator", help="Pfad zu jellyfin_playlist_generator.py\n" "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)") - parser.add_argument("--status", action="store_true", - help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben") args = parser.parse_args() - if args.status: - if not args.paths and not args.album: - parser.error("--status benötigt mindestens einen Pfad.") - _print_status(args) - return - if not args.album and not args.paths: parser.error("Mindestens ein Pfad oder --album erforderlich.") diff --git a/test_suite_enricher.py b/test_suite_enricher.py index e61121b..71bc588 100755 --- a/test_suite_enricher.py +++ b/test_suite_enricher.py @@ -188,140 +188,6 @@ def test_extract_hints_multi_disc() -> str: return f"disc numbers detected: {disc_nums}" -# --------------------------------------------------------------------------- -# Vertical tracklist parser Tests -# --------------------------------------------------------------------------- - -def test_vertical_tracklist_basic() -> str: - from hint_extractor import _normalize_vertical_tracklist - text = "1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka pro trubku\n4:01" - result = _normalize_vertical_tracklist(text) - assert result is not None, "should recognize vertical format" - assert "1. Katka" in result, f"got: {result!r}" - assert "2. Záludná" in result, f"got: {result!r}" - return f"normalized: {result[:60]!r}" - - -def test_vertical_tracklist_without_duration() -> str: - from hint_extractor import _normalize_vertical_tracklist - text = "1\nFirst Song\n2\nSecond Song\n3\nThird Song" - result = _normalize_vertical_tracklist(text) - assert result is not None, "should work without durations" - assert "1. First Song" in result, f"got: {result!r}" - return f"no-duration OK: {result[:60]!r}" - - -def test_vertical_tracklist_not_triggered_for_normal() -> str: - from hint_extractor import _normalize_vertical_tracklist - text = "1. Dancing Queen\n2. Waterloo\n3. Fernando" - result = _normalize_vertical_tracklist(text) - assert result is None, f"should return None for normal format, got: {result!r}" - return "correctly returns None for standard format" - - -# --------------------------------------------------------------------------- -# Single-CD disc handling Tests -# --------------------------------------------------------------------------- - -def test_single_cd_tracklist_match() -> str: - """Track-Nummer-Match darf nicht disc_num erfordern (Single-CD hat disc=None).""" - from hint_extractor import _parse_tracklist - from models import TrackHints, AlbumHints, AlbumScan - from pathlib import Path - import tempfile - - with tempfile.TemporaryDirectory() as tmpdir: - root = Path(tmpdir) / "Tufaranka_-_Katka_dovadi" - root.mkdir() - (root / "01_-_Tufaranka_-_AudioTrack_01.mp3").write_bytes(b"\x00" * 100) - (root / "tracklist.txt").write_text("1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka\n4:01") - - from scanner import scan_album - from hint_extractor import extract_hints - scan = scan_album(root) - hints = extract_hints(scan, use_ocr=False) - track = hints.tracks[0] - assert track.title == "Katka dovádí", f"expected tracklist title, got: {track.title!r}" - return f"single-CD match OK: title={track.title!r}" - - -# --------------------------------------------------------------------------- -# Genre normalization Tests -# --------------------------------------------------------------------------- - -def test_genre_normalize_german() -> str: - from metadata_resolver import normalize_genre - assert normalize_genre("volksmusik") == "Folk", "volksmusik → Folk" - assert normalize_genre("klassik") == "Classical", "klassik → Classical" - assert normalize_genre("marschmusik") == "March", "marschmusik → March" - return "German genres normalized correctly" - - -def test_genre_normalize_english_variants() -> str: - from metadata_resolver import normalize_genre - assert normalize_genre("rhythm and blues") == "R&B" - assert normalize_genre("rock and roll") == "Rock 'n' Roll" - return "English variants normalized correctly" - - -def test_genre_normalize_titlecase() -> str: - from metadata_resolver import normalize_genre - assert normalize_genre("JAZZ") == "Jazz", f"got: {normalize_genre('JAZZ')!r}" - assert normalize_genre("folk") == "Folk", f"got: {normalize_genre('folk')!r}" - assert normalize_genre("Big Band") == "Big Band" # unchanged - return "Titlecase normalization OK" - - -# --------------------------------------------------------------------------- -# _is_classical() Tests -# --------------------------------------------------------------------------- - -def test_is_classical_by_genre() -> str: - from executor import _is_classical - assert _is_classical("Gardiner", "Bach", "Classical"), "Classical genre should trigger" - assert _is_classical("Herreweghe", "Handel", "Baroque"), "Baroque should trigger" - return "genre-based detection OK" - - -def test_is_classical_by_composer() -> str: - from executor import _is_classical - assert _is_classical("Gardiner", "Bach", ""), "Bach as track_artist should trigger" - assert _is_classical("Hurford", "beethoven", ""), "beethoven should trigger" - return "composer-name detection OK" - - -def test_is_classical_false_for_pop() -> str: - from executor import _is_classical - assert not _is_classical("Trini Lopez", "Trini Lopez", "Pop"), "same artist = not classical" - assert not _is_classical("ABBA", "ABBA", "Pop"), "ABBA is not classical" - assert not _is_classical("Trini Lopez", "", "R&B"), "empty track_artist = not classical" - return "pop albums correctly not classical" - - -def test_is_classical_false_for_folk() -> str: - from executor import _is_classical - assert not _is_classical("Tufaranka", "Tufaranka", "Folk"), "Folk is not classical" - return "Folk correctly not classical" - - -# --------------------------------------------------------------------------- -# cover normalize Tests -# --------------------------------------------------------------------------- - -def test_normalize_cover_renames_front_jpg() -> str: - from cover_handler import normalize_cover_to_folder_jpg - import tempfile, shutil - with tempfile.TemporaryDirectory() as tmpdir: - root = Path(tmpdir) - front = root / "Front.jpg" - front.write_bytes(b"\xff\xd8" + b"\x00" * 200) - result = normalize_cover_to_folder_jpg(front) - assert result.name == "folder.jpg", f"expected folder.jpg, got {result.name!r}" - assert (root / "folder.jpg").exists(), "folder.jpg should exist" - assert not front.exists(), "Front.jpg should be gone" - return "Front.jpg → folder.jpg rename OK" - - # --------------------------------------------------------------------------- # executor Tests # --------------------------------------------------------------------------- @@ -386,19 +252,6 @@ def main() -> None: ("UNIT_15_proposed_filename_single_disc", test_proposed_filename_single_disc), ("UNIT_16_proposed_filename_multi_disc", test_proposed_filename_multi_disc), ("UNIT_17_proposed_filename_sanitizes_chars", test_proposed_filename_sanitizes_chars), - # Neue Tests - ("UNIT_18_vertical_tracklist_basic", test_vertical_tracklist_basic), - ("UNIT_19_vertical_tracklist_no_duration", test_vertical_tracklist_without_duration), - ("UNIT_20_vertical_tracklist_no_false_pos", test_vertical_tracklist_not_triggered_for_normal), - ("UNIT_21_single_cd_tracklist_match", test_single_cd_tracklist_match), - ("UNIT_22_genre_normalize_german", test_genre_normalize_german), - ("UNIT_23_genre_normalize_english", test_genre_normalize_english_variants), - ("UNIT_24_genre_normalize_titlecase", test_genre_normalize_titlecase), - ("UNIT_25_is_classical_by_genre", test_is_classical_by_genre), - ("UNIT_26_is_classical_by_composer", test_is_classical_by_composer), - ("UNIT_27_is_classical_false_pop", test_is_classical_false_for_pop), - ("UNIT_28_is_classical_false_folk", test_is_classical_false_for_folk), - ("UNIT_29_normalize_cover_renames", test_normalize_cover_renames_front_jpg), ] for test_id, fn in cases: