diff --git a/cover_handler.py b/cover_handler.py index 4debeae..8ebdce7 100755 --- a/cover_handler.py +++ b/cover_handler.py @@ -48,7 +48,6 @@ def _image_ok(path: Path) -> bool: def find_local_cover(image_files: List[Path]) -> Optional[Path]: priority = ("folder", "front", "cover", "album") - # Sort by priority keyword, then size descending def key(p: Path): name = p.name.lower() score = next((i for i, kw in enumerate(priority) if kw in name), len(priority)) @@ -61,6 +60,41 @@ def find_local_cover(image_files: List[Path]) -> Optional[Path]: return None +def normalize_cover_to_folder_jpg(cover_path: Path) -> Path: + """ + Stellt sicher dass das Cover als folder.jpg (JPEG) im Album-Verzeichnis liegt. + - Ist es bereits folder.jpg → unverändert zurückgeben. + - Ist es eine andere JPEG → umbenennen. + - Ist es WebP oder PNG → zu JPEG konvertieren, Original löschen. + Gibt den Pfad zur folder.jpg zurück. + """ + dest = cover_path.parent / "folder.jpg" + if cover_path.resolve() == dest.resolve(): + return dest + + suffix = cover_path.suffix.lower() + try: + if suffix in (".jpg", ".jpeg"): + cover_path.rename(dest) + elif HAS_PIL: + import io + with cover_path.open("rb") as f: + raw = f.read() + with Image.open(io.BytesIO(raw)) as img: + buf = io.BytesIO() + img.convert("RGB").save(buf, format="JPEG", quality=92) + dest.write_bytes(buf.getvalue()) + cover_path.unlink() + else: + # PIL nicht verfügbar: einfach umbenennen, auch wenn es kein JPEG ist + cover_path.rename(dest) + print(f" 🖼️ Cover normalisiert → folder.jpg ({cover_path.name})") + except Exception as e: + print(f" ⚠️ Cover-Normalisierung fehlgeschlagen: {e}", file=sys.stderr) + return cover_path + return dest + + def _mb_cover_url(release_mbid: str) -> Optional[str]: url = f"https://coverartarchive.org/release/{release_mbid}/front" if not HAS_REQUESTS: @@ -187,14 +221,72 @@ def embed_cover(audio_path: Path, cover_path: Path) -> bool: return False +def _discogs_cover_url(artist: Optional[str], album: Optional[str]) -> Optional[str]: + """Sucht auf Discogs nach artist+album und gibt die primäre Image-URL zurück.""" + if not HAS_REQUESTS or not artist or not album: + return None + import os + token = os.getenv("DISCOGS_TOKEN", "") + headers = {"User-Agent": "MusicMetadataEnricher/1.0"} + if token: + headers["Authorization"] = f"Discogs token={token}" + try: + r = requests.get( + "https://api.discogs.com/database/search", + params={"artist": artist, "release_title": album, "type": "release", "per_page": 3}, + headers=headers, + timeout=10, + ) + if r.status_code != 200: + return None + results = r.json().get("results", []) + for result in results: + cover = result.get("cover_image") or result.get("thumb") + if cover and "spacer" not in cover: + return cover + except Exception as e: + print(f" ⚠️ Discogs-Suchfehler: {e}", file=sys.stderr) + return None + + +def download_discogs_cover(artist: Optional[str], album: Optional[str], dest_dir: Path) -> Optional[Path]: + url = _discogs_cover_url(artist, album) + if not url: + return None + dest = dest_dir / "folder.jpg" + try: + r = requests.get(url, timeout=15, headers={"User-Agent": "MusicMetadataEnricher/1.0"}) + if r.status_code != 200: + return None + ct = r.headers.get("content-type", "") + if ("png" in ct or url.lower().endswith(".png")) and HAS_PIL: + import io + with Image.open(io.BytesIO(r.content)) as img: + buf = io.BytesIO() + img.convert("RGB").save(buf, format="JPEG", quality=92) + dest.write_bytes(buf.getvalue()) + else: + dest.write_bytes(r.content) + if _image_ok(dest): + return dest + dest.unlink(missing_ok=True) + except Exception as e: + print(f" ⚠️ Discogs-Cover-Fehler: {e}", file=sys.stderr) + dest.unlink(missing_ok=True) + return None + + def resolve_cover( image_files: List[Path], release_mbid: Optional[str], album_dir: Path, + artist: Optional[str] = None, + album: Optional[str] = None, ) -> tuple[Optional[Path], Optional[str]]: """Returns (cover_path, source_label).""" local = find_local_cover(image_files) if local: + local = normalize_cover_to_folder_jpg(local) return local, "local" if release_mbid: @@ -202,4 +294,9 @@ def resolve_cover( if downloaded: return downloaded, "musicbrainz" + if artist or album: + downloaded = download_discogs_cover(artist, album, album_dir) + if downloaded: + return downloaded, "discogs" + return None, None diff --git a/executor.py b/executor.py index cfa559c..5a7a2cc 100755 --- a/executor.py +++ b/executor.py @@ -42,21 +42,52 @@ def _safe_name(s: str) -> str: return re.sub(r"\s+", "_", s).strip("._-") +_CLASSICAL_GENRE_KEYWORDS = { + "classical", "klassik", "baroque", "barock", "romantic", "romantik", + "opera", "oper", "operetta", "operette", "chamber", "kammermusik", + "symphon", "concerto", "oratorio", "sacred", "kirchenmusik", + "renaissance", "medieval", "contemporary classical", +} + +_CLASSICAL_COMPOSER_KEYWORDS = { + # Bekannte Komponisten als Signal (Nachname reicht) + "bach", "beethoven", "mozart", "handel", "haydn", "schubert", "brahms", + "chopin", "liszt", "schumann", "wagner", "verdi", "puccini", "vivaldi", + "telemann", "buxtehude", "monteverdi", "palestrina", "purcell", + "mahler", "bruckner", "dvorak", "tchaikovsky", "tschaikowski", + "debussy", "ravel", "satie", "strauss", "sibelius", "grieg", +} + + def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool: """ - Classical schema applies when performer (albumartist) ≠ composer (track_artist), - which covers both 'real' classical music and jazz-on-classical-themes albums. - Genre keyword matching is used as additional signal but not required. + Klassik-Schema (Performer_-_Komponist_-_Werk) wird angewendet wenn: + 1. Genre explizit klassisch ist, ODER + 2. track_artist ist ein bekannter Komponist (und ≠ albumartist), ODER + 3. albumartist ≠ track_artist UND beide sind bekannte Komponistennamen. + Reine Performer≠Komponist-Heuristik ohne Genre-Bestätigung ist abgeschaltet + (zu viele Falschpositive bei Samplern, Jazz, Volksmusik). """ aa = (albumartist or "").casefold().strip() ta = (track_artist or "").casefold().strip() + g = (genre or "").casefold().strip() + if not aa or aa in ("various artists", "unknown artist", "unknown"): return False if not ta or ta in ("unknown artist", "unknown"): - return False # placeholder, not a real composer + return False if aa == ta: return False - return True # performer ≠ composer → classical naming + + # Primäres Signal: Genre-Keyword + if any(kw in g for kw in _CLASSICAL_GENRE_KEYWORDS): + return True + + # Sekundäres Signal: track_artist enthält bekannten Komponistennamen + if any(kw in ta for kw in _CLASSICAL_COMPOSER_KEYWORDS): + return True + + return False def _proposed_filename( diff --git a/metadata_resolver.py b/metadata_resolver.py index f109d04..94a1ca7 100755 --- a/metadata_resolver.py +++ b/metadata_resolver.py @@ -33,6 +33,63 @@ try: except ImportError: HAS_ANTHROPIC = False +# --------------------------------------------------------------------------- +# Genre normalization +# --------------------------------------------------------------------------- + +_GENRE_MAP: Dict[str, str] = { + # Deutsch → Englisch (Jellyfin-Standardbegriffe) + "volksmusik": "Folk", + "volkslieder": "Folk", + "volkslied": "Folk", + "heimatlieder": "Folk", + "schlager": "Schlager", + "deutsche schlager": "Schlager", + "marsch": "March", + "marschmusik": "March", + "militaermusik": "March", + "militärmusik": "March", + "kirchenmusik": "Sacred", + "chormusik": "Choral", + "kinderlieder": "Children", + "weihnachtslieder": "Christmas", + "weihnachtsmusik": "Christmas", + "blasmusik": "Brass Band", + "operette": "Operetta", + "oper": "Opera", + "kammermusik": "Chamber Music", + "klassik": "Classical", + "classic": "Classical", + "klassische musik": "Classical", + "barock": "Baroque", + "romantik": "Romantic", + # Englische Varianten vereinheitlichen + "rhythm and blues": "R&B", + "rhythmic soul": "R&B", + "rock and roll": "Rock 'n' Roll", + "rock & roll": "Rock 'n' Roll", + "easy listening": "Easy Listening", + "vocal pop": "Pop", + "adult contemporary": "Pop", + "big band": "Big Band", + "swing music": "Swing", + "latin jazz": "Latin Jazz", + "bossa nova": "Bossa Nova", + "nueva cancion": "Nueva Canción", +} + + +def normalize_genre(genre: Optional[str]) -> Optional[str]: + if not genre: + return genre + key = genre.strip().lower() + normalized = _GENRE_MAP.get(key) + if normalized: + return normalized + # Titlcase wenn nicht in der Map (verhindert ALL CAPS oder all lowercase) + return genre.strip().title() if genre == genre.upper() or genre == genre.lower() else genre.strip() + + _MB_RATE_LIMIT = 1.1 # seconds between MusicBrainz requests _last_mb_call = 0.0 ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "") @@ -505,7 +562,7 @@ def resolve( album=album, albumartist=albumartist, date=year, - genre=genre, + genre=normalize_genre(genre), label=label, mbid=release_mbid, cover_path=None, diff --git a/music_enricher.py b/music_enricher.py index e14645f..c99af9e 100755 --- a/music_enricher.py +++ b/music_enricher.py @@ -148,6 +148,8 @@ def process_album( hints.cover_images, proposal.mbid, album_dir, + artist=proposal.albumartist, + album=proposal.album, ) if cover_path and not args.no_cover: proposal.cover_path = cover_path @@ -228,6 +230,71 @@ def process_album( return stats +def _print_status(args: argparse.Namespace) -> None: + """Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten.""" + from mutagen import File as MutagenFile + + IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"} + AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"} + + album_dirs: List[Path] = [] + if args.album: + album_dirs.append(args.album.expanduser().resolve()) + for raw in args.paths: + root = Path(raw).expanduser().resolve() + if root.is_dir(): + album_dirs.extend(collect_album_dirs(root)) + + no_cover, bad_tags, ok = [], [], [] + + for album_dir in sorted(album_dirs): + has_cover = any( + f.suffix.lower() in IMAGE_EXTS + for f in album_dir.rglob("*") if f.is_file() + ) + audio_files = [ + f for f in sorted(album_dir.rglob("*")) + if f.is_file() and f.suffix.lower() in AUDIO_EXTS + ] + missing_tags = [] + for af in audio_files[:3]: # nur erste 3 prüfen (schnell) + try: + tags = MutagenFile(str(af), easy=True) + if tags is None: + missing_tags.append(af.name) + continue + title = (tags.get("title") or [""])[0].strip() + artist = (tags.get("artist") or [""])[0].strip() + if not title or title.lower() in ("unknown", "audiotrack", "") \ + or not artist or artist.lower() in ("unknown", ""): + missing_tags.append(af.name) + except Exception: + missing_tags.append(af.name) + + problems = [] + if not has_cover: + problems.append("kein Cover") + if missing_tags: + problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)") + + if problems: + bad_tags.append((album_dir, problems)) + else: + ok.append(album_dir) + + print(f"\n{'=' * 60}") + print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben") + print(f"{'=' * 60}") + print(f" ✅ In Ordnung: {len(ok)}") + print(f" ⚠️ Mit Problemen: {len(bad_tags)}") + print() + for album_dir, problems in bad_tags: + print(f" 💿 {album_dir.name}") + for p in problems: + print(f" → {p}") + print("=" * 60) + + def main() -> None: parser = argparse.ArgumentParser( description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin", @@ -262,9 +329,17 @@ def main() -> None: parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator", help="Pfad zu jellyfin_playlist_generator.py\n" "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)") + parser.add_argument("--status", action="store_true", + help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben") args = parser.parse_args() + if args.status: + if not args.paths and not args.album: + parser.error("--status benötigt mindestens einen Pfad.") + _print_status(args) + return + if not args.album and not args.paths: parser.error("Mindestens ein Pfad oder --album erforderlich.") diff --git a/test_suite_enricher.py b/test_suite_enricher.py index 71bc588..e61121b 100755 --- a/test_suite_enricher.py +++ b/test_suite_enricher.py @@ -188,6 +188,140 @@ def test_extract_hints_multi_disc() -> str: return f"disc numbers detected: {disc_nums}" +# --------------------------------------------------------------------------- +# Vertical tracklist parser Tests +# --------------------------------------------------------------------------- + +def test_vertical_tracklist_basic() -> str: + from hint_extractor import _normalize_vertical_tracklist + text = "1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka pro trubku\n4:01" + result = _normalize_vertical_tracklist(text) + assert result is not None, "should recognize vertical format" + assert "1. Katka" in result, f"got: {result!r}" + assert "2. Záludná" in result, f"got: {result!r}" + return f"normalized: {result[:60]!r}" + + +def test_vertical_tracklist_without_duration() -> str: + from hint_extractor import _normalize_vertical_tracklist + text = "1\nFirst Song\n2\nSecond Song\n3\nThird Song" + result = _normalize_vertical_tracklist(text) + assert result is not None, "should work without durations" + assert "1. First Song" in result, f"got: {result!r}" + return f"no-duration OK: {result[:60]!r}" + + +def test_vertical_tracklist_not_triggered_for_normal() -> str: + from hint_extractor import _normalize_vertical_tracklist + text = "1. Dancing Queen\n2. Waterloo\n3. Fernando" + result = _normalize_vertical_tracklist(text) + assert result is None, f"should return None for normal format, got: {result!r}" + return "correctly returns None for standard format" + + +# --------------------------------------------------------------------------- +# Single-CD disc handling Tests +# --------------------------------------------------------------------------- + +def test_single_cd_tracklist_match() -> str: + """Track-Nummer-Match darf nicht disc_num erfordern (Single-CD hat disc=None).""" + from hint_extractor import _parse_tracklist + from models import TrackHints, AlbumHints, AlbumScan + from pathlib import Path + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) / "Tufaranka_-_Katka_dovadi" + root.mkdir() + (root / "01_-_Tufaranka_-_AudioTrack_01.mp3").write_bytes(b"\x00" * 100) + (root / "tracklist.txt").write_text("1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka\n4:01") + + from scanner import scan_album + from hint_extractor import extract_hints + scan = scan_album(root) + hints = extract_hints(scan, use_ocr=False) + track = hints.tracks[0] + assert track.title == "Katka dovádí", f"expected tracklist title, got: {track.title!r}" + return f"single-CD match OK: title={track.title!r}" + + +# --------------------------------------------------------------------------- +# Genre normalization Tests +# --------------------------------------------------------------------------- + +def test_genre_normalize_german() -> str: + from metadata_resolver import normalize_genre + assert normalize_genre("volksmusik") == "Folk", "volksmusik → Folk" + assert normalize_genre("klassik") == "Classical", "klassik → Classical" + assert normalize_genre("marschmusik") == "March", "marschmusik → March" + return "German genres normalized correctly" + + +def test_genre_normalize_english_variants() -> str: + from metadata_resolver import normalize_genre + assert normalize_genre("rhythm and blues") == "R&B" + assert normalize_genre("rock and roll") == "Rock 'n' Roll" + return "English variants normalized correctly" + + +def test_genre_normalize_titlecase() -> str: + from metadata_resolver import normalize_genre + assert normalize_genre("JAZZ") == "Jazz", f"got: {normalize_genre('JAZZ')!r}" + assert normalize_genre("folk") == "Folk", f"got: {normalize_genre('folk')!r}" + assert normalize_genre("Big Band") == "Big Band" # unchanged + return "Titlecase normalization OK" + + +# --------------------------------------------------------------------------- +# _is_classical() Tests +# --------------------------------------------------------------------------- + +def test_is_classical_by_genre() -> str: + from executor import _is_classical + assert _is_classical("Gardiner", "Bach", "Classical"), "Classical genre should trigger" + assert _is_classical("Herreweghe", "Handel", "Baroque"), "Baroque should trigger" + return "genre-based detection OK" + + +def test_is_classical_by_composer() -> str: + from executor import _is_classical + assert _is_classical("Gardiner", "Bach", ""), "Bach as track_artist should trigger" + assert _is_classical("Hurford", "beethoven", ""), "beethoven should trigger" + return "composer-name detection OK" + + +def test_is_classical_false_for_pop() -> str: + from executor import _is_classical + assert not _is_classical("Trini Lopez", "Trini Lopez", "Pop"), "same artist = not classical" + assert not _is_classical("ABBA", "ABBA", "Pop"), "ABBA is not classical" + assert not _is_classical("Trini Lopez", "", "R&B"), "empty track_artist = not classical" + return "pop albums correctly not classical" + + +def test_is_classical_false_for_folk() -> str: + from executor import _is_classical + assert not _is_classical("Tufaranka", "Tufaranka", "Folk"), "Folk is not classical" + return "Folk correctly not classical" + + +# --------------------------------------------------------------------------- +# cover normalize Tests +# --------------------------------------------------------------------------- + +def test_normalize_cover_renames_front_jpg() -> str: + from cover_handler import normalize_cover_to_folder_jpg + import tempfile, shutil + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + front = root / "Front.jpg" + front.write_bytes(b"\xff\xd8" + b"\x00" * 200) + result = normalize_cover_to_folder_jpg(front) + assert result.name == "folder.jpg", f"expected folder.jpg, got {result.name!r}" + assert (root / "folder.jpg").exists(), "folder.jpg should exist" + assert not front.exists(), "Front.jpg should be gone" + return "Front.jpg → folder.jpg rename OK" + + # --------------------------------------------------------------------------- # executor Tests # --------------------------------------------------------------------------- @@ -252,6 +386,19 @@ def main() -> None: ("UNIT_15_proposed_filename_single_disc", test_proposed_filename_single_disc), ("UNIT_16_proposed_filename_multi_disc", test_proposed_filename_multi_disc), ("UNIT_17_proposed_filename_sanitizes_chars", test_proposed_filename_sanitizes_chars), + # Neue Tests + ("UNIT_18_vertical_tracklist_basic", test_vertical_tracklist_basic), + ("UNIT_19_vertical_tracklist_no_duration", test_vertical_tracklist_without_duration), + ("UNIT_20_vertical_tracklist_no_false_pos", test_vertical_tracklist_not_triggered_for_normal), + ("UNIT_21_single_cd_tracklist_match", test_single_cd_tracklist_match), + ("UNIT_22_genre_normalize_german", test_genre_normalize_german), + ("UNIT_23_genre_normalize_english", test_genre_normalize_english_variants), + ("UNIT_24_genre_normalize_titlecase", test_genre_normalize_titlecase), + ("UNIT_25_is_classical_by_genre", test_is_classical_by_genre), + ("UNIT_26_is_classical_by_composer", test_is_classical_by_composer), + ("UNIT_27_is_classical_false_pop", test_is_classical_false_for_pop), + ("UNIT_28_is_classical_false_folk", test_is_classical_false_for_folk), + ("UNIT_29_normalize_cover_renames", test_normalize_cover_renames_front_jpg), ] for test_id, fn in cases: