Music_Metadata_Enricher/music_enricher.py
dschlueter 80472653b4 Add 4 new cover/tracklist sources: MB back cover, iTunes, Last.fm, Discogs tracklist
cover_handler.py:
- _download_image(): shared helper replaces duplicated download logic
- download_back_cover(): fetches back cover from MusicBrainz CAA (/back endpoint),
  saves as back.jpg; skips if already present
- _itunes_cover_url() / download_itunes_cover(): iTunes Search API (no auth),
  requests 600x600 artwork; fallback after Discogs
- _lastfm_cover_url() / download_lastfm_cover(): Last.fm album.getinfo
  (LASTFM_API_KEY env var); last cover fallback, skips placeholder images
- resolve_cover(): extended with iTunes → Last.fm fallback chain

metadata_resolver.py:
- _discogs_get_tracklist(): fetches full Discogs release via REST API,
  parses tracklist[] including heading-based disc detection
- _lastfm_tracklist(): fetches Last.fm album.getinfo tracks (LASTFM_API_KEY)
- resolve(): uses Discogs tracklist → Last.fm tracklist as fallback when
  MusicBrainz returns no tracks; LASTFM_API_KEY added to env var block

music_enricher.py:
- process_album(): calls download_back_cover() after execute_album() when MBID known

New cover priority:  local → MusicBrainz front → Discogs → iTunes → Last.fm
New tracklist priority: local → YouTube → MusicBrainz → Discogs → Last.fm → OCR
Test suite: 29 → 33 tests (all pass)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-29 08:55:17 +02:00

406 lines
16 KiB
Python
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
music_enricher.py
KI-gestützter Musik-Metadaten-Enricher für Jellyfin-Bibliotheken.
Pipeline pro Album:
Scan → HintExtractor → MetadataResolver → CoverHandler → Review → Executor
→ (optional) Jellyfin Playlist Generator
"""
from __future__ import annotations
import argparse
import importlib.util
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
try:
from tqdm import tqdm
HAS_TQDM = True
except ImportError:
HAS_TQDM = False
from models import AlbumProposal
from scanner import scan_album, collect_album_dirs
from hint_extractor import extract_hints
from metadata_resolver import resolve
from cover_handler import resolve_cover, download_back_cover
from executor import execute_album, write_report
def maybe_tqdm(iterable, show: bool, **kwargs):
return tqdm(iterable, **kwargs) if show else iterable
# ---------------------------------------------------------------------------
# Jellyfin Playlist Generator integration
# ---------------------------------------------------------------------------
def _find_jellyfin_generator(album_dir: Path, explicit: Optional[Path]) -> Optional[Path]:
"""Sucht jellyfin_playlist_generator.py — explizit oder im Geschwister-Verzeichnis."""
if explicit:
return explicit.expanduser().resolve() if explicit.exists() else None
# Auto-Discover: ../Jellyfin_Playlist_Generator/ relativ zum Album-Root
candidate = album_dir.parent / "Jellyfin_Playlist_Generator" / "jellyfin_playlist_generator.py"
return candidate if candidate.exists() else None
def _run_jellyfin_generator(album_dir: Path, generator_path: Path) -> None:
"""
Importiert den Jellyfin Playlist Generator und erstellt die Playlist für album_dir.
Kein subprocess, kein cleanup_all_playlists — nur gezielt dieses eine Album.
"""
try:
spec = importlib.util.spec_from_file_location("jellyfin_pg", generator_path)
mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
sys.modules["jellyfin_pg"] = mod # muss vor exec_module stehen (für @dataclass)
spec.loader.exec_module(mod) # type: ignore[union-attr]
media_files = mod.collect_media_recursive(album_dir)
if not media_files:
print(f" ⚠️ Jellyfin-Generator: keine Mediendateien in {album_dir.name}", file=sys.stderr)
return
deduped = sorted(set(media_files), key=mod.natural_sort_key)
tracks = mod.enrich_tracks(
[mod.TrackInfo(p, p.stem, p.suffix.lower()) for p in deduped],
album_dir,
)
tracks = mod.sort_tracks_for_playlist(tracks, album_dir)
pl_path = mod.generate_playlist(album_dir, tracks, None, dry_run=False)
print(f" 🎵 Jellyfin-Playlist erstellt: {pl_path.name}")
except Exception as e:
print(f" ⚠️ Jellyfin-Generator-Fehler ({album_dir.name}): {e}", file=sys.stderr)
# ---------------------------------------------------------------------------
# Review / Display
# ---------------------------------------------------------------------------
def _print_proposal(proposal: AlbumProposal) -> None:
conf_bar = "" * int(proposal.confidence * 10) + "" * (10 - int(proposal.confidence * 10))
print(f"\n{'' * 60}")
print(f"💿 {proposal.album_dir.name}")
print(f" Album: {proposal.album}")
print(f" Artist: {proposal.albumartist}")
print(f" Jahr: {proposal.date or ''}")
print(f" Genre: {proposal.genre or ''}")
print(f" Label: {proposal.label or ''}")
print(f" Cover: {proposal.cover_source or ''} ({proposal.cover_path.name if proposal.cover_path else 'keins'})")
print(f" Konfidenz: [{conf_bar}] {proposal.confidence:.0%} Quellen: {', '.join(proposal.sources) or ''}")
if proposal.notes:
for n in proposal.notes:
print(f" {n}")
print(f" Tracks ({len(proposal.tracks)}):")
for tp in proposal.tracks[:8]:
tn = f"{tp.disc_number}-{tp.track_number:02d}" if tp.disc_number and tp.disc_number > 1 else (
f"{tp.track_number:02d}" if tp.track_number else "??")
display_artist = tp.artist or proposal.albumartist or "Unknown"
print(f" {tn} {display_artist} {tp.title}")
if len(proposal.tracks) > 8:
print(f" … und {len(proposal.tracks) - 8} weitere")
def _interactive_review(proposal: AlbumProposal) -> bool:
"""Returns True if user accepts the proposal."""
_print_proposal(proposal)
while True:
answer = input("\n [Enter] Akzeptieren [s] Überspringen [q] Abbrechen: ").strip().lower()
if answer in ("", "j", "y"):
return True
if answer == "s":
return False
if answer == "q":
sys.exit(0)
# ---------------------------------------------------------------------------
# Main pipeline
# ---------------------------------------------------------------------------
def process_album(
album_dir: Path,
args: argparse.Namespace,
report_data: List[Dict[str, Any]],
) -> Dict[str, int]:
stats = {"tags_written": 0, "covers_embedded": 0, "files_renamed": 0,
"errors": 0, "skipped": 0}
try:
scan = scan_album(album_dir)
if not scan.audio_files:
stats["skipped"] += 1
return stats
hints = extract_hints(scan, use_ocr=not args.no_api)
proposal = resolve(
hints,
use_fingerprint=not args.no_fingerprint,
use_api=not args.no_api,
use_claude=not args.no_api,
)
# Cover art
cover_path, cover_source = resolve_cover(
hints.cover_images,
proposal.mbid,
album_dir,
artist=proposal.albumartist,
album=proposal.album,
)
if cover_path and not args.no_cover:
proposal.cover_path = cover_path
proposal.cover_source = cover_source
# Set proposed filenames if --rename
if args.rename:
from executor import _proposed_filename
for tp in proposal.tracks:
tp.new_filename = _proposed_filename(
tp, tp.path.suffix,
albumartist=proposal.albumartist or "",
genre=proposal.genre or "",
)
# Review step
if args.dry_run:
_print_proposal(proposal)
for tp in proposal.tracks:
report_data.append({
"status": "dry-run",
"album_dir": str(album_dir.name),
"track_path": str(tp.path),
"old_title": tp.path.stem,
"new_title": tp.title,
"old_artist": "",
"new_artist": tp.artist,
"album": proposal.album,
"albumartist": proposal.albumartist,
"date": proposal.date or "",
"genre": proposal.genre or "",
"label": proposal.label or "",
"track_number": tp.track_number or "",
"disc_number": tp.disc_number or "",
"cover_embedded": False,
"renamed_to": tp.new_filename or "",
"confidence": f"{proposal.confidence:.2f}",
"sources": ", ".join(proposal.sources),
})
return stats
accepted = True
if not args.auto:
accepted = _interactive_review(proposal)
elif args.auto and proposal.confidence < args.confidence:
print(f" ⏭️ Konfidenz {proposal.confidence:.0%} < {args.confidence:.0%} → übersprungen: {album_dir.name}")
stats["skipped"] += 1
return stats
else:
_print_proposal(proposal)
if not accepted:
stats["skipped"] += 1
return stats
album_stats = execute_album(
proposal=proposal,
backup_dir=args.backup,
do_rename=args.rename,
embed_cover_art=args.embed_cover,
dry_run=False,
report_data=report_data,
)
for k, v in album_stats.items():
stats[k] = stats.get(k, 0) + v
# Back-Cover von MusicBrainz holen (wenn MBID bekannt und noch kein back.jpg)
if proposal.mbid and not args.no_cover and not args.dry_run:
back = download_back_cover(proposal.mbid, album_dir)
if back:
print(f" 🖼️ Back-Cover heruntergeladen: {back.name}")
# Jellyfin Playlist Generator aufrufen
generator_path = _find_jellyfin_generator(album_dir, getattr(args, "playlist_generator", None))
if generator_path:
_run_jellyfin_generator(album_dir, generator_path)
except Exception as e:
stats["errors"] += 1
print(f" ❌ Fehler in {album_dir.name}: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return stats
def _print_status(args: argparse.Namespace) -> None:
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
from mutagen import File as MutagenFile
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if root.is_dir():
album_dirs.extend(collect_album_dirs(root))
no_cover, bad_tags, ok = [], [], []
for album_dir in sorted(album_dirs):
has_cover = any(
f.suffix.lower() in IMAGE_EXTS
for f in album_dir.rglob("*") if f.is_file()
)
audio_files = [
f for f in sorted(album_dir.rglob("*"))
if f.is_file() and f.suffix.lower() in AUDIO_EXTS
]
missing_tags = []
for af in audio_files[:3]: # nur erste 3 prüfen (schnell)
try:
tags = MutagenFile(str(af), easy=True)
if tags is None:
missing_tags.append(af.name)
continue
title = (tags.get("title") or [""])[0].strip()
artist = (tags.get("artist") or [""])[0].strip()
if not title or title.lower() in ("unknown", "audiotrack", "") \
or not artist or artist.lower() in ("unknown", ""):
missing_tags.append(af.name)
except Exception:
missing_tags.append(af.name)
problems = []
if not has_cover:
problems.append("kein Cover")
if missing_tags:
problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)")
if problems:
bad_tags.append((album_dir, problems))
else:
ok.append(album_dir)
print(f"\n{'=' * 60}")
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
print(f"{'=' * 60}")
print(f" ✅ In Ordnung: {len(ok)}")
print(f" ⚠️ Mit Problemen: {len(bad_tags)}")
print()
for album_dir, problems in bad_tags:
print(f" 💿 {album_dir.name}")
for p in problems:
print(f"{p}")
print("=" * 60)
def main() -> None:
parser = argparse.ArgumentParser(
description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("paths", nargs="*",
help="Root-Verzeichnisse (rekursiv nach Alben durchsucht)")
parser.add_argument("--album", type=Path,
help="Einzelnes Album-Verzeichnis verarbeiten")
parser.add_argument("--dry-run", action="store_true",
help="Vorschläge anzeigen, nichts schreiben")
parser.add_argument("--auto", action="store_true",
help="Kein interaktiver Review-Schritt")
parser.add_argument("--confidence", type=float, default=0.85,
help="Min-Konfidenz für --auto (default: 0.85)")
parser.add_argument("--rename", action="store_true",
help="Dateien nach Schema umbenennen: TT_-_Artist_-_Titel.ext")
parser.add_argument("--embed-cover", action="store_true",
help="Cover-Art in Audiodatei einbetten")
parser.add_argument("--backup", type=Path,
help="Backup-Verzeichnis vor Änderungen")
parser.add_argument("--report", type=Path,
help="CSV-Report der Änderungen")
parser.add_argument("--no-fingerprint", action="store_true",
help="AcoustID-Fingerprinting überspringen")
parser.add_argument("--no-api", action="store_true",
help="Keine externen API-Calls")
parser.add_argument("--no-cover", action="store_true",
help="Kein Cover-Art-Download")
parser.add_argument("--no-tqdm", action="store_true",
help="Fortschrittsanzeige deaktivieren")
parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
help="Pfad zu jellyfin_playlist_generator.py\n"
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
parser.add_argument("--status", action="store_true",
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
args = parser.parse_args()
if args.status:
if not args.paths and not args.album:
parser.error("--status benötigt mindestens einen Pfad.")
_print_status(args)
return
if not args.album and not args.paths:
parser.error("Mindestens ein Pfad oder --album erforderlich.")
show_progress = HAS_TQDM and not args.no_tqdm and args.auto
report_data: List[Dict[str, Any]] = []
totals: Dict[str, int] = {
"albums": 0, "skipped": 0, "tags_written": 0,
"covers_embedded": 0, "files_renamed": 0, "errors": 0,
}
# Collect album directories
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if not root.is_dir():
print(f"⚠️ Kein Verzeichnis: {root}")
continue
album_dirs.extend(collect_album_dirs(root))
if not album_dirs:
print("⚠️ Keine Album-Verzeichnisse gefunden.")
sys.exit(1)
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
if not args.no_api:
print("🔍 MusicBrainz-Lookup aktiv.")
if args.dry_run:
print("🧪 DRY-RUN — nichts wird geschrieben.")
for album_dir in maybe_tqdm(album_dirs, show_progress,
desc="Alben", unit="album", dynamic_ncols=True):
stats = process_album(album_dir, args, report_data)
totals["albums"] += 1
for k in ("skipped", "tags_written", "covers_embedded", "files_renamed", "errors"):
totals[k] += stats.get(k, 0)
if args.report and report_data:
write_report(report_data, args.report)
print(f"\n{'=' * 50}")
print("✅ Zusammenfassung:")
print(f" 💿 Alben verarbeitet: {totals['albums']}")
print(f" ⏭️ Übersprungen: {totals['skipped']}")
print(f" 🏷️ Tags geschrieben: {totals['tags_written']}")
print(f" 🖼️ Cover eingebettet: {totals['covers_embedded']}")
print(f" 📝 Dateien umbenannt: {totals['files_renamed']}")
print(f" ❌ Fehler: {totals['errors']}")
if args.dry_run:
print(" 🧪 Modus: DRY-RUN")
print("=" * 50)
if __name__ == "__main__":
main()