Add 4 new cover/tracklist sources: MB back cover, iTunes, Last.fm, Discogs tracklist

cover_handler.py:
- _download_image(): shared helper replaces duplicated download logic
- download_back_cover(): fetches back cover from MusicBrainz CAA (/back endpoint),
  saves as back.jpg; skips if already present
- _itunes_cover_url() / download_itunes_cover(): iTunes Search API (no auth),
  requests 600x600 artwork; fallback after Discogs
- _lastfm_cover_url() / download_lastfm_cover(): Last.fm album.getinfo
  (LASTFM_API_KEY env var); last cover fallback, skips placeholder images
- resolve_cover(): extended with iTunes → Last.fm fallback chain

metadata_resolver.py:
- _discogs_get_tracklist(): fetches full Discogs release via REST API,
  parses tracklist[] including heading-based disc detection
- _lastfm_tracklist(): fetches Last.fm album.getinfo tracks (LASTFM_API_KEY)
- resolve(): uses Discogs tracklist → Last.fm tracklist as fallback when
  MusicBrainz returns no tracks; LASTFM_API_KEY added to env var block

music_enricher.py:
- process_album(): calls download_back_cover() after execute_album() when MBID known

New cover priority:  local → MusicBrainz front → Discogs → iTunes → Last.fm
New tracklist priority: local → YouTube → MusicBrainz → Discogs → Last.fm → OCR
Test suite: 29 → 33 tests (all pass)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-04-29 08:55:17 +02:00
commit 80472653b4
4 changed files with 273 additions and 33 deletions

View file

@ -92,11 +92,12 @@ def normalize_genre(genre: Optional[str]) -> Optional[str]:
_MB_RATE_LIMIT = 1.1 # seconds between MusicBrainz requests
_last_mb_call = 0.0
ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
DISCOGS_TOKEN = os.getenv("DISCOGS_TOKEN", "")
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
DISCOGS_TOKEN = os.getenv("DISCOGS_TOKEN", "")
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY", "")
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
# qwen3:8b (5.2GB) reicht für einfache JSON-Metadaten-Ergänzung und lädt schnell (~10s)
OLLAMA_RESOLVE_MODEL = os.getenv("OLLAMA_RESOLVE_MODEL", "qwen3:8b")
@ -231,6 +232,78 @@ def _discogs_search(artist: Optional[str], album: Optional[str]) -> Optional[Dic
return None
def _discogs_get_tracklist(release_id) -> List[Dict]:
"""Holt die vollständige Tracklist eines Discogs-Release via REST-API."""
try:
import requests as _req
token = DISCOGS_TOKEN
headers = {"User-Agent": "MusicMetadataEnricher/1.0"}
if token:
headers["Authorization"] = f"Discogs token={token}"
r = _req.get(f"https://api.discogs.com/releases/{release_id}",
headers=headers, timeout=10)
if r.status_code != 200:
return []
tracklist = r.json().get("tracklist", [])
result = []
disc = 1
track_num = 0
for entry in tracklist:
if entry.get("type_") == "heading":
# Disc-Trennzeile ("CD 1", "Side A", …) — Disc hochzählen
disc += 1
track_num = 0
continue
pos = entry.get("position", "")
# Position kann "A1", "1", "1.2", "B3" sein
num_match = re.search(r"\d+", pos)
track_num = int(num_match.group()) if num_match else track_num + 1
result.append({
"disc": disc,
"number": track_num,
"title": entry.get("title", ""),
"artist": "",
})
return result
except Exception as e:
print(f" ⚠️ Discogs-Tracklist-Fehler: {e}", file=sys.stderr)
return []
def _lastfm_tracklist(artist: Optional[str], album: Optional[str]) -> List[Dict]:
"""Holt die Tracklist von Last.fm album.getinfo (LASTFM_API_KEY erforderlich)."""
api_key = os.getenv("LASTFM_API_KEY", "")
if not api_key or not artist or not album:
return []
try:
import requests as _req
r = _req.get(
"https://ws.audioscrobbler.com/2.0/",
params={"method": "album.getinfo", "api_key": api_key,
"artist": artist, "album": album, "format": "json"},
timeout=8,
)
if r.status_code != 200:
return []
tracks = r.json().get("album", {}).get("tracks", {}).get("track", [])
if isinstance(tracks, dict): # Einzelner Track → Liste
tracks = [tracks]
result = []
for t in tracks:
attr = t.get("@attr", {})
num = int(attr.get("rank", 0))
result.append({
"disc": 1,
"number": num,
"title": t.get("name", ""),
"artist": t.get("artist", {}).get("name", "") if isinstance(t.get("artist"), dict) else "",
})
return result
except Exception as e:
print(f" ⚠️ Last.fm-Tracklist-Fehler: {e}", file=sys.stderr)
return []
# ---------------------------------------------------------------------------
# Claude API reasoning (optional)
# ---------------------------------------------------------------------------
@ -485,6 +558,7 @@ def resolve(
})
# Discogs fallback
discogs_release_id = None
if use_api and HAS_DISCOGS and DISCOGS_TOKEN and not release_mbid:
dg = _discogs_search(artist, album)
if dg:
@ -493,9 +567,23 @@ def resolve(
year = year or dg.get("year")
genre = genre or dg.get("genre")
label = label or dg.get("label")
discogs_release_id = dg.get("id")
confidence += 0.15
sources.append("discogs")
# Tracklist-Fallbacks: Discogs → Last.fm (wenn MusicBrainz keine Tracks geliefert hat)
if use_api and not mb_tracks:
if discogs_release_id:
dg_tracks = _discogs_get_tracklist(discogs_release_id)
if dg_tracks:
mb_tracks = dg_tracks
sources.append("discogs-tracklist")
if not mb_tracks:
lfm_tracks = _lastfm_tracklist(artist, album)
if lfm_tracks:
mb_tracks = lfm_tracks
sources.append("lastfm-tracklist")
# LLM-Reasoning für verbleibende Lücken:
# Reihenfolge: Ollama lokal → OpenRouter (DeepSeek, günstig) → Claude API
cl_albumartist: Optional[str] = None