Robust tracklist matching: fuzzy titles, catalog numbers, correct disc/track
hint_extractor: - _norm_for_match(): strips all non-alnum for punctuation-agnostic comparison - _catalog_key(): extracts BWV/Op./K./HWV/... catalog number for matching (fixes abbreviated filenames like "Fantasia_Cm_BWV_562" vs "Fantasia In C Minor, BWV 562") - Matching priority: exact number+disc → exact title → fuzzy title → catalog number - Tracklist disc+track OVERRIDE M3U position when a match is found (M3U is only used as last fallback; fixes wrong alphabetical ordering) metadata_resolver: - LLM prompt now defines artist/albumartist roles explicitly (artist = composer for classical; albumartist = performer/interpreter) - LLM albumartist can override dir_artist when confidence < 0.4 - _build_track_proposals: when track artist == albumartist (performer from filename), composer (album-level artist) is used as track artist instead - Tracklist header (first lines before tracks) included in LLM prompt for label/year/album-title discovery - import re added (was missing) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5011cef4db
commit
d1391fc36a
2 changed files with 134 additions and 30 deletions
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
|
|
@ -179,20 +180,42 @@ def _discogs_search(artist: Optional[str], album: Optional[str]) -> Optional[Dic
|
|||
|
||||
def _build_resolve_prompt(hints: AlbumHints, partial: Dict) -> str:
|
||||
tracks_summary = "\n".join(
|
||||
f" - Track {t.track_number or '?'}: {t.title or t.path.stem}"
|
||||
f" - {('D'+str(t.disc_number)+'-') if t.disc_number else ''}T{t.track_number or '?'}: "
|
||||
f"{t.title or t.path.stem}"
|
||||
+ (f" [{t.artist}]" if t.artist else "")
|
||||
for t in hints.tracks[:20]
|
||||
)
|
||||
# Tracklist-Kopfzeilen (erste 400 Zeichen, vor der Track-Liste) für Album/Label-Info
|
||||
tracklist_header = ""
|
||||
if hints.tracklist_text:
|
||||
header_lines = []
|
||||
for line in hints.tracklist_text.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
# Stopp bei erster Zeile die wie ein Track aussieht (1-1, 1. etc.)
|
||||
if re.match(r"^\d[\d\-]\s+\S", line) or re.match(r"^\d{1,3}[.)]\s+", line):
|
||||
break
|
||||
header_lines.append(line)
|
||||
if sum(len(l) for l in header_lines) > 400:
|
||||
break
|
||||
tracklist_header = "\n".join(header_lines[:15])
|
||||
|
||||
return (
|
||||
"Du bist ein Musikexperte. Analysiere diese Album-Daten.\n"
|
||||
"Vervollständige fehlende Felder UND korrigiere erkennbare Tippfehler "
|
||||
"(z.B. im Albumtitel oder Künstlernamen — Verzeichnisnamen enthalten oft Schreibfehler).\n\n"
|
||||
"Du bist ein Musikexperte. Analysiere diese Album-Daten und gib korrekte Metadaten zurück.\n"
|
||||
"Korrigiere auch erkennbare Tippfehler (Verzeichnisnamen enthalten oft Schreibfehler).\n\n"
|
||||
"WICHTIGE FELDDEFINITIONEN:\n"
|
||||
'- "artist" = Komponist (Klassik) ODER Band/Sänger (Pop/Rock/Jazz)\n'
|
||||
'- "albumartist" = Interpret/Performer/Dirigent (Klassik) ODER gleich wie artist (Pop)\n'
|
||||
" Beispiel Klassik: artist='Johann Sebastian Bach', albumartist='Peter Hurford'\n"
|
||||
" Beispiel Pop: artist='ABBA', albumartist='ABBA'\n\n"
|
||||
f"Verzeichnisname: {hints.album_dir.name}\n"
|
||||
f"Künstler (aus Verzeichnis): {hints.dir_artist or partial.get('artist', 'unbekannt')}\n"
|
||||
f"Albumtitel (aus Verzeichnis, evtl. mit Tippfehlern): {hints.dir_album or partial.get('album', 'unbekannt')}\n"
|
||||
f"Hinweis Künstler/Titel (aus Verzeichnis, kann vertauscht oder falsch sein): "
|
||||
f"{hints.dir_artist or '?'} / {hints.dir_album or partial.get('album', '?')}\n"
|
||||
f"Jahr: {hints.dir_year or partial.get('year', 'unbekannt')}\n"
|
||||
f"Tracklist-Hinweise:\n{tracks_summary}\n\n"
|
||||
'Antworte NUR mit einem JSON-Objekt mit diesen Feldern (null wenn unbekannt):\n'
|
||||
+ (f"Tracklist-Kopf (Label/Jahr/Albumtitel):\n{tracklist_header}\n\n" if tracklist_header else "")
|
||||
+ f"Tracks:\n{tracks_summary}\n\n"
|
||||
'Antworte NUR mit einem JSON-Objekt (null wenn unbekannt):\n'
|
||||
'{"artist": ..., "album": ..., "albumartist": ..., "year": ..., "genre": ..., "label": ...}'
|
||||
)
|
||||
|
||||
|
|
@ -408,6 +431,7 @@ def resolve(
|
|||
|
||||
# LLM-Reasoning für verbleibende Lücken:
|
||||
# Reihenfolge: Ollama lokal → OpenRouter (DeepSeek, günstig) → Claude API
|
||||
cl_albumartist: Optional[str] = None
|
||||
partial = {"artist": artist, "album": album, "year": year}
|
||||
if use_claude and use_api:
|
||||
if not artist or not album or confidence < 0.5:
|
||||
|
|
@ -427,18 +451,28 @@ def resolve(
|
|||
year = year or cl.get("year")
|
||||
genre = genre or cl.get("genre")
|
||||
label = label or cl.get("label")
|
||||
cl_albumartist = cl.get("albumartist") or None
|
||||
confidence += 0.10
|
||||
sources.append("llm-resolve")
|
||||
|
||||
# Finalize albumartist
|
||||
# dir_artist hat Vorrang: wenn der Verzeichnisname einen Künstler nennt
|
||||
# (z.B. "Eugen_Cicero_-_Jazz_meets_Classic"), ist das der Albumkünstler —
|
||||
# auch wenn die Track-Dateinamen die Komponisten-Namen enthalten.
|
||||
# Priorität: (1) LLM-albumartist bei niedriger Konfidenz
|
||||
# (2) dir_artist wenn Verzeichnisname einen Künstler nennt
|
||||
# (3) Heuristiken (Various Artists, Mehrheitsabstimmung)
|
||||
# Rationale: "Bach_Organ_-_Peter_Hurford" → dir_artist="Bach Organ" ist kein Künstler,
|
||||
# aber der Verzeichnisname sieht aus wie Künstler; LLM kann das korrekt auflösen.
|
||||
track_artists = [t.artist for t in hints.tracks if t.artist]
|
||||
from collections import Counter
|
||||
distinct_artists = set(a for a in track_artists if a)
|
||||
if hints.dir_artist:
|
||||
# Verzeichnisname nennt explizit einen Künstler → immer verwenden
|
||||
|
||||
_bad_aa = {"various artists", "unknown artist", "unknown", "va"}
|
||||
def _good_aa(s: Optional[str]) -> bool:
|
||||
return bool(s) and s.casefold().strip() not in _bad_aa
|
||||
|
||||
if _good_aa(cl_albumartist) and confidence < 0.4:
|
||||
# LLM kennt den echten Albumkünstler besser als der Verzeichnisname
|
||||
albumartist = cl_albumartist # type: ignore[assignment]
|
||||
elif hints.dir_artist:
|
||||
albumartist = hints.dir_artist
|
||||
elif len(distinct_artists) >= 3:
|
||||
albumartist = "Various Artists"
|
||||
|
|
@ -452,7 +486,9 @@ def resolve(
|
|||
confidence = min(confidence, 1.0)
|
||||
|
||||
# Build track proposals
|
||||
track_proposals = _build_track_proposals(hints, mb_tracks, album, artist)
|
||||
# `artist` = Komponist/Hauptkünstler (LLM-aufgelöst), `albumartist` = Performer
|
||||
# Werden beide weitergegeben damit _build_track_proposals richtig zuordnen kann.
|
||||
track_proposals = _build_track_proposals(hints, mb_tracks, album, albumartist, composer=artist)
|
||||
|
||||
return AlbumProposal(
|
||||
album_dir=hints.album_dir,
|
||||
|
|
@ -476,15 +512,26 @@ def _build_track_proposals(
|
|||
mb_tracks: Optional[List],
|
||||
album: str,
|
||||
album_artist: str,
|
||||
composer: Optional[str] = None,
|
||||
) -> List[TrackProposal]:
|
||||
proposals: List[TrackProposal] = []
|
||||
|
||||
for th in sorted(hints.tracks, key=lambda t: (t.disc_number or 1, t.track_number or 9999, str(t.path))):
|
||||
title = th.title
|
||||
artist = th.artist or album_artist
|
||||
track_num = th.track_number
|
||||
disc_num = th.disc_number
|
||||
|
||||
# Klassik-Fall: Performer aus Dateiname, Komponist aus LLM
|
||||
# Wenn th.artist == albumartist (Performer), und wir den Komponisten kennen,
|
||||
# wird der Komponist als Track-Artist gesetzt → Filename: TT_-_Performer_-_Komponist_-_Werk
|
||||
th_artist_cf = (th.artist or "").casefold().strip()
|
||||
aa_cf = album_artist.casefold().strip()
|
||||
if composer and th_artist_cf == aa_cf and th_artist_cf:
|
||||
# Performer == albumartist → Komponist als Track-Artist
|
||||
artist = composer
|
||||
else:
|
||||
artist = th.artist or album_artist
|
||||
|
||||
# Try to match from MusicBrainz track list
|
||||
if mb_tracks and track_num:
|
||||
for mb_t in mb_tracks:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue