Robust tracklist matching: fuzzy titles, catalog numbers, correct disc/track
hint_extractor: - _norm_for_match(): strips all non-alnum for punctuation-agnostic comparison - _catalog_key(): extracts BWV/Op./K./HWV/... catalog number for matching (fixes abbreviated filenames like "Fantasia_Cm_BWV_562" vs "Fantasia In C Minor, BWV 562") - Matching priority: exact number+disc → exact title → fuzzy title → catalog number - Tracklist disc+track OVERRIDE M3U position when a match is found (M3U is only used as last fallback; fixes wrong alphabetical ordering) metadata_resolver: - LLM prompt now defines artist/albumartist roles explicitly (artist = composer for classical; albumartist = performer/interpreter) - LLM albumartist can override dir_artist when confidence < 0.4 - _build_track_proposals: when track artist == albumartist (performer from filename), composer (album-level artist) is used as track artist instead - Tracklist header (first lines before tracks) included in LLM prompt for label/year/album-title discovery - import re added (was missing) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5011cef4db
commit
d1391fc36a
2 changed files with 134 additions and 30 deletions
|
|
@ -56,6 +56,26 @@ def _clean(s: Optional[str]) -> str:
|
|||
return re.sub(r"\s+", " ", s.replace("_", " ")).strip(" -._")
|
||||
|
||||
|
||||
def _norm_for_match(s: str) -> str:
|
||||
"""Nur Buchstaben und Ziffern — für fuzzy Titelvergleich (Interpunktion-agnostisch)."""
|
||||
return re.sub(r"[^a-z0-9]", "", s.casefold())
|
||||
|
||||
|
||||
# Klassische Werkverzeichnis-Nummern: BWV 565, Op. 27, K. 331, HWV 56, …
|
||||
_CATALOG_RE = re.compile(
|
||||
r"\b(bwv|hwv|op|k|kv|d|sz|wq|bbwv|rv|twv|hob)\W*(\d+[a-z]?(?:[\/\.]\d+)?)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _catalog_key(s: str) -> Optional[str]:
|
||||
"""Extrahiert normalisierte Katalognummer, z.B. 'bwv565' oder 'op27'."""
|
||||
m = _CATALOG_RE.search(s)
|
||||
if m:
|
||||
return m.group(1).lower() + re.sub(r"\W", "", m.group(2))
|
||||
return None
|
||||
|
||||
|
||||
def _is_good(v: Optional[str]) -> bool:
|
||||
if not v:
|
||||
return False
|
||||
|
|
@ -249,12 +269,21 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
|
|||
except Exception as e:
|
||||
print(f" ⚠️ Playlist-Lesefehler {pf.name}: {e}", file=sys.stderr)
|
||||
|
||||
# Tracklist-Lookup: normalisierter Titel → Eintrag (für titelbasiertes Matching)
|
||||
# Tracklist-Lookup: exakter Titel, fuzzy Titel, Katalognummer (BWV, Op., K., …)
|
||||
tl_by_title: Dict[str, Dict[str, str]] = {}
|
||||
tl_by_title_norm: Dict[str, Dict[str, str]] = {}
|
||||
tl_by_catalog: Dict[str, Dict[str, str]] = {}
|
||||
for entry in parsed_tracklist:
|
||||
key = _clean(entry.get("title", "")).casefold()
|
||||
if key:
|
||||
tl_by_title[key] = entry
|
||||
raw_title = entry.get("title", "")
|
||||
exact_key = _clean(raw_title).casefold()
|
||||
if exact_key:
|
||||
tl_by_title[exact_key] = entry
|
||||
norm_key = _norm_for_match(raw_title)
|
||||
if norm_key:
|
||||
tl_by_title_norm[norm_key] = entry
|
||||
cat_key = _catalog_key(raw_title)
|
||||
if cat_key:
|
||||
tl_by_catalog[cat_key] = entry
|
||||
|
||||
# Build TrackHints per audio file
|
||||
for audio_path in sorted(scan.audio_files):
|
||||
|
|
@ -274,12 +303,6 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
|
|||
except ValueError:
|
||||
pass
|
||||
|
||||
# Track number aus M3U-Reihenfolge (Vorrang vor Dateiname, aber nicht vor Tag)
|
||||
if track_num is None:
|
||||
stem_key = _clean(audio_path.stem).casefold()
|
||||
if stem_key in m3u_order:
|
||||
track_num = m3u_order[stem_key]
|
||||
|
||||
# Disc number: tag > filename > path segment
|
||||
raw_dn = tags.get("discnumber") or fn_hints.get("disc")
|
||||
if raw_dn:
|
||||
|
|
@ -297,21 +320,55 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
|
|||
title = tags.get("title") or fn_hints.get("title")
|
||||
artist = tags.get("artist") or fn_hints.get("artist")
|
||||
|
||||
# Tracklist: erst nach Nummer, dann nach Titel
|
||||
# Tracklist-Matching: Nummer → exakter Titel → fuzzy Titel
|
||||
# Wenn ein Match gefunden: disc+track aus Tracklist übernehmen (Tracklist ist
|
||||
# autoritativer als M3U-Reihenfolge bei Alben mit expliziter Disc-Nummerierung).
|
||||
if parsed_tracklist:
|
||||
matched_tl: Optional[Dict[str, str]] = None
|
||||
if track_num:
|
||||
|
||||
# 1. Exakt per Tracknummer + Disc (nur wenn beides aus Tag/Dateiname bekannt)
|
||||
if track_num and disc_num:
|
||||
for tl_entry in parsed_tracklist:
|
||||
tl_track = tl_entry.get("track")
|
||||
tl_disc = tl_entry.get("disc", "1")
|
||||
if (tl_track and int(tl_track) == track_num
|
||||
and int(tl_disc) == (disc_num or 1)):
|
||||
and int(tl_disc) == disc_num):
|
||||
matched_tl = tl_entry
|
||||
break
|
||||
|
||||
# 2. Exakter Titelvergleich
|
||||
if matched_tl is None and title:
|
||||
matched_tl = tl_by_title.get(_clean(title).casefold())
|
||||
if matched_tl and not _is_good(title) and _is_good(matched_tl.get("title")):
|
||||
title = matched_tl["title"]
|
||||
|
||||
# 3. Fuzzy Titelvergleich (ignoriert Kommas, Apostrophe, Groß-/Kleinschreibung)
|
||||
if matched_tl is None and title:
|
||||
matched_tl = tl_by_title_norm.get(_norm_for_match(title))
|
||||
|
||||
# 4. Katalognummer (BWV, Op., K. …) — greift bei abgekürzten Dateinamen
|
||||
if matched_tl is None and title:
|
||||
cat = _catalog_key(title)
|
||||
if cat:
|
||||
matched_tl = tl_by_catalog.get(cat)
|
||||
|
||||
if matched_tl:
|
||||
# Titel aus Tracklist übernehmen wenn besser
|
||||
if _is_good(matched_tl.get("title")):
|
||||
title = matched_tl["title"]
|
||||
# disc+track aus Tracklist sind autoritativer als M3U-Reihenfolge
|
||||
try:
|
||||
tl_track_n = int(matched_tl["track"]) if matched_tl.get("track") else None
|
||||
tl_disc_n = int(matched_tl.get("disc", "1"))
|
||||
if tl_track_n:
|
||||
track_num = tl_track_n
|
||||
disc_num = tl_disc_n
|
||||
except (ValueError, KeyError):
|
||||
pass
|
||||
|
||||
# M3U-Reihenfolge nur als letzter Fallback (wenn Tracklist kein Match liefert)
|
||||
if track_num is None:
|
||||
stem_key = _clean(audio_path.stem).casefold()
|
||||
if stem_key in m3u_order:
|
||||
track_num = m3u_order[stem_key]
|
||||
|
||||
# M3U-Titel als Fallback (enthält "Composer - Title" — nur nutzen wenn kein besserer Titel)
|
||||
if not _is_good(title):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue