fix: korrekte Track-Nummerierung, Scanner-Rekursion, M3U-Reihenfolge

scanner: nicht in Unterordner wenn Root Audio-Dateien enthält (verhindert
  Doppel-Scan bei versehentlichen Unterordner-Kopien); nur Disc-Ordner
  (CD1, Disc 2…) werden bei Multi-CD-Alben rekursiert.

hint_extractor: M3U/Playlist-Dateien als Track-Reihenfolge-Quelle; BOM-
  Bereinigung; Tracklist-Matching auch per Titel (nicht nur per Nummer);
  tracknumber=0 wird als 'keine Nummer' gewertet.

metadata_resolver: sequenzielle Fallback-Nummerierung (1,2,3…) für Tracks
  ohne Tracknummer — verhindert '00'-Präfix beim --rename; dir_artist hat
  Vorrang vor 'Various Artists'-Heuristik; LLM darf bei Konfidenz <0.3
  auch bestehende Werte korrigieren (Tippfehler im Verzeichnisnamen).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-04-28 21:49:00 +02:00
commit d91eb36007
4 changed files with 189 additions and 48 deletions

View file

@ -51,6 +51,8 @@ _DISC_SECTION_RE = re.compile(r"(?i)(?:cd|disc|disk|side)[_ \-]*(\d{1,2})")
def _clean(s: Optional[str]) -> str:
if not s:
return ""
# BOM (U+FEFF), Zero-Width-Space (U+200B), Soft-Hyphen (U+00AD) entfernen
s = re.sub(r"[­]", "", s)
return re.sub(r"\s+", " ", s.replace("_", " ")).strip(" -._")
@ -143,6 +145,34 @@ def _parse_tracklist(text: str) -> List[Dict[str, str]]:
return tracks
def _parse_m3u(text: str) -> List[Dict[str, str]]:
"""M3U/M3U8 → geordnete Liste: [{filename, title, position}].
Reihenfolge der Einträge = gewünschte Trackreihenfolge.
"""
tracks: List[Dict[str, str]] = []
pending_title: Optional[str] = None
position = 0
for line in text.splitlines():
line = line.strip()
if not line:
continue
if line.upper().startswith("#EXTINF:"):
parts = line.split(",", 1)
pending_title = parts[1].strip() if len(parts) > 1 else None
elif not line.startswith("#"):
filename = Path(line.replace("\\", "/")).name
if not filename:
continue
position += 1
tracks.append({
"position": str(position),
"filename": filename,
"title": pending_title or "",
})
pending_title = None
return tracks
def _read_tracklist_file(path: Path) -> Optional[str]:
try:
if path.suffix.lower() in (".htm", ".html"):
@ -203,6 +233,29 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
parsed_tracklist = _parse_tracklist(hints.tracklist_text) if hints.tracklist_text else []
# M3U/Playlist-Reihenfolge: filename (stem, normalisiert) → Tracknummer
m3u_order: Dict[str, int] = {}
m3u_titles: Dict[str, str] = {}
for pf in scan.playlist_files:
try:
text = pf.read_text(encoding="utf-8", errors="replace")
for entry in _parse_m3u(text):
stem = _clean(Path(entry["filename"]).stem).casefold()
pos = int(entry["position"])
if stem and stem not in m3u_order:
m3u_order[stem] = pos
if entry.get("title"):
m3u_titles[stem] = entry["title"]
except Exception as e:
print(f" ⚠️ Playlist-Lesefehler {pf.name}: {e}", file=sys.stderr)
# Tracklist-Lookup: normalisierter Titel → Eintrag (für titelbasiertes Matching)
tl_by_title: Dict[str, Dict[str, str]] = {}
for entry in parsed_tracklist:
key = _clean(entry.get("title", "")).casefold()
if key:
tl_by_title[key] = entry
# Build TrackHints per audio file
for audio_path in sorted(scan.audio_files):
tags, duration = _read_tags(audio_path)
@ -215,10 +268,18 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
raw_tn = tags.get("tracknumber") or fn_hints.get("track")
if raw_tn:
try:
track_num = int(str(raw_tn).split("/")[0])
tn_int = int(str(raw_tn).split("/")[0])
if tn_int > 0: # 0 gilt als "keine Nummer"
track_num = tn_int
except ValueError:
pass
# Track number aus M3U-Reihenfolge (Vorrang vor Dateiname, aber nicht vor Tag)
if track_num is None:
stem_key = _clean(audio_path.stem).casefold()
if stem_key in m3u_order:
track_num = m3u_order[stem_key]
# Disc number: tag > filename > path segment
raw_dn = tags.get("discnumber") or fn_hints.get("disc")
if raw_dn:
@ -236,16 +297,27 @@ def extract_hints(scan: AlbumScan) -> AlbumHints:
title = tags.get("title") or fn_hints.get("title")
artist = tags.get("artist") or fn_hints.get("artist")
# Enrich from parsed tracklist if track_num matches
if parsed_tracklist and track_num:
for tl_entry in parsed_tracklist:
tl_track = tl_entry.get("track")
tl_disc = tl_entry.get("disc", "1")
if (tl_track and int(tl_track) == track_num
and int(tl_disc) == (disc_num or 1)):
if not _is_good(title) and _is_good(tl_entry.get("title")):
title = tl_entry["title"]
break
# Tracklist: erst nach Nummer, dann nach Titel
if parsed_tracklist:
matched_tl: Optional[Dict[str, str]] = None
if track_num:
for tl_entry in parsed_tracklist:
tl_track = tl_entry.get("track")
tl_disc = tl_entry.get("disc", "1")
if (tl_track and int(tl_track) == track_num
and int(tl_disc) == (disc_num or 1)):
matched_tl = tl_entry
break
if matched_tl is None and title:
matched_tl = tl_by_title.get(_clean(title).casefold())
if matched_tl and not _is_good(title) and _is_good(matched_tl.get("title")):
title = matched_tl["title"]
# M3U-Titel als Fallback (enthält "Composer - Title" — nur nutzen wenn kein besserer Titel)
if not _is_good(title):
stem_key = _clean(audio_path.stem).casefold()
if stem_key in m3u_titles:
title = m3u_titles[stem_key]
hints.tracks.append(TrackHints(
path=audio_path,