Fix YouTube ID detection: use last _-token instead of broken lookbehind regex

The previous regex lookbehind (?<![A-Za-z0-9_-]) excluded _ as valid preceding
character, so IDs after underscores were never matched. New approach: split stem
by _ and check if the last token is an 11-char YouTube ID (mixed case + digit).
Also strips the ID token from the stem before _parse_filename() to prevent it
from leaking into the track title or being misread as an artist-title separator.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-04-29 05:57:27 +02:00
commit 1960989eef

View file

@ -321,18 +321,21 @@ def _check_cover_images(paths: List[Path]) -> List[Path]:
return good
# YouTube-Video-ID: 11 Zeichen aus [A-Za-z0-9_-], eingebettet im Dateinamen
_YT_ID_RE = re.compile(r"(?<![A-Za-z0-9_-])([A-Za-z0-9_-]{11})(?![A-Za-z0-9_-])")
# YouTube-Video-ID: exakt 11 Zeichen aus [A-Za-z0-9_-], typischerweise letztes _-Token
_YT_ID_CHARS = re.compile(r"^[A-Za-z0-9_-]{11}$")
def _extract_youtube_id(path: Path) -> Optional[str]:
"""Sucht eine YouTube-Video-ID im Dateinamen (Stem oder Suffix)."""
name = path.stem + path.suffix
for m in _YT_ID_RE.finditer(name):
candidate = m.group(1)
# Einfache Plausibilitätsprüfung: muss gemischte Zeichen haben
if re.search(r"[A-Z]", candidate) and re.search(r"[0-9a-z]", candidate):
return candidate
"""
Erkennt YouTube-Video-ID als letztes '_'-getrenntes Token im Dateinamen.
Plausibilitätsprüfung: mind. ein Großbuchstabe UND mind. ein Kleinbuchstabe/Ziffer.
"""
candidate = path.stem.split("_")[-1] # letztes Token nach Unterstrich
if (len(candidate) == 11
and _YT_ID_CHARS.match(candidate)
and re.search(r"[A-Z]", candidate)
and re.search(r"[0-9a-z]", candidate)):
return candidate
return None
@ -470,8 +473,18 @@ def extract_hints(scan: AlbumScan, use_ocr: bool = True) -> AlbumHints:
# Build TrackHints per audio file
for audio_path in sorted(scan.audio_files):
stem_key = _clean(audio_path.stem).casefold()
yt_id_for_file = yt_ids_by_stem.get(stem_key)
# Stem ohne YouTube-ID für Dateiname-Parsing
parse_stem = audio_path.stem
if yt_id_for_file:
tokens = parse_stem.rsplit("_", 1)
if len(tokens) == 2 and tokens[1] == yt_id_for_file:
parse_stem = tokens[0]
tags, duration = _read_tags(audio_path)
fn_hints = _parse_filename(audio_path.stem)
fn_hints = _parse_filename(parse_stem)
track_num: Optional[int] = None
disc_num: Optional[int] = None
@ -549,20 +562,17 @@ def extract_hints(scan: AlbumScan, use_ocr: bool = True) -> AlbumHints:
# M3U-Reihenfolge nur als letzter Fallback (wenn Tracklist kein Match liefert)
if track_num is None:
stem_key = _clean(audio_path.stem).casefold()
if stem_key in m3u_order:
track_num = m3u_order[stem_key]
# M3U-Titel als Fallback (enthält "Composer - Title" — nur nutzen wenn kein besserer Titel)
if not _is_good(title):
stem_key = _clean(audio_path.stem).casefold()
if stem_key in m3u_titles:
title = m3u_titles[stem_key]
# YouTube-Titel als letzter Fallback (bei einzelner Datei = das ganze Video)
if not _is_good(title):
stem_key = _clean(audio_path.stem).casefold()
yt_id = yt_ids_by_stem.get(stem_key)
yt_id = yt_id_for_file
if yt_id:
meta = yt_meta_by_id.get(yt_id)
if meta: