Fix YouTube ID detection: use last _-token instead of broken lookbehind regex
The previous regex lookbehind (?<![A-Za-z0-9_-]) excluded _ as valid preceding character, so IDs after underscores were never matched. New approach: split stem by _ and check if the last token is an 11-char YouTube ID (mixed case + digit). Also strips the ID token from the stem before _parse_filename() to prevent it from leaking into the track title or being misread as an artist-title separator. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f86db982a5
commit
1960989eef
1 changed files with 24 additions and 14 deletions
|
|
@ -321,18 +321,21 @@ def _check_cover_images(paths: List[Path]) -> List[Path]:
|
||||||
return good
|
return good
|
||||||
|
|
||||||
|
|
||||||
# YouTube-Video-ID: 11 Zeichen aus [A-Za-z0-9_-], eingebettet im Dateinamen
|
# YouTube-Video-ID: exakt 11 Zeichen aus [A-Za-z0-9_-], typischerweise letztes _-Token
|
||||||
_YT_ID_RE = re.compile(r"(?<![A-Za-z0-9_-])([A-Za-z0-9_-]{11})(?![A-Za-z0-9_-])")
|
_YT_ID_CHARS = re.compile(r"^[A-Za-z0-9_-]{11}$")
|
||||||
|
|
||||||
|
|
||||||
def _extract_youtube_id(path: Path) -> Optional[str]:
|
def _extract_youtube_id(path: Path) -> Optional[str]:
|
||||||
"""Sucht eine YouTube-Video-ID im Dateinamen (Stem oder Suffix)."""
|
"""
|
||||||
name = path.stem + path.suffix
|
Erkennt YouTube-Video-ID als letztes '_'-getrenntes Token im Dateinamen.
|
||||||
for m in _YT_ID_RE.finditer(name):
|
Plausibilitätsprüfung: mind. ein Großbuchstabe UND mind. ein Kleinbuchstabe/Ziffer.
|
||||||
candidate = m.group(1)
|
"""
|
||||||
# Einfache Plausibilitätsprüfung: muss gemischte Zeichen haben
|
candidate = path.stem.split("_")[-1] # letztes Token nach Unterstrich
|
||||||
if re.search(r"[A-Z]", candidate) and re.search(r"[0-9a-z]", candidate):
|
if (len(candidate) == 11
|
||||||
return candidate
|
and _YT_ID_CHARS.match(candidate)
|
||||||
|
and re.search(r"[A-Z]", candidate)
|
||||||
|
and re.search(r"[0-9a-z]", candidate)):
|
||||||
|
return candidate
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -470,8 +473,18 @@ def extract_hints(scan: AlbumScan, use_ocr: bool = True) -> AlbumHints:
|
||||||
|
|
||||||
# Build TrackHints per audio file
|
# Build TrackHints per audio file
|
||||||
for audio_path in sorted(scan.audio_files):
|
for audio_path in sorted(scan.audio_files):
|
||||||
|
stem_key = _clean(audio_path.stem).casefold()
|
||||||
|
yt_id_for_file = yt_ids_by_stem.get(stem_key)
|
||||||
|
|
||||||
|
# Stem ohne YouTube-ID für Dateiname-Parsing
|
||||||
|
parse_stem = audio_path.stem
|
||||||
|
if yt_id_for_file:
|
||||||
|
tokens = parse_stem.rsplit("_", 1)
|
||||||
|
if len(tokens) == 2 and tokens[1] == yt_id_for_file:
|
||||||
|
parse_stem = tokens[0]
|
||||||
|
|
||||||
tags, duration = _read_tags(audio_path)
|
tags, duration = _read_tags(audio_path)
|
||||||
fn_hints = _parse_filename(audio_path.stem)
|
fn_hints = _parse_filename(parse_stem)
|
||||||
|
|
||||||
track_num: Optional[int] = None
|
track_num: Optional[int] = None
|
||||||
disc_num: Optional[int] = None
|
disc_num: Optional[int] = None
|
||||||
|
|
@ -549,20 +562,17 @@ def extract_hints(scan: AlbumScan, use_ocr: bool = True) -> AlbumHints:
|
||||||
|
|
||||||
# M3U-Reihenfolge nur als letzter Fallback (wenn Tracklist kein Match liefert)
|
# M3U-Reihenfolge nur als letzter Fallback (wenn Tracklist kein Match liefert)
|
||||||
if track_num is None:
|
if track_num is None:
|
||||||
stem_key = _clean(audio_path.stem).casefold()
|
|
||||||
if stem_key in m3u_order:
|
if stem_key in m3u_order:
|
||||||
track_num = m3u_order[stem_key]
|
track_num = m3u_order[stem_key]
|
||||||
|
|
||||||
# M3U-Titel als Fallback (enthält "Composer - Title" — nur nutzen wenn kein besserer Titel)
|
# M3U-Titel als Fallback (enthält "Composer - Title" — nur nutzen wenn kein besserer Titel)
|
||||||
if not _is_good(title):
|
if not _is_good(title):
|
||||||
stem_key = _clean(audio_path.stem).casefold()
|
|
||||||
if stem_key in m3u_titles:
|
if stem_key in m3u_titles:
|
||||||
title = m3u_titles[stem_key]
|
title = m3u_titles[stem_key]
|
||||||
|
|
||||||
# YouTube-Titel als letzter Fallback (bei einzelner Datei = das ganze Video)
|
# YouTube-Titel als letzter Fallback (bei einzelner Datei = das ganze Video)
|
||||||
if not _is_good(title):
|
if not _is_good(title):
|
||||||
stem_key = _clean(audio_path.stem).casefold()
|
yt_id = yt_id_for_file
|
||||||
yt_id = yt_ids_by_stem.get(stem_key)
|
|
||||||
if yt_id:
|
if yt_id:
|
||||||
meta = yt_meta_by_id.get(yt_id)
|
meta = yt_meta_by_id.get(yt_id)
|
||||||
if meta:
|
if meta:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue