Fix CDDB parser: only ' / ' splits artist/title, never ' - '
Classical titles like 'Sonate: I. Largo - Allegro' were incorrectly split at the movement-separator dash, producing wrong artist/title pairs. Now only ' / ' (CDDB compilation standard) is treated as artist-title separator; ' - ' is always part of the title. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9e61b01f92
commit
12bf67e977
2 changed files with 39 additions and 21 deletions
|
|
@ -71,9 +71,13 @@ def _sanitize_name(name: str) -> str:
|
||||||
def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
|
def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
|
||||||
"""Parse CDDB track list from abcde output lines.
|
"""Parse CDDB track list from abcde output lines.
|
||||||
|
|
||||||
Matches lines like:
|
Zwei Formate:
|
||||||
"1: Wolfgang Anheisser - Wer recht in Freuden wandern will" (regular albums)
|
Sampler: "1: Trini Lopez / This Land Is Your Land" → artist / title
|
||||||
"1: Trini Lopez / This Land Is Your Land (live)" (compilations)
|
Reguläres Album: "1: Sonate Nr. 14 - I. Adagio sostenuto" → title (kein Split)
|
||||||
|
|
||||||
|
Nur ' / ' (Slash mit Leerzeichen) gilt als Künstler-Trenner — das ist der
|
||||||
|
CDDB-Standard für Sampler-TTITLEs. ' - ' wird NIE gesplittet, da es in
|
||||||
|
Klassik-Titeln als Satztrenner vorkommt.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
lines: Lines collected from abcde stdout+stderr
|
lines: Lines collected from abcde stdout+stderr
|
||||||
|
|
@ -82,15 +86,26 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
|
||||||
List of TrackInfo (may be empty if CDDB lookup failed)
|
List of TrackInfo (may be empty if CDDB lookup failed)
|
||||||
"""
|
"""
|
||||||
tracks = []
|
tracks = []
|
||||||
pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
|
# Sampler-Format: "N: Artist / Title"
|
||||||
|
compilation = re.compile(r"^\s*(\d+):\s*(.+?)\s+/\s+(.+)$")
|
||||||
|
# Reguläres Format: "N: Track Title" (Titel kann ' - ' enthalten)
|
||||||
|
regular = re.compile(r"^\s*(\d+):\s*(.+)$")
|
||||||
for line in lines:
|
for line in lines:
|
||||||
m = pattern.match(line)
|
m = compilation.match(line)
|
||||||
if m:
|
if m:
|
||||||
tracks.append(TrackInfo(
|
tracks.append(TrackInfo(
|
||||||
track_number=int(m.group(1)),
|
track_number=int(m.group(1)),
|
||||||
artist=m.group(2).strip(),
|
artist=m.group(2).strip(),
|
||||||
title=m.group(3).strip(),
|
title=m.group(3).strip(),
|
||||||
))
|
))
|
||||||
|
else:
|
||||||
|
m = regular.match(line)
|
||||||
|
if m:
|
||||||
|
tracks.append(TrackInfo(
|
||||||
|
track_number=int(m.group(1)),
|
||||||
|
artist="",
|
||||||
|
title=m.group(2).strip(),
|
||||||
|
))
|
||||||
return tracks
|
return tracks
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -143,8 +158,8 @@ def _stream_abcde(
|
||||||
grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
|
grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
|
||||||
tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
|
tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
|
||||||
sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
|
sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
|
||||||
# Handle both "Artist - Title" and "Artist / Title" (compilations)
|
# Jede "N: Inhalt"-Zeile — Parsing (Sampler vs. regulär) in _parse_cddb_lines
|
||||||
cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
|
cddb_re = re.compile(r"^\s*\d+:\s+\S")
|
||||||
header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ----
|
header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ----
|
||||||
total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I)
|
total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,31 +63,34 @@ class TestCleanInput:
|
||||||
class TestParseCddbLines:
|
class TestParseCddbLines:
|
||||||
"""Tests für _parse_cddb_lines."""
|
"""Tests für _parse_cddb_lines."""
|
||||||
|
|
||||||
def test_parse_single_track(self) -> None:
|
def test_parse_single_track_title_only(self) -> None:
|
||||||
lines = ["1: Artist - Title"]
|
"""Reguläres Album: Zeile ohne ' / ' → artist leer, gesamter Inhalt = Titel."""
|
||||||
|
lines = ["1: Für Elise"]
|
||||||
tracks = _parse_cddb_lines(lines)
|
tracks = _parse_cddb_lines(lines)
|
||||||
assert len(tracks) == 1
|
assert len(tracks) == 1
|
||||||
assert tracks[0].track_number == 1
|
assert tracks[0].track_number == 1
|
||||||
assert tracks[0].artist == "Artist"
|
assert tracks[0].artist == ""
|
||||||
assert tracks[0].title == "Title"
|
assert tracks[0].title == "Für Elise"
|
||||||
|
|
||||||
def test_parse_multiple_tracks(self) -> None:
|
def test_parse_regular_multiple_tracks(self) -> None:
|
||||||
|
"""Mehrere reguläre Tracks werden korrekt geparst."""
|
||||||
lines = [
|
lines = [
|
||||||
"1: Artist One - Title One",
|
"1: First Title",
|
||||||
"2: Artist Two - Title Two",
|
"2: Second Title",
|
||||||
"3: Artist Three - Title Three",
|
"3: Third Title",
|
||||||
]
|
]
|
||||||
tracks = _parse_cddb_lines(lines)
|
tracks = _parse_cddb_lines(lines)
|
||||||
assert len(tracks) == 3
|
assert len(tracks) == 3
|
||||||
assert tracks[2].track_number == 3
|
assert tracks[2].track_number == 3
|
||||||
assert tracks[2].artist == "Artist Three"
|
assert tracks[2].artist == ""
|
||||||
assert tracks[2].title == "Title Three"
|
assert tracks[2].title == "Third Title"
|
||||||
|
|
||||||
def test_parse_with_spaces_in_title(self) -> None:
|
def test_dash_in_title_not_split(self) -> None:
|
||||||
lines = ["1: Wolfgang Anheisser - Wer recht in Freuden wandern will"]
|
"""' - ' in klassischen Titeln wird NICHT als Künstler-Separator behandelt."""
|
||||||
|
lines = ['1: Sonata "Tempest": I. Largo - Allegro']
|
||||||
tracks = _parse_cddb_lines(lines)
|
tracks = _parse_cddb_lines(lines)
|
||||||
assert tracks[0].artist == "Wolfgang Anheisser"
|
assert tracks[0].artist == ""
|
||||||
assert tracks[0].title == "Wer recht in Freuden wandern will"
|
assert tracks[0].title == 'Sonata "Tempest": I. Largo - Allegro'
|
||||||
|
|
||||||
def test_ignores_non_matching_lines(self) -> None:
|
def test_ignores_non_matching_lines(self) -> None:
|
||||||
lines = [
|
lines = [
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue