Fix CDDB parser: only ' / ' splits artist/title, never ' - '

Classical titles like 'Sonate: I. Largo - Allegro' were incorrectly split
at the movement-separator dash, producing wrong artist/title pairs.
Now only ' / ' (CDDB compilation standard) is treated as artist-title
separator; ' - ' is always part of the title.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-18 22:45:38 +01:00
commit 12bf67e977
2 changed files with 39 additions and 21 deletions

View file

@ -71,9 +71,13 @@ def _sanitize_name(name: str) -> str:
def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]: def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
"""Parse CDDB track list from abcde output lines. """Parse CDDB track list from abcde output lines.
Matches lines like: Zwei Formate:
"1: Wolfgang Anheisser - Wer recht in Freuden wandern will" (regular albums) Sampler: "1: Trini Lopez / This Land Is Your Land" artist / title
"1: Trini Lopez / This Land Is Your Land (live)" (compilations) Reguläres Album: "1: Sonate Nr. 14 - I. Adagio sostenuto" title (kein Split)
Nur ' / ' (Slash mit Leerzeichen) gilt als Künstler-Trenner das ist der
CDDB-Standard für Sampler-TTITLEs. ' - ' wird NIE gesplittet, da es in
Klassik-Titeln als Satztrenner vorkommt.
Args: Args:
lines: Lines collected from abcde stdout+stderr lines: Lines collected from abcde stdout+stderr
@ -82,15 +86,26 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
List of TrackInfo (may be empty if CDDB lookup failed) List of TrackInfo (may be empty if CDDB lookup failed)
""" """
tracks = [] tracks = []
pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$") # Sampler-Format: "N: Artist / Title"
compilation = re.compile(r"^\s*(\d+):\s*(.+?)\s+/\s+(.+)$")
# Reguläres Format: "N: Track Title" (Titel kann ' - ' enthalten)
regular = re.compile(r"^\s*(\d+):\s*(.+)$")
for line in lines: for line in lines:
m = pattern.match(line) m = compilation.match(line)
if m: if m:
tracks.append(TrackInfo( tracks.append(TrackInfo(
track_number=int(m.group(1)), track_number=int(m.group(1)),
artist=m.group(2).strip(), artist=m.group(2).strip(),
title=m.group(3).strip(), title=m.group(3).strip(),
)) ))
else:
m = regular.match(line)
if m:
tracks.append(TrackInfo(
track_number=int(m.group(1)),
artist="",
title=m.group(2).strip(),
))
return tracks return tracks
@ -143,8 +158,8 @@ def _stream_abcde(
grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I) grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I) tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]") sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
# Handle both "Artist - Title" and "Artist / Title" (compilations) # Jede "N: Inhalt"-Zeile — Parsing (Sampler vs. regulär) in _parse_cddb_lines
cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$") cddb_re = re.compile(r"^\s*\d+:\s+\S")
header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ---- header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ----
total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I) total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I)

View file

@ -63,31 +63,34 @@ class TestCleanInput:
class TestParseCddbLines: class TestParseCddbLines:
"""Tests für _parse_cddb_lines.""" """Tests für _parse_cddb_lines."""
def test_parse_single_track(self) -> None: def test_parse_single_track_title_only(self) -> None:
lines = ["1: Artist - Title"] """Reguläres Album: Zeile ohne ' / ' → artist leer, gesamter Inhalt = Titel."""
lines = ["1: Für Elise"]
tracks = _parse_cddb_lines(lines) tracks = _parse_cddb_lines(lines)
assert len(tracks) == 1 assert len(tracks) == 1
assert tracks[0].track_number == 1 assert tracks[0].track_number == 1
assert tracks[0].artist == "Artist" assert tracks[0].artist == ""
assert tracks[0].title == "Title" assert tracks[0].title == "Für Elise"
def test_parse_multiple_tracks(self) -> None: def test_parse_regular_multiple_tracks(self) -> None:
"""Mehrere reguläre Tracks werden korrekt geparst."""
lines = [ lines = [
"1: Artist One - Title One", "1: First Title",
"2: Artist Two - Title Two", "2: Second Title",
"3: Artist Three - Title Three", "3: Third Title",
] ]
tracks = _parse_cddb_lines(lines) tracks = _parse_cddb_lines(lines)
assert len(tracks) == 3 assert len(tracks) == 3
assert tracks[2].track_number == 3 assert tracks[2].track_number == 3
assert tracks[2].artist == "Artist Three" assert tracks[2].artist == ""
assert tracks[2].title == "Title Three" assert tracks[2].title == "Third Title"
def test_parse_with_spaces_in_title(self) -> None: def test_dash_in_title_not_split(self) -> None:
lines = ["1: Wolfgang Anheisser - Wer recht in Freuden wandern will"] """' - ' in klassischen Titeln wird NICHT als Künstler-Separator behandelt."""
lines = ['1: Sonata "Tempest": I. Largo - Allegro']
tracks = _parse_cddb_lines(lines) tracks = _parse_cddb_lines(lines)
assert tracks[0].artist == "Wolfgang Anheisser" assert tracks[0].artist == ""
assert tracks[0].title == "Wer recht in Freuden wandern will" assert tracks[0].title == 'Sonata "Tempest": I. Largo - Allegro'
def test_ignores_non_matching_lines(self) -> None: def test_ignores_non_matching_lines(self) -> None:
lines = [ lines = [