Fix CDDB parser: only ' / ' splits artist/title, never ' - '
Classical titles like 'Sonate: I. Largo - Allegro' were incorrectly split at the movement-separator dash, producing wrong artist/title pairs. Now only ' / ' (CDDB compilation standard) is treated as artist-title separator; ' - ' is always part of the title. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9e61b01f92
commit
12bf67e977
2 changed files with 39 additions and 21 deletions
|
|
@ -71,9 +71,13 @@ def _sanitize_name(name: str) -> str:
|
|||
def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
|
||||
"""Parse CDDB track list from abcde output lines.
|
||||
|
||||
Matches lines like:
|
||||
"1: Wolfgang Anheisser - Wer recht in Freuden wandern will" (regular albums)
|
||||
"1: Trini Lopez / This Land Is Your Land (live)" (compilations)
|
||||
Zwei Formate:
|
||||
Sampler: "1: Trini Lopez / This Land Is Your Land" → artist / title
|
||||
Reguläres Album: "1: Sonate Nr. 14 - I. Adagio sostenuto" → title (kein Split)
|
||||
|
||||
Nur ' / ' (Slash mit Leerzeichen) gilt als Künstler-Trenner — das ist der
|
||||
CDDB-Standard für Sampler-TTITLEs. ' - ' wird NIE gesplittet, da es in
|
||||
Klassik-Titeln als Satztrenner vorkommt.
|
||||
|
||||
Args:
|
||||
lines: Lines collected from abcde stdout+stderr
|
||||
|
|
@ -82,15 +86,26 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
|
|||
List of TrackInfo (may be empty if CDDB lookup failed)
|
||||
"""
|
||||
tracks = []
|
||||
pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
|
||||
# Sampler-Format: "N: Artist / Title"
|
||||
compilation = re.compile(r"^\s*(\d+):\s*(.+?)\s+/\s+(.+)$")
|
||||
# Reguläres Format: "N: Track Title" (Titel kann ' - ' enthalten)
|
||||
regular = re.compile(r"^\s*(\d+):\s*(.+)$")
|
||||
for line in lines:
|
||||
m = pattern.match(line)
|
||||
m = compilation.match(line)
|
||||
if m:
|
||||
tracks.append(TrackInfo(
|
||||
track_number=int(m.group(1)),
|
||||
artist=m.group(2).strip(),
|
||||
title=m.group(3).strip(),
|
||||
))
|
||||
else:
|
||||
m = regular.match(line)
|
||||
if m:
|
||||
tracks.append(TrackInfo(
|
||||
track_number=int(m.group(1)),
|
||||
artist="",
|
||||
title=m.group(2).strip(),
|
||||
))
|
||||
return tracks
|
||||
|
||||
|
||||
|
|
@ -143,8 +158,8 @@ def _stream_abcde(
|
|||
grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
|
||||
tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
|
||||
sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
|
||||
# Handle both "Artist - Title" and "Artist / Title" (compilations)
|
||||
cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
|
||||
# Jede "N: Inhalt"-Zeile — Parsing (Sampler vs. regulär) in _parse_cddb_lines
|
||||
cddb_re = re.compile(r"^\s*\d+:\s+\S")
|
||||
header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ----
|
||||
total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I)
|
||||
|
||||
|
|
|
|||
|
|
@ -63,31 +63,34 @@ class TestCleanInput:
|
|||
class TestParseCddbLines:
|
||||
"""Tests für _parse_cddb_lines."""
|
||||
|
||||
def test_parse_single_track(self) -> None:
|
||||
lines = ["1: Artist - Title"]
|
||||
def test_parse_single_track_title_only(self) -> None:
|
||||
"""Reguläres Album: Zeile ohne ' / ' → artist leer, gesamter Inhalt = Titel."""
|
||||
lines = ["1: Für Elise"]
|
||||
tracks = _parse_cddb_lines(lines)
|
||||
assert len(tracks) == 1
|
||||
assert tracks[0].track_number == 1
|
||||
assert tracks[0].artist == "Artist"
|
||||
assert tracks[0].title == "Title"
|
||||
assert tracks[0].artist == ""
|
||||
assert tracks[0].title == "Für Elise"
|
||||
|
||||
def test_parse_multiple_tracks(self) -> None:
|
||||
def test_parse_regular_multiple_tracks(self) -> None:
|
||||
"""Mehrere reguläre Tracks werden korrekt geparst."""
|
||||
lines = [
|
||||
"1: Artist One - Title One",
|
||||
"2: Artist Two - Title Two",
|
||||
"3: Artist Three - Title Three",
|
||||
"1: First Title",
|
||||
"2: Second Title",
|
||||
"3: Third Title",
|
||||
]
|
||||
tracks = _parse_cddb_lines(lines)
|
||||
assert len(tracks) == 3
|
||||
assert tracks[2].track_number == 3
|
||||
assert tracks[2].artist == "Artist Three"
|
||||
assert tracks[2].title == "Title Three"
|
||||
assert tracks[2].artist == ""
|
||||
assert tracks[2].title == "Third Title"
|
||||
|
||||
def test_parse_with_spaces_in_title(self) -> None:
|
||||
lines = ["1: Wolfgang Anheisser - Wer recht in Freuden wandern will"]
|
||||
def test_dash_in_title_not_split(self) -> None:
|
||||
"""' - ' in klassischen Titeln wird NICHT als Künstler-Separator behandelt."""
|
||||
lines = ['1: Sonata "Tempest": I. Largo - Allegro']
|
||||
tracks = _parse_cddb_lines(lines)
|
||||
assert tracks[0].artist == "Wolfgang Anheisser"
|
||||
assert tracks[0].title == "Wer recht in Freuden wandern will"
|
||||
assert tracks[0].artist == ""
|
||||
assert tracks[0].title == 'Sonata "Tempest": I. Largo - Allegro'
|
||||
|
||||
def test_ignores_non_matching_lines(self) -> None:
|
||||
lines = [
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue