Fix CDDB parser: only ' / ' splits artist/title, never ' - '

Classical titles like 'Sonate: I. Largo - Allegro' were incorrectly split at the movement-separator dash, producing wrong artist/title pairs. Now only ' / ' (CDDB compilation standard) is treated as artist-title separator; ' - ' is always part of the title. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-18 22:45:38 +01:00 · 2026-02-18 22:45:38 +01:00 · 12bf67e977
commit 12bf67e977
parent 9e61b01f92
2 changed files with 39 additions and 21 deletions
--- a/src/musiksammlung/ripper.py
+++ b/src/musiksammlung/ripper.py
@ -71,9 +71,13 @@ def _sanitize_name(name: str) -> str:
 def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
    """Parse CDDB track list from abcde output lines.

-    Matches lines like:
-      "1: Wolfgang Anheisser - Wer recht in Freuden wandern will"  (regular albums)
-      "1: Trini Lopez / This Land Is Your Land (live)"              (compilations)
+    Zwei Formate:
+      Sampler:        "1: Trini Lopez / This Land Is Your Land"  →  artist / title
+      Reguläres Album: "1: Sonate Nr. 14 - I. Adagio sostenuto"  →  title (kein Split)
+
+    Nur ' / ' (Slash mit Leerzeichen) gilt als Künstler-Trenner — das ist der
+    CDDB-Standard für Sampler-TTITLEs. ' - ' wird NIE gesplittet, da es in
+    Klassik-Titeln als Satztrenner vorkommt.

    Args:
        lines: Lines collected from abcde stdout+stderr
@ -82,15 +86,26 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
        List of TrackInfo (may be empty if CDDB lookup failed)
    """
    tracks = []
-    pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
+    # Sampler-Format: "N: Artist / Title"
+    compilation = re.compile(r"^\s*(\d+):\s*(.+?)\s+/\s+(.+)$")
+    # Reguläres Format: "N: Track Title" (Titel kann ' - ' enthalten)
+    regular = re.compile(r"^\s*(\d+):\s*(.+)$")
    for line in lines:
-        m = pattern.match(line)
+        m = compilation.match(line)
        if m:
            tracks.append(TrackInfo(
                track_number=int(m.group(1)),
                artist=m.group(2).strip(),
                title=m.group(3).strip(),
            ))
+        else:
+            m = regular.match(line)
+            if m:
+                tracks.append(TrackInfo(
+                    track_number=int(m.group(1)),
+                    artist="",
+                    title=m.group(2).strip(),
+                ))
    return tracks


@ -143,8 +158,8 @@ def _stream_abcde(
    grab_re   = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
    tag_re    = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
    sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
-    # Handle both "Artist - Title" and "Artist / Title" (compilations)
-    cddb_re   = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
+    # Jede "N: Inhalt"-Zeile — Parsing (Sampler vs. regulär) in _parse_cddb_lines
+    cddb_re   = re.compile(r"^\s*\d+:\s+\S")
    header_re = re.compile(r"-{2,}.+-{2,}")   # ---- Artist / Album ----
    total_re  = re.compile(r"tracks?:\s+([\d\s]+)", re.I)

--- a/tests/test_ripper.py
+++ b/tests/test_ripper.py
@ -63,31 +63,34 @@ class TestCleanInput:
 class TestParseCddbLines:
    """Tests für _parse_cddb_lines."""

-    def test_parse_single_track(self) -> None:
-        lines = ["1: Artist - Title"]
+    def test_parse_single_track_title_only(self) -> None:
+        """Reguläres Album: Zeile ohne ' / ' → artist leer, gesamter Inhalt = Titel."""
+        lines = ["1: Für Elise"]
        tracks = _parse_cddb_lines(lines)
        assert len(tracks) == 1
        assert tracks[0].track_number == 1
-        assert tracks[0].artist == "Artist"
-        assert tracks[0].title == "Title"
+        assert tracks[0].artist == ""
+        assert tracks[0].title == "Für Elise"

-    def test_parse_multiple_tracks(self) -> None:
+    def test_parse_regular_multiple_tracks(self) -> None:
+        """Mehrere reguläre Tracks werden korrekt geparst."""
        lines = [
-            "1: Artist One - Title One",
-            "2: Artist Two - Title Two",
-            "3: Artist Three - Title Three",
+            "1: First Title",
+            "2: Second Title",
+            "3: Third Title",
        ]
        tracks = _parse_cddb_lines(lines)
        assert len(tracks) == 3
        assert tracks[2].track_number == 3
-        assert tracks[2].artist == "Artist Three"
-        assert tracks[2].title == "Title Three"
+        assert tracks[2].artist == ""
+        assert tracks[2].title == "Third Title"

-    def test_parse_with_spaces_in_title(self) -> None:
-        lines = ["1: Wolfgang Anheisser - Wer recht in Freuden wandern will"]
+    def test_dash_in_title_not_split(self) -> None:
+        """' - ' in klassischen Titeln wird NICHT als Künstler-Separator behandelt."""
+        lines = ['1: Sonata "Tempest": I. Largo - Allegro']
        tracks = _parse_cddb_lines(lines)
-        assert tracks[0].artist == "Wolfgang Anheisser"
-        assert tracks[0].title == "Wer recht in Freuden wandern will"
+        assert tracks[0].artist == ""
+        assert tracks[0].title == 'Sonata "Tempest": I. Largo - Allegro'

    def test_ignores_non_matching_lines(self) -> None:
        lines = [