diff --git a/src/musiksammlung/ripper.py b/src/musiksammlung/ripper.py index b1be88f..690455d 100644 --- a/src/musiksammlung/ripper.py +++ b/src/musiksammlung/ripper.py @@ -71,9 +71,13 @@ def _sanitize_name(name: str) -> str: def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]: """Parse CDDB track list from abcde output lines. - Matches lines like: - "1: Wolfgang Anheisser - Wer recht in Freuden wandern will" (regular albums) - "1: Trini Lopez / This Land Is Your Land (live)" (compilations) + Zwei Formate: + Sampler: "1: Trini Lopez / This Land Is Your Land" → artist / title + Reguläres Album: "1: Sonate Nr. 14 - I. Adagio sostenuto" → title (kein Split) + + Nur ' / ' (Slash mit Leerzeichen) gilt als Künstler-Trenner — das ist der + CDDB-Standard für Sampler-TTITLEs. ' - ' wird NIE gesplittet, da es in + Klassik-Titeln als Satztrenner vorkommt. Args: lines: Lines collected from abcde stdout+stderr @@ -82,15 +86,26 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]: List of TrackInfo (may be empty if CDDB lookup failed) """ tracks = [] - pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$") + # Sampler-Format: "N: Artist / Title" + compilation = re.compile(r"^\s*(\d+):\s*(.+?)\s+/\s+(.+)$") + # Reguläres Format: "N: Track Title" (Titel kann ' - ' enthalten) + regular = re.compile(r"^\s*(\d+):\s*(.+)$") for line in lines: - m = pattern.match(line) + m = compilation.match(line) if m: tracks.append(TrackInfo( track_number=int(m.group(1)), artist=m.group(2).strip(), title=m.group(3).strip(), )) + else: + m = regular.match(line) + if m: + tracks.append(TrackInfo( + track_number=int(m.group(1)), + artist="", + title=m.group(2).strip(), + )) return tracks @@ -143,8 +158,8 @@ def _stream_abcde( grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I) tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I) sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]") - # Handle both "Artist - Title" and "Artist / Title" (compilations) - cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$") + # Jede "N: Inhalt"-Zeile — Parsing (Sampler vs. regulär) in _parse_cddb_lines + cddb_re = re.compile(r"^\s*\d+:\s+\S") header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ---- total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I) diff --git a/tests/test_ripper.py b/tests/test_ripper.py index 93c9203..ffb1bde 100644 --- a/tests/test_ripper.py +++ b/tests/test_ripper.py @@ -63,31 +63,34 @@ class TestCleanInput: class TestParseCddbLines: """Tests für _parse_cddb_lines.""" - def test_parse_single_track(self) -> None: - lines = ["1: Artist - Title"] + def test_parse_single_track_title_only(self) -> None: + """Reguläres Album: Zeile ohne ' / ' → artist leer, gesamter Inhalt = Titel.""" + lines = ["1: Für Elise"] tracks = _parse_cddb_lines(lines) assert len(tracks) == 1 assert tracks[0].track_number == 1 - assert tracks[0].artist == "Artist" - assert tracks[0].title == "Title" + assert tracks[0].artist == "" + assert tracks[0].title == "Für Elise" - def test_parse_multiple_tracks(self) -> None: + def test_parse_regular_multiple_tracks(self) -> None: + """Mehrere reguläre Tracks werden korrekt geparst.""" lines = [ - "1: Artist One - Title One", - "2: Artist Two - Title Two", - "3: Artist Three - Title Three", + "1: First Title", + "2: Second Title", + "3: Third Title", ] tracks = _parse_cddb_lines(lines) assert len(tracks) == 3 assert tracks[2].track_number == 3 - assert tracks[2].artist == "Artist Three" - assert tracks[2].title == "Title Three" + assert tracks[2].artist == "" + assert tracks[2].title == "Third Title" - def test_parse_with_spaces_in_title(self) -> None: - lines = ["1: Wolfgang Anheisser - Wer recht in Freuden wandern will"] + def test_dash_in_title_not_split(self) -> None: + """' - ' in klassischen Titeln wird NICHT als Künstler-Separator behandelt.""" + lines = ['1: Sonata "Tempest": I. Largo - Allegro'] tracks = _parse_cddb_lines(lines) - assert tracks[0].artist == "Wolfgang Anheisser" - assert tracks[0].title == "Wer recht in Freuden wandern will" + assert tracks[0].artist == "" + assert tracks[0].title == 'Sonata "Tempest": I. Largo - Allegro' def test_ignores_non_matching_lines(self) -> None: lines = [