Fix CDDB parser for compilations and add grab-progress fallback

- _parse_cddb_lines now handles both 'Artist - Title' and 'Artist / Title'
  (slash separator used by abcde for compilation albums like Various Artists)
- _stream_abcde collects grab-progress lines (track N: Artist / Title)
  as a fallback TrackInfo source when no CDDB lines are found
- New _parse_grab_tracks() splits grab titles on ' / ' into artist+title
- 5 new tests (TestParseCddbLines.test_compilation_slash_separator,
  TestParseGrabTracks.*)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-18 09:42:03 +01:00
commit 09c01c9370
2 changed files with 96 additions and 7 deletions

View file

@ -71,7 +71,9 @@ def _sanitize_name(name: str) -> str:
def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
"""Parse CDDB track list from abcde output lines.
Matches lines like: "1: Wolfgang Anheisser - Wer recht in Freuden wandern will"
Matches lines like:
"1: Wolfgang Anheisser - Wer recht in Freuden wandern will" (regular albums)
"1: Trini Lopez / This Land Is Your Land (live)" (compilations)
Args:
lines: Lines collected from abcde stdout+stderr
@ -80,7 +82,7 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
List of TrackInfo (may be empty if CDDB lookup failed)
"""
tracks = []
pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+-\s+(.+)$")
pattern = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
for line in lines:
m = pattern.match(line)
if m:
@ -92,6 +94,30 @@ def _parse_cddb_lines(lines: list[str]) -> list[TrackInfo]:
return tracks
def _parse_grab_tracks(grab_data: list[tuple[int, str]]) -> list[TrackInfo]:
"""Build TrackInfo list from grab-progress lines captured during ripping.
abcde prints "Grabbing track N of M: Artist / Title" (or just "Title")
during the grab phase. This serves as a fallback when CDDB lines are absent.
Args:
grab_data: List of (track_number, raw_title) from grab_re matches
Returns:
List of TrackInfo
"""
tracks = []
sep = re.compile(r"\s+/\s+")
for num, raw in grab_data:
parts = sep.split(raw, maxsplit=1)
if len(parts) == 2:
artist, title = parts[0].strip(), parts[1].strip()
else:
artist, title = "", raw.strip()
tracks.append(TrackInfo(track_number=num, artist=artist, title=title))
return tracks
def _stream_abcde(
process: subprocess.Popen,
use_cddb: bool,
@ -103,22 +129,28 @@ def _stream_abcde(
- Sector progress bar from cdparanoia
- CDDB/MusicBrainz info lines
Handles both regular albums ("Artist - Title") and compilations
("Artist / Title") in CDDB output. Grab-progress lines are stored as
a fallback in case CDDB lines are absent.
Args:
process: Running abcde subprocess
use_cddb: Whether to expect and parse CDDB output
Returns:
Tuple (list of TrackInfo or None, total track count)
Tuple (list of TrackInfo or None, return code)
"""
grab_re = re.compile(r"Grabbing.*track\s+(\d+)(?:\s+of\s+(\d+))?[:\s]*(.*)", re.I)
tag_re = re.compile(r"Tagging track\s+(\d+)\s+of\s+(\d+)", re.I)
sector_re = re.compile(r"\(== PROGRESS ==.*\|\s*(\d+)\s+(\d+)\s*\]")
cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+-\s+(.+)$")
# Handle both "Artist - Title" and "Artist / Title" (compilations)
cddb_re = re.compile(r"^\s*(\d+):\s*(.+?)\s+(?:-|/)\s+(.+)$")
header_re = re.compile(r"-{2,}.+-{2,}") # ---- Artist / Album ----
total_re = re.compile(r"tracks?:\s+([\d\s]+)", re.I)
all_lines: list[str] = []
cddb_lines: list[str] = []
grab_data: list[tuple[int, str]] = [] # (track_number, raw_title) fallback
total_tracks = 0
current_track = 0
track_end_sector = 0
@ -143,6 +175,8 @@ def _stream_abcde(
title = m.group(3).strip().rstrip(".")
counter = f"{current_track}/{total_tracks}" if total_tracks else str(current_track)
print(f"\n Track {counter} {title}", flush=True)
if title:
grab_data.append((current_track, title))
track_end_sector = 0 # reset sector bar for new track
continue
@ -172,7 +206,7 @@ def _stream_abcde(
print(f"\n {line.strip()}", flush=True)
continue
# ── CDDB track lines "1: Artist - Title"
# ── CDDB track lines "1: Artist - Title" or "1: Artist / Title"
m = cddb_re.match(line)
if m:
cddb_lines.append(line)
@ -191,8 +225,17 @@ def _stream_abcde(
# Newline after last progress bar
print(flush=True)
tracks = _parse_cddb_lines(cddb_lines) if use_cddb else None
return tracks, returncode
if not use_cddb:
return None, returncode
tracks = _parse_cddb_lines(cddb_lines)
if not tracks and grab_data:
# CDDB lines absent but grab progress contained track titles
tracks = _parse_grab_tracks(grab_data)
if tracks:
print(f" (Tracklist aus Grab-Fortschritt: {len(tracks)} Tracks)", flush=True)
return tracks or None, returncode
def _extract_tracks(output_dir: Path, audio_format: AudioFormat) -> list[Path]:

View file

@ -10,6 +10,7 @@ from musiksammlung.ripper import (
_clean_input,
_extract_tracks,
_parse_cddb_lines,
_parse_grab_tracks,
_rename_files,
_sanitize_name,
interactive_rip,
@ -97,10 +98,55 @@ class TestParseCddbLines:
tracks = _parse_cddb_lines(lines)
assert len(tracks) == 1
def test_compilation_slash_separator(self) -> None:
"""Kompilations-Format: 'N: Artist / Title' wird korrekt geparst."""
lines = [
"1: Trini Lopez / This Land Is Your Land (live)",
"2: The Foundations / In the Bad Bad Old Days",
]
tracks = _parse_cddb_lines(lines)
assert len(tracks) == 2
assert tracks[0].artist == "Trini Lopez"
assert tracks[0].title == "This Land Is Your Land (live)"
assert tracks[1].artist == "The Foundations"
assert tracks[1].title == "In the Bad Bad Old Days"
def test_empty_input(self) -> None:
assert _parse_cddb_lines([]) == []
class TestParseGrabTracks:
"""Tests für _parse_grab_tracks."""
def test_artist_slash_title(self) -> None:
data = [(1, "Trini Lopez / This Land Is Your Land (live)")]
tracks = _parse_grab_tracks(data)
assert len(tracks) == 1
assert tracks[0].track_number == 1
assert tracks[0].artist == "Trini Lopez"
assert tracks[0].title == "This Land Is Your Land (live)"
def test_title_only_no_slash(self) -> None:
"""Ohne Slash → leerer Künstler, Titel = gesamter String."""
data = [(3, "Beethoven 5. Sinfonie")]
tracks = _parse_grab_tracks(data)
assert tracks[0].artist == ""
assert tracks[0].title == "Beethoven 5. Sinfonie"
def test_multiple_tracks(self) -> None:
data = [
(1, "KC and the Sunshine Band / Give It Up"),
(2, "Sam & Dave / Can't You Find Another Way"),
]
tracks = _parse_grab_tracks(data)
assert len(tracks) == 2
assert tracks[1].artist == "Sam & Dave"
assert tracks[1].title == "Can't You Find Another Way"
def test_empty_input(self) -> None:
assert _parse_grab_tracks([]) == []
class TestRipperConfig:
"""Tests für RipperConfig."""