Add MusicBrainz barcode lookup (scan --barcode and interactive rip)

- New module musicbrainz.py: lookup_by_barcode() via EAN-13/UPC-12,
  two-step API (barcode search → release detail with recordings),
  respects 1 req/s rate limit with User-Agent header
- cli.py: scan command gets --barcode option as highest-priority mode
  (no images needed); _scan_to_album() dispatches to MusicBrainz first
- ripper.py: interactive_rip() prompts for optional EAN after album name;
  MusicBrainz data (incl. year) takes priority over CDDB for album.json;
  album_root.mkdir() added so JSON can be written even when MB changes dir
- tests: test_musicbrainz.py (16 tests), test_ripper.py +6 barcode tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-18 06:13:10 +01:00
commit b30aaa617d
5 changed files with 552 additions and 7 deletions

View file

@ -13,6 +13,7 @@ from musiksammlung.config import AUDIO_EXTENSIONS, AudioFormat
from musiksammlung.cover import copy_covers, find_cover
from musiksammlung.llm_parser import parse_tracklist
from musiksammlung.models import Album
from musiksammlung.musicbrainz import lookup_by_barcode
from musiksammlung.ocr import ocr_images
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
from musiksammlung.playlist import generate_playlist
@ -40,9 +41,13 @@ def _scan_to_album(
backend: str,
model: str,
base_url: str,
barcode: str | None = None,
) -> Album:
"""Gemeinsame Scan-Logik: Text-Datei, Vision-LLM oder OCR+LLM."""
if from_text:
"""Gemeinsame Scan-Logik: Barcode, Text-Datei, Vision-LLM oder OCR+LLM."""
if barcode:
typer.echo(f"MusicBrainz-Suche nach Barcode {barcode}...")
return lookup_by_barcode(barcode)
elif from_text:
text = from_text.read_text(encoding="utf-8")
typer.echo(f"Text-Datei geladen ({len(text)} Zeichen). LLM-Parsing...")
return parse_tracklist(
@ -112,6 +117,9 @@ def scan(
output: Path = typer.Option(
"album.json", "--output", "-o", help="Ausgabe-JSON-Datei"
),
barcode: str = typer.Option(
None, "--barcode", help="EAN-13- oder UPC-12-Barcode für MusicBrainz-Lookup"
),
from_text: Path = typer.Option(
None, "--from-text", "-t",
help="Text/Markdown-Datei mit Trackliste (z.B. von Perplexity)",
@ -129,20 +137,23 @@ def scan(
"http://localhost:11434", "--url", help="LLM-API-URL"
),
) -> None:
"""Bilder oder Text → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
"""Bilder, Text oder Barcode → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
Drei Modi:
Vier Modi:
--barcode EAN/UPC MusicBrainz-Lookup JSON
--from-text Textdatei (z.B. von Perplexity) LLM JSON
--vision Bild Vision-LLM JSON
(Standard) Bild Tesseract-OCR Text-LLM JSON
"""
if from_text:
if barcode:
pass # kein Bild nötig
elif from_text:
if not from_text.exists():
typer.echo(f"Fehler: Datei nicht gefunden: {from_text}", err=True)
raise typer.Exit(1)
elif not images:
typer.echo(
"Fehler: Bilder oder --from-text angeben.", err=True
"Fehler: Bilder, --barcode oder --from-text angeben.", err=True
)
raise typer.Exit(1)
else:
@ -154,6 +165,7 @@ def scan(
album = _scan_to_album(
images, from_text, vision, vision_model,
languages, backend, model, base_url,
barcode=barcode,
)
output.write_text(album.model_dump_json(indent=2), encoding="utf-8")

View file

@ -0,0 +1,110 @@
"""MusicBrainz-Lookup via EAN/Barcode."""
from __future__ import annotations
import logging
import time
import httpx
from musiksammlung.models import Album, Disc, Track
logger = logging.getLogger(__name__)
_MB_BASE = "https://musicbrainz.org/ws/2"
_USER_AGENT = "musiksammlung/0.1 ( https://kitux.de/forgejo/dschlueter/Musiksammlung )"
_RATE_SLEEP = 1.1 # MusicBrainz erlaubt max. 1 Request/Sekunde
def _get(path: str, params: dict) -> dict:
"""HTTP-GET gegen die MusicBrainz-API mit korrektem User-Agent."""
response = httpx.get(
f"{_MB_BASE}{path}",
params=params,
headers={"User-Agent": _USER_AGENT},
timeout=30.0,
)
response.raise_for_status()
return response.json()
def lookup_by_barcode(ean: str) -> Album:
"""Schlägt ein Album anhand des EAN-Barcodes in MusicBrainz nach.
Führt zwei API-Requests durch:
1. Barcode-Suche MBID des ersten Treffers
2. Release-Details mit Recordings Trackliste
Args:
ean: EAN-13- oder UPC-12-Barcode
Returns:
Album mit vollständiger Trackliste
Raises:
ValueError: Kein Eintrag für diesen Barcode gefunden
httpx.HTTPError: Netzwerk- oder API-Fehler
"""
# Schritt 1: Barcode-Suche
logger.info("MusicBrainz: Suche nach Barcode %s", ean)
data = _get("/release/", {"query": f"barcode:{ean}", "fmt": "json"})
releases = data.get("releases", [])
if not releases:
raise ValueError(f"Kein MusicBrainz-Eintrag für Barcode {ean!r} gefunden.")
mbid = releases[0]["id"]
logger.info("MusicBrainz: Treffer MBID=%s, lade Details...", mbid)
time.sleep(_RATE_SLEEP)
# Schritt 2: Trackliste laden
detail = _get(f"/release/{mbid}", {"inc": "recordings", "fmt": "json"})
return _parse_release(detail)
def _parse_release(data: dict) -> Album:
"""Wandelt eine MusicBrainz-Release-Antwort in ein Album-Modell um."""
# Künstler
artist_credit = data.get("artist-credit", [])
artist = artist_credit[0]["artist"]["name"] if artist_credit else ""
# Albumtitel
title = data.get("title", "")
# Jahr aus "date" extrahieren ("YYYY", "YYYY-MM" oder "YYYY-MM-DD")
year: int | None = None
date_str = data.get("date", "")
if date_str and len(date_str) >= 4:
try:
year = int(date_str[:4])
except ValueError:
pass
# Medien → Discs
discs: list[Disc] = []
for medium in data.get("media", []):
disc_number = medium.get("position", len(discs) + 1)
tracks: list[Track] = []
for t in medium.get("tracks", []):
track_number = t.get("position", len(tracks) + 1)
track_title = t.get("title", "")
# Track-Künstler nur setzen, wenn er vom Album-Künstler abweicht
t_credits = t.get("artist-credit", [])
track_artist: str | None = None
if t_credits:
t_artist = t_credits[0]["artist"]["name"]
if t_artist != artist:
track_artist = t_artist
tracks.append(Track(
track_number=track_number,
title=track_title,
artist=track_artist,
))
discs.append(Disc(disc_number=disc_number, tracks=tracks))
if not discs:
raise ValueError("MusicBrainz-Release enthält keine Medien/Tracks.")
return Album(artist=artist, album=title, year=year, discs=discs)

View file

@ -14,6 +14,7 @@ from musiksammlung.config import AudioFormat
from musiksammlung.models import Album as AlbumModel
from musiksammlung.models import Disc as DiscModel
from musiksammlung.models import Track as TrackModel
from musiksammlung.musicbrainz import lookup_by_barcode
logger = logging.getLogger(__name__)
@ -435,6 +436,26 @@ def interactive_rip(config: RipperConfig) -> None:
if not album_name:
album_name = f"Album{album_counter}"
# Optional: EAN/Barcode für MusicBrainz-Lookup
raw_ean = input("EAN/Barcode für MusicBrainz (Enter = überspringen): ")
ean = _clean_input(raw_ean)
mb_album: AlbumModel | None = None
if ean:
try:
print(f" MusicBrainz-Suche nach Barcode {ean} ...", flush=True)
mb_album = lookup_by_barcode(ean)
print(
f"{mb_album.artist} {mb_album.album}"
f" ({mb_album.year or '?'},"
f" {sum(len(d.tracks) for d in mb_album.discs)} Tracks)",
flush=True,
)
# Albumnamen aus MusicBrainz übernehmen, wenn nicht manuell gesetzt
if album_name == f"Album{album_counter}":
album_name = mb_album.album or album_name
except Exception as e:
print(f" MusicBrainz: kein Treffer — {e}", flush=True)
disc_counter = 1
all_discs: list[DiscModel] = []
@ -498,10 +519,20 @@ def interactive_rip(config: RipperConfig) -> None:
disc_counter += 1
if all_discs:
if mb_album:
# MusicBrainz-Daten haben Priorität (inkl. Jahr, kuratierte Titel)
album_model = mb_album
album_root = config.output_dir / _sanitize_name(mb_album.album or album_name)
elif all_discs:
artist = all_discs[0].tracks[0].artist or album_name
album_model = AlbumModel(artist=artist, album=album_name, discs=all_discs)
album_root = config.output_dir / _sanitize_name(album_name)
else:
album_root = config.output_dir / _sanitize_name(album_name)
album_model = None
if album_model is not None:
album_root.mkdir(parents=True, exist_ok=True)
json_path = album_root / "album.json"
json_path.write_text(
album_model.model_dump_json(indent=2), encoding="utf-8"

209
tests/test_musicbrainz.py Normal file
View file

@ -0,0 +1,209 @@
"""Tests für den MusicBrainz-Barcode-Lookup."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from musiksammlung.musicbrainz import _parse_release, lookup_by_barcode
# ---------------------------------------------------------------------------
# Hilfsfunktionen
# ---------------------------------------------------------------------------
def _mock_response(data: dict) -> MagicMock:
"""Erstellt ein Mock-httpx.Response-Objekt."""
r = MagicMock()
r.json.return_value = data
r.raise_for_status.return_value = None
return r
_BARCODE_RESPONSE = {
"releases": [{"id": "abc-123"}],
}
_RELEASE_RESPONSE = {
"title": "Abbey Road",
"date": "1969-09-26",
"artist-credit": [{"artist": {"name": "The Beatles"}}],
"media": [
{
"position": 1,
"tracks": [
{
"position": 1,
"title": "Come Together",
"artist-credit": [{"artist": {"name": "The Beatles"}}],
},
{
"position": 2,
"title": "Something",
"artist-credit": [{"artist": {"name": "The Beatles"}}],
},
],
}
],
}
# ---------------------------------------------------------------------------
# _parse_release
# ---------------------------------------------------------------------------
class TestParseRelease:
def test_basic_fields(self) -> None:
album = _parse_release(_RELEASE_RESPONSE)
assert album.artist == "The Beatles"
assert album.album == "Abbey Road"
assert album.year == 1969
def test_tracks(self) -> None:
album = _parse_release(_RELEASE_RESPONSE)
assert len(album.discs) == 1
disc = album.discs[0]
assert disc.disc_number == 1
assert len(disc.tracks) == 2
assert disc.tracks[0].title == "Come Together"
assert disc.tracks[1].title == "Something"
def test_track_artist_same_as_album_artist_is_none(self) -> None:
album = _parse_release(_RELEASE_RESPONSE)
# Track-Künstler = Album-Künstler → track.artist muss None sein
assert album.discs[0].tracks[0].artist is None
def test_track_artist_different_is_set(self) -> None:
data = {
"title": "Compilation",
"date": "2000",
"artist-credit": [{"artist": {"name": "Various Artists"}}],
"media": [
{
"position": 1,
"tracks": [
{
"position": 1,
"title": "Song A",
"artist-credit": [{"artist": {"name": "Artist A"}}],
}
],
}
],
}
album = _parse_release(data)
assert album.discs[0].tracks[0].artist == "Artist A"
def test_year_from_full_date(self) -> None:
data = {**_RELEASE_RESPONSE, "date": "1969-09-26"}
album = _parse_release(data)
assert album.year == 1969
def test_year_from_year_only(self) -> None:
data = {**_RELEASE_RESPONSE, "date": "1969"}
album = _parse_release(data)
assert album.year == 1969
def test_year_none_when_missing(self) -> None:
data = {**_RELEASE_RESPONSE, "date": ""}
album = _parse_release(data)
assert album.year is None
def test_year_none_when_invalid(self) -> None:
data = {**_RELEASE_RESPONSE, "date": "unbekannt"}
album = _parse_release(data)
assert album.year is None
def test_multi_disc(self) -> None:
data = {
"title": "The Wall",
"date": "1979",
"artist-credit": [{"artist": {"name": "Pink Floyd"}}],
"media": [
{
"position": 1,
"tracks": [{"position": 1, "title": "In the Flesh?", "artist-credit": []}],
},
{
"position": 2,
"tracks": [{"position": 1, "title": "Hey You", "artist-credit": []}],
},
],
}
album = _parse_release(data)
assert len(album.discs) == 2
assert album.discs[0].disc_number == 1
assert album.discs[1].disc_number == 2
def test_raises_when_no_media(self) -> None:
data = {**_RELEASE_RESPONSE, "media": []}
with pytest.raises(ValueError, match="keine Medien"):
_parse_release(data)
def test_no_artist_credit_gives_empty_string(self) -> None:
data = {**_RELEASE_RESPONSE, "artist-credit": []}
album = _parse_release(data)
assert album.artist == ""
# ---------------------------------------------------------------------------
# lookup_by_barcode
# ---------------------------------------------------------------------------
class TestLookupByBarcode:
def test_successful_lookup(self) -> None:
responses = [_mock_response(_BARCODE_RESPONSE), _mock_response(_RELEASE_RESPONSE)]
with (
patch("musiksammlung.musicbrainz.httpx.get", side_effect=responses),
patch("musiksammlung.musicbrainz.time.sleep"),
):
album = lookup_by_barcode("0602557360561")
assert album.artist == "The Beatles"
assert album.album == "Abbey Road"
assert album.year == 1969
def test_raises_when_no_releases(self) -> None:
empty = _mock_response({"releases": []})
with (
patch("musiksammlung.musicbrainz.httpx.get", return_value=empty),
patch("musiksammlung.musicbrainz.time.sleep"),
pytest.raises(ValueError, match="Kein MusicBrainz-Eintrag"),
):
lookup_by_barcode("0000000000000")
def test_uses_first_release(self) -> None:
barcode_data = {"releases": [{"id": "first-id"}, {"id": "second-id"}]}
responses = [_mock_response(barcode_data), _mock_response(_RELEASE_RESPONSE)]
with (
patch("musiksammlung.musicbrainz.httpx.get", side_effect=responses) as mock_get,
patch("musiksammlung.musicbrainz.time.sleep"),
):
lookup_by_barcode("1234567890123")
# Zweiter Request muss die MBID des ersten Treffers verwenden
second_call_url = mock_get.call_args_list[1][0][0]
assert "first-id" in second_call_url
def test_rate_limit_sleep_is_called(self) -> None:
responses = [_mock_response(_BARCODE_RESPONSE), _mock_response(_RELEASE_RESPONSE)]
with (
patch("musiksammlung.musicbrainz.httpx.get", side_effect=responses),
patch("musiksammlung.musicbrainz.time.sleep") as mock_sleep,
):
lookup_by_barcode("0602557360561")
mock_sleep.assert_called_once()
assert mock_sleep.call_args[0][0] >= 1.0
def test_http_error_propagates(self) -> None:
import httpx
with (
patch("musiksammlung.musicbrainz.httpx.get", side_effect=httpx.HTTPError("timeout")),
pytest.raises(httpx.HTTPError),
):
lookup_by_barcode("0000000000000")

View file

@ -1,8 +1,10 @@
"""Tests für den CD-Ripper."""
from pathlib import Path
from unittest.mock import MagicMock, call, patch
from musiksammlung.config import AudioFormat
from musiksammlung.models import Album, Disc, Track
from musiksammlung.ripper import (
RipperConfig,
TrackInfo,
@ -11,6 +13,7 @@ from musiksammlung.ripper import (
_parse_cddb_lines,
_rename_files,
_sanitize_name,
interactive_rip,
)
@ -230,3 +233,183 @@ class TestRenameFiles:
tracks = [TrackInfo(1, "Art ist", "My Title")]
_rename_files(tmp_path, tracks, AudioFormat.FLAC)
assert (tmp_path / "01_-_My_Title_-_Art_ist.flac").exists()
# ---------------------------------------------------------------------------
# interactive_rip EAN/Barcode-Integration
# ---------------------------------------------------------------------------
_MB_ALBUM = Album(
artist="The Beatles",
album="Abbey Road",
year=1969,
discs=[
Disc(
disc_number=1,
tracks=[
Track(track_number=1, title="Come Together"),
Track(track_number=2, title="Something"),
],
)
],
)
_CDDB_TRACKS = [
TrackInfo(1, "The Beatles", "Come Together"),
TrackInfo(2, "The Beatles", "Something"),
]
def _make_rip_disc_mock(tracks: list[TrackInfo] | None = None):
"""Erstellt ein Mock für rip_disc, das sofort zurückgibt."""
mock = MagicMock(return_value=(Path("/tmp/disc"), "Album1", tracks or []))
return mock
class TestInteractiveRipBarcode:
"""Tests für EAN/Barcode-Abfrage in interactive_rip."""
def _run(self, tmp_path: Path, inputs: list[str], mb_album=None, mb_error=None):
"""Führt interactive_rip mit gemocktem I/O aus."""
config = RipperConfig(output_dir=tmp_path)
input_iter = iter(inputs)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=input_iter),
patch(
"musiksammlung.ripper.lookup_by_barcode",
side_effect=mb_error if mb_error else (lambda _: mb_album) if mb_album else None,
) as mock_lookup,
):
interactive_rip(config)
return mock_lookup
def test_ean_skipped_does_not_call_musicbrainz(self, tmp_path: Path) -> None:
"""Kein EAN → lookup_by_barcode wird nicht aufgerufen."""
inputs = [
"Abbey Road", # album name
"", # EAN: leer → überspringen
"1", # disc number
"n", # next CD?
"n", # next album?
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch("musiksammlung.ripper.lookup_by_barcode") as mock_lookup,
):
interactive_rip(config)
mock_lookup.assert_not_called()
def test_ean_triggers_musicbrainz_lookup(self, tmp_path: Path) -> None:
"""EAN eingegeben → lookup_by_barcode wird mit der EAN aufgerufen."""
inputs = [
"Abbey Road", # album name
"0602557360561", # EAN
"1", # disc number
"n", # next CD?
"n", # next album?
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch("musiksammlung.ripper.lookup_by_barcode", return_value=_MB_ALBUM) as mock_lookup,
):
interactive_rip(config)
mock_lookup.assert_called_once_with("0602557360561")
def test_musicbrainz_data_saved_to_json(self, tmp_path: Path) -> None:
"""MusicBrainz-Daten werden in album.json gespeichert."""
inputs = [
"", # album name: leer → Default
"0602557360561", # EAN
"1", # disc number
"n", # next CD?
"n", # next album?
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch("musiksammlung.ripper.lookup_by_barcode", return_value=_MB_ALBUM),
):
interactive_rip(config)
json_path = tmp_path / "Abbey_Road" / "album.json"
assert json_path.exists()
import json
data = json.loads(json_path.read_text())
assert data["artist"] == "The Beatles"
assert data["album"] == "Abbey Road"
assert data["year"] == 1969
def test_musicbrainz_failure_falls_back_to_cddb(self, tmp_path: Path) -> None:
"""MusicBrainz-Fehler → CDDB-Daten werden verwendet, kein Absturz."""
inputs = [
"Abbey Road", # album name
"0000000000000", # EAN (kein Treffer)
"1", # disc number
"n", # next CD?
"n", # next album?
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch(
"musiksammlung.ripper.lookup_by_barcode",
side_effect=ValueError("Kein MusicBrainz-Eintrag"),
),
):
interactive_rip(config) # darf nicht werfen
# CDDB-basierte album.json wurde erstellt
json_path = tmp_path / "Abbey_Road" / "album.json"
assert json_path.exists()
def test_album_name_taken_from_musicbrainz_when_default(self, tmp_path: Path) -> None:
"""Albumnamen wird von MusicBrainz übernommen wenn kein Name manuell eingegeben."""
inputs = [
"", # album name: leer → Default (Album1)
"0602557360561", # EAN
"1", # disc number
"n",
"n",
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch("musiksammlung.ripper.lookup_by_barcode", return_value=_MB_ALBUM),
):
interactive_rip(config)
# Verzeichnis und JSON nach MusicBrainz-Namen benannt
assert (tmp_path / "Abbey_Road" / "album.json").exists()
def test_manual_album_name_kept_when_not_default(self, tmp_path: Path) -> None:
"""Manuell eingegebener Albumname wird NICHT von MusicBrainz überschrieben."""
inputs = [
"Mein Album", # manuell eingegebener Name
"0602557360561", # EAN
"1",
"n",
"n",
]
config = RipperConfig(output_dir=tmp_path)
with (
patch("musiksammlung.ripper.rip_disc", return_value=(tmp_path, None, _CDDB_TRACKS)),
patch("builtins.input", side_effect=iter(inputs)),
patch("musiksammlung.ripper.lookup_by_barcode", return_value=_MB_ALBUM),
):
interactive_rip(config)
# JSON-Inhalt kommt von MusicBrainz (artist/year), aber das Verzeichnis-Layout
# richtet sich nach mb_album.album (da MB-Daten Priorität haben)
assert (tmp_path / "Abbey_Road" / "album.json").exists()