Add MusicBrainz barcode lookup (scan --barcode and interactive rip)

- New module musicbrainz.py: lookup_by_barcode() via EAN-13/UPC-12,
  two-step API (barcode search → release detail with recordings),
  respects 1 req/s rate limit with User-Agent header
- cli.py: scan command gets --barcode option as highest-priority mode
  (no images needed); _scan_to_album() dispatches to MusicBrainz first
- ripper.py: interactive_rip() prompts for optional EAN after album name;
  MusicBrainz data (incl. year) takes priority over CDDB for album.json;
  album_root.mkdir() added so JSON can be written even when MB changes dir
- tests: test_musicbrainz.py (16 tests), test_ripper.py +6 barcode tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-02-18 06:13:10 +01:00
commit b30aaa617d
5 changed files with 552 additions and 7 deletions

View file

@ -13,6 +13,7 @@ from musiksammlung.config import AUDIO_EXTENSIONS, AudioFormat
from musiksammlung.cover import copy_covers, find_cover
from musiksammlung.llm_parser import parse_tracklist
from musiksammlung.models import Album
from musiksammlung.musicbrainz import lookup_by_barcode
from musiksammlung.ocr import ocr_images
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
from musiksammlung.playlist import generate_playlist
@ -40,9 +41,13 @@ def _scan_to_album(
backend: str,
model: str,
base_url: str,
barcode: str | None = None,
) -> Album:
"""Gemeinsame Scan-Logik: Text-Datei, Vision-LLM oder OCR+LLM."""
if from_text:
"""Gemeinsame Scan-Logik: Barcode, Text-Datei, Vision-LLM oder OCR+LLM."""
if barcode:
typer.echo(f"MusicBrainz-Suche nach Barcode {barcode}...")
return lookup_by_barcode(barcode)
elif from_text:
text = from_text.read_text(encoding="utf-8")
typer.echo(f"Text-Datei geladen ({len(text)} Zeichen). LLM-Parsing...")
return parse_tracklist(
@ -112,6 +117,9 @@ def scan(
output: Path = typer.Option(
"album.json", "--output", "-o", help="Ausgabe-JSON-Datei"
),
barcode: str = typer.Option(
None, "--barcode", help="EAN-13- oder UPC-12-Barcode für MusicBrainz-Lookup"
),
from_text: Path = typer.Option(
None, "--from-text", "-t",
help="Text/Markdown-Datei mit Trackliste (z.B. von Perplexity)",
@ -129,20 +137,23 @@ def scan(
"http://localhost:11434", "--url", help="LLM-API-URL"
),
) -> None:
"""Bilder oder Text → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
"""Bilder, Text oder Barcode → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
Drei Modi:
Vier Modi:
--barcode EAN/UPC MusicBrainz-Lookup JSON
--from-text Textdatei (z.B. von Perplexity) LLM JSON
--vision Bild Vision-LLM JSON
(Standard) Bild Tesseract-OCR Text-LLM JSON
"""
if from_text:
if barcode:
pass # kein Bild nötig
elif from_text:
if not from_text.exists():
typer.echo(f"Fehler: Datei nicht gefunden: {from_text}", err=True)
raise typer.Exit(1)
elif not images:
typer.echo(
"Fehler: Bilder oder --from-text angeben.", err=True
"Fehler: Bilder, --barcode oder --from-text angeben.", err=True
)
raise typer.Exit(1)
else:
@ -154,6 +165,7 @@ def scan(
album = _scan_to_album(
images, from_text, vision, vision_model,
languages, backend, model, base_url,
barcode=barcode,
)
output.write_text(album.model_dump_json(indent=2), encoding="utf-8")

View file

@ -0,0 +1,110 @@
"""MusicBrainz-Lookup via EAN/Barcode."""
from __future__ import annotations
import logging
import time
import httpx
from musiksammlung.models import Album, Disc, Track
logger = logging.getLogger(__name__)
_MB_BASE = "https://musicbrainz.org/ws/2"
_USER_AGENT = "musiksammlung/0.1 ( https://kitux.de/forgejo/dschlueter/Musiksammlung )"
_RATE_SLEEP = 1.1 # MusicBrainz erlaubt max. 1 Request/Sekunde
def _get(path: str, params: dict) -> dict:
"""HTTP-GET gegen die MusicBrainz-API mit korrektem User-Agent."""
response = httpx.get(
f"{_MB_BASE}{path}",
params=params,
headers={"User-Agent": _USER_AGENT},
timeout=30.0,
)
response.raise_for_status()
return response.json()
def lookup_by_barcode(ean: str) -> Album:
"""Schlägt ein Album anhand des EAN-Barcodes in MusicBrainz nach.
Führt zwei API-Requests durch:
1. Barcode-Suche MBID des ersten Treffers
2. Release-Details mit Recordings Trackliste
Args:
ean: EAN-13- oder UPC-12-Barcode
Returns:
Album mit vollständiger Trackliste
Raises:
ValueError: Kein Eintrag für diesen Barcode gefunden
httpx.HTTPError: Netzwerk- oder API-Fehler
"""
# Schritt 1: Barcode-Suche
logger.info("MusicBrainz: Suche nach Barcode %s", ean)
data = _get("/release/", {"query": f"barcode:{ean}", "fmt": "json"})
releases = data.get("releases", [])
if not releases:
raise ValueError(f"Kein MusicBrainz-Eintrag für Barcode {ean!r} gefunden.")
mbid = releases[0]["id"]
logger.info("MusicBrainz: Treffer MBID=%s, lade Details...", mbid)
time.sleep(_RATE_SLEEP)
# Schritt 2: Trackliste laden
detail = _get(f"/release/{mbid}", {"inc": "recordings", "fmt": "json"})
return _parse_release(detail)
def _parse_release(data: dict) -> Album:
"""Wandelt eine MusicBrainz-Release-Antwort in ein Album-Modell um."""
# Künstler
artist_credit = data.get("artist-credit", [])
artist = artist_credit[0]["artist"]["name"] if artist_credit else ""
# Albumtitel
title = data.get("title", "")
# Jahr aus "date" extrahieren ("YYYY", "YYYY-MM" oder "YYYY-MM-DD")
year: int | None = None
date_str = data.get("date", "")
if date_str and len(date_str) >= 4:
try:
year = int(date_str[:4])
except ValueError:
pass
# Medien → Discs
discs: list[Disc] = []
for medium in data.get("media", []):
disc_number = medium.get("position", len(discs) + 1)
tracks: list[Track] = []
for t in medium.get("tracks", []):
track_number = t.get("position", len(tracks) + 1)
track_title = t.get("title", "")
# Track-Künstler nur setzen, wenn er vom Album-Künstler abweicht
t_credits = t.get("artist-credit", [])
track_artist: str | None = None
if t_credits:
t_artist = t_credits[0]["artist"]["name"]
if t_artist != artist:
track_artist = t_artist
tracks.append(Track(
track_number=track_number,
title=track_title,
artist=track_artist,
))
discs.append(Disc(disc_number=disc_number, tracks=tracks))
if not discs:
raise ValueError("MusicBrainz-Release enthält keine Medien/Tracks.")
return Album(artist=artist, album=title, year=year, discs=discs)

View file

@ -14,6 +14,7 @@ from musiksammlung.config import AudioFormat
from musiksammlung.models import Album as AlbumModel
from musiksammlung.models import Disc as DiscModel
from musiksammlung.models import Track as TrackModel
from musiksammlung.musicbrainz import lookup_by_barcode
logger = logging.getLogger(__name__)
@ -435,6 +436,26 @@ def interactive_rip(config: RipperConfig) -> None:
if not album_name:
album_name = f"Album{album_counter}"
# Optional: EAN/Barcode für MusicBrainz-Lookup
raw_ean = input("EAN/Barcode für MusicBrainz (Enter = überspringen): ")
ean = _clean_input(raw_ean)
mb_album: AlbumModel | None = None
if ean:
try:
print(f" MusicBrainz-Suche nach Barcode {ean} ...", flush=True)
mb_album = lookup_by_barcode(ean)
print(
f"{mb_album.artist} {mb_album.album}"
f" ({mb_album.year or '?'},"
f" {sum(len(d.tracks) for d in mb_album.discs)} Tracks)",
flush=True,
)
# Albumnamen aus MusicBrainz übernehmen, wenn nicht manuell gesetzt
if album_name == f"Album{album_counter}":
album_name = mb_album.album or album_name
except Exception as e:
print(f" MusicBrainz: kein Treffer — {e}", flush=True)
disc_counter = 1
all_discs: list[DiscModel] = []
@ -498,10 +519,20 @@ def interactive_rip(config: RipperConfig) -> None:
disc_counter += 1
if all_discs:
if mb_album:
# MusicBrainz-Daten haben Priorität (inkl. Jahr, kuratierte Titel)
album_model = mb_album
album_root = config.output_dir / _sanitize_name(mb_album.album or album_name)
elif all_discs:
artist = all_discs[0].tracks[0].artist or album_name
album_model = AlbumModel(artist=artist, album=album_name, discs=all_discs)
album_root = config.output_dir / _sanitize_name(album_name)
else:
album_root = config.output_dir / _sanitize_name(album_name)
album_model = None
if album_model is not None:
album_root.mkdir(parents=True, exist_ok=True)
json_path = album_root / "album.json"
json_path.write_text(
album_model.model_dump_json(indent=2), encoding="utf-8"