Add MusicBrainz barcode lookup (scan --barcode and interactive rip)
- New module musicbrainz.py: lookup_by_barcode() via EAN-13/UPC-12, two-step API (barcode search → release detail with recordings), respects 1 req/s rate limit with User-Agent header - cli.py: scan command gets --barcode option as highest-priority mode (no images needed); _scan_to_album() dispatches to MusicBrainz first - ripper.py: interactive_rip() prompts for optional EAN after album name; MusicBrainz data (incl. year) takes priority over CDDB for album.json; album_root.mkdir() added so JSON can be written even when MB changes dir - tests: test_musicbrainz.py (16 tests), test_ripper.py +6 barcode tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
6aba30c0e5
commit
b30aaa617d
5 changed files with 552 additions and 7 deletions
|
|
@ -13,6 +13,7 @@ from musiksammlung.config import AUDIO_EXTENSIONS, AudioFormat
|
|||
from musiksammlung.cover import copy_covers, find_cover
|
||||
from musiksammlung.llm_parser import parse_tracklist
|
||||
from musiksammlung.models import Album
|
||||
from musiksammlung.musicbrainz import lookup_by_barcode
|
||||
from musiksammlung.ocr import ocr_images
|
||||
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
|
||||
from musiksammlung.playlist import generate_playlist
|
||||
|
|
@ -40,9 +41,13 @@ def _scan_to_album(
|
|||
backend: str,
|
||||
model: str,
|
||||
base_url: str,
|
||||
barcode: str | None = None,
|
||||
) -> Album:
|
||||
"""Gemeinsame Scan-Logik: Text-Datei, Vision-LLM oder OCR+LLM."""
|
||||
if from_text:
|
||||
"""Gemeinsame Scan-Logik: Barcode, Text-Datei, Vision-LLM oder OCR+LLM."""
|
||||
if barcode:
|
||||
typer.echo(f"MusicBrainz-Suche nach Barcode {barcode}...")
|
||||
return lookup_by_barcode(barcode)
|
||||
elif from_text:
|
||||
text = from_text.read_text(encoding="utf-8")
|
||||
typer.echo(f"Text-Datei geladen ({len(text)} Zeichen). LLM-Parsing...")
|
||||
return parse_tracklist(
|
||||
|
|
@ -112,6 +117,9 @@ def scan(
|
|||
output: Path = typer.Option(
|
||||
"album.json", "--output", "-o", help="Ausgabe-JSON-Datei"
|
||||
),
|
||||
barcode: str = typer.Option(
|
||||
None, "--barcode", help="EAN-13- oder UPC-12-Barcode für MusicBrainz-Lookup"
|
||||
),
|
||||
from_text: Path = typer.Option(
|
||||
None, "--from-text", "-t",
|
||||
help="Text/Markdown-Datei mit Trackliste (z.B. von Perplexity)",
|
||||
|
|
@ -129,20 +137,23 @@ def scan(
|
|||
"http://localhost:11434", "--url", help="LLM-API-URL"
|
||||
),
|
||||
) -> None:
|
||||
"""Bilder oder Text → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
|
||||
"""Bilder, Text oder Barcode → Album-JSON erzeugen (zur Prüfung vor dem Anwenden).
|
||||
|
||||
Drei Modi:
|
||||
Vier Modi:
|
||||
--barcode EAN/UPC → MusicBrainz-Lookup → JSON
|
||||
--from-text Textdatei (z.B. von Perplexity) → LLM → JSON
|
||||
--vision Bild → Vision-LLM → JSON
|
||||
(Standard) Bild → Tesseract-OCR → Text-LLM → JSON
|
||||
"""
|
||||
if from_text:
|
||||
if barcode:
|
||||
pass # kein Bild nötig
|
||||
elif from_text:
|
||||
if not from_text.exists():
|
||||
typer.echo(f"Fehler: Datei nicht gefunden: {from_text}", err=True)
|
||||
raise typer.Exit(1)
|
||||
elif not images:
|
||||
typer.echo(
|
||||
"Fehler: Bilder oder --from-text angeben.", err=True
|
||||
"Fehler: Bilder, --barcode oder --from-text angeben.", err=True
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
else:
|
||||
|
|
@ -154,6 +165,7 @@ def scan(
|
|||
album = _scan_to_album(
|
||||
images, from_text, vision, vision_model,
|
||||
languages, backend, model, base_url,
|
||||
barcode=barcode,
|
||||
)
|
||||
|
||||
output.write_text(album.model_dump_json(indent=2), encoding="utf-8")
|
||||
|
|
|
|||
110
src/musiksammlung/musicbrainz.py
Normal file
110
src/musiksammlung/musicbrainz.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
"""MusicBrainz-Lookup via EAN/Barcode."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from musiksammlung.models import Album, Disc, Track
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_MB_BASE = "https://musicbrainz.org/ws/2"
|
||||
_USER_AGENT = "musiksammlung/0.1 ( https://kitux.de/forgejo/dschlueter/Musiksammlung )"
|
||||
_RATE_SLEEP = 1.1 # MusicBrainz erlaubt max. 1 Request/Sekunde
|
||||
|
||||
|
||||
def _get(path: str, params: dict) -> dict:
|
||||
"""HTTP-GET gegen die MusicBrainz-API mit korrektem User-Agent."""
|
||||
response = httpx.get(
|
||||
f"{_MB_BASE}{path}",
|
||||
params=params,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
timeout=30.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def lookup_by_barcode(ean: str) -> Album:
|
||||
"""Schlägt ein Album anhand des EAN-Barcodes in MusicBrainz nach.
|
||||
|
||||
Führt zwei API-Requests durch:
|
||||
1. Barcode-Suche → MBID des ersten Treffers
|
||||
2. Release-Details mit Recordings → Trackliste
|
||||
|
||||
Args:
|
||||
ean: EAN-13- oder UPC-12-Barcode
|
||||
|
||||
Returns:
|
||||
Album mit vollständiger Trackliste
|
||||
|
||||
Raises:
|
||||
ValueError: Kein Eintrag für diesen Barcode gefunden
|
||||
httpx.HTTPError: Netzwerk- oder API-Fehler
|
||||
"""
|
||||
# Schritt 1: Barcode-Suche
|
||||
logger.info("MusicBrainz: Suche nach Barcode %s", ean)
|
||||
data = _get("/release/", {"query": f"barcode:{ean}", "fmt": "json"})
|
||||
|
||||
releases = data.get("releases", [])
|
||||
if not releases:
|
||||
raise ValueError(f"Kein MusicBrainz-Eintrag für Barcode {ean!r} gefunden.")
|
||||
|
||||
mbid = releases[0]["id"]
|
||||
logger.info("MusicBrainz: Treffer MBID=%s, lade Details...", mbid)
|
||||
time.sleep(_RATE_SLEEP)
|
||||
|
||||
# Schritt 2: Trackliste laden
|
||||
detail = _get(f"/release/{mbid}", {"inc": "recordings", "fmt": "json"})
|
||||
return _parse_release(detail)
|
||||
|
||||
|
||||
def _parse_release(data: dict) -> Album:
|
||||
"""Wandelt eine MusicBrainz-Release-Antwort in ein Album-Modell um."""
|
||||
# Künstler
|
||||
artist_credit = data.get("artist-credit", [])
|
||||
artist = artist_credit[0]["artist"]["name"] if artist_credit else ""
|
||||
|
||||
# Albumtitel
|
||||
title = data.get("title", "")
|
||||
|
||||
# Jahr aus "date" extrahieren ("YYYY", "YYYY-MM" oder "YYYY-MM-DD")
|
||||
year: int | None = None
|
||||
date_str = data.get("date", "")
|
||||
if date_str and len(date_str) >= 4:
|
||||
try:
|
||||
year = int(date_str[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Medien → Discs
|
||||
discs: list[Disc] = []
|
||||
for medium in data.get("media", []):
|
||||
disc_number = medium.get("position", len(discs) + 1)
|
||||
tracks: list[Track] = []
|
||||
for t in medium.get("tracks", []):
|
||||
track_number = t.get("position", len(tracks) + 1)
|
||||
track_title = t.get("title", "")
|
||||
|
||||
# Track-Künstler nur setzen, wenn er vom Album-Künstler abweicht
|
||||
t_credits = t.get("artist-credit", [])
|
||||
track_artist: str | None = None
|
||||
if t_credits:
|
||||
t_artist = t_credits[0]["artist"]["name"]
|
||||
if t_artist != artist:
|
||||
track_artist = t_artist
|
||||
|
||||
tracks.append(Track(
|
||||
track_number=track_number,
|
||||
title=track_title,
|
||||
artist=track_artist,
|
||||
))
|
||||
discs.append(Disc(disc_number=disc_number, tracks=tracks))
|
||||
|
||||
if not discs:
|
||||
raise ValueError("MusicBrainz-Release enthält keine Medien/Tracks.")
|
||||
|
||||
return Album(artist=artist, album=title, year=year, discs=discs)
|
||||
|
|
@ -14,6 +14,7 @@ from musiksammlung.config import AudioFormat
|
|||
from musiksammlung.models import Album as AlbumModel
|
||||
from musiksammlung.models import Disc as DiscModel
|
||||
from musiksammlung.models import Track as TrackModel
|
||||
from musiksammlung.musicbrainz import lookup_by_barcode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -435,6 +436,26 @@ def interactive_rip(config: RipperConfig) -> None:
|
|||
if not album_name:
|
||||
album_name = f"Album{album_counter}"
|
||||
|
||||
# Optional: EAN/Barcode für MusicBrainz-Lookup
|
||||
raw_ean = input("EAN/Barcode für MusicBrainz (Enter = überspringen): ")
|
||||
ean = _clean_input(raw_ean)
|
||||
mb_album: AlbumModel | None = None
|
||||
if ean:
|
||||
try:
|
||||
print(f" MusicBrainz-Suche nach Barcode {ean} ...", flush=True)
|
||||
mb_album = lookup_by_barcode(ean)
|
||||
print(
|
||||
f" ✓ {mb_album.artist} – {mb_album.album}"
|
||||
f" ({mb_album.year or '?'},"
|
||||
f" {sum(len(d.tracks) for d in mb_album.discs)} Tracks)",
|
||||
flush=True,
|
||||
)
|
||||
# Albumnamen aus MusicBrainz übernehmen, wenn nicht manuell gesetzt
|
||||
if album_name == f"Album{album_counter}":
|
||||
album_name = mb_album.album or album_name
|
||||
except Exception as e:
|
||||
print(f" MusicBrainz: kein Treffer — {e}", flush=True)
|
||||
|
||||
disc_counter = 1
|
||||
all_discs: list[DiscModel] = []
|
||||
|
||||
|
|
@ -498,10 +519,20 @@ def interactive_rip(config: RipperConfig) -> None:
|
|||
|
||||
disc_counter += 1
|
||||
|
||||
if all_discs:
|
||||
if mb_album:
|
||||
# MusicBrainz-Daten haben Priorität (inkl. Jahr, kuratierte Titel)
|
||||
album_model = mb_album
|
||||
album_root = config.output_dir / _sanitize_name(mb_album.album or album_name)
|
||||
elif all_discs:
|
||||
artist = all_discs[0].tracks[0].artist or album_name
|
||||
album_model = AlbumModel(artist=artist, album=album_name, discs=all_discs)
|
||||
album_root = config.output_dir / _sanitize_name(album_name)
|
||||
else:
|
||||
album_root = config.output_dir / _sanitize_name(album_name)
|
||||
album_model = None
|
||||
|
||||
if album_model is not None:
|
||||
album_root.mkdir(parents=True, exist_ok=True)
|
||||
json_path = album_root / "album.json"
|
||||
json_path.write_text(
|
||||
album_model.model_dump_json(indent=2), encoding="utf-8"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue