Compare commits

..

No commits in common. "071f4c5e1d89e6b601014b36d523eceb38f616d3" and "7516de439f30fb38d33a67a7158be6f4c11f9afb" have entirely different histories.

5 changed files with 7 additions and 414 deletions

View file

@ -48,6 +48,7 @@ def _image_ok(path: Path) -> bool:
def find_local_cover(image_files: List[Path]) -> Optional[Path]: def find_local_cover(image_files: List[Path]) -> Optional[Path]:
priority = ("folder", "front", "cover", "album") priority = ("folder", "front", "cover", "album")
# Sort by priority keyword, then size descending
def key(p: Path): def key(p: Path):
name = p.name.lower() name = p.name.lower()
score = next((i for i, kw in enumerate(priority) if kw in name), len(priority)) score = next((i for i, kw in enumerate(priority) if kw in name), len(priority))
@ -60,41 +61,6 @@ def find_local_cover(image_files: List[Path]) -> Optional[Path]:
return None return None
def normalize_cover_to_folder_jpg(cover_path: Path) -> Path:
"""
Stellt sicher dass das Cover als folder.jpg (JPEG) im Album-Verzeichnis liegt.
- Ist es bereits folder.jpg unverändert zurückgeben.
- Ist es eine andere JPEG umbenennen.
- Ist es WebP oder PNG zu JPEG konvertieren, Original löschen.
Gibt den Pfad zur folder.jpg zurück.
"""
dest = cover_path.parent / "folder.jpg"
if cover_path.resolve() == dest.resolve():
return dest
suffix = cover_path.suffix.lower()
try:
if suffix in (".jpg", ".jpeg"):
cover_path.rename(dest)
elif HAS_PIL:
import io
with cover_path.open("rb") as f:
raw = f.read()
with Image.open(io.BytesIO(raw)) as img:
buf = io.BytesIO()
img.convert("RGB").save(buf, format="JPEG", quality=92)
dest.write_bytes(buf.getvalue())
cover_path.unlink()
else:
# PIL nicht verfügbar: einfach umbenennen, auch wenn es kein JPEG ist
cover_path.rename(dest)
print(f" 🖼️ Cover normalisiert → folder.jpg ({cover_path.name})")
except Exception as e:
print(f" ⚠️ Cover-Normalisierung fehlgeschlagen: {e}", file=sys.stderr)
return cover_path
return dest
def _mb_cover_url(release_mbid: str) -> Optional[str]: def _mb_cover_url(release_mbid: str) -> Optional[str]:
url = f"https://coverartarchive.org/release/{release_mbid}/front" url = f"https://coverartarchive.org/release/{release_mbid}/front"
if not HAS_REQUESTS: if not HAS_REQUESTS:
@ -221,72 +187,14 @@ def embed_cover(audio_path: Path, cover_path: Path) -> bool:
return False return False
def _discogs_cover_url(artist: Optional[str], album: Optional[str]) -> Optional[str]:
"""Sucht auf Discogs nach artist+album und gibt die primäre Image-URL zurück."""
if not HAS_REQUESTS or not artist or not album:
return None
import os
token = os.getenv("DISCOGS_TOKEN", "")
headers = {"User-Agent": "MusicMetadataEnricher/1.0"}
if token:
headers["Authorization"] = f"Discogs token={token}"
try:
r = requests.get(
"https://api.discogs.com/database/search",
params={"artist": artist, "release_title": album, "type": "release", "per_page": 3},
headers=headers,
timeout=10,
)
if r.status_code != 200:
return None
results = r.json().get("results", [])
for result in results:
cover = result.get("cover_image") or result.get("thumb")
if cover and "spacer" not in cover:
return cover
except Exception as e:
print(f" ⚠️ Discogs-Suchfehler: {e}", file=sys.stderr)
return None
def download_discogs_cover(artist: Optional[str], album: Optional[str], dest_dir: Path) -> Optional[Path]:
url = _discogs_cover_url(artist, album)
if not url:
return None
dest = dest_dir / "folder.jpg"
try:
r = requests.get(url, timeout=15, headers={"User-Agent": "MusicMetadataEnricher/1.0"})
if r.status_code != 200:
return None
ct = r.headers.get("content-type", "")
if ("png" in ct or url.lower().endswith(".png")) and HAS_PIL:
import io
with Image.open(io.BytesIO(r.content)) as img:
buf = io.BytesIO()
img.convert("RGB").save(buf, format="JPEG", quality=92)
dest.write_bytes(buf.getvalue())
else:
dest.write_bytes(r.content)
if _image_ok(dest):
return dest
dest.unlink(missing_ok=True)
except Exception as e:
print(f" ⚠️ Discogs-Cover-Fehler: {e}", file=sys.stderr)
dest.unlink(missing_ok=True)
return None
def resolve_cover( def resolve_cover(
image_files: List[Path], image_files: List[Path],
release_mbid: Optional[str], release_mbid: Optional[str],
album_dir: Path, album_dir: Path,
artist: Optional[str] = None,
album: Optional[str] = None,
) -> tuple[Optional[Path], Optional[str]]: ) -> tuple[Optional[Path], Optional[str]]:
"""Returns (cover_path, source_label).""" """Returns (cover_path, source_label)."""
local = find_local_cover(image_files) local = find_local_cover(image_files)
if local: if local:
local = normalize_cover_to_folder_jpg(local)
return local, "local" return local, "local"
if release_mbid: if release_mbid:
@ -294,9 +202,4 @@ def resolve_cover(
if downloaded: if downloaded:
return downloaded, "musicbrainz" return downloaded, "musicbrainz"
if artist or album:
downloaded = download_discogs_cover(artist, album, album_dir)
if downloaded:
return downloaded, "discogs"
return None, None return None, None

View file

@ -42,52 +42,21 @@ def _safe_name(s: str) -> str:
return re.sub(r"\s+", "_", s).strip("._-") return re.sub(r"\s+", "_", s).strip("._-")
_CLASSICAL_GENRE_KEYWORDS = {
"classical", "klassik", "baroque", "barock", "romantic", "romantik",
"opera", "oper", "operetta", "operette", "chamber", "kammermusik",
"symphon", "concerto", "oratorio", "sacred", "kirchenmusik",
"renaissance", "medieval", "contemporary classical",
}
_CLASSICAL_COMPOSER_KEYWORDS = {
# Bekannte Komponisten als Signal (Nachname reicht)
"bach", "beethoven", "mozart", "handel", "haydn", "schubert", "brahms",
"chopin", "liszt", "schumann", "wagner", "verdi", "puccini", "vivaldi",
"telemann", "buxtehude", "monteverdi", "palestrina", "purcell",
"mahler", "bruckner", "dvorak", "tchaikovsky", "tschaikowski",
"debussy", "ravel", "satie", "strauss", "sibelius", "grieg",
}
def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool: def _is_classical(albumartist: str, track_artist: str, genre: str) -> bool:
""" """
Klassik-Schema (Performer_-_Komponist_-_Werk) wird angewendet wenn: Classical schema applies when performer (albumartist) composer (track_artist),
1. Genre explizit klassisch ist, ODER which covers both 'real' classical music and jazz-on-classical-themes albums.
2. track_artist ist ein bekannter Komponist (und albumartist), ODER Genre keyword matching is used as additional signal but not required.
3. albumartist track_artist UND beide sind bekannte Komponistennamen.
Reine PerformerKomponist-Heuristik ohne Genre-Bestätigung ist abgeschaltet
(zu viele Falschpositive bei Samplern, Jazz, Volksmusik).
""" """
aa = (albumartist or "").casefold().strip() aa = (albumartist or "").casefold().strip()
ta = (track_artist or "").casefold().strip() ta = (track_artist or "").casefold().strip()
g = (genre or "").casefold().strip()
if not aa or aa in ("various artists", "unknown artist", "unknown"): if not aa or aa in ("various artists", "unknown artist", "unknown"):
return False return False
if not ta or ta in ("unknown artist", "unknown"): if not ta or ta in ("unknown artist", "unknown"):
return False return False # placeholder, not a real composer
if aa == ta: if aa == ta:
return False return False
return True # performer ≠ composer → classical naming
# Primäres Signal: Genre-Keyword
if any(kw in g for kw in _CLASSICAL_GENRE_KEYWORDS):
return True
# Sekundäres Signal: track_artist enthält bekannten Komponistennamen
if any(kw in ta for kw in _CLASSICAL_COMPOSER_KEYWORDS):
return True
return False
def _proposed_filename( def _proposed_filename(

View file

@ -33,63 +33,6 @@ try:
except ImportError: except ImportError:
HAS_ANTHROPIC = False HAS_ANTHROPIC = False
# ---------------------------------------------------------------------------
# Genre normalization
# ---------------------------------------------------------------------------
_GENRE_MAP: Dict[str, str] = {
# Deutsch → Englisch (Jellyfin-Standardbegriffe)
"volksmusik": "Folk",
"volkslieder": "Folk",
"volkslied": "Folk",
"heimatlieder": "Folk",
"schlager": "Schlager",
"deutsche schlager": "Schlager",
"marsch": "March",
"marschmusik": "March",
"militaermusik": "March",
"militärmusik": "March",
"kirchenmusik": "Sacred",
"chormusik": "Choral",
"kinderlieder": "Children",
"weihnachtslieder": "Christmas",
"weihnachtsmusik": "Christmas",
"blasmusik": "Brass Band",
"operette": "Operetta",
"oper": "Opera",
"kammermusik": "Chamber Music",
"klassik": "Classical",
"classic": "Classical",
"klassische musik": "Classical",
"barock": "Baroque",
"romantik": "Romantic",
# Englische Varianten vereinheitlichen
"rhythm and blues": "R&B",
"rhythmic soul": "R&B",
"rock and roll": "Rock 'n' Roll",
"rock & roll": "Rock 'n' Roll",
"easy listening": "Easy Listening",
"vocal pop": "Pop",
"adult contemporary": "Pop",
"big band": "Big Band",
"swing music": "Swing",
"latin jazz": "Latin Jazz",
"bossa nova": "Bossa Nova",
"nueva cancion": "Nueva Canción",
}
def normalize_genre(genre: Optional[str]) -> Optional[str]:
if not genre:
return genre
key = genre.strip().lower()
normalized = _GENRE_MAP.get(key)
if normalized:
return normalized
# Titlcase wenn nicht in der Map (verhindert ALL CAPS oder all lowercase)
return genre.strip().title() if genre == genre.upper() or genre == genre.lower() else genre.strip()
_MB_RATE_LIMIT = 1.1 # seconds between MusicBrainz requests _MB_RATE_LIMIT = 1.1 # seconds between MusicBrainz requests
_last_mb_call = 0.0 _last_mb_call = 0.0
ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "") ACOUSTID_API_KEY = os.getenv("ACOUSTID_API_KEY", "")
@ -562,7 +505,7 @@ def resolve(
album=album, album=album,
albumartist=albumartist, albumartist=albumartist,
date=year, date=year,
genre=normalize_genre(genre), genre=genre,
label=label, label=label,
mbid=release_mbid, mbid=release_mbid,
cover_path=None, cover_path=None,

View file

@ -148,8 +148,6 @@ def process_album(
hints.cover_images, hints.cover_images,
proposal.mbid, proposal.mbid,
album_dir, album_dir,
artist=proposal.albumartist,
album=proposal.album,
) )
if cover_path and not args.no_cover: if cover_path and not args.no_cover:
proposal.cover_path = cover_path proposal.cover_path = cover_path
@ -230,71 +228,6 @@ def process_album(
return stats return stats
def _print_status(args: argparse.Namespace) -> None:
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
from mutagen import File as MutagenFile
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if root.is_dir():
album_dirs.extend(collect_album_dirs(root))
no_cover, bad_tags, ok = [], [], []
for album_dir in sorted(album_dirs):
has_cover = any(
f.suffix.lower() in IMAGE_EXTS
for f in album_dir.rglob("*") if f.is_file()
)
audio_files = [
f for f in sorted(album_dir.rglob("*"))
if f.is_file() and f.suffix.lower() in AUDIO_EXTS
]
missing_tags = []
for af in audio_files[:3]: # nur erste 3 prüfen (schnell)
try:
tags = MutagenFile(str(af), easy=True)
if tags is None:
missing_tags.append(af.name)
continue
title = (tags.get("title") or [""])[0].strip()
artist = (tags.get("artist") or [""])[0].strip()
if not title or title.lower() in ("unknown", "audiotrack", "") \
or not artist or artist.lower() in ("unknown", ""):
missing_tags.append(af.name)
except Exception:
missing_tags.append(af.name)
problems = []
if not has_cover:
problems.append("kein Cover")
if missing_tags:
problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)")
if problems:
bad_tags.append((album_dir, problems))
else:
ok.append(album_dir)
print(f"\n{'=' * 60}")
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
print(f"{'=' * 60}")
print(f" ✅ In Ordnung: {len(ok)}")
print(f" ⚠️ Mit Problemen: {len(bad_tags)}")
print()
for album_dir, problems in bad_tags:
print(f" 💿 {album_dir.name}")
for p in problems:
print(f"{p}")
print("=" * 60)
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin", description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
@ -329,17 +262,9 @@ def main() -> None:
parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator", parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
help="Pfad zu jellyfin_playlist_generator.py\n" help="Pfad zu jellyfin_playlist_generator.py\n"
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)") "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
parser.add_argument("--status", action="store_true",
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
args = parser.parse_args() args = parser.parse_args()
if args.status:
if not args.paths and not args.album:
parser.error("--status benötigt mindestens einen Pfad.")
_print_status(args)
return
if not args.album and not args.paths: if not args.album and not args.paths:
parser.error("Mindestens ein Pfad oder --album erforderlich.") parser.error("Mindestens ein Pfad oder --album erforderlich.")

View file

@ -188,140 +188,6 @@ def test_extract_hints_multi_disc() -> str:
return f"disc numbers detected: {disc_nums}" return f"disc numbers detected: {disc_nums}"
# ---------------------------------------------------------------------------
# Vertical tracklist parser Tests
# ---------------------------------------------------------------------------
def test_vertical_tracklist_basic() -> str:
from hint_extractor import _normalize_vertical_tracklist
text = "1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka pro trubku\n4:01"
result = _normalize_vertical_tracklist(text)
assert result is not None, "should recognize vertical format"
assert "1. Katka" in result, f"got: {result!r}"
assert "2. Záludná" in result, f"got: {result!r}"
return f"normalized: {result[:60]!r}"
def test_vertical_tracklist_without_duration() -> str:
from hint_extractor import _normalize_vertical_tracklist
text = "1\nFirst Song\n2\nSecond Song\n3\nThird Song"
result = _normalize_vertical_tracklist(text)
assert result is not None, "should work without durations"
assert "1. First Song" in result, f"got: {result!r}"
return f"no-duration OK: {result[:60]!r}"
def test_vertical_tracklist_not_triggered_for_normal() -> str:
from hint_extractor import _normalize_vertical_tracklist
text = "1. Dancing Queen\n2. Waterloo\n3. Fernando"
result = _normalize_vertical_tracklist(text)
assert result is None, f"should return None for normal format, got: {result!r}"
return "correctly returns None for standard format"
# ---------------------------------------------------------------------------
# Single-CD disc handling Tests
# ---------------------------------------------------------------------------
def test_single_cd_tracklist_match() -> str:
"""Track-Nummer-Match darf nicht disc_num erfordern (Single-CD hat disc=None)."""
from hint_extractor import _parse_tracklist
from models import TrackHints, AlbumHints, AlbumScan
from pathlib import Path
import tempfile
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir) / "Tufaranka_-_Katka_dovadi"
root.mkdir()
(root / "01_-_Tufaranka_-_AudioTrack_01.mp3").write_bytes(b"\x00" * 100)
(root / "tracklist.txt").write_text("1\nKatka dovádí\n3:22\n2\nZáludná\n2:15\n3\nPolka\n4:01")
from scanner import scan_album
from hint_extractor import extract_hints
scan = scan_album(root)
hints = extract_hints(scan, use_ocr=False)
track = hints.tracks[0]
assert track.title == "Katka dovádí", f"expected tracklist title, got: {track.title!r}"
return f"single-CD match OK: title={track.title!r}"
# ---------------------------------------------------------------------------
# Genre normalization Tests
# ---------------------------------------------------------------------------
def test_genre_normalize_german() -> str:
from metadata_resolver import normalize_genre
assert normalize_genre("volksmusik") == "Folk", "volksmusik → Folk"
assert normalize_genre("klassik") == "Classical", "klassik → Classical"
assert normalize_genre("marschmusik") == "March", "marschmusik → March"
return "German genres normalized correctly"
def test_genre_normalize_english_variants() -> str:
from metadata_resolver import normalize_genre
assert normalize_genre("rhythm and blues") == "R&B"
assert normalize_genre("rock and roll") == "Rock 'n' Roll"
return "English variants normalized correctly"
def test_genre_normalize_titlecase() -> str:
from metadata_resolver import normalize_genre
assert normalize_genre("JAZZ") == "Jazz", f"got: {normalize_genre('JAZZ')!r}"
assert normalize_genre("folk") == "Folk", f"got: {normalize_genre('folk')!r}"
assert normalize_genre("Big Band") == "Big Band" # unchanged
return "Titlecase normalization OK"
# ---------------------------------------------------------------------------
# _is_classical() Tests
# ---------------------------------------------------------------------------
def test_is_classical_by_genre() -> str:
from executor import _is_classical
assert _is_classical("Gardiner", "Bach", "Classical"), "Classical genre should trigger"
assert _is_classical("Herreweghe", "Handel", "Baroque"), "Baroque should trigger"
return "genre-based detection OK"
def test_is_classical_by_composer() -> str:
from executor import _is_classical
assert _is_classical("Gardiner", "Bach", ""), "Bach as track_artist should trigger"
assert _is_classical("Hurford", "beethoven", ""), "beethoven should trigger"
return "composer-name detection OK"
def test_is_classical_false_for_pop() -> str:
from executor import _is_classical
assert not _is_classical("Trini Lopez", "Trini Lopez", "Pop"), "same artist = not classical"
assert not _is_classical("ABBA", "ABBA", "Pop"), "ABBA is not classical"
assert not _is_classical("Trini Lopez", "", "R&B"), "empty track_artist = not classical"
return "pop albums correctly not classical"
def test_is_classical_false_for_folk() -> str:
from executor import _is_classical
assert not _is_classical("Tufaranka", "Tufaranka", "Folk"), "Folk is not classical"
return "Folk correctly not classical"
# ---------------------------------------------------------------------------
# cover normalize Tests
# ---------------------------------------------------------------------------
def test_normalize_cover_renames_front_jpg() -> str:
from cover_handler import normalize_cover_to_folder_jpg
import tempfile, shutil
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
front = root / "Front.jpg"
front.write_bytes(b"\xff\xd8" + b"\x00" * 200)
result = normalize_cover_to_folder_jpg(front)
assert result.name == "folder.jpg", f"expected folder.jpg, got {result.name!r}"
assert (root / "folder.jpg").exists(), "folder.jpg should exist"
assert not front.exists(), "Front.jpg should be gone"
return "Front.jpg → folder.jpg rename OK"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# executor Tests # executor Tests
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -386,19 +252,6 @@ def main() -> None:
("UNIT_15_proposed_filename_single_disc", test_proposed_filename_single_disc), ("UNIT_15_proposed_filename_single_disc", test_proposed_filename_single_disc),
("UNIT_16_proposed_filename_multi_disc", test_proposed_filename_multi_disc), ("UNIT_16_proposed_filename_multi_disc", test_proposed_filename_multi_disc),
("UNIT_17_proposed_filename_sanitizes_chars", test_proposed_filename_sanitizes_chars), ("UNIT_17_proposed_filename_sanitizes_chars", test_proposed_filename_sanitizes_chars),
# Neue Tests
("UNIT_18_vertical_tracklist_basic", test_vertical_tracklist_basic),
("UNIT_19_vertical_tracklist_no_duration", test_vertical_tracklist_without_duration),
("UNIT_20_vertical_tracklist_no_false_pos", test_vertical_tracklist_not_triggered_for_normal),
("UNIT_21_single_cd_tracklist_match", test_single_cd_tracklist_match),
("UNIT_22_genre_normalize_german", test_genre_normalize_german),
("UNIT_23_genre_normalize_english", test_genre_normalize_english_variants),
("UNIT_24_genre_normalize_titlecase", test_genre_normalize_titlecase),
("UNIT_25_is_classical_by_genre", test_is_classical_by_genre),
("UNIT_26_is_classical_by_composer", test_is_classical_by_composer),
("UNIT_27_is_classical_false_pop", test_is_classical_false_for_pop),
("UNIT_28_is_classical_false_folk", test_is_classical_false_for_folk),
("UNIT_29_normalize_cover_renames", test_normalize_cover_renames_front_jpg),
] ]
for test_id, fn in cases: for test_id, fn in cases: