- --except filters albums by directory name (glob or substring, repeatable) - README.md: new options table entries, new cover sources, updated pipeline, corrected test count (33), added batch example - BEDIENUNGSANLEITUNG.md: new options table, sections E (batch+except), F (--status), LASTFM_API_KEY env var, corrected test count Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
456 lines
18 KiB
Python
Executable file
456 lines
18 KiB
Python
Executable file
#!/usr/bin/env python3
|
||
"""
|
||
music_enricher.py
|
||
KI-gestützter Musik-Metadaten-Enricher für Jellyfin-Bibliotheken.
|
||
|
||
Pipeline pro Album:
|
||
Scan → HintExtractor → MetadataResolver → CoverHandler → Review → Executor
|
||
→ (optional) Jellyfin Playlist Generator
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import fnmatch
|
||
import importlib.util
|
||
import os
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
try:
|
||
from tqdm import tqdm
|
||
HAS_TQDM = True
|
||
except ImportError:
|
||
HAS_TQDM = False
|
||
|
||
from models import AlbumProposal
|
||
from scanner import scan_album, collect_album_dirs
|
||
from hint_extractor import extract_hints
|
||
from metadata_resolver import resolve
|
||
from cover_handler import resolve_cover, download_back_cover
|
||
from executor import execute_album, write_report
|
||
|
||
|
||
def maybe_tqdm(iterable, show: bool, **kwargs):
|
||
return tqdm(iterable, **kwargs) if show else iterable
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Jellyfin Playlist Generator integration
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _find_jellyfin_generator(album_dir: Path, explicit: Optional[Path]) -> Optional[Path]:
|
||
"""Sucht jellyfin_playlist_generator.py — explizit oder im Geschwister-Verzeichnis."""
|
||
if explicit:
|
||
return explicit.expanduser().resolve() if explicit.exists() else None
|
||
# Auto-Discover: ../Jellyfin_Playlist_Generator/ relativ zum Album-Root
|
||
candidate = album_dir.parent / "Jellyfin_Playlist_Generator" / "jellyfin_playlist_generator.py"
|
||
return candidate if candidate.exists() else None
|
||
|
||
|
||
def _run_jellyfin_generator(album_dir: Path, generator_path: Path) -> None:
|
||
"""
|
||
Importiert den Jellyfin Playlist Generator und erstellt die Playlist für album_dir.
|
||
Kein subprocess, kein cleanup_all_playlists — nur gezielt dieses eine Album.
|
||
"""
|
||
try:
|
||
spec = importlib.util.spec_from_file_location("jellyfin_pg", generator_path)
|
||
mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
|
||
sys.modules["jellyfin_pg"] = mod # muss vor exec_module stehen (für @dataclass)
|
||
spec.loader.exec_module(mod) # type: ignore[union-attr]
|
||
|
||
media_files = mod.collect_media_recursive(album_dir)
|
||
if not media_files:
|
||
print(f" ⚠️ Jellyfin-Generator: keine Mediendateien in {album_dir.name}", file=sys.stderr)
|
||
return
|
||
|
||
deduped = sorted(set(media_files), key=mod.natural_sort_key)
|
||
tracks = mod.enrich_tracks(
|
||
[mod.TrackInfo(p, p.stem, p.suffix.lower()) for p in deduped],
|
||
album_dir,
|
||
)
|
||
tracks = mod.sort_tracks_for_playlist(tracks, album_dir)
|
||
pl_path = mod.generate_playlist(album_dir, tracks, None, dry_run=False)
|
||
print(f" 🎵 Jellyfin-Playlist erstellt: {pl_path.name}")
|
||
except Exception as e:
|
||
print(f" ⚠️ Jellyfin-Generator-Fehler ({album_dir.name}): {e}", file=sys.stderr)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Review / Display
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _print_proposal(proposal: AlbumProposal) -> None:
|
||
conf_bar = "█" * int(proposal.confidence * 10) + "░" * (10 - int(proposal.confidence * 10))
|
||
print(f"\n{'─' * 60}")
|
||
print(f"💿 {proposal.album_dir.name}")
|
||
print(f" Album: {proposal.album}")
|
||
print(f" Artist: {proposal.albumartist}")
|
||
print(f" Jahr: {proposal.date or '–'}")
|
||
print(f" Genre: {proposal.genre or '–'}")
|
||
print(f" Label: {proposal.label or '–'}")
|
||
print(f" Cover: {proposal.cover_source or '–'} ({proposal.cover_path.name if proposal.cover_path else 'keins'})")
|
||
print(f" Konfidenz: [{conf_bar}] {proposal.confidence:.0%} Quellen: {', '.join(proposal.sources) or '–'}")
|
||
if proposal.notes:
|
||
for n in proposal.notes:
|
||
print(f" ℹ️ {n}")
|
||
print(f" Tracks ({len(proposal.tracks)}):")
|
||
for tp in proposal.tracks[:8]:
|
||
tn = f"{tp.disc_number}-{tp.track_number:02d}" if tp.disc_number and tp.disc_number > 1 else (
|
||
f"{tp.track_number:02d}" if tp.track_number else "??")
|
||
display_artist = tp.artist or proposal.albumartist or "Unknown"
|
||
print(f" {tn} {display_artist} – {tp.title}")
|
||
if len(proposal.tracks) > 8:
|
||
print(f" … und {len(proposal.tracks) - 8} weitere")
|
||
|
||
|
||
def _interactive_review(proposal: AlbumProposal) -> bool:
|
||
"""Returns True if user accepts the proposal."""
|
||
_print_proposal(proposal)
|
||
while True:
|
||
answer = input("\n [Enter] Akzeptieren [s] Überspringen [q] Abbrechen: ").strip().lower()
|
||
if answer in ("", "j", "y"):
|
||
return True
|
||
if answer == "s":
|
||
return False
|
||
if answer == "q":
|
||
sys.exit(0)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main pipeline
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def process_album(
|
||
album_dir: Path,
|
||
args: argparse.Namespace,
|
||
report_data: List[Dict[str, Any]],
|
||
) -> Dict[str, int]:
|
||
stats = {"tags_written": 0, "covers_embedded": 0, "files_renamed": 0,
|
||
"errors": 0, "skipped": 0}
|
||
|
||
try:
|
||
scan = scan_album(album_dir)
|
||
if not scan.audio_files:
|
||
stats["skipped"] += 1
|
||
return stats
|
||
|
||
hints = extract_hints(scan, use_ocr=not args.no_api)
|
||
|
||
proposal = resolve(
|
||
hints,
|
||
use_fingerprint=not args.no_fingerprint,
|
||
use_api=not args.no_api,
|
||
use_claude=not args.no_api,
|
||
)
|
||
|
||
# Cover art
|
||
cover_path, cover_source = resolve_cover(
|
||
hints.cover_images,
|
||
proposal.mbid,
|
||
album_dir,
|
||
artist=proposal.albumartist,
|
||
album=proposal.album,
|
||
)
|
||
if cover_path and not args.no_cover:
|
||
proposal.cover_path = cover_path
|
||
proposal.cover_source = cover_source
|
||
|
||
# Set proposed filenames if --rename
|
||
if args.rename:
|
||
from executor import _proposed_filename
|
||
for tp in proposal.tracks:
|
||
tp.new_filename = _proposed_filename(
|
||
tp, tp.path.suffix,
|
||
albumartist=proposal.albumartist or "",
|
||
genre=proposal.genre or "",
|
||
)
|
||
|
||
# Review step
|
||
if args.dry_run:
|
||
_print_proposal(proposal)
|
||
for tp in proposal.tracks:
|
||
report_data.append({
|
||
"status": "dry-run",
|
||
"album_dir": str(album_dir.name),
|
||
"track_path": str(tp.path),
|
||
"old_title": tp.path.stem,
|
||
"new_title": tp.title,
|
||
"old_artist": "",
|
||
"new_artist": tp.artist,
|
||
"album": proposal.album,
|
||
"albumartist": proposal.albumartist,
|
||
"date": proposal.date or "",
|
||
"genre": proposal.genre or "",
|
||
"label": proposal.label or "",
|
||
"track_number": tp.track_number or "",
|
||
"disc_number": tp.disc_number or "",
|
||
"cover_embedded": False,
|
||
"renamed_to": tp.new_filename or "",
|
||
"confidence": f"{proposal.confidence:.2f}",
|
||
"sources": ", ".join(proposal.sources),
|
||
})
|
||
return stats
|
||
|
||
accepted = True
|
||
if not args.auto:
|
||
accepted = _interactive_review(proposal)
|
||
elif args.auto and proposal.confidence < args.confidence:
|
||
print(f" ⏭️ Konfidenz {proposal.confidence:.0%} < {args.confidence:.0%} → übersprungen: {album_dir.name}")
|
||
stats["skipped"] += 1
|
||
return stats
|
||
else:
|
||
_print_proposal(proposal)
|
||
|
||
if not accepted:
|
||
stats["skipped"] += 1
|
||
return stats
|
||
|
||
album_stats = execute_album(
|
||
proposal=proposal,
|
||
backup_dir=args.backup,
|
||
do_rename=args.rename,
|
||
embed_cover_art=args.embed_cover,
|
||
dry_run=False,
|
||
report_data=report_data,
|
||
)
|
||
for k, v in album_stats.items():
|
||
stats[k] = stats.get(k, 0) + v
|
||
|
||
# Back-Cover von MusicBrainz holen (wenn MBID bekannt und noch kein back.jpg)
|
||
if proposal.mbid and not args.no_cover and not args.dry_run:
|
||
back = download_back_cover(proposal.mbid, album_dir)
|
||
if back:
|
||
print(f" 🖼️ Back-Cover heruntergeladen: {back.name}")
|
||
|
||
# Jellyfin Playlist Generator aufrufen
|
||
generator_path = _find_jellyfin_generator(album_dir, getattr(args, "playlist_generator", None))
|
||
if generator_path:
|
||
_run_jellyfin_generator(album_dir, generator_path)
|
||
|
||
except Exception as e:
|
||
stats["errors"] += 1
|
||
print(f" ❌ Fehler in {album_dir.name}: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
|
||
return stats
|
||
|
||
|
||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
|
||
_AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
|
||
_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""}
|
||
|
||
|
||
def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]:
|
||
"""
|
||
Prüft ob ein Album vollständig enriched ist.
|
||
Gibt (is_complete, problems) zurück.
|
||
Kriterien:
|
||
- folder.jpg oder äquivalentes Cover vorhanden
|
||
- Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags
|
||
"""
|
||
from mutagen import File as MutagenFile
|
||
|
||
problems: List[str] = []
|
||
|
||
has_cover = any(
|
||
f.suffix.lower() in _IMAGE_EXTS
|
||
for f in album_dir.rglob("*") if f.is_file()
|
||
)
|
||
if not has_cover:
|
||
problems.append("kein Cover")
|
||
|
||
audio_files = sorted(
|
||
f for f in album_dir.rglob("*")
|
||
if f.is_file() and f.suffix.lower() in _AUDIO_EXTS
|
||
)
|
||
# Stichprobe: erste, letzte und mittlere Datei abdecken
|
||
if audio_files:
|
||
indices = sorted(set([
|
||
0, len(audio_files) - 1,
|
||
*range(1, min(sample - 2, len(audio_files) - 1)),
|
||
]))
|
||
sampled = [audio_files[i] for i in indices if i < len(audio_files)]
|
||
bad = []
|
||
for af in sampled:
|
||
try:
|
||
tags = MutagenFile(str(af), easy=True)
|
||
if tags is None:
|
||
bad.append(af.name)
|
||
continue
|
||
title = (tags.get("title") or [""])[0].strip().lower()
|
||
artist = (tags.get("artist") or [""])[0].strip().lower()
|
||
if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES:
|
||
bad.append(af.name)
|
||
elif title.startswith("audiotrack") or title.startswith("track "):
|
||
bad.append(af.name)
|
||
except Exception:
|
||
bad.append(af.name)
|
||
if bad:
|
||
problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)")
|
||
|
||
return len(problems) == 0, problems
|
||
|
||
|
||
def _print_status(args: argparse.Namespace) -> None:
|
||
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
|
||
album_dirs: List[Path] = []
|
||
if args.album:
|
||
album_dirs.append(args.album.expanduser().resolve())
|
||
for raw in args.paths:
|
||
root = Path(raw).expanduser().resolve()
|
||
if root.is_dir():
|
||
album_dirs.extend(collect_album_dirs(root))
|
||
|
||
bad_list, ok = [], []
|
||
for album_dir in sorted(album_dirs):
|
||
complete, problems = _album_is_complete(album_dir)
|
||
if complete:
|
||
ok.append(album_dir)
|
||
else:
|
||
bad_list.append((album_dir, problems))
|
||
|
||
print(f"\n{'=' * 60}")
|
||
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
|
||
print(f"{'=' * 60}")
|
||
print(f" ✅ In Ordnung: {len(ok)}")
|
||
print(f" ⚠️ Mit Problemen: {len(bad_list)}")
|
||
print()
|
||
for album_dir, problems in bad_list:
|
||
print(f" 💿 {album_dir.name}")
|
||
for p in problems:
|
||
print(f" → {p}")
|
||
print("=" * 60)
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser(
|
||
description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
|
||
formatter_class=argparse.RawTextHelpFormatter,
|
||
)
|
||
parser.add_argument("paths", nargs="*",
|
||
help="Root-Verzeichnisse (rekursiv nach Alben durchsucht)")
|
||
parser.add_argument("--album", type=Path,
|
||
help="Einzelnes Album-Verzeichnis verarbeiten")
|
||
parser.add_argument("--dry-run", action="store_true",
|
||
help="Vorschläge anzeigen, nichts schreiben")
|
||
parser.add_argument("--auto", action="store_true",
|
||
help="Kein interaktiver Review-Schritt")
|
||
parser.add_argument("--confidence", type=float, default=0.85,
|
||
help="Min-Konfidenz für --auto (default: 0.85)")
|
||
parser.add_argument("--rename", action="store_true",
|
||
help="Dateien nach Schema umbenennen: TT_-_Artist_-_Titel.ext")
|
||
parser.add_argument("--embed-cover", action="store_true",
|
||
help="Cover-Art in Audiodatei einbetten")
|
||
parser.add_argument("--backup", type=Path,
|
||
help="Backup-Verzeichnis vor Änderungen")
|
||
parser.add_argument("--report", type=Path,
|
||
help="CSV-Report der Änderungen")
|
||
parser.add_argument("--no-fingerprint", action="store_true",
|
||
help="AcoustID-Fingerprinting überspringen")
|
||
parser.add_argument("--no-api", action="store_true",
|
||
help="Keine externen API-Calls")
|
||
parser.add_argument("--no-cover", action="store_true",
|
||
help="Kein Cover-Art-Download")
|
||
parser.add_argument("--no-tqdm", action="store_true",
|
||
help="Fortschrittsanzeige deaktivieren")
|
||
parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
|
||
help="Pfad zu jellyfin_playlist_generator.py\n"
|
||
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
|
||
parser.add_argument("--status", action="store_true",
|
||
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
|
||
parser.add_argument("--skip-complete", action="store_true", dest="skip_complete",
|
||
help="Alben überspringen die bereits Cover + gute Tags haben")
|
||
parser.add_argument("--except", action="append", dest="exclude_patterns",
|
||
metavar="PATTERN", default=[],
|
||
help="Album ausschließen dessen Verzeichnisname das Muster enthält\n"
|
||
"(Glob oder Substring, mehrfach verwendbar, z.B. --except 'Abba*')")
|
||
|
||
args = parser.parse_args()
|
||
|
||
if args.status:
|
||
if not args.paths and not args.album:
|
||
parser.error("--status benötigt mindestens einen Pfad.")
|
||
_print_status(args)
|
||
return
|
||
|
||
if not args.album and not args.paths:
|
||
parser.error("Mindestens ein Pfad oder --album erforderlich.")
|
||
|
||
show_progress = HAS_TQDM and not args.no_tqdm and args.auto
|
||
report_data: List[Dict[str, Any]] = []
|
||
totals: Dict[str, int] = {
|
||
"albums": 0, "skipped": 0, "tags_written": 0,
|
||
"covers_embedded": 0, "files_renamed": 0, "errors": 0,
|
||
}
|
||
|
||
# Collect album directories
|
||
album_dirs: List[Path] = []
|
||
if args.album:
|
||
album_dirs.append(args.album.expanduser().resolve())
|
||
for raw in args.paths:
|
||
root = Path(raw).expanduser().resolve()
|
||
if not root.is_dir():
|
||
print(f"⚠️ Kein Verzeichnis: {root}")
|
||
continue
|
||
album_dirs.extend(collect_album_dirs(root))
|
||
|
||
if not album_dirs:
|
||
print("⚠️ Keine Album-Verzeichnisse gefunden.")
|
||
sys.exit(1)
|
||
|
||
# --skip-complete: vollständig enrichte Alben herausfiltern
|
||
if args.skip_complete:
|
||
before = len(album_dirs)
|
||
album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]]
|
||
skipped_upfront = before - len(album_dirs)
|
||
print(f"⏭️ {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.")
|
||
|
||
# --except: Alben nach Namensmuster ausschließen
|
||
if args.exclude_patterns:
|
||
before_exc = len(album_dirs)
|
||
def _is_excluded(d: Path) -> bool:
|
||
name = d.name
|
||
return any(
|
||
fnmatch.fnmatch(name, pat) or pat in name
|
||
for pat in args.exclude_patterns
|
||
)
|
||
album_dirs = [d for d in album_dirs if not _is_excluded(d)]
|
||
excluded_count = before_exc - len(album_dirs)
|
||
patterns_str = ", ".join(repr(p) for p in args.exclude_patterns)
|
||
print(f"🚫 {excluded_count}/{before_exc} Alben ausgeschlossen ({patterns_str}).")
|
||
|
||
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
|
||
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
|
||
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
|
||
if not args.no_api:
|
||
print("🔍 MusicBrainz-Lookup aktiv.")
|
||
if args.dry_run:
|
||
print("🧪 DRY-RUN — nichts wird geschrieben.")
|
||
|
||
for album_dir in maybe_tqdm(album_dirs, show_progress,
|
||
desc="Alben", unit="album", dynamic_ncols=True):
|
||
stats = process_album(album_dir, args, report_data)
|
||
totals["albums"] += 1
|
||
for k in ("skipped", "tags_written", "covers_embedded", "files_renamed", "errors"):
|
||
totals[k] += stats.get(k, 0)
|
||
|
||
if args.report and report_data:
|
||
write_report(report_data, args.report)
|
||
|
||
print(f"\n{'=' * 50}")
|
||
print("✅ Zusammenfassung:")
|
||
print(f" 💿 Alben verarbeitet: {totals['albums']}")
|
||
print(f" ⏭️ Übersprungen: {totals['skipped']}")
|
||
print(f" 🏷️ Tags geschrieben: {totals['tags_written']}")
|
||
print(f" 🖼️ Cover eingebettet: {totals['covers_embedded']}")
|
||
print(f" 📝 Dateien umbenannt: {totals['files_renamed']}")
|
||
print(f" ❌ Fehler: {totals['errors']}")
|
||
if args.dry_run:
|
||
print(" 🧪 Modus: DRY-RUN")
|
||
print("=" * 50)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|