Music_Metadata_Enricher/music_enricher.py

325 lines
13 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
music_enricher.py
KI-gestützter Musik-Metadaten-Enricher für Jellyfin-Bibliotheken.
Pipeline pro Album:
Scan HintExtractor MetadataResolver CoverHandler Review Executor
(optional) Jellyfin Playlist Generator
"""
from __future__ import annotations
import argparse
import importlib.util
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
try:
from tqdm import tqdm
HAS_TQDM = True
except ImportError:
HAS_TQDM = False
from models import AlbumProposal
from scanner import scan_album, collect_album_dirs
from hint_extractor import extract_hints
from metadata_resolver import resolve
from cover_handler import resolve_cover
from executor import execute_album, write_report
def maybe_tqdm(iterable, show: bool, **kwargs):
return tqdm(iterable, **kwargs) if show else iterable
# ---------------------------------------------------------------------------
# Jellyfin Playlist Generator integration
# ---------------------------------------------------------------------------
def _find_jellyfin_generator(album_dir: Path, explicit: Optional[Path]) -> Optional[Path]:
"""Sucht jellyfin_playlist_generator.py — explizit oder im Geschwister-Verzeichnis."""
if explicit:
return explicit.expanduser().resolve() if explicit.exists() else None
# Auto-Discover: ../Jellyfin_Playlist_Generator/ relativ zum Album-Root
candidate = album_dir.parent / "Jellyfin_Playlist_Generator" / "jellyfin_playlist_generator.py"
return candidate if candidate.exists() else None
def _run_jellyfin_generator(album_dir: Path, generator_path: Path) -> None:
"""
Importiert den Jellyfin Playlist Generator und erstellt die Playlist für album_dir.
Kein subprocess, kein cleanup_all_playlists nur gezielt dieses eine Album.
"""
try:
spec = importlib.util.spec_from_file_location("jellyfin_pg", generator_path)
mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
sys.modules["jellyfin_pg"] = mod # muss vor exec_module stehen (für @dataclass)
spec.loader.exec_module(mod) # type: ignore[union-attr]
media_files = mod.collect_media_recursive(album_dir)
if not media_files:
print(f" ⚠️ Jellyfin-Generator: keine Mediendateien in {album_dir.name}", file=sys.stderr)
return
deduped = sorted(set(media_files), key=mod.natural_sort_key)
tracks = mod.enrich_tracks(
[mod.TrackInfo(p, p.stem, p.suffix.lower()) for p in deduped],
album_dir,
)
tracks = mod.sort_tracks_for_playlist(tracks, album_dir)
pl_path = mod.generate_playlist(album_dir, tracks, None, dry_run=False)
print(f" 🎵 Jellyfin-Playlist erstellt: {pl_path.name}")
except Exception as e:
print(f" ⚠️ Jellyfin-Generator-Fehler ({album_dir.name}): {e}", file=sys.stderr)
# ---------------------------------------------------------------------------
# Review / Display
# ---------------------------------------------------------------------------
def _print_proposal(proposal: AlbumProposal) -> None:
conf_bar = "" * int(proposal.confidence * 10) + "" * (10 - int(proposal.confidence * 10))
print(f"\n{'' * 60}")
print(f"💿 {proposal.album_dir.name}")
print(f" Album: {proposal.album}")
print(f" Artist: {proposal.albumartist}")
print(f" Jahr: {proposal.date or ''}")
print(f" Genre: {proposal.genre or ''}")
print(f" Label: {proposal.label or ''}")
print(f" Cover: {proposal.cover_source or ''} ({proposal.cover_path.name if proposal.cover_path else 'keins'})")
print(f" Konfidenz: [{conf_bar}] {proposal.confidence:.0%} Quellen: {', '.join(proposal.sources) or ''}")
if proposal.notes:
for n in proposal.notes:
print(f" {n}")
print(f" Tracks ({len(proposal.tracks)}):")
for tp in proposal.tracks[:8]:
tn = f"{tp.disc_number}-{tp.track_number:02d}" if tp.disc_number and tp.disc_number > 1 else (
f"{tp.track_number:02d}" if tp.track_number else "??")
display_artist = tp.artist or proposal.albumartist or "Unknown"
print(f" {tn} {display_artist} {tp.title}")
if len(proposal.tracks) > 8:
print(f" … und {len(proposal.tracks) - 8} weitere")
def _interactive_review(proposal: AlbumProposal) -> bool:
"""Returns True if user accepts the proposal."""
_print_proposal(proposal)
while True:
answer = input("\n [Enter] Akzeptieren [s] Überspringen [q] Abbrechen: ").strip().lower()
if answer in ("", "j", "y"):
return True
if answer == "s":
return False
if answer == "q":
sys.exit(0)
# ---------------------------------------------------------------------------
# Main pipeline
# ---------------------------------------------------------------------------
def process_album(
album_dir: Path,
args: argparse.Namespace,
report_data: List[Dict[str, Any]],
) -> Dict[str, int]:
stats = {"tags_written": 0, "covers_embedded": 0, "files_renamed": 0,
"errors": 0, "skipped": 0}
try:
scan = scan_album(album_dir)
if not scan.audio_files:
stats["skipped"] += 1
return stats
hints = extract_hints(scan, use_ocr=not args.no_api)
proposal = resolve(
hints,
use_fingerprint=not args.no_fingerprint,
use_api=not args.no_api,
use_claude=not args.no_api,
)
# Cover art
cover_path, cover_source = resolve_cover(
hints.cover_images,
proposal.mbid,
album_dir,
)
if cover_path and not args.no_cover:
proposal.cover_path = cover_path
proposal.cover_source = cover_source
# Set proposed filenames if --rename
if args.rename:
from executor import _proposed_filename
for tp in proposal.tracks:
tp.new_filename = _proposed_filename(
tp, tp.path.suffix,
albumartist=proposal.albumartist or "",
genre=proposal.genre or "",
)
# Review step
if args.dry_run:
_print_proposal(proposal)
for tp in proposal.tracks:
report_data.append({
"status": "dry-run",
"album_dir": str(album_dir.name),
"track_path": str(tp.path),
"old_title": tp.path.stem,
"new_title": tp.title,
"old_artist": "",
"new_artist": tp.artist,
"album": proposal.album,
"albumartist": proposal.albumartist,
"date": proposal.date or "",
"genre": proposal.genre or "",
"label": proposal.label or "",
"track_number": tp.track_number or "",
"disc_number": tp.disc_number or "",
"cover_embedded": False,
"renamed_to": tp.new_filename or "",
"confidence": f"{proposal.confidence:.2f}",
"sources": ", ".join(proposal.sources),
})
return stats
accepted = True
if not args.auto:
accepted = _interactive_review(proposal)
elif args.auto and proposal.confidence < args.confidence:
print(f" ⏭️ Konfidenz {proposal.confidence:.0%} < {args.confidence:.0%} → übersprungen: {album_dir.name}")
stats["skipped"] += 1
return stats
else:
_print_proposal(proposal)
if not accepted:
stats["skipped"] += 1
return stats
album_stats = execute_album(
proposal=proposal,
backup_dir=args.backup,
do_rename=args.rename,
embed_cover_art=args.embed_cover,
dry_run=False,
report_data=report_data,
)
for k, v in album_stats.items():
stats[k] = stats.get(k, 0) + v
# Jellyfin Playlist Generator aufrufen
generator_path = _find_jellyfin_generator(album_dir, getattr(args, "playlist_generator", None))
if generator_path:
_run_jellyfin_generator(album_dir, generator_path)
except Exception as e:
stats["errors"] += 1
print(f" ❌ Fehler in {album_dir.name}: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return stats
def main() -> None:
parser = argparse.ArgumentParser(
description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("paths", nargs="*",
help="Root-Verzeichnisse (rekursiv nach Alben durchsucht)")
parser.add_argument("--album", type=Path,
help="Einzelnes Album-Verzeichnis verarbeiten")
parser.add_argument("--dry-run", action="store_true",
help="Vorschläge anzeigen, nichts schreiben")
parser.add_argument("--auto", action="store_true",
help="Kein interaktiver Review-Schritt")
parser.add_argument("--confidence", type=float, default=0.85,
help="Min-Konfidenz für --auto (default: 0.85)")
parser.add_argument("--rename", action="store_true",
help="Dateien nach Schema umbenennen: TT_-_Artist_-_Titel.ext")
parser.add_argument("--embed-cover", action="store_true",
help="Cover-Art in Audiodatei einbetten")
parser.add_argument("--backup", type=Path,
help="Backup-Verzeichnis vor Änderungen")
parser.add_argument("--report", type=Path,
help="CSV-Report der Änderungen")
parser.add_argument("--no-fingerprint", action="store_true",
help="AcoustID-Fingerprinting überspringen")
parser.add_argument("--no-api", action="store_true",
help="Keine externen API-Calls")
parser.add_argument("--no-cover", action="store_true",
help="Kein Cover-Art-Download")
parser.add_argument("--no-tqdm", action="store_true",
help="Fortschrittsanzeige deaktivieren")
parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
help="Pfad zu jellyfin_playlist_generator.py\n"
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
args = parser.parse_args()
if not args.album and not args.paths:
parser.error("Mindestens ein Pfad oder --album erforderlich.")
show_progress = HAS_TQDM and not args.no_tqdm and args.auto
report_data: List[Dict[str, Any]] = []
totals: Dict[str, int] = {
"albums": 0, "skipped": 0, "tags_written": 0,
"covers_embedded": 0, "files_renamed": 0, "errors": 0,
}
# Collect album directories
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if not root.is_dir():
print(f"⚠️ Kein Verzeichnis: {root}")
continue
album_dirs.extend(collect_album_dirs(root))
if not album_dirs:
print("⚠️ Keine Album-Verzeichnisse gefunden.")
sys.exit(1)
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
if not args.no_api:
print("🔍 MusicBrainz-Lookup aktiv.")
if args.dry_run:
print("🧪 DRY-RUN — nichts wird geschrieben.")
for album_dir in maybe_tqdm(album_dirs, show_progress,
desc="Alben", unit="album", dynamic_ncols=True):
stats = process_album(album_dir, args, report_data)
totals["albums"] += 1
for k in ("skipped", "tags_written", "covers_embedded", "files_renamed", "errors"):
totals[k] += stats.get(k, 0)
if args.report and report_data:
write_report(report_data, args.report)
print(f"\n{'=' * 50}")
print("✅ Zusammenfassung:")
print(f" 💿 Alben verarbeitet: {totals['albums']}")
print(f" ⏭️ Übersprungen: {totals['skipped']}")
print(f" 🏷️ Tags geschrieben: {totals['tags_written']}")
print(f" 🖼️ Cover eingebettet: {totals['covers_embedded']}")
print(f" 📝 Dateien umbenannt: {totals['files_renamed']}")
print(f" ❌ Fehler: {totals['errors']}")
if args.dry_run:
print(" 🧪 Modus: DRY-RUN")
print("=" * 50)
if __name__ == "__main__":
main()