Music_Metadata_Enricher/music_enricher.py

#!/usr/bin/env python3
"""
music_enricher.py
KI-gestützter Musik-Metadaten-Enricher für Jellyfin-Bibliotheken.

Pipeline pro Album:
  Scan → HintExtractor → MetadataResolver → CoverHandler → Review → Executor
  → (optional) Jellyfin Playlist Generator
"""
from __future__ import annotations

import argparse
import importlib.util
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional

try:
    from tqdm import tqdm
    HAS_TQDM = True
except ImportError:
    HAS_TQDM = False

from models import AlbumProposal
from scanner import scan_album, collect_album_dirs
from hint_extractor import extract_hints
from metadata_resolver import resolve
from cover_handler import resolve_cover, download_back_cover
from executor import execute_album, write_report


def maybe_tqdm(iterable, show: bool, **kwargs):
    return tqdm(iterable, **kwargs) if show else iterable


# ---------------------------------------------------------------------------
# Jellyfin Playlist Generator integration
# ---------------------------------------------------------------------------

def _find_jellyfin_generator(album_dir: Path, explicit: Optional[Path]) -> Optional[Path]:
    """Sucht jellyfin_playlist_generator.py — explizit oder im Geschwister-Verzeichnis."""
    if explicit:
        return explicit.expanduser().resolve() if explicit.exists() else None
    # Auto-Discover: ../Jellyfin_Playlist_Generator/ relativ zum Album-Root
    candidate = album_dir.parent / "Jellyfin_Playlist_Generator" / "jellyfin_playlist_generator.py"
    return candidate if candidate.exists() else None


def _run_jellyfin_generator(album_dir: Path, generator_path: Path) -> None:
    """
    Importiert den Jellyfin Playlist Generator und erstellt die Playlist für album_dir.
    Kein subprocess, kein cleanup_all_playlists — nur gezielt dieses eine Album.
    """
    try:
        spec = importlib.util.spec_from_file_location("jellyfin_pg", generator_path)
        mod = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
        sys.modules["jellyfin_pg"] = mod   # muss vor exec_module stehen (für @dataclass)
        spec.loader.exec_module(mod)  # type: ignore[union-attr]

        media_files = mod.collect_media_recursive(album_dir)
        if not media_files:
            print(f"  ⚠️ Jellyfin-Generator: keine Mediendateien in {album_dir.name}", file=sys.stderr)
            return

        deduped = sorted(set(media_files), key=mod.natural_sort_key)
        tracks = mod.enrich_tracks(
            [mod.TrackInfo(p, p.stem, p.suffix.lower()) for p in deduped],
            album_dir,
        )
        tracks = mod.sort_tracks_for_playlist(tracks, album_dir)
        pl_path = mod.generate_playlist(album_dir, tracks, None, dry_run=False)
        print(f"  🎵 Jellyfin-Playlist erstellt: {pl_path.name}")
    except Exception as e:
        print(f"  ⚠️ Jellyfin-Generator-Fehler ({album_dir.name}): {e}", file=sys.stderr)


# ---------------------------------------------------------------------------
# Review / Display
# ---------------------------------------------------------------------------

def _print_proposal(proposal: AlbumProposal) -> None:
    conf_bar = "█" * int(proposal.confidence * 10) + "░" * (10 - int(proposal.confidence * 10))
    print(f"\n{'─' * 60}")
    print(f"💿 {proposal.album_dir.name}")
    print(f"   Album:       {proposal.album}")
    print(f"   Artist:      {proposal.albumartist}")
    print(f"   Jahr:        {proposal.date or '–'}")
    print(f"   Genre:       {proposal.genre or '–'}")
    print(f"   Label:       {proposal.label or '–'}")
    print(f"   Cover:       {proposal.cover_source or '–'} ({proposal.cover_path.name if proposal.cover_path else 'keins'})")
    print(f"   Konfidenz:   [{conf_bar}] {proposal.confidence:.0%}  Quellen: {', '.join(proposal.sources) or '–'}")
    if proposal.notes:
        for n in proposal.notes:
            print(f"   ℹ️  {n}")
    print(f"   Tracks ({len(proposal.tracks)}):")
    for tp in proposal.tracks[:8]:
        tn = f"{tp.disc_number}-{tp.track_number:02d}" if tp.disc_number and tp.disc_number > 1 else (
             f"{tp.track_number:02d}" if tp.track_number else "??")
        display_artist = tp.artist or proposal.albumartist or "Unknown"
        print(f"     {tn}  {display_artist} – {tp.title}")
    if len(proposal.tracks) > 8:
        print(f"     … und {len(proposal.tracks) - 8} weitere")


def _interactive_review(proposal: AlbumProposal) -> bool:
    """Returns True if user accepts the proposal."""
    _print_proposal(proposal)
    while True:
        answer = input("\n   [Enter] Akzeptieren  [s] Überspringen  [q] Abbrechen: ").strip().lower()
        if answer in ("", "j", "y"):
            return True
        if answer == "s":
            return False
        if answer == "q":
            sys.exit(0)


# ---------------------------------------------------------------------------
# Main pipeline
# ---------------------------------------------------------------------------

def process_album(
    album_dir: Path,
    args: argparse.Namespace,
    report_data: List[Dict[str, Any]],
) -> Dict[str, int]:
    stats = {"tags_written": 0, "covers_embedded": 0, "files_renamed": 0,
             "errors": 0, "skipped": 0}

    try:
        scan = scan_album(album_dir)
        if not scan.audio_files:
            stats["skipped"] += 1
            return stats

        hints = extract_hints(scan, use_ocr=not args.no_api)

        proposal = resolve(
            hints,
            use_fingerprint=not args.no_fingerprint,
            use_api=not args.no_api,
            use_claude=not args.no_api,
        )

        # Cover art
        cover_path, cover_source = resolve_cover(
            hints.cover_images,
            proposal.mbid,
            album_dir,
            artist=proposal.albumartist,
            album=proposal.album,
        )
        if cover_path and not args.no_cover:
            proposal.cover_path = cover_path
            proposal.cover_source = cover_source

        # Set proposed filenames if --rename
        if args.rename:
            from executor import _proposed_filename
            for tp in proposal.tracks:
                tp.new_filename = _proposed_filename(
                    tp, tp.path.suffix,
                    albumartist=proposal.albumartist or "",
                    genre=proposal.genre or "",
                )

        # Review step
        if args.dry_run:
            _print_proposal(proposal)
            for tp in proposal.tracks:
                report_data.append({
                    "status": "dry-run",
                    "album_dir": str(album_dir.name),
                    "track_path": str(tp.path),
                    "old_title": tp.path.stem,
                    "new_title": tp.title,
                    "old_artist": "",
                    "new_artist": tp.artist,
                    "album": proposal.album,
                    "albumartist": proposal.albumartist,
                    "date": proposal.date or "",
                    "genre": proposal.genre or "",
                    "label": proposal.label or "",
                    "track_number": tp.track_number or "",
                    "disc_number": tp.disc_number or "",
                    "cover_embedded": False,
                    "renamed_to": tp.new_filename or "",
                    "confidence": f"{proposal.confidence:.2f}",
                    "sources": ", ".join(proposal.sources),
                })
            return stats

        accepted = True
        if not args.auto:
            accepted = _interactive_review(proposal)
        elif args.auto and proposal.confidence < args.confidence:
            print(f"  ⏭️ Konfidenz {proposal.confidence:.0%} < {args.confidence:.0%} → übersprungen: {album_dir.name}")
            stats["skipped"] += 1
            return stats
        else:
            _print_proposal(proposal)

        if not accepted:
            stats["skipped"] += 1
            return stats

        album_stats = execute_album(
            proposal=proposal,
            backup_dir=args.backup,
            do_rename=args.rename,
            embed_cover_art=args.embed_cover,
            dry_run=False,
            report_data=report_data,
        )
        for k, v in album_stats.items():
            stats[k] = stats.get(k, 0) + v

        # Back-Cover von MusicBrainz holen (wenn MBID bekannt und noch kein back.jpg)
        if proposal.mbid and not args.no_cover and not args.dry_run:
            back = download_back_cover(proposal.mbid, album_dir)
            if back:
                print(f"  🖼️  Back-Cover heruntergeladen: {back.name}")

        # Jellyfin Playlist Generator aufrufen
        generator_path = _find_jellyfin_generator(album_dir, getattr(args, "playlist_generator", None))
        if generator_path:
            _run_jellyfin_generator(album_dir, generator_path)

    except Exception as e:
        stats["errors"] += 1
        print(f"  ❌ Fehler in {album_dir.name}: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc(file=sys.stderr)

    return stats


_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
_AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""}


def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]:
    """
    Prüft ob ein Album vollständig enriched ist.
    Gibt (is_complete, problems) zurück.
    Kriterien:
    - folder.jpg oder äquivalentes Cover vorhanden
    - Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags
    """
    from mutagen import File as MutagenFile

    problems: List[str] = []

    has_cover = any(
        f.suffix.lower() in _IMAGE_EXTS
        for f in album_dir.rglob("*") if f.is_file()
    )
    if not has_cover:
        problems.append("kein Cover")

    audio_files = sorted(
        f for f in album_dir.rglob("*")
        if f.is_file() and f.suffix.lower() in _AUDIO_EXTS
    )
    # Stichprobe: erste, letzte und mittlere Datei abdecken
    if audio_files:
        indices = sorted(set([
            0, len(audio_files) - 1,
            *range(1, min(sample - 2, len(audio_files) - 1)),
        ]))
        sampled = [audio_files[i] for i in indices if i < len(audio_files)]
        bad = []
        for af in sampled:
            try:
                tags = MutagenFile(str(af), easy=True)
                if tags is None:
                    bad.append(af.name)
                    continue
                title = (tags.get("title") or [""])[0].strip().lower()
                artist = (tags.get("artist") or [""])[0].strip().lower()
                if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES:
                    bad.append(af.name)
                elif title.startswith("audiotrack") or title.startswith("track "):
                    bad.append(af.name)
            except Exception:
                bad.append(af.name)
        if bad:
            problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)")

    return len(problems) == 0, problems


def _print_status(args: argparse.Namespace) -> None:
    """Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
    album_dirs: List[Path] = []
    if args.album:
        album_dirs.append(args.album.expanduser().resolve())
    for raw in args.paths:
        root = Path(raw).expanduser().resolve()
        if root.is_dir():
            album_dirs.extend(collect_album_dirs(root))

    bad_list, ok = [], []
    for album_dir in sorted(album_dirs):
        complete, problems = _album_is_complete(album_dir)
        if complete:
            ok.append(album_dir)
        else:
            bad_list.append((album_dir, problems))

    print(f"\n{'=' * 60}")
    print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
    print(f"{'=' * 60}")
    print(f"  ✅ In Ordnung:    {len(ok)}")
    print(f"  ⚠️  Mit Problemen: {len(bad_list)}")
    print()
    for album_dir, problems in bad_list:
        print(f"  💿 {album_dir.name}")
        for p in problems:
            print(f"       → {p}")
    print("=" * 60)


def main() -> None:
    parser = argparse.ArgumentParser(
        description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    parser.add_argument("paths", nargs="*",
                        help="Root-Verzeichnisse (rekursiv nach Alben durchsucht)")
    parser.add_argument("--album", type=Path,
                        help="Einzelnes Album-Verzeichnis verarbeiten")
    parser.add_argument("--dry-run", action="store_true",
                        help="Vorschläge anzeigen, nichts schreiben")
    parser.add_argument("--auto", action="store_true",
                        help="Kein interaktiver Review-Schritt")
    parser.add_argument("--confidence", type=float, default=0.85,
                        help="Min-Konfidenz für --auto (default: 0.85)")
    parser.add_argument("--rename", action="store_true",
                        help="Dateien nach Schema umbenennen: TT_-_Artist_-_Titel.ext")
    parser.add_argument("--embed-cover", action="store_true",
                        help="Cover-Art in Audiodatei einbetten")
    parser.add_argument("--backup", type=Path,
                        help="Backup-Verzeichnis vor Änderungen")
    parser.add_argument("--report", type=Path,
                        help="CSV-Report der Änderungen")
    parser.add_argument("--no-fingerprint", action="store_true",
                        help="AcoustID-Fingerprinting überspringen")
    parser.add_argument("--no-api", action="store_true",
                        help="Keine externen API-Calls")
    parser.add_argument("--no-cover", action="store_true",
                        help="Kein Cover-Art-Download")
    parser.add_argument("--no-tqdm", action="store_true",
                        help="Fortschrittsanzeige deaktivieren")
    parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
                        help="Pfad zu jellyfin_playlist_generator.py\n"
                             "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
    parser.add_argument("--status", action="store_true",
                        help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
    parser.add_argument("--skip-complete", action="store_true", dest="skip_complete",
                        help="Alben überspringen die bereits Cover + gute Tags haben")

    args = parser.parse_args()

    if args.status:
        if not args.paths and not args.album:
            parser.error("--status benötigt mindestens einen Pfad.")
        _print_status(args)
        return

    if not args.album and not args.paths:
        parser.error("Mindestens ein Pfad oder --album erforderlich.")

    show_progress = HAS_TQDM and not args.no_tqdm and args.auto
    report_data: List[Dict[str, Any]] = []
    totals: Dict[str, int] = {
        "albums": 0, "skipped": 0, "tags_written": 0,
        "covers_embedded": 0, "files_renamed": 0, "errors": 0,
    }

    # Collect album directories
    album_dirs: List[Path] = []
    if args.album:
        album_dirs.append(args.album.expanduser().resolve())
    for raw in args.paths:
        root = Path(raw).expanduser().resolve()
        if not root.is_dir():
            print(f"⚠️ Kein Verzeichnis: {root}")
            continue
        album_dirs.extend(collect_album_dirs(root))

    if not album_dirs:
        print("⚠️ Keine Album-Verzeichnisse gefunden.")
        sys.exit(1)

    # --skip-complete: vollständig enrichte Alben herausfiltern
    if args.skip_complete:
        before = len(album_dirs)
        album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]]
        skipped_upfront = before - len(album_dirs)
        print(f"⏭️  {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.")

    print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
    if os.getenv("OLLAMA_HOST") or True:  # Ollama always attempted
        print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
    if not args.no_api:
        print("🔍 MusicBrainz-Lookup aktiv.")
    if args.dry_run:
        print("🧪 DRY-RUN — nichts wird geschrieben.")

    for album_dir in maybe_tqdm(album_dirs, show_progress,
                                 desc="Alben", unit="album", dynamic_ncols=True):
        stats = process_album(album_dir, args, report_data)
        totals["albums"] += 1
        for k in ("skipped", "tags_written", "covers_embedded", "files_renamed", "errors"):
            totals[k] += stats.get(k, 0)

    if args.report and report_data:
        write_report(report_data, args.report)

    print(f"\n{'=' * 50}")
    print("✅ Zusammenfassung:")
    print(f"  💿 Alben verarbeitet:   {totals['albums']}")
    print(f"  ⏭️  Übersprungen:         {totals['skipped']}")
    print(f"  🏷️  Tags geschrieben:     {totals['tags_written']}")
    print(f"  🖼️  Cover eingebettet:    {totals['covers_embedded']}")
    print(f"  📝 Dateien umbenannt:    {totals['files_renamed']}")
    print(f"  ❌ Fehler:               {totals['errors']}")
    if args.dry_run:
        print("  🧪 Modus: DRY-RUN")
    print("=" * 50)


if __name__ == "__main__":
    main()