Music_Metadata_Enricher/music_enricher.py
dschlueter 388a9ffd08 Add --skip-complete: skip already-enriched albums in batch runs
- _album_is_complete(album_dir): checks cover presence + sampled tag quality
  (first/last/middle files); returns (bool, problems_list)
  Sampling strategy: covers first, last and up to 3 middle files to catch
  albums where only some tracks were tagged
- _print_status() now uses _album_is_complete() internally (DRY)
- --skip-complete flag: filters album_dirs before the main loop, prints
  how many were skipped upfront
- Typical batch command:
    python3 music_enricher.py --auto --confidence 0.1 --rename --embed-cover \
        --no-fingerprint --skip-complete ~/nvme2n1p7_home/Musik

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-29 09:05:51 +02:00

437 lines
17 KiB
Python
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
music_enricher.py
KI-gestützter Musik-Metadaten-Enricher für Jellyfin-Bibliotheken.
Pipeline pro Album:
Scan → HintExtractor → MetadataResolver → CoverHandler → Review → Executor
→ (optional) Jellyfin Playlist Generator
"""
from __future__ import annotations
import argparse
import importlib.util
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
try:
from tqdm import tqdm
HAS_TQDM = True
except ImportError:
HAS_TQDM = False
from models import AlbumProposal
from scanner import scan_album, collect_album_dirs
from hint_extractor import extract_hints
from metadata_resolver import resolve
from cover_handler import resolve_cover, download_back_cover
from executor import execute_album, write_report
def maybe_tqdm(iterable, show: bool, **kwargs):
return tqdm(iterable, **kwargs) if show else iterable
# ---------------------------------------------------------------------------
# Jellyfin Playlist Generator integration
# ---------------------------------------------------------------------------
def _find_jellyfin_generator(album_dir: Path, explicit: Optional[Path]) -> Optional[Path]:
"""Sucht jellyfin_playlist_generator.py — explizit oder im Geschwister-Verzeichnis."""
if explicit:
return explicit.expanduser().resolve() if explicit.exists() else None
# Auto-Discover: ../Jellyfin_Playlist_Generator/ relativ zum Album-Root
candidate = album_dir.parent / "Jellyfin_Playlist_Generator" / "jellyfin_playlist_generator.py"
return candidate if candidate.exists() else None
def _run_jellyfin_generator(album_dir: Path, generator_path: Path) -> None:
"""
Importiert den Jellyfin Playlist Generator und erstellt die Playlist für album_dir.
Kein subprocess, kein cleanup_all_playlists — nur gezielt dieses eine Album.
"""
try:
spec = importlib.util.spec_from_file_location("jellyfin_pg", generator_path)
mod = importlib.util.module_from_spec(spec) # type: ignore[arg-type]
sys.modules["jellyfin_pg"] = mod # muss vor exec_module stehen (für @dataclass)
spec.loader.exec_module(mod) # type: ignore[union-attr]
media_files = mod.collect_media_recursive(album_dir)
if not media_files:
print(f" ⚠️ Jellyfin-Generator: keine Mediendateien in {album_dir.name}", file=sys.stderr)
return
deduped = sorted(set(media_files), key=mod.natural_sort_key)
tracks = mod.enrich_tracks(
[mod.TrackInfo(p, p.stem, p.suffix.lower()) for p in deduped],
album_dir,
)
tracks = mod.sort_tracks_for_playlist(tracks, album_dir)
pl_path = mod.generate_playlist(album_dir, tracks, None, dry_run=False)
print(f" 🎵 Jellyfin-Playlist erstellt: {pl_path.name}")
except Exception as e:
print(f" ⚠️ Jellyfin-Generator-Fehler ({album_dir.name}): {e}", file=sys.stderr)
# ---------------------------------------------------------------------------
# Review / Display
# ---------------------------------------------------------------------------
def _print_proposal(proposal: AlbumProposal) -> None:
conf_bar = "" * int(proposal.confidence * 10) + "" * (10 - int(proposal.confidence * 10))
print(f"\n{'' * 60}")
print(f"💿 {proposal.album_dir.name}")
print(f" Album: {proposal.album}")
print(f" Artist: {proposal.albumartist}")
print(f" Jahr: {proposal.date or ''}")
print(f" Genre: {proposal.genre or ''}")
print(f" Label: {proposal.label or ''}")
print(f" Cover: {proposal.cover_source or ''} ({proposal.cover_path.name if proposal.cover_path else 'keins'})")
print(f" Konfidenz: [{conf_bar}] {proposal.confidence:.0%} Quellen: {', '.join(proposal.sources) or ''}")
if proposal.notes:
for n in proposal.notes:
print(f" {n}")
print(f" Tracks ({len(proposal.tracks)}):")
for tp in proposal.tracks[:8]:
tn = f"{tp.disc_number}-{tp.track_number:02d}" if tp.disc_number and tp.disc_number > 1 else (
f"{tp.track_number:02d}" if tp.track_number else "??")
display_artist = tp.artist or proposal.albumartist or "Unknown"
print(f" {tn} {display_artist} {tp.title}")
if len(proposal.tracks) > 8:
print(f" … und {len(proposal.tracks) - 8} weitere")
def _interactive_review(proposal: AlbumProposal) -> bool:
"""Returns True if user accepts the proposal."""
_print_proposal(proposal)
while True:
answer = input("\n [Enter] Akzeptieren [s] Überspringen [q] Abbrechen: ").strip().lower()
if answer in ("", "j", "y"):
return True
if answer == "s":
return False
if answer == "q":
sys.exit(0)
# ---------------------------------------------------------------------------
# Main pipeline
# ---------------------------------------------------------------------------
def process_album(
album_dir: Path,
args: argparse.Namespace,
report_data: List[Dict[str, Any]],
) -> Dict[str, int]:
stats = {"tags_written": 0, "covers_embedded": 0, "files_renamed": 0,
"errors": 0, "skipped": 0}
try:
scan = scan_album(album_dir)
if not scan.audio_files:
stats["skipped"] += 1
return stats
hints = extract_hints(scan, use_ocr=not args.no_api)
proposal = resolve(
hints,
use_fingerprint=not args.no_fingerprint,
use_api=not args.no_api,
use_claude=not args.no_api,
)
# Cover art
cover_path, cover_source = resolve_cover(
hints.cover_images,
proposal.mbid,
album_dir,
artist=proposal.albumartist,
album=proposal.album,
)
if cover_path and not args.no_cover:
proposal.cover_path = cover_path
proposal.cover_source = cover_source
# Set proposed filenames if --rename
if args.rename:
from executor import _proposed_filename
for tp in proposal.tracks:
tp.new_filename = _proposed_filename(
tp, tp.path.suffix,
albumartist=proposal.albumartist or "",
genre=proposal.genre or "",
)
# Review step
if args.dry_run:
_print_proposal(proposal)
for tp in proposal.tracks:
report_data.append({
"status": "dry-run",
"album_dir": str(album_dir.name),
"track_path": str(tp.path),
"old_title": tp.path.stem,
"new_title": tp.title,
"old_artist": "",
"new_artist": tp.artist,
"album": proposal.album,
"albumartist": proposal.albumartist,
"date": proposal.date or "",
"genre": proposal.genre or "",
"label": proposal.label or "",
"track_number": tp.track_number or "",
"disc_number": tp.disc_number or "",
"cover_embedded": False,
"renamed_to": tp.new_filename or "",
"confidence": f"{proposal.confidence:.2f}",
"sources": ", ".join(proposal.sources),
})
return stats
accepted = True
if not args.auto:
accepted = _interactive_review(proposal)
elif args.auto and proposal.confidence < args.confidence:
print(f" ⏭️ Konfidenz {proposal.confidence:.0%} < {args.confidence:.0%} → übersprungen: {album_dir.name}")
stats["skipped"] += 1
return stats
else:
_print_proposal(proposal)
if not accepted:
stats["skipped"] += 1
return stats
album_stats = execute_album(
proposal=proposal,
backup_dir=args.backup,
do_rename=args.rename,
embed_cover_art=args.embed_cover,
dry_run=False,
report_data=report_data,
)
for k, v in album_stats.items():
stats[k] = stats.get(k, 0) + v
# Back-Cover von MusicBrainz holen (wenn MBID bekannt und noch kein back.jpg)
if proposal.mbid and not args.no_cover and not args.dry_run:
back = download_back_cover(proposal.mbid, album_dir)
if back:
print(f" 🖼️ Back-Cover heruntergeladen: {back.name}")
# Jellyfin Playlist Generator aufrufen
generator_path = _find_jellyfin_generator(album_dir, getattr(args, "playlist_generator", None))
if generator_path:
_run_jellyfin_generator(album_dir, generator_path)
except Exception as e:
stats["errors"] += 1
print(f" ❌ Fehler in {album_dir.name}: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return stats
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
_AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""}
def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]:
"""
Prüft ob ein Album vollständig enriched ist.
Gibt (is_complete, problems) zurück.
Kriterien:
- folder.jpg oder äquivalentes Cover vorhanden
- Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags
"""
from mutagen import File as MutagenFile
problems: List[str] = []
has_cover = any(
f.suffix.lower() in _IMAGE_EXTS
for f in album_dir.rglob("*") if f.is_file()
)
if not has_cover:
problems.append("kein Cover")
audio_files = sorted(
f for f in album_dir.rglob("*")
if f.is_file() and f.suffix.lower() in _AUDIO_EXTS
)
# Stichprobe: erste, letzte und mittlere Datei abdecken
if audio_files:
indices = sorted(set([
0, len(audio_files) - 1,
*range(1, min(sample - 2, len(audio_files) - 1)),
]))
sampled = [audio_files[i] for i in indices if i < len(audio_files)]
bad = []
for af in sampled:
try:
tags = MutagenFile(str(af), easy=True)
if tags is None:
bad.append(af.name)
continue
title = (tags.get("title") or [""])[0].strip().lower()
artist = (tags.get("artist") or [""])[0].strip().lower()
if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES:
bad.append(af.name)
elif title.startswith("audiotrack") or title.startswith("track "):
bad.append(af.name)
except Exception:
bad.append(af.name)
if bad:
problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)")
return len(problems) == 0, problems
def _print_status(args: argparse.Namespace) -> None:
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if root.is_dir():
album_dirs.extend(collect_album_dirs(root))
bad_list, ok = [], []
for album_dir in sorted(album_dirs):
complete, problems = _album_is_complete(album_dir)
if complete:
ok.append(album_dir)
else:
bad_list.append((album_dir, problems))
print(f"\n{'=' * 60}")
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
print(f"{'=' * 60}")
print(f" ✅ In Ordnung: {len(ok)}")
print(f" ⚠️ Mit Problemen: {len(bad_list)}")
print()
for album_dir, problems in bad_list:
print(f" 💿 {album_dir.name}")
for p in problems:
print(f"{p}")
print("=" * 60)
def main() -> None:
parser = argparse.ArgumentParser(
description="KI-gestützter Musik-Metadaten-Enricher für Jellyfin",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("paths", nargs="*",
help="Root-Verzeichnisse (rekursiv nach Alben durchsucht)")
parser.add_argument("--album", type=Path,
help="Einzelnes Album-Verzeichnis verarbeiten")
parser.add_argument("--dry-run", action="store_true",
help="Vorschläge anzeigen, nichts schreiben")
parser.add_argument("--auto", action="store_true",
help="Kein interaktiver Review-Schritt")
parser.add_argument("--confidence", type=float, default=0.85,
help="Min-Konfidenz für --auto (default: 0.85)")
parser.add_argument("--rename", action="store_true",
help="Dateien nach Schema umbenennen: TT_-_Artist_-_Titel.ext")
parser.add_argument("--embed-cover", action="store_true",
help="Cover-Art in Audiodatei einbetten")
parser.add_argument("--backup", type=Path,
help="Backup-Verzeichnis vor Änderungen")
parser.add_argument("--report", type=Path,
help="CSV-Report der Änderungen")
parser.add_argument("--no-fingerprint", action="store_true",
help="AcoustID-Fingerprinting überspringen")
parser.add_argument("--no-api", action="store_true",
help="Keine externen API-Calls")
parser.add_argument("--no-cover", action="store_true",
help="Kein Cover-Art-Download")
parser.add_argument("--no-tqdm", action="store_true",
help="Fortschrittsanzeige deaktivieren")
parser.add_argument("--playlist-generator", type=Path, dest="playlist_generator",
help="Pfad zu jellyfin_playlist_generator.py\n"
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
parser.add_argument("--status", action="store_true",
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
parser.add_argument("--skip-complete", action="store_true", dest="skip_complete",
help="Alben überspringen die bereits Cover + gute Tags haben")
args = parser.parse_args()
if args.status:
if not args.paths and not args.album:
parser.error("--status benötigt mindestens einen Pfad.")
_print_status(args)
return
if not args.album and not args.paths:
parser.error("Mindestens ein Pfad oder --album erforderlich.")
show_progress = HAS_TQDM and not args.no_tqdm and args.auto
report_data: List[Dict[str, Any]] = []
totals: Dict[str, int] = {
"albums": 0, "skipped": 0, "tags_written": 0,
"covers_embedded": 0, "files_renamed": 0, "errors": 0,
}
# Collect album directories
album_dirs: List[Path] = []
if args.album:
album_dirs.append(args.album.expanduser().resolve())
for raw in args.paths:
root = Path(raw).expanduser().resolve()
if not root.is_dir():
print(f"⚠️ Kein Verzeichnis: {root}")
continue
album_dirs.extend(collect_album_dirs(root))
if not album_dirs:
print("⚠️ Keine Album-Verzeichnisse gefunden.")
sys.exit(1)
# --skip-complete: vollständig enrichte Alben herausfiltern
if args.skip_complete:
before = len(album_dirs)
album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]]
skipped_upfront = before - len(album_dirs)
print(f"⏭️ {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.")
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
if not args.no_api:
print("🔍 MusicBrainz-Lookup aktiv.")
if args.dry_run:
print("🧪 DRY-RUN — nichts wird geschrieben.")
for album_dir in maybe_tqdm(album_dirs, show_progress,
desc="Alben", unit="album", dynamic_ncols=True):
stats = process_album(album_dir, args, report_data)
totals["albums"] += 1
for k in ("skipped", "tags_written", "covers_embedded", "files_renamed", "errors"):
totals[k] += stats.get(k, 0)
if args.report and report_data:
write_report(report_data, args.report)
print(f"\n{'=' * 50}")
print("✅ Zusammenfassung:")
print(f" 💿 Alben verarbeitet: {totals['albums']}")
print(f" ⏭️ Übersprungen: {totals['skipped']}")
print(f" 🏷️ Tags geschrieben: {totals['tags_written']}")
print(f" 🖼️ Cover eingebettet: {totals['covers_embedded']}")
print(f" 📝 Dateien umbenannt: {totals['files_renamed']}")
print(f" ❌ Fehler: {totals['errors']}")
if args.dry_run:
print(" 🧪 Modus: DRY-RUN")
print("=" * 50)
if __name__ == "__main__":
main()