diff --git a/music_enricher.py b/music_enricher.py index f35ebe1..d14703f 100755 --- a/music_enricher.py +++ b/music_enricher.py @@ -236,13 +236,64 @@ def process_album( return stats -def _print_status(args: argparse.Namespace) -> None: - """Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten.""" +_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"} +_AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"} +_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""} + + +def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]: + """ + Prüft ob ein Album vollständig enriched ist. + Gibt (is_complete, problems) zurück. + Kriterien: + - folder.jpg oder äquivalentes Cover vorhanden + - Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags + """ from mutagen import File as MutagenFile - IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"} - AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"} + problems: List[str] = [] + has_cover = any( + f.suffix.lower() in _IMAGE_EXTS + for f in album_dir.rglob("*") if f.is_file() + ) + if not has_cover: + problems.append("kein Cover") + + audio_files = sorted( + f for f in album_dir.rglob("*") + if f.is_file() and f.suffix.lower() in _AUDIO_EXTS + ) + # Stichprobe: erste, letzte und mittlere Datei abdecken + if audio_files: + indices = sorted(set([ + 0, len(audio_files) - 1, + *range(1, min(sample - 2, len(audio_files) - 1)), + ])) + sampled = [audio_files[i] for i in indices if i < len(audio_files)] + bad = [] + for af in sampled: + try: + tags = MutagenFile(str(af), easy=True) + if tags is None: + bad.append(af.name) + continue + title = (tags.get("title") or [""])[0].strip().lower() + artist = (tags.get("artist") or [""])[0].strip().lower() + if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES: + bad.append(af.name) + elif title.startswith("audiotrack") or title.startswith("track "): + bad.append(af.name) + except Exception: + bad.append(af.name) + if bad: + problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)") + + return len(problems) == 0, problems + + +def _print_status(args: argparse.Namespace) -> None: + """Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten.""" album_dirs: List[Path] = [] if args.album: album_dirs.append(args.album.expanduser().resolve()) @@ -251,50 +302,21 @@ def _print_status(args: argparse.Namespace) -> None: if root.is_dir(): album_dirs.extend(collect_album_dirs(root)) - no_cover, bad_tags, ok = [], [], [] - + bad_list, ok = [], [] for album_dir in sorted(album_dirs): - has_cover = any( - f.suffix.lower() in IMAGE_EXTS - for f in album_dir.rglob("*") if f.is_file() - ) - audio_files = [ - f for f in sorted(album_dir.rglob("*")) - if f.is_file() and f.suffix.lower() in AUDIO_EXTS - ] - missing_tags = [] - for af in audio_files[:3]: # nur erste 3 prüfen (schnell) - try: - tags = MutagenFile(str(af), easy=True) - if tags is None: - missing_tags.append(af.name) - continue - title = (tags.get("title") or [""])[0].strip() - artist = (tags.get("artist") or [""])[0].strip() - if not title or title.lower() in ("unknown", "audiotrack", "") \ - or not artist or artist.lower() in ("unknown", ""): - missing_tags.append(af.name) - except Exception: - missing_tags.append(af.name) - - problems = [] - if not has_cover: - problems.append("kein Cover") - if missing_tags: - problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)") - - if problems: - bad_tags.append((album_dir, problems)) - else: + complete, problems = _album_is_complete(album_dir) + if complete: ok.append(album_dir) + else: + bad_list.append((album_dir, problems)) print(f"\n{'=' * 60}") print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben") print(f"{'=' * 60}") - print(f" ✅ In Ordnung: {len(ok)}") - print(f" ⚠️ Mit Problemen: {len(bad_tags)}") + print(f" ✅ In Ordnung: {len(ok)}") + print(f" ⚠️ Mit Problemen: {len(bad_list)}") print() - for album_dir, problems in bad_tags: + for album_dir, problems in bad_list: print(f" 💿 {album_dir.name}") for p in problems: print(f" → {p}") @@ -337,6 +359,8 @@ def main() -> None: "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)") parser.add_argument("--status", action="store_true", help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben") + parser.add_argument("--skip-complete", action="store_true", dest="skip_complete", + help="Alben überspringen die bereits Cover + gute Tags haben") args = parser.parse_args() @@ -371,6 +395,13 @@ def main() -> None: print("⚠️ Keine Album-Verzeichnisse gefunden.") sys.exit(1) + # --skip-complete: vollständig enrichte Alben herausfiltern + if args.skip_complete: + before = len(album_dirs) + album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]] + skipped_upfront = before - len(album_dirs) + print(f"⏭️ {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.") + print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.") if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")