Add --skip-complete: skip already-enriched albums in batch runs

- _album_is_complete(album_dir): checks cover presence + sampled tag quality
  (first/last/middle files); returns (bool, problems_list)
  Sampling strategy: covers first, last and up to 3 middle files to catch
  albums where only some tracks were tagged
- _print_status() now uses _album_is_complete() internally (DRY)
- --skip-complete flag: filters album_dirs before the main loop, prints
  how many were skipped upfront
- Typical batch command:
    python3 music_enricher.py --auto --confidence 0.1 --rename --embed-cover \
        --no-fingerprint --skip-complete ~/nvme2n1p7_home/Musik

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-04-29 09:05:51 +02:00
commit 388a9ffd08

View file

@ -236,13 +236,64 @@ def process_album(
return stats return stats
def _print_status(args: argparse.Namespace) -> None: _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten.""" _AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""}
def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]:
"""
Prüft ob ein Album vollständig enriched ist.
Gibt (is_complete, problems) zurück.
Kriterien:
- folder.jpg oder äquivalentes Cover vorhanden
- Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags
"""
from mutagen import File as MutagenFile from mutagen import File as MutagenFile
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"} problems: List[str] = []
AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
has_cover = any(
f.suffix.lower() in _IMAGE_EXTS
for f in album_dir.rglob("*") if f.is_file()
)
if not has_cover:
problems.append("kein Cover")
audio_files = sorted(
f for f in album_dir.rglob("*")
if f.is_file() and f.suffix.lower() in _AUDIO_EXTS
)
# Stichprobe: erste, letzte und mittlere Datei abdecken
if audio_files:
indices = sorted(set([
0, len(audio_files) - 1,
*range(1, min(sample - 2, len(audio_files) - 1)),
]))
sampled = [audio_files[i] for i in indices if i < len(audio_files)]
bad = []
for af in sampled:
try:
tags = MutagenFile(str(af), easy=True)
if tags is None:
bad.append(af.name)
continue
title = (tags.get("title") or [""])[0].strip().lower()
artist = (tags.get("artist") or [""])[0].strip().lower()
if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES:
bad.append(af.name)
elif title.startswith("audiotrack") or title.startswith("track "):
bad.append(af.name)
except Exception:
bad.append(af.name)
if bad:
problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)")
return len(problems) == 0, problems
def _print_status(args: argparse.Namespace) -> None:
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
album_dirs: List[Path] = [] album_dirs: List[Path] = []
if args.album: if args.album:
album_dirs.append(args.album.expanduser().resolve()) album_dirs.append(args.album.expanduser().resolve())
@ -251,50 +302,21 @@ def _print_status(args: argparse.Namespace) -> None:
if root.is_dir(): if root.is_dir():
album_dirs.extend(collect_album_dirs(root)) album_dirs.extend(collect_album_dirs(root))
no_cover, bad_tags, ok = [], [], [] bad_list, ok = [], []
for album_dir in sorted(album_dirs): for album_dir in sorted(album_dirs):
has_cover = any( complete, problems = _album_is_complete(album_dir)
f.suffix.lower() in IMAGE_EXTS if complete:
for f in album_dir.rglob("*") if f.is_file()
)
audio_files = [
f for f in sorted(album_dir.rglob("*"))
if f.is_file() and f.suffix.lower() in AUDIO_EXTS
]
missing_tags = []
for af in audio_files[:3]: # nur erste 3 prüfen (schnell)
try:
tags = MutagenFile(str(af), easy=True)
if tags is None:
missing_tags.append(af.name)
continue
title = (tags.get("title") or [""])[0].strip()
artist = (tags.get("artist") or [""])[0].strip()
if not title or title.lower() in ("unknown", "audiotrack", "") \
or not artist or artist.lower() in ("unknown", ""):
missing_tags.append(af.name)
except Exception:
missing_tags.append(af.name)
problems = []
if not has_cover:
problems.append("kein Cover")
if missing_tags:
problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)")
if problems:
bad_tags.append((album_dir, problems))
else:
ok.append(album_dir) ok.append(album_dir)
else:
bad_list.append((album_dir, problems))
print(f"\n{'=' * 60}") print(f"\n{'=' * 60}")
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben") print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
print(f"{'=' * 60}") print(f"{'=' * 60}")
print(f" ✅ In Ordnung: {len(ok)}") print(f" ✅ In Ordnung: {len(ok)}")
print(f" ⚠️ Mit Problemen: {len(bad_tags)}") print(f" ⚠️ Mit Problemen: {len(bad_list)}")
print() print()
for album_dir, problems in bad_tags: for album_dir, problems in bad_list:
print(f" 💿 {album_dir.name}") print(f" 💿 {album_dir.name}")
for p in problems: for p in problems:
print(f"{p}") print(f"{p}")
@ -337,6 +359,8 @@ def main() -> None:
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)") "(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
parser.add_argument("--status", action="store_true", parser.add_argument("--status", action="store_true",
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben") help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
parser.add_argument("--skip-complete", action="store_true", dest="skip_complete",
help="Alben überspringen die bereits Cover + gute Tags haben")
args = parser.parse_args() args = parser.parse_args()
@ -371,6 +395,13 @@ def main() -> None:
print("⚠️ Keine Album-Verzeichnisse gefunden.") print("⚠️ Keine Album-Verzeichnisse gefunden.")
sys.exit(1) sys.exit(1)
# --skip-complete: vollständig enrichte Alben herausfiltern
if args.skip_complete:
before = len(album_dirs)
album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]]
skipped_upfront = before - len(album_dirs)
print(f"⏭️ {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.")
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.") print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)") print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")