Add --skip-complete: skip already-enriched albums in batch runs
- _album_is_complete(album_dir): checks cover presence + sampled tag quality
(first/last/middle files); returns (bool, problems_list)
Sampling strategy: covers first, last and up to 3 middle files to catch
albums where only some tracks were tagged
- _print_status() now uses _album_is_complete() internally (DRY)
- --skip-complete flag: filters album_dirs before the main loop, prints
how many were skipped upfront
- Typical batch command:
python3 music_enricher.py --auto --confidence 0.1 --rename --embed-cover \
--no-fingerprint --skip-complete ~/nvme2n1p7_home/Musik
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
80472653b4
commit
388a9ffd08
1 changed files with 72 additions and 41 deletions
|
|
@ -236,13 +236,64 @@ def process_album(
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
def _print_status(args: argparse.Namespace) -> None:
|
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
|
||||||
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
|
_AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
|
||||||
|
_BAD_TAG_VALUES = {"unknown", "unknown artist", "audiotrack", "track", ""}
|
||||||
|
|
||||||
|
|
||||||
|
def _album_is_complete(album_dir: Path, sample: int = 5) -> tuple[bool, List[str]]:
|
||||||
|
"""
|
||||||
|
Prüft ob ein Album vollständig enriched ist.
|
||||||
|
Gibt (is_complete, problems) zurück.
|
||||||
|
Kriterien:
|
||||||
|
- folder.jpg oder äquivalentes Cover vorhanden
|
||||||
|
- Alle Audio-Dateien (Stichprobe: `sample` Dateien) haben sinnvolle title + artist Tags
|
||||||
|
"""
|
||||||
from mutagen import File as MutagenFile
|
from mutagen import File as MutagenFile
|
||||||
|
|
||||||
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}
|
problems: List[str] = []
|
||||||
AUDIO_EXTS = {".mp3", ".flac", ".m4a", ".wav", ".ogg", ".opus"}
|
|
||||||
|
|
||||||
|
has_cover = any(
|
||||||
|
f.suffix.lower() in _IMAGE_EXTS
|
||||||
|
for f in album_dir.rglob("*") if f.is_file()
|
||||||
|
)
|
||||||
|
if not has_cover:
|
||||||
|
problems.append("kein Cover")
|
||||||
|
|
||||||
|
audio_files = sorted(
|
||||||
|
f for f in album_dir.rglob("*")
|
||||||
|
if f.is_file() and f.suffix.lower() in _AUDIO_EXTS
|
||||||
|
)
|
||||||
|
# Stichprobe: erste, letzte und mittlere Datei abdecken
|
||||||
|
if audio_files:
|
||||||
|
indices = sorted(set([
|
||||||
|
0, len(audio_files) - 1,
|
||||||
|
*range(1, min(sample - 2, len(audio_files) - 1)),
|
||||||
|
]))
|
||||||
|
sampled = [audio_files[i] for i in indices if i < len(audio_files)]
|
||||||
|
bad = []
|
||||||
|
for af in sampled:
|
||||||
|
try:
|
||||||
|
tags = MutagenFile(str(af), easy=True)
|
||||||
|
if tags is None:
|
||||||
|
bad.append(af.name)
|
||||||
|
continue
|
||||||
|
title = (tags.get("title") or [""])[0].strip().lower()
|
||||||
|
artist = (tags.get("artist") or [""])[0].strip().lower()
|
||||||
|
if title in _BAD_TAG_VALUES or artist in _BAD_TAG_VALUES:
|
||||||
|
bad.append(af.name)
|
||||||
|
elif title.startswith("audiotrack") or title.startswith("track "):
|
||||||
|
bad.append(af.name)
|
||||||
|
except Exception:
|
||||||
|
bad.append(af.name)
|
||||||
|
if bad:
|
||||||
|
problems.append(f"schlechte Tags ({len(bad)}/{len(sampled)} geprüft: {bad[0]}…)")
|
||||||
|
|
||||||
|
return len(problems) == 0, problems
|
||||||
|
|
||||||
|
|
||||||
|
def _print_status(args: argparse.Namespace) -> None:
|
||||||
|
"""Scannt die Bibliothek und zeigt Alben mit fehlenden/schlechten Metadaten."""
|
||||||
album_dirs: List[Path] = []
|
album_dirs: List[Path] = []
|
||||||
if args.album:
|
if args.album:
|
||||||
album_dirs.append(args.album.expanduser().resolve())
|
album_dirs.append(args.album.expanduser().resolve())
|
||||||
|
|
@ -251,50 +302,21 @@ def _print_status(args: argparse.Namespace) -> None:
|
||||||
if root.is_dir():
|
if root.is_dir():
|
||||||
album_dirs.extend(collect_album_dirs(root))
|
album_dirs.extend(collect_album_dirs(root))
|
||||||
|
|
||||||
no_cover, bad_tags, ok = [], [], []
|
bad_list, ok = [], []
|
||||||
|
|
||||||
for album_dir in sorted(album_dirs):
|
for album_dir in sorted(album_dirs):
|
||||||
has_cover = any(
|
complete, problems = _album_is_complete(album_dir)
|
||||||
f.suffix.lower() in IMAGE_EXTS
|
if complete:
|
||||||
for f in album_dir.rglob("*") if f.is_file()
|
|
||||||
)
|
|
||||||
audio_files = [
|
|
||||||
f for f in sorted(album_dir.rglob("*"))
|
|
||||||
if f.is_file() and f.suffix.lower() in AUDIO_EXTS
|
|
||||||
]
|
|
||||||
missing_tags = []
|
|
||||||
for af in audio_files[:3]: # nur erste 3 prüfen (schnell)
|
|
||||||
try:
|
|
||||||
tags = MutagenFile(str(af), easy=True)
|
|
||||||
if tags is None:
|
|
||||||
missing_tags.append(af.name)
|
|
||||||
continue
|
|
||||||
title = (tags.get("title") or [""])[0].strip()
|
|
||||||
artist = (tags.get("artist") or [""])[0].strip()
|
|
||||||
if not title or title.lower() in ("unknown", "audiotrack", "") \
|
|
||||||
or not artist or artist.lower() in ("unknown", ""):
|
|
||||||
missing_tags.append(af.name)
|
|
||||||
except Exception:
|
|
||||||
missing_tags.append(af.name)
|
|
||||||
|
|
||||||
problems = []
|
|
||||||
if not has_cover:
|
|
||||||
problems.append("kein Cover")
|
|
||||||
if missing_tags:
|
|
||||||
problems.append(f"schlechte Tags ({len(missing_tags)}/{min(3,len(audio_files))} geprüft)")
|
|
||||||
|
|
||||||
if problems:
|
|
||||||
bad_tags.append((album_dir, problems))
|
|
||||||
else:
|
|
||||||
ok.append(album_dir)
|
ok.append(album_dir)
|
||||||
|
else:
|
||||||
|
bad_list.append((album_dir, problems))
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
|
print(f"📊 Bibliotheksstatus — {len(album_dirs)} Alben")
|
||||||
print(f"{'=' * 60}")
|
print(f"{'=' * 60}")
|
||||||
print(f" ✅ In Ordnung: {len(ok)}")
|
print(f" ✅ In Ordnung: {len(ok)}")
|
||||||
print(f" ⚠️ Mit Problemen: {len(bad_tags)}")
|
print(f" ⚠️ Mit Problemen: {len(bad_list)}")
|
||||||
print()
|
print()
|
||||||
for album_dir, problems in bad_tags:
|
for album_dir, problems in bad_list:
|
||||||
print(f" 💿 {album_dir.name}")
|
print(f" 💿 {album_dir.name}")
|
||||||
for p in problems:
|
for p in problems:
|
||||||
print(f" → {p}")
|
print(f" → {p}")
|
||||||
|
|
@ -337,6 +359,8 @@ def main() -> None:
|
||||||
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
|
"(Standard: ../Jellyfin_Playlist_Generator/jellyfin_playlist_generator.py)")
|
||||||
parser.add_argument("--status", action="store_true",
|
parser.add_argument("--status", action="store_true",
|
||||||
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
|
help="Bibliotheksstatus anzeigen (fehlende Cover, schlechte Tags) — nichts schreiben")
|
||||||
|
parser.add_argument("--skip-complete", action="store_true", dest="skip_complete",
|
||||||
|
help="Alben überspringen die bereits Cover + gute Tags haben")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
@ -371,6 +395,13 @@ def main() -> None:
|
||||||
print("⚠️ Keine Album-Verzeichnisse gefunden.")
|
print("⚠️ Keine Album-Verzeichnisse gefunden.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# --skip-complete: vollständig enrichte Alben herausfiltern
|
||||||
|
if args.skip_complete:
|
||||||
|
before = len(album_dirs)
|
||||||
|
album_dirs = [d for d in album_dirs if not _album_is_complete(d)[0]]
|
||||||
|
skipped_upfront = before - len(album_dirs)
|
||||||
|
print(f"⏭️ {skipped_upfront}/{before} Alben bereits vollständig — übersprungen.")
|
||||||
|
|
||||||
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
|
print(f"🎵 {len(album_dirs)} Album-Verzeichnisse gefunden.")
|
||||||
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
|
if os.getenv("OLLAMA_HOST") or True: # Ollama always attempted
|
||||||
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
|
print("🤖 LLM-Resolve: Ollama → OpenRouter (kein Claude)")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue