Music_Metadata_Enricher/scanner.py

137 lines
4.5 KiB
Python
Raw Normal View History

from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import List
from models import AlbumScan, AUDIO_EXTENSIONS, IMAGE_EXTENSIONS, TRACKLIST_EXTENSIONS, PLAYLIST_EXTENSIONS
_DISC_DIR_RE = re.compile(r"(?i)^(?:cd|disc|disk|side)[_ \-]*\d{1,2}$")
def _is_hidden(name: str) -> bool:
return name.startswith(".") or name.startswith("_")
def _is_disc_dir(name: str) -> bool:
"""True für Ordner wie 'CD1', 'Disc 2', 'Side A', 'Disk_1'."""
return bool(_DISC_DIR_RE.match(name))
def scan_album(album_dir: Path) -> AlbumScan:
"""
Scannt ein Album-Verzeichnis.
Rekursions-Regel:
- Hat das Album-Verzeichnis selbst Audio-Dateien kein Abstieg in Unterordner
(Einzelscheibe; Sub-Ordner wie Artworks, Scans, irrtümliche Kopien werden ignoriert).
- Hat der Root KEINE Audio-Dateien Abstieg nur in Disc-Unterordner (CD1, Disc 2 ).
"""
result = AlbumScan(album_dir=album_dir)
# Erst nur die Wurzel-Ebene scannen, um zu entscheiden ob rekursiert wird
root_has_audio = any(
(album_dir / name).suffix.lower() in AUDIO_EXTENSIONS
for name in _listdir(album_dir)
if not _is_hidden(name)
)
if root_has_audio:
# Nur Root-Ebene — keine Unterordner
_scan_dir(album_dir, album_dir, result, recurse=False)
else:
# Kein Audio an der Wurzel → Multi-CD: nur Disc-Unterordner
_scan_dir(album_dir, album_dir, result, recurse=True)
result.audio_files.sort()
result.image_files.sort()
result.tracklist_files.sort()
result.playlist_files.sort()
return result
def _listdir(path: Path) -> List[str]:
try:
return [e.name for e in path.iterdir()]
except (PermissionError, OSError) as e:
print(f"⚠️ Scan-Fehler: {e}", file=sys.stderr)
return []
def _scan_dir(current: Path, album_dir: Path, result: AlbumScan, recurse: bool) -> None:
try:
entries = sorted(current.iterdir())
except (PermissionError, OSError) as e:
print(f"⚠️ Scan-Fehler {current}: {e}", file=sys.stderr)
return
for entry in entries:
name = entry.name
if _is_hidden(name):
continue
if entry.is_dir():
if recurse and _is_disc_dir(name):
_scan_dir(entry, album_dir, result, recurse=True)
# Andere Unterordner (Artworks, irrtümliche Kopien…) werden übersprungen
elif entry.is_file():
ext = entry.suffix.lower()
if ext in AUDIO_EXTENSIONS:
result.audio_files.append(entry)
elif ext in IMAGE_EXTENSIONS:
result.image_files.append(entry)
elif ext in TRACKLIST_EXTENSIONS:
result.tracklist_files.append(entry)
elif ext in PLAYLIST_EXTENSIONS:
result.playlist_files.append(entry)
else:
result.other_files.append(entry)
def collect_album_dirs(root: Path) -> List[Path]:
"""
Findet rekursiv alle Album-Verzeichnisse unterhalb von root.
Ein Verzeichnis gilt als Album wenn:
- es direkt Audio-Dateien enthält, ODER
- es keine direkten Audio-Dateien hat aber Disc-Unterordner mit Audio (Multi-CD).
Container-Verzeichnisse ohne Audio werden rekursiv durchsucht.
"""
result: List[Path] = []
_find_albums(root, result)
return result
def _has_local_audio(path: Path) -> bool:
try:
return any(
e.suffix.lower() in AUDIO_EXTENSIONS
for e in path.iterdir()
if e.is_file() and not _is_hidden(e.name)
)
except (PermissionError, OSError):
return False
def _find_albums(current: Path, result: List[Path]) -> None:
try:
entries = sorted(current.iterdir())
subdirs = [e for e in entries if e.is_dir() and not _is_hidden(e.name)]
except (PermissionError, OSError) as e:
print(f"⚠️ Lesefehler {current}: {e}", file=sys.stderr)
return
# Verzeichnis enthält direkt Audio → Album
if any(e.is_file() and not _is_hidden(e.name) and e.suffix.lower() in AUDIO_EXTENSIONS
for e in entries):
result.append(current)
return
# Disc-Unterordner mit Audio → Multi-CD-Album (scan_album übernimmt die Disc-Logik)
disc_dirs = [d for d in subdirs if _is_disc_dir(d.name)]
if disc_dirs and any(_has_local_audio(d) for d in disc_dirs):
result.append(current)
return
# Container-Verzeichnis → rekursiv weiter suchen
for subdir in subdirs:
_find_albums(subdir, result)