LLM-Parser-Tests, check-Befehl und Cover-Doku

tests/test_llm_parser.py: 13 Tests für _call_ollama, _call_openai_compatible und parse_tracklist (Retry-Logik, Markdown-Block, Track-Artist, Mock) cli: neuer check-Befehl zeigt Tags und Cover-Status aller Audiodateien; ♪ markiert Dateien mit eingebettetem Cover BEDIENUNGSANLEITUNG: neuer Abschnitt 7 (check-Befehl), Cover-Konvention (frontcover.jpg/backcover.jpg, Embedding, 500px) in Schritt 3 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-18 00:45:49 +01:00 · 2026-02-18 00:45:49 +01:00 · 88b89fbb50
commit 88b89fbb50
parent 256be0ae33
3 changed files with 261 additions and 4 deletions
--- a/BEDIENUNGSANLEITUNG.md
+++ b/BEDIENUNGSANLEITUNG.md
@ -10,8 +10,9 @@
 4. [Schritt 1: CDs rippen](#4-schritt-1-cds-rippen)
 5. [Schritt 2: Album-Metadaten ermitteln](#5-schritt-2-album-metadaten-ermitteln)
 6. [Schritt 3: Dateien organisieren und taggen](#6-schritt-3-dateien-organisieren-und-taggen)
-7. [Komplett-Pipeline](#7-komplett-pipeline)
+7. [Tags und Cover prüfen](#7-tags-und-cover-prüfen)
-8. [Tipps und Hinweise](#8-tipps-und-hinweise)
+8. [Komplett-Pipeline](#8-komplett-pipeline)
 9. [Tipps und Hinweise](#9-tipps-und-hinweise)
 ---
@ -299,6 +300,19 @@ Ergebnis:
 | `--back back.jpg` | Rückseiten-Bild |
 | `--dry-run` | Nur anzeigen, nichts ändern |
 ### Cover-Konvention
 Im Album-Verzeichnis werden folgende Dateinamen erwartet:
 | Datei | Zweck |
 |-------|-------|
 | `frontcover.jpg` oder `frontcover.png` | Front-Cover |
 | `backcover.jpg` oder `backcover.png` | Rückseiten-Cover |
 Symbolische Links auf diese Namen sind erlaubt. `apply` kopiert die mit `--front`/`--back` angegebenen Bilder automatisch als `frontcover.jpg` bzw. `backcover.jpg` ins Album-Verzeichnis und bettet das Frontcover anschließend in alle Audio-Dateien ein (skaliert auf max. 500 px).
 Ist bereits ein `frontcover.*` vorhanden (z.B. bei erneutem `apply`), wird es ohne `--front`-Option verwendet.
 ### Dateinamen-Schema
 ```
@ -314,7 +328,35 @@ Beispiel: `01_-_Allegro_con_brio_-_Karajan.flac`
 ---
-## 7. Komplett-Pipeline
+## 7. Tags und Cover prüfen
 Nach `apply` lässt sich der Zustand aller Dateien mit einem Befehl prüfen:
 ```bash
 musiksammlung check ~/rip/Beethoven_Sinfonien
 ```
 Ausgabe:
 ```
 Verzeichnis: ~/rip/Beethoven_Sinfonien
  frontcover: frontcover.jpg
  backcover:  backcover.jpg
  CD1/
    [♪] 01_-_Allegro_con_brio_-_Karajan.flac
         Titel:    Allegro con brio
         Künstler: Karajan  |  AlbumArtist: Karajan
         Album:    Beethoven Sinfonien  |  Jahr: 1963
         Track:    1/4  |  Disc: 1
    ...
 ```
 `[♪]` zeigt an, dass ein Cover eingebettet ist. `[ ]` bedeutet kein eingebettetes Cover.
 ---
 ## 8. Komplett-Pipeline
 Für einfache Fälle (alles in einem Schritt):
@ -324,7 +366,7 @@ musiksammlung process temp/Album/CD1 ~/Musik --back back.jpg
 ---
-## 8. Tipps und Hinweise
+## 9. Tipps und Hinweise
 **CDDB-Lookup schlägt fehl?**
 - Internetverbindung prüfen
--- a/src/musiksammlung/cli.py
+++ b/src/musiksammlung/cli.py
@ -16,6 +16,9 @@ from musiksammlung.ocr import ocr_images
 from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
 from musiksammlung.playlist import generate_playlist
 from musiksammlung.ripper import RipperConfig, interactive_rip
 from mutagen import File as MutagenFile
 from musiksammlung.config import AUDIO_EXTENSIONS
 from musiksammlung.tagger import embed_album_cover, tag_album
 from musiksammlung.vision_llm import parse_image
@ -373,5 +376,81 @@ def process(
    typer.echo(f"Fertig! Album: {album_dir}")
@app.command()
 def check(
    directory: Path = typer.Argument(..., help="Album- oder Disc-Verzeichnis"),
 ) -> None:
    """Zeigt Audio-Tags und Cover-Status aller Dateien in einem Verzeichnis.
    Durchsucht das Verzeichnis rekursiv nach Audiodateien und gibt für jede
    Datei die wichtigsten Tags aus. Zeigt außerdem ob frontcover.jpg/backcover.jpg
    vorhanden sind und ob ein Cover eingebettet ist.
    """
    if not directory.exists():
        typer.echo(f"Fehler: Verzeichnis nicht gefunden: {directory}", err=True)
        raise typer.Exit(1)
    # Cover-Status auf Album-Ebene
    front = find_cover(directory, "front")
    back = find_cover(directory, "back")
    typer.echo(f"\nVerzeichnis: {directory}")
    typer.echo(f"  frontcover: {front.name if front else '—  (fehlt)'}")
    typer.echo(f"  backcover:  {back.name if back else '—  (fehlt)'}")
    # Alle Audiodateien finden (flach + Unterverzeichnisse)
    audio_files: list[Path] = sorted(
        (f for f in directory.rglob("*") if f.suffix.lower() in AUDIO_EXTENSIONS),
        key=lambda p: (p.parent.name, p.name),
    )
    if not audio_files:
        typer.echo("\n  Keine Audiodateien gefunden.")
        return
    current_subdir: str | None = None
    for path in audio_files:
        subdir = path.parent.name if path.parent != directory else ""
        if subdir != current_subdir:
            current_subdir = subdir
            typer.echo(f"\n  {subdir or '.'}/" if subdir else "\n  ./")
        audio = MutagenFile(str(path), easy=True)
        if audio is None:
            typer.echo(f"    {path.name}  [unlesbares Format]")
            continue
        def tag(key: str) -> str:
            vals = audio.get(key)
            return vals[0] if vals else "—"
        has_cover = _has_embedded_cover(path)
        cover_marker = "♪" if has_cover else " "
        typer.echo(
            f"    [{cover_marker}] {path.name}\n"
            f"         Titel:    {tag('title')}\n"
            f"         Künstler: {tag('artist')}  |  AlbumArtist: {tag('albumartist')}\n"
            f"         Album:    {tag('album')}  |  Jahr: {tag('date')}\n"
            f"         Track:    {tag('tracknumber')}  |  Disc: {tag('discnumber')}"
        )
 def _has_embedded_cover(path: Path) -> bool:
    """Prüft ob eine Audiodatei ein eingebettetes Cover enthält."""
    from mutagen.flac import FLAC
    from mutagen.mp3 import MP3
    suffix = path.suffix.lower()
    try:
        if suffix == ".flac":
            return bool(FLAC(str(path)).pictures)
        if suffix == ".mp3":
            tags = MP3(str(path)).tags
            return tags is not None and any(k.startswith("APIC") for k in tags.keys())
    except Exception:
        pass
    return False
 if __name__ == "__main__":
    app()
--- a/tests/test_llm_parser.py
+++ b/tests/test_llm_parser.py
@ -0,0 +1,136 @@
 """Tests für den LLM-Parser (HTTP-Calls via Mock)."""
 from unittest.mock import MagicMock, patch
 import pytest
 from musiksammlung.llm_parser import _call_ollama, _call_openai_compatible, parse_tracklist
 VALID_JSON = (
    '{"artist":"Karajan","album":"Beethoven","year":1963,'
    '"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"Allegro"}]}]}'
 )
 VALID_JSON_WITH_TRACK_ARTIST = (
    '{"artist":"Various Artists","album":"Sampler","year":null,'
    '"discs":[{"disc_number":1,"tracks":['
    '{"track_number":1,"title":"Song A","artist":"Artist X"},'
    '{"track_number":2,"title":"Song B","artist":"Artist Y"}'
    ']}]}'
 )
 def _mock_response(content: str) -> MagicMock:
    """Erstellt eine Mock-httpx-Response mit dem gegebenen Content."""
    resp = MagicMock()
    resp.raise_for_status = MagicMock()
    resp.json.return_value = {"message": {"content": content}}
    return resp
 def _mock_openai_response(content: str) -> MagicMock:
    resp = MagicMock()
    resp.raise_for_status = MagicMock()
    resp.json.return_value = {"choices": [{"message": {"content": content}}]}
    return resp
 class TestCallOllama:
    def test_returns_message_content(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_response("some response")
            result = _call_ollama("text", "gemma3:12b", "http://localhost:11434")
        assert result == "some response"
    def test_sends_correct_url(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_response("")
            _call_ollama("text", "gemma3:12b", "http://localhost:11434")
        called_url = mock_post.call_args[0][0]
        assert called_url == "http://localhost:11434/api/chat"
    def test_sends_model_and_text(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_response("")
            _call_ollama("mein text", "gemma3:12b", "http://localhost:11434")
        payload = mock_post.call_args[1]["json"]
        assert payload["model"] == "gemma3:12b"
        assert payload["messages"][1]["content"] == "mein text"
 class TestCallOpenaiCompatible:
    def test_returns_message_content(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_openai_response("openai reply")
            result = _call_openai_compatible("text", "gpt-4", "http://api.example.com")
        assert result == "openai reply"
    def test_sends_bearer_token_if_api_key(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_openai_response("")
            _call_openai_compatible("t", "m", "http://x", api_key="secret")
        headers = mock_post.call_args[1]["headers"]
        assert headers["Authorization"] == "Bearer secret"
    def test_no_auth_header_without_api_key(self) -> None:
        with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
            mock_post.return_value = _mock_openai_response("")
            _call_openai_compatible("t", "m", "http://x")
        headers = mock_post.call_args[1]["headers"]
        assert "Authorization" not in headers
 class TestParseTracklist:
    def test_successful_parse_ollama(self) -> None:
        with patch("musiksammlung.llm_parser._call_ollama", return_value=VALID_JSON):
            album = parse_tracklist("tracklist text")
        assert album.artist == "Karajan"
        assert album.album == "Beethoven"
        assert album.year == 1963
        assert len(album.discs[0].tracks) == 1
    def test_successful_parse_openai(self) -> None:
        with patch(
            "musiksammlung.llm_parser._call_openai_compatible", return_value=VALID_JSON
        ):
            album = parse_tracklist("text", backend="openai")
        assert album.artist == "Karajan"
    def test_retries_on_invalid_json(self) -> None:
        responses = iter(["definitely not json", VALID_JSON])
        with patch(
            "musiksammlung.llm_parser._call_ollama", side_effect=responses
        ):
            album = parse_tracklist("text", max_retries=2)
        assert album.artist == "Karajan"
    def test_raises_after_max_retries_exceeded(self) -> None:
        with patch(
            "musiksammlung.llm_parser._call_ollama", return_value="no json here"
        ):
            with pytest.raises(ValueError, match="kein valides JSON"):
                parse_tracklist("text", max_retries=1)
    def test_json_in_markdown_block(self) -> None:
        wrapped = f"```json\n{VALID_JSON}\n```"
        with patch("musiksammlung.llm_parser._call_ollama", return_value=wrapped):
            album = parse_tracklist("text")
        assert album.artist == "Karajan"
    def test_track_artist_field_parsed(self) -> None:
        with patch(
            "musiksammlung.llm_parser._call_ollama",
            return_value=VALID_JSON_WITH_TRACK_ARTIST,
        ):
            album = parse_tracklist("text")
        assert album.discs[0].tracks[0].artist == "Artist X"
        assert album.discs[0].tracks[1].artist == "Artist Y"
    def test_missing_year_defaults_to_none(self) -> None:
        json_no_year = (
            '{"artist":"A","album":"B","year":null,'
            '"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"T"}]}]}'
        )
        with patch("musiksammlung.llm_parser._call_ollama", return_value=json_no_year):
            album = parse_tracklist("text")
        assert album.year is None