LLM-Parser-Tests, check-Befehl und Cover-Doku
tests/test_llm_parser.py: 13 Tests für _call_ollama, _call_openai_compatible und parse_tracklist (Retry-Logik, Markdown-Block, Track-Artist, Mock) cli: neuer check-Befehl zeigt Tags und Cover-Status aller Audiodateien; ♪ markiert Dateien mit eingebettetem Cover BEDIENUNGSANLEITUNG: neuer Abschnitt 7 (check-Befehl), Cover-Konvention (frontcover.jpg/backcover.jpg, Embedding, 500px) in Schritt 3 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
256be0ae33
commit
88b89fbb50
3 changed files with 261 additions and 4 deletions
|
|
@ -10,8 +10,9 @@
|
||||||
4. [Schritt 1: CDs rippen](#4-schritt-1-cds-rippen)
|
4. [Schritt 1: CDs rippen](#4-schritt-1-cds-rippen)
|
||||||
5. [Schritt 2: Album-Metadaten ermitteln](#5-schritt-2-album-metadaten-ermitteln)
|
5. [Schritt 2: Album-Metadaten ermitteln](#5-schritt-2-album-metadaten-ermitteln)
|
||||||
6. [Schritt 3: Dateien organisieren und taggen](#6-schritt-3-dateien-organisieren-und-taggen)
|
6. [Schritt 3: Dateien organisieren und taggen](#6-schritt-3-dateien-organisieren-und-taggen)
|
||||||
7. [Komplett-Pipeline](#7-komplett-pipeline)
|
7. [Tags und Cover prüfen](#7-tags-und-cover-prüfen)
|
||||||
8. [Tipps und Hinweise](#8-tipps-und-hinweise)
|
8. [Komplett-Pipeline](#8-komplett-pipeline)
|
||||||
|
9. [Tipps und Hinweise](#9-tipps-und-hinweise)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -299,6 +300,19 @@ Ergebnis:
|
||||||
| `--back back.jpg` | Rückseiten-Bild |
|
| `--back back.jpg` | Rückseiten-Bild |
|
||||||
| `--dry-run` | Nur anzeigen, nichts ändern |
|
| `--dry-run` | Nur anzeigen, nichts ändern |
|
||||||
|
|
||||||
|
### Cover-Konvention
|
||||||
|
|
||||||
|
Im Album-Verzeichnis werden folgende Dateinamen erwartet:
|
||||||
|
|
||||||
|
| Datei | Zweck |
|
||||||
|
|-------|-------|
|
||||||
|
| `frontcover.jpg` oder `frontcover.png` | Front-Cover |
|
||||||
|
| `backcover.jpg` oder `backcover.png` | Rückseiten-Cover |
|
||||||
|
|
||||||
|
Symbolische Links auf diese Namen sind erlaubt. `apply` kopiert die mit `--front`/`--back` angegebenen Bilder automatisch als `frontcover.jpg` bzw. `backcover.jpg` ins Album-Verzeichnis und bettet das Frontcover anschließend in alle Audio-Dateien ein (skaliert auf max. 500 px).
|
||||||
|
|
||||||
|
Ist bereits ein `frontcover.*` vorhanden (z.B. bei erneutem `apply`), wird es ohne `--front`-Option verwendet.
|
||||||
|
|
||||||
### Dateinamen-Schema
|
### Dateinamen-Schema
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
@ -314,7 +328,35 @@ Beispiel: `01_-_Allegro_con_brio_-_Karajan.flac`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 7. Komplett-Pipeline
|
## 7. Tags und Cover prüfen
|
||||||
|
|
||||||
|
Nach `apply` lässt sich der Zustand aller Dateien mit einem Befehl prüfen:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
musiksammlung check ~/rip/Beethoven_Sinfonien
|
||||||
|
```
|
||||||
|
|
||||||
|
Ausgabe:
|
||||||
|
|
||||||
|
```
|
||||||
|
Verzeichnis: ~/rip/Beethoven_Sinfonien
|
||||||
|
frontcover: frontcover.jpg
|
||||||
|
backcover: backcover.jpg
|
||||||
|
|
||||||
|
CD1/
|
||||||
|
[♪] 01_-_Allegro_con_brio_-_Karajan.flac
|
||||||
|
Titel: Allegro con brio
|
||||||
|
Künstler: Karajan | AlbumArtist: Karajan
|
||||||
|
Album: Beethoven Sinfonien | Jahr: 1963
|
||||||
|
Track: 1/4 | Disc: 1
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
`[♪]` zeigt an, dass ein Cover eingebettet ist. `[ ]` bedeutet kein eingebettetes Cover.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Komplett-Pipeline
|
||||||
|
|
||||||
Für einfache Fälle (alles in einem Schritt):
|
Für einfache Fälle (alles in einem Schritt):
|
||||||
|
|
||||||
|
|
@ -324,7 +366,7 @@ musiksammlung process temp/Album/CD1 ~/Musik --back back.jpg
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 8. Tipps und Hinweise
|
## 9. Tipps und Hinweise
|
||||||
|
|
||||||
**CDDB-Lookup schlägt fehl?**
|
**CDDB-Lookup schlägt fehl?**
|
||||||
- Internetverbindung prüfen
|
- Internetverbindung prüfen
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ from musiksammlung.ocr import ocr_images
|
||||||
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
|
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
|
||||||
from musiksammlung.playlist import generate_playlist
|
from musiksammlung.playlist import generate_playlist
|
||||||
from musiksammlung.ripper import RipperConfig, interactive_rip
|
from musiksammlung.ripper import RipperConfig, interactive_rip
|
||||||
|
from mutagen import File as MutagenFile
|
||||||
|
|
||||||
|
from musiksammlung.config import AUDIO_EXTENSIONS
|
||||||
from musiksammlung.tagger import embed_album_cover, tag_album
|
from musiksammlung.tagger import embed_album_cover, tag_album
|
||||||
from musiksammlung.vision_llm import parse_image
|
from musiksammlung.vision_llm import parse_image
|
||||||
|
|
||||||
|
|
@ -373,5 +376,81 @@ def process(
|
||||||
typer.echo(f"Fertig! Album: {album_dir}")
|
typer.echo(f"Fertig! Album: {album_dir}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def check(
|
||||||
|
directory: Path = typer.Argument(..., help="Album- oder Disc-Verzeichnis"),
|
||||||
|
) -> None:
|
||||||
|
"""Zeigt Audio-Tags und Cover-Status aller Dateien in einem Verzeichnis.
|
||||||
|
|
||||||
|
Durchsucht das Verzeichnis rekursiv nach Audiodateien und gibt für jede
|
||||||
|
Datei die wichtigsten Tags aus. Zeigt außerdem ob frontcover.jpg/backcover.jpg
|
||||||
|
vorhanden sind und ob ein Cover eingebettet ist.
|
||||||
|
"""
|
||||||
|
if not directory.exists():
|
||||||
|
typer.echo(f"Fehler: Verzeichnis nicht gefunden: {directory}", err=True)
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Cover-Status auf Album-Ebene
|
||||||
|
front = find_cover(directory, "front")
|
||||||
|
back = find_cover(directory, "back")
|
||||||
|
typer.echo(f"\nVerzeichnis: {directory}")
|
||||||
|
typer.echo(f" frontcover: {front.name if front else '— (fehlt)'}")
|
||||||
|
typer.echo(f" backcover: {back.name if back else '— (fehlt)'}")
|
||||||
|
|
||||||
|
# Alle Audiodateien finden (flach + Unterverzeichnisse)
|
||||||
|
audio_files: list[Path] = sorted(
|
||||||
|
(f for f in directory.rglob("*") if f.suffix.lower() in AUDIO_EXTENSIONS),
|
||||||
|
key=lambda p: (p.parent.name, p.name),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not audio_files:
|
||||||
|
typer.echo("\n Keine Audiodateien gefunden.")
|
||||||
|
return
|
||||||
|
|
||||||
|
current_subdir: str | None = None
|
||||||
|
for path in audio_files:
|
||||||
|
subdir = path.parent.name if path.parent != directory else ""
|
||||||
|
if subdir != current_subdir:
|
||||||
|
current_subdir = subdir
|
||||||
|
typer.echo(f"\n {subdir or '.'}/" if subdir else "\n ./")
|
||||||
|
|
||||||
|
audio = MutagenFile(str(path), easy=True)
|
||||||
|
if audio is None:
|
||||||
|
typer.echo(f" {path.name} [unlesbares Format]")
|
||||||
|
continue
|
||||||
|
|
||||||
|
def tag(key: str) -> str:
|
||||||
|
vals = audio.get(key)
|
||||||
|
return vals[0] if vals else "—"
|
||||||
|
|
||||||
|
has_cover = _has_embedded_cover(path)
|
||||||
|
cover_marker = "♪" if has_cover else " "
|
||||||
|
|
||||||
|
typer.echo(
|
||||||
|
f" [{cover_marker}] {path.name}\n"
|
||||||
|
f" Titel: {tag('title')}\n"
|
||||||
|
f" Künstler: {tag('artist')} | AlbumArtist: {tag('albumartist')}\n"
|
||||||
|
f" Album: {tag('album')} | Jahr: {tag('date')}\n"
|
||||||
|
f" Track: {tag('tracknumber')} | Disc: {tag('discnumber')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_embedded_cover(path: Path) -> bool:
|
||||||
|
"""Prüft ob eine Audiodatei ein eingebettetes Cover enthält."""
|
||||||
|
from mutagen.flac import FLAC
|
||||||
|
from mutagen.mp3 import MP3
|
||||||
|
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
try:
|
||||||
|
if suffix == ".flac":
|
||||||
|
return bool(FLAC(str(path)).pictures)
|
||||||
|
if suffix == ".mp3":
|
||||||
|
tags = MP3(str(path)).tags
|
||||||
|
return tags is not None and any(k.startswith("APIC") for k in tags.keys())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app()
|
app()
|
||||||
|
|
|
||||||
136
tests/test_llm_parser.py
Normal file
136
tests/test_llm_parser.py
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
"""Tests für den LLM-Parser (HTTP-Calls via Mock)."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from musiksammlung.llm_parser import _call_ollama, _call_openai_compatible, parse_tracklist
|
||||||
|
|
||||||
|
VALID_JSON = (
|
||||||
|
'{"artist":"Karajan","album":"Beethoven","year":1963,'
|
||||||
|
'"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"Allegro"}]}]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
VALID_JSON_WITH_TRACK_ARTIST = (
|
||||||
|
'{"artist":"Various Artists","album":"Sampler","year":null,'
|
||||||
|
'"discs":[{"disc_number":1,"tracks":['
|
||||||
|
'{"track_number":1,"title":"Song A","artist":"Artist X"},'
|
||||||
|
'{"track_number":2,"title":"Song B","artist":"Artist Y"}'
|
||||||
|
']}]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_response(content: str) -> MagicMock:
|
||||||
|
"""Erstellt eine Mock-httpx-Response mit dem gegebenen Content."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.raise_for_status = MagicMock()
|
||||||
|
resp.json.return_value = {"message": {"content": content}}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_openai_response(content: str) -> MagicMock:
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.raise_for_status = MagicMock()
|
||||||
|
resp.json.return_value = {"choices": [{"message": {"content": content}}]}
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
class TestCallOllama:
|
||||||
|
def test_returns_message_content(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_response("some response")
|
||||||
|
result = _call_ollama("text", "gemma3:12b", "http://localhost:11434")
|
||||||
|
assert result == "some response"
|
||||||
|
|
||||||
|
def test_sends_correct_url(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_response("")
|
||||||
|
_call_ollama("text", "gemma3:12b", "http://localhost:11434")
|
||||||
|
called_url = mock_post.call_args[0][0]
|
||||||
|
assert called_url == "http://localhost:11434/api/chat"
|
||||||
|
|
||||||
|
def test_sends_model_and_text(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_response("")
|
||||||
|
_call_ollama("mein text", "gemma3:12b", "http://localhost:11434")
|
||||||
|
payload = mock_post.call_args[1]["json"]
|
||||||
|
assert payload["model"] == "gemma3:12b"
|
||||||
|
assert payload["messages"][1]["content"] == "mein text"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCallOpenaiCompatible:
|
||||||
|
def test_returns_message_content(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_openai_response("openai reply")
|
||||||
|
result = _call_openai_compatible("text", "gpt-4", "http://api.example.com")
|
||||||
|
assert result == "openai reply"
|
||||||
|
|
||||||
|
def test_sends_bearer_token_if_api_key(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_openai_response("")
|
||||||
|
_call_openai_compatible("t", "m", "http://x", api_key="secret")
|
||||||
|
headers = mock_post.call_args[1]["headers"]
|
||||||
|
assert headers["Authorization"] == "Bearer secret"
|
||||||
|
|
||||||
|
def test_no_auth_header_without_api_key(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||||
|
mock_post.return_value = _mock_openai_response("")
|
||||||
|
_call_openai_compatible("t", "m", "http://x")
|
||||||
|
headers = mock_post.call_args[1]["headers"]
|
||||||
|
assert "Authorization" not in headers
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseTracklist:
|
||||||
|
def test_successful_parse_ollama(self) -> None:
|
||||||
|
with patch("musiksammlung.llm_parser._call_ollama", return_value=VALID_JSON):
|
||||||
|
album = parse_tracklist("tracklist text")
|
||||||
|
assert album.artist == "Karajan"
|
||||||
|
assert album.album == "Beethoven"
|
||||||
|
assert album.year == 1963
|
||||||
|
assert len(album.discs[0].tracks) == 1
|
||||||
|
|
||||||
|
def test_successful_parse_openai(self) -> None:
|
||||||
|
with patch(
|
||||||
|
"musiksammlung.llm_parser._call_openai_compatible", return_value=VALID_JSON
|
||||||
|
):
|
||||||
|
album = parse_tracklist("text", backend="openai")
|
||||||
|
assert album.artist == "Karajan"
|
||||||
|
|
||||||
|
def test_retries_on_invalid_json(self) -> None:
|
||||||
|
responses = iter(["definitely not json", VALID_JSON])
|
||||||
|
with patch(
|
||||||
|
"musiksammlung.llm_parser._call_ollama", side_effect=responses
|
||||||
|
):
|
||||||
|
album = parse_tracklist("text", max_retries=2)
|
||||||
|
assert album.artist == "Karajan"
|
||||||
|
|
||||||
|
def test_raises_after_max_retries_exceeded(self) -> None:
|
||||||
|
with patch(
|
||||||
|
"musiksammlung.llm_parser._call_ollama", return_value="no json here"
|
||||||
|
):
|
||||||
|
with pytest.raises(ValueError, match="kein valides JSON"):
|
||||||
|
parse_tracklist("text", max_retries=1)
|
||||||
|
|
||||||
|
def test_json_in_markdown_block(self) -> None:
|
||||||
|
wrapped = f"```json\n{VALID_JSON}\n```"
|
||||||
|
with patch("musiksammlung.llm_parser._call_ollama", return_value=wrapped):
|
||||||
|
album = parse_tracklist("text")
|
||||||
|
assert album.artist == "Karajan"
|
||||||
|
|
||||||
|
def test_track_artist_field_parsed(self) -> None:
|
||||||
|
with patch(
|
||||||
|
"musiksammlung.llm_parser._call_ollama",
|
||||||
|
return_value=VALID_JSON_WITH_TRACK_ARTIST,
|
||||||
|
):
|
||||||
|
album = parse_tracklist("text")
|
||||||
|
assert album.discs[0].tracks[0].artist == "Artist X"
|
||||||
|
assert album.discs[0].tracks[1].artist == "Artist Y"
|
||||||
|
|
||||||
|
def test_missing_year_defaults_to_none(self) -> None:
|
||||||
|
json_no_year = (
|
||||||
|
'{"artist":"A","album":"B","year":null,'
|
||||||
|
'"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"T"}]}]}'
|
||||||
|
)
|
||||||
|
with patch("musiksammlung.llm_parser._call_ollama", return_value=json_no_year):
|
||||||
|
album = parse_tracklist("text")
|
||||||
|
assert album.year is None
|
||||||
Loading…
Add table
Add a link
Reference in a new issue