LLM-Parser-Tests, check-Befehl und Cover-Doku
tests/test_llm_parser.py: 13 Tests für _call_ollama, _call_openai_compatible und parse_tracklist (Retry-Logik, Markdown-Block, Track-Artist, Mock) cli: neuer check-Befehl zeigt Tags und Cover-Status aller Audiodateien; ♪ markiert Dateien mit eingebettetem Cover BEDIENUNGSANLEITUNG: neuer Abschnitt 7 (check-Befehl), Cover-Konvention (frontcover.jpg/backcover.jpg, Embedding, 500px) in Schritt 3 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
256be0ae33
commit
88b89fbb50
3 changed files with 261 additions and 4 deletions
|
|
@ -10,8 +10,9 @@
|
|||
4. [Schritt 1: CDs rippen](#4-schritt-1-cds-rippen)
|
||||
5. [Schritt 2: Album-Metadaten ermitteln](#5-schritt-2-album-metadaten-ermitteln)
|
||||
6. [Schritt 3: Dateien organisieren und taggen](#6-schritt-3-dateien-organisieren-und-taggen)
|
||||
7. [Komplett-Pipeline](#7-komplett-pipeline)
|
||||
8. [Tipps und Hinweise](#8-tipps-und-hinweise)
|
||||
7. [Tags und Cover prüfen](#7-tags-und-cover-prüfen)
|
||||
8. [Komplett-Pipeline](#8-komplett-pipeline)
|
||||
9. [Tipps und Hinweise](#9-tipps-und-hinweise)
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -299,6 +300,19 @@ Ergebnis:
|
|||
| `--back back.jpg` | Rückseiten-Bild |
|
||||
| `--dry-run` | Nur anzeigen, nichts ändern |
|
||||
|
||||
### Cover-Konvention
|
||||
|
||||
Im Album-Verzeichnis werden folgende Dateinamen erwartet:
|
||||
|
||||
| Datei | Zweck |
|
||||
|-------|-------|
|
||||
| `frontcover.jpg` oder `frontcover.png` | Front-Cover |
|
||||
| `backcover.jpg` oder `backcover.png` | Rückseiten-Cover |
|
||||
|
||||
Symbolische Links auf diese Namen sind erlaubt. `apply` kopiert die mit `--front`/`--back` angegebenen Bilder automatisch als `frontcover.jpg` bzw. `backcover.jpg` ins Album-Verzeichnis und bettet das Frontcover anschließend in alle Audio-Dateien ein (skaliert auf max. 500 px).
|
||||
|
||||
Ist bereits ein `frontcover.*` vorhanden (z.B. bei erneutem `apply`), wird es ohne `--front`-Option verwendet.
|
||||
|
||||
### Dateinamen-Schema
|
||||
|
||||
```
|
||||
|
|
@ -314,7 +328,35 @@ Beispiel: `01_-_Allegro_con_brio_-_Karajan.flac`
|
|||
|
||||
---
|
||||
|
||||
## 7. Komplett-Pipeline
|
||||
## 7. Tags und Cover prüfen
|
||||
|
||||
Nach `apply` lässt sich der Zustand aller Dateien mit einem Befehl prüfen:
|
||||
|
||||
```bash
|
||||
musiksammlung check ~/rip/Beethoven_Sinfonien
|
||||
```
|
||||
|
||||
Ausgabe:
|
||||
|
||||
```
|
||||
Verzeichnis: ~/rip/Beethoven_Sinfonien
|
||||
frontcover: frontcover.jpg
|
||||
backcover: backcover.jpg
|
||||
|
||||
CD1/
|
||||
[♪] 01_-_Allegro_con_brio_-_Karajan.flac
|
||||
Titel: Allegro con brio
|
||||
Künstler: Karajan | AlbumArtist: Karajan
|
||||
Album: Beethoven Sinfonien | Jahr: 1963
|
||||
Track: 1/4 | Disc: 1
|
||||
...
|
||||
```
|
||||
|
||||
`[♪]` zeigt an, dass ein Cover eingebettet ist. `[ ]` bedeutet kein eingebettetes Cover.
|
||||
|
||||
---
|
||||
|
||||
## 8. Komplett-Pipeline
|
||||
|
||||
Für einfache Fälle (alles in einem Schritt):
|
||||
|
||||
|
|
@ -324,7 +366,7 @@ musiksammlung process temp/Album/CD1 ~/Musik --back back.jpg
|
|||
|
||||
---
|
||||
|
||||
## 8. Tipps und Hinweise
|
||||
## 9. Tipps und Hinweise
|
||||
|
||||
**CDDB-Lookup schlägt fehl?**
|
||||
- Internetverbindung prüfen
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@ from musiksammlung.ocr import ocr_images
|
|||
from musiksammlung.organizer import apply_mapping, build_mapping, check_disc_counts
|
||||
from musiksammlung.playlist import generate_playlist
|
||||
from musiksammlung.ripper import RipperConfig, interactive_rip
|
||||
from mutagen import File as MutagenFile
|
||||
|
||||
from musiksammlung.config import AUDIO_EXTENSIONS
|
||||
from musiksammlung.tagger import embed_album_cover, tag_album
|
||||
from musiksammlung.vision_llm import parse_image
|
||||
|
||||
|
|
@ -373,5 +376,81 @@ def process(
|
|||
typer.echo(f"Fertig! Album: {album_dir}")
|
||||
|
||||
|
||||
@app.command()
|
||||
def check(
|
||||
directory: Path = typer.Argument(..., help="Album- oder Disc-Verzeichnis"),
|
||||
) -> None:
|
||||
"""Zeigt Audio-Tags und Cover-Status aller Dateien in einem Verzeichnis.
|
||||
|
||||
Durchsucht das Verzeichnis rekursiv nach Audiodateien und gibt für jede
|
||||
Datei die wichtigsten Tags aus. Zeigt außerdem ob frontcover.jpg/backcover.jpg
|
||||
vorhanden sind und ob ein Cover eingebettet ist.
|
||||
"""
|
||||
if not directory.exists():
|
||||
typer.echo(f"Fehler: Verzeichnis nicht gefunden: {directory}", err=True)
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Cover-Status auf Album-Ebene
|
||||
front = find_cover(directory, "front")
|
||||
back = find_cover(directory, "back")
|
||||
typer.echo(f"\nVerzeichnis: {directory}")
|
||||
typer.echo(f" frontcover: {front.name if front else '— (fehlt)'}")
|
||||
typer.echo(f" backcover: {back.name if back else '— (fehlt)'}")
|
||||
|
||||
# Alle Audiodateien finden (flach + Unterverzeichnisse)
|
||||
audio_files: list[Path] = sorted(
|
||||
(f for f in directory.rglob("*") if f.suffix.lower() in AUDIO_EXTENSIONS),
|
||||
key=lambda p: (p.parent.name, p.name),
|
||||
)
|
||||
|
||||
if not audio_files:
|
||||
typer.echo("\n Keine Audiodateien gefunden.")
|
||||
return
|
||||
|
||||
current_subdir: str | None = None
|
||||
for path in audio_files:
|
||||
subdir = path.parent.name if path.parent != directory else ""
|
||||
if subdir != current_subdir:
|
||||
current_subdir = subdir
|
||||
typer.echo(f"\n {subdir or '.'}/" if subdir else "\n ./")
|
||||
|
||||
audio = MutagenFile(str(path), easy=True)
|
||||
if audio is None:
|
||||
typer.echo(f" {path.name} [unlesbares Format]")
|
||||
continue
|
||||
|
||||
def tag(key: str) -> str:
|
||||
vals = audio.get(key)
|
||||
return vals[0] if vals else "—"
|
||||
|
||||
has_cover = _has_embedded_cover(path)
|
||||
cover_marker = "♪" if has_cover else " "
|
||||
|
||||
typer.echo(
|
||||
f" [{cover_marker}] {path.name}\n"
|
||||
f" Titel: {tag('title')}\n"
|
||||
f" Künstler: {tag('artist')} | AlbumArtist: {tag('albumartist')}\n"
|
||||
f" Album: {tag('album')} | Jahr: {tag('date')}\n"
|
||||
f" Track: {tag('tracknumber')} | Disc: {tag('discnumber')}"
|
||||
)
|
||||
|
||||
|
||||
def _has_embedded_cover(path: Path) -> bool:
|
||||
"""Prüft ob eine Audiodatei ein eingebettetes Cover enthält."""
|
||||
from mutagen.flac import FLAC
|
||||
from mutagen.mp3 import MP3
|
||||
|
||||
suffix = path.suffix.lower()
|
||||
try:
|
||||
if suffix == ".flac":
|
||||
return bool(FLAC(str(path)).pictures)
|
||||
if suffix == ".mp3":
|
||||
tags = MP3(str(path)).tags
|
||||
return tags is not None and any(k.startswith("APIC") for k in tags.keys())
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
|
|
|||
136
tests/test_llm_parser.py
Normal file
136
tests/test_llm_parser.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
"""Tests für den LLM-Parser (HTTP-Calls via Mock)."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from musiksammlung.llm_parser import _call_ollama, _call_openai_compatible, parse_tracklist
|
||||
|
||||
VALID_JSON = (
|
||||
'{"artist":"Karajan","album":"Beethoven","year":1963,'
|
||||
'"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"Allegro"}]}]}'
|
||||
)
|
||||
|
||||
VALID_JSON_WITH_TRACK_ARTIST = (
|
||||
'{"artist":"Various Artists","album":"Sampler","year":null,'
|
||||
'"discs":[{"disc_number":1,"tracks":['
|
||||
'{"track_number":1,"title":"Song A","artist":"Artist X"},'
|
||||
'{"track_number":2,"title":"Song B","artist":"Artist Y"}'
|
||||
']}]}'
|
||||
)
|
||||
|
||||
|
||||
def _mock_response(content: str) -> MagicMock:
|
||||
"""Erstellt eine Mock-httpx-Response mit dem gegebenen Content."""
|
||||
resp = MagicMock()
|
||||
resp.raise_for_status = MagicMock()
|
||||
resp.json.return_value = {"message": {"content": content}}
|
||||
return resp
|
||||
|
||||
|
||||
def _mock_openai_response(content: str) -> MagicMock:
|
||||
resp = MagicMock()
|
||||
resp.raise_for_status = MagicMock()
|
||||
resp.json.return_value = {"choices": [{"message": {"content": content}}]}
|
||||
return resp
|
||||
|
||||
|
||||
class TestCallOllama:
|
||||
def test_returns_message_content(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_response("some response")
|
||||
result = _call_ollama("text", "gemma3:12b", "http://localhost:11434")
|
||||
assert result == "some response"
|
||||
|
||||
def test_sends_correct_url(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_response("")
|
||||
_call_ollama("text", "gemma3:12b", "http://localhost:11434")
|
||||
called_url = mock_post.call_args[0][0]
|
||||
assert called_url == "http://localhost:11434/api/chat"
|
||||
|
||||
def test_sends_model_and_text(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_response("")
|
||||
_call_ollama("mein text", "gemma3:12b", "http://localhost:11434")
|
||||
payload = mock_post.call_args[1]["json"]
|
||||
assert payload["model"] == "gemma3:12b"
|
||||
assert payload["messages"][1]["content"] == "mein text"
|
||||
|
||||
|
||||
class TestCallOpenaiCompatible:
|
||||
def test_returns_message_content(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_openai_response("openai reply")
|
||||
result = _call_openai_compatible("text", "gpt-4", "http://api.example.com")
|
||||
assert result == "openai reply"
|
||||
|
||||
def test_sends_bearer_token_if_api_key(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_openai_response("")
|
||||
_call_openai_compatible("t", "m", "http://x", api_key="secret")
|
||||
headers = mock_post.call_args[1]["headers"]
|
||||
assert headers["Authorization"] == "Bearer secret"
|
||||
|
||||
def test_no_auth_header_without_api_key(self) -> None:
|
||||
with patch("musiksammlung.llm_parser.httpx.post") as mock_post:
|
||||
mock_post.return_value = _mock_openai_response("")
|
||||
_call_openai_compatible("t", "m", "http://x")
|
||||
headers = mock_post.call_args[1]["headers"]
|
||||
assert "Authorization" not in headers
|
||||
|
||||
|
||||
class TestParseTracklist:
|
||||
def test_successful_parse_ollama(self) -> None:
|
||||
with patch("musiksammlung.llm_parser._call_ollama", return_value=VALID_JSON):
|
||||
album = parse_tracklist("tracklist text")
|
||||
assert album.artist == "Karajan"
|
||||
assert album.album == "Beethoven"
|
||||
assert album.year == 1963
|
||||
assert len(album.discs[0].tracks) == 1
|
||||
|
||||
def test_successful_parse_openai(self) -> None:
|
||||
with patch(
|
||||
"musiksammlung.llm_parser._call_openai_compatible", return_value=VALID_JSON
|
||||
):
|
||||
album = parse_tracklist("text", backend="openai")
|
||||
assert album.artist == "Karajan"
|
||||
|
||||
def test_retries_on_invalid_json(self) -> None:
|
||||
responses = iter(["definitely not json", VALID_JSON])
|
||||
with patch(
|
||||
"musiksammlung.llm_parser._call_ollama", side_effect=responses
|
||||
):
|
||||
album = parse_tracklist("text", max_retries=2)
|
||||
assert album.artist == "Karajan"
|
||||
|
||||
def test_raises_after_max_retries_exceeded(self) -> None:
|
||||
with patch(
|
||||
"musiksammlung.llm_parser._call_ollama", return_value="no json here"
|
||||
):
|
||||
with pytest.raises(ValueError, match="kein valides JSON"):
|
||||
parse_tracklist("text", max_retries=1)
|
||||
|
||||
def test_json_in_markdown_block(self) -> None:
|
||||
wrapped = f"```json\n{VALID_JSON}\n```"
|
||||
with patch("musiksammlung.llm_parser._call_ollama", return_value=wrapped):
|
||||
album = parse_tracklist("text")
|
||||
assert album.artist == "Karajan"
|
||||
|
||||
def test_track_artist_field_parsed(self) -> None:
|
||||
with patch(
|
||||
"musiksammlung.llm_parser._call_ollama",
|
||||
return_value=VALID_JSON_WITH_TRACK_ARTIST,
|
||||
):
|
||||
album = parse_tracklist("text")
|
||||
assert album.discs[0].tracks[0].artist == "Artist X"
|
||||
assert album.discs[0].tracks[1].artist == "Artist Y"
|
||||
|
||||
def test_missing_year_defaults_to_none(self) -> None:
|
||||
json_no_year = (
|
||||
'{"artist":"A","album":"B","year":null,'
|
||||
'"discs":[{"disc_number":1,"tracks":[{"track_number":1,"title":"T"}]}]}'
|
||||
)
|
||||
with patch("musiksammlung.llm_parser._call_ollama", return_value=json_no_year):
|
||||
album = parse_tracklist("text")
|
||||
assert album.year is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue