Musiksammlung/tests/test_vision_llm.py
dschlueter 55c71823d1 Add tests for extract_barcode_from_image
9 new test cases covering: plain digit response, thinking-tag stripping,
digit extraction from surrounding text, empty/no-digit response → None,
exception handling → None, correct model/URL forwarding, EAN_PROMPT usage,
and base64 image encoding in request payload.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-19 14:34:59 +01:00

127 lines
4.9 KiB
Python

"""Tests für die Vision-LLM JSON-Extraktion."""
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from musiksammlung.vision_llm import EAN_PROMPT, _extract_json, extract_barcode_from_image
def test_extract_pure_json():
text = '{"artist": "Test", "album": "Album"}'
assert '"Test"' in _extract_json(text)
def test_extract_json_from_markdown_block():
text = 'Hier ist das Ergebnis:\n```json\n{"artist": "Test"}\n```\nFertig.'
assert '"Test"' in _extract_json(text)
def test_extract_json_with_thinking_tags():
text = '<think>Ich denke nach...</think>\n{"artist": "Test", "album": "X"}'
result = _extract_json(text)
assert '"Test"' in result
def test_extract_json_with_surrounding_text():
text = 'Das JSON:\n{"artist": "A", "album": "B"}\nEnde.'
result = _extract_json(text)
assert '"A"' in result
def test_extract_json_empty_raises():
with pytest.raises(ValueError, match="Leere Antwort"):
_extract_json("")
def test_extract_json_no_json_raises():
with pytest.raises(ValueError, match="Kein JSON"):
_extract_json("Hier ist kein JSON, nur Text.")
# ---------------------------------------------------------------------------
# extract_barcode_from_image
# ---------------------------------------------------------------------------
def _mock_response(content: str) -> MagicMock:
"""Erstellt einen Mock-httpx-Response mit gegebenem LLM-Inhalt."""
resp = MagicMock()
resp.json.return_value = {"message": {"content": content}}
resp.raise_for_status.return_value = None
return resp
class TestExtractBarcodeFromImage:
"""Tests für extract_barcode_from_image."""
def test_returns_digits_from_plain_response(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", return_value=_mock_response("4006408262121")):
result = extract_barcode_from_image(img)
assert result == "4006408262121"
def test_strips_thinking_tags(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
raw = "<think>Ich suche den Barcode...</think>\n4006408262121"
with patch("httpx.post", return_value=_mock_response(raw)):
result = extract_barcode_from_image(img)
assert result == "4006408262121"
def test_extracts_digits_from_surrounding_text(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
raw = "Der EAN-Barcode lautet: 0028943753227."
with patch("httpx.post", return_value=_mock_response(raw)):
result = extract_barcode_from_image(img)
assert result == "0028943753227"
def test_returns_none_when_no_digits(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", return_value=_mock_response("Kein Barcode erkannt.")):
result = extract_barcode_from_image(img)
assert result is None
def test_returns_none_on_empty_response(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", return_value=_mock_response("")):
result = extract_barcode_from_image(img)
assert result is None
def test_returns_none_on_exception(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", side_effect=Exception("connection refused")):
result = extract_barcode_from_image(img)
assert result is None
def test_sends_correct_model_and_url(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", return_value=_mock_response("1234567890123")) as mock_post:
extract_barcode_from_image(img, model="my-model", base_url="http://myhost:11434")
url, = mock_post.call_args.args
assert url == "http://myhost:11434/api/chat"
assert mock_post.call_args.kwargs["json"]["model"] == "my-model"
def test_uses_ean_prompt(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"fake")
with patch("httpx.post", return_value=_mock_response("9783161484100")) as mock_post:
extract_barcode_from_image(img)
messages = mock_post.call_args.kwargs["json"]["messages"]
assert messages[0]["content"] == EAN_PROMPT
def test_image_is_base64_encoded_in_request(self, tmp_path: Path) -> None:
img = tmp_path / "cover.jpg"
img.write_bytes(b"test image data")
with patch("httpx.post", return_value=_mock_response("0000000000000")) as mock_post:
extract_barcode_from_image(img)
messages = mock_post.call_args.kwargs["json"]["messages"]
import base64
expected_b64 = base64.b64encode(b"test image data").decode()
assert messages[0]["images"] == [expected_b64]