"""Tests für die Vision-LLM JSON-Extraktion.""" from pathlib import Path from unittest.mock import MagicMock, patch import pytest from musiksammlung.vision_llm import EAN_PROMPT, _extract_json, extract_barcode_from_image def test_extract_pure_json(): text = '{"artist": "Test", "album": "Album"}' assert '"Test"' in _extract_json(text) def test_extract_json_from_markdown_block(): text = 'Hier ist das Ergebnis:\n```json\n{"artist": "Test"}\n```\nFertig.' assert '"Test"' in _extract_json(text) def test_extract_json_with_thinking_tags(): text = 'Ich denke nach...\n{"artist": "Test", "album": "X"}' result = _extract_json(text) assert '"Test"' in result def test_extract_json_with_surrounding_text(): text = 'Das JSON:\n{"artist": "A", "album": "B"}\nEnde.' result = _extract_json(text) assert '"A"' in result def test_extract_json_empty_raises(): with pytest.raises(ValueError, match="Leere Antwort"): _extract_json("") def test_extract_json_no_json_raises(): with pytest.raises(ValueError, match="Kein JSON"): _extract_json("Hier ist kein JSON, nur Text.") # --------------------------------------------------------------------------- # extract_barcode_from_image # --------------------------------------------------------------------------- def _mock_response(content: str) -> MagicMock: """Erstellt einen Mock-httpx-Response mit gegebenem LLM-Inhalt.""" resp = MagicMock() resp.json.return_value = {"message": {"content": content}} resp.raise_for_status.return_value = None return resp class TestExtractBarcodeFromImage: """Tests für extract_barcode_from_image.""" def test_returns_digits_from_plain_response(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", return_value=_mock_response("4006408262121")): result = extract_barcode_from_image(img) assert result == "4006408262121" def test_strips_thinking_tags(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") raw = "Ich suche den Barcode...\n4006408262121" with patch("httpx.post", return_value=_mock_response(raw)): result = extract_barcode_from_image(img) assert result == "4006408262121" def test_extracts_digits_from_surrounding_text(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") raw = "Der EAN-Barcode lautet: 0028943753227." with patch("httpx.post", return_value=_mock_response(raw)): result = extract_barcode_from_image(img) assert result == "0028943753227" def test_returns_none_when_no_digits(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", return_value=_mock_response("Kein Barcode erkannt.")): result = extract_barcode_from_image(img) assert result is None def test_returns_none_on_empty_response(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", return_value=_mock_response("")): result = extract_barcode_from_image(img) assert result is None def test_returns_none_on_exception(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", side_effect=Exception("connection refused")): result = extract_barcode_from_image(img) assert result is None def test_sends_correct_model_and_url(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", return_value=_mock_response("1234567890123")) as mock_post: extract_barcode_from_image(img, model="my-model", base_url="http://myhost:11434") url, = mock_post.call_args.args assert url == "http://myhost:11434/api/chat" assert mock_post.call_args.kwargs["json"]["model"] == "my-model" def test_uses_ean_prompt(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"fake") with patch("httpx.post", return_value=_mock_response("9783161484100")) as mock_post: extract_barcode_from_image(img) messages = mock_post.call_args.kwargs["json"]["messages"] assert messages[0]["content"] == EAN_PROMPT def test_image_is_base64_encoded_in_request(self, tmp_path: Path) -> None: img = tmp_path / "cover.jpg" img.write_bytes(b"test image data") with patch("httpx.post", return_value=_mock_response("0000000000000")) as mock_post: extract_barcode_from_image(img) messages = mock_post.call_args.kwargs["json"]["messages"] import base64 expected_b64 = base64.b64encode(b"test image data").decode() assert messages[0]["images"] == [expected_b64]