Musiksammlung/tests/test_ocr.py

191 lines
7 KiB
Python
Raw Permalink Normal View History

"""Tests für OCR-Funktionen (subprocess via Mock)."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
from PIL import Image
from musiksammlung.ocr import _detect_and_fix_rotation, ocr_images, run_ocr
def _fake_run(stdout: str = "", returncode: int = 0) -> MagicMock:
"""Erstellt ein Mock-subprocess.CompletedProcess."""
result = MagicMock()
result.returncode = returncode
result.stdout = stdout
result.stderr = ""
return result
def _make_image(path: Path, size: tuple[int, int] = (100, 100)) -> Path:
Image.new("RGB", size, color=(200, 100, 50)).save(str(path), "JPEG")
return path
# ---------------------------------------------------------------------------
# run_ocr
# ---------------------------------------------------------------------------
class TestRunOcr:
def test_returns_stdout_on_success(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run("Trackliste\n")):
result = run_ocr(img)
assert result == "Trackliste"
def test_calls_tesseract_with_correct_args(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
run_ocr(img, languages="deu+eng")
args = mock_run.call_args[0][0]
assert args[0] == "tesseract"
assert str(img) in args
assert "deu+eng" in args
assert "--psm" in args
def test_raises_on_nonzero_returncode(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch(
"musiksammlung.ocr.subprocess.run",
return_value=_fake_run("", returncode=1),
):
try:
run_ocr(img)
assert False, "RuntimeError expected"
except RuntimeError as e:
assert "Tesseract" in str(e)
def test_custom_language(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
run_ocr(img, languages="fra")
args = mock_run.call_args[0][0]
assert "fra" in args
# ---------------------------------------------------------------------------
# _detect_and_fix_rotation
# ---------------------------------------------------------------------------
class TestDetectAndFixRotation:
def test_no_rotation_needed(self) -> None:
img = Image.new("L", (200, 100))
osd_output = "Rotate: 0\nOrientation in degrees: 0\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
assert result.size == (200, 100)
def test_rotates_90_degrees(self) -> None:
img = Image.new("L", (200, 100)) # breit
osd_output = "Rotate: 90\nOrientation in degrees: 90\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
# 200×100 rotiert um 90° → 100×200
assert result.size == (100, 200)
def test_rotates_180_degrees(self) -> None:
img = Image.new("L", (200, 100))
osd_output = "Rotate: 180\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
# 180° Rotation ändert Größe nicht
assert result.size == (200, 100)
def test_fallback_brute_force_when_osd_fails(self) -> None:
img = Image.new("L", (200, 100))
call_count = [0]
def side_effect(cmd, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
# Erster Aufruf (OSD) schlägt fehl
return _fake_run("", returncode=1)
# Weitere Aufrufe (brute-force): 90° liefert die meisten Buchstaben
if "--psm" in cmd and "6" in cmd:
return _fake_run("Allegro Andante Beethoven" if call_count[0] == 3 else "a")
return _fake_run("")
with patch("musiksammlung.ocr.subprocess.run", side_effect=side_effect):
_detect_and_fix_rotation(img)
# Brute-force wurde verwendet (mind. 4 Subprocess-Aufrufe)
assert call_count[0] >= 2
# ---------------------------------------------------------------------------
# ocr_images
# ---------------------------------------------------------------------------
class TestOcrImages:
def test_returns_text_for_single_image(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=img),
patch("musiksammlung.ocr.run_ocr", return_value="Track 1\nTrack 2"),
):
result = ocr_images([img])
assert result == "Track 1\nTrack 2"
def test_concatenates_multiple_images(self, tmp_path: Path) -> None:
img1 = _make_image(tmp_path / "img1.jpg")
img2 = _make_image(tmp_path / "img2.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", side_effect=["Text A", "Text B"]),
):
result = ocr_images([img1, img2])
assert result == "Text A\n\nText B"
def test_skips_empty_ocr_result(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", return_value=""),
):
result = ocr_images([img])
assert result == ""
def test_cleans_up_temp_file(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", return_value="text"),
):
ocr_images([img])
# Temporäre Datei wurde gelöscht
assert not preprocessed.exists()
def test_cleans_up_even_on_error(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
err = RuntimeError("Tesseract fehlgeschlagen")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", side_effect=err),
):
try:
ocr_images([img])
except RuntimeError:
pass
assert not preprocessed.exists()