191 lines
7 KiB
Python
191 lines
7 KiB
Python
|
|
"""Tests für OCR-Funktionen (subprocess via Mock)."""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
from pathlib import Path
|
|||
|
|
from unittest.mock import MagicMock, patch
|
|||
|
|
|
|||
|
|
from PIL import Image
|
|||
|
|
|
|||
|
|
from musiksammlung.ocr import _detect_and_fix_rotation, ocr_images, run_ocr
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _fake_run(stdout: str = "", returncode: int = 0) -> MagicMock:
|
|||
|
|
"""Erstellt ein Mock-subprocess.CompletedProcess."""
|
|||
|
|
result = MagicMock()
|
|||
|
|
result.returncode = returncode
|
|||
|
|
result.stdout = stdout
|
|||
|
|
result.stderr = ""
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _make_image(path: Path, size: tuple[int, int] = (100, 100)) -> Path:
|
|||
|
|
Image.new("RGB", size, color=(200, 100, 50)).save(str(path), "JPEG")
|
|||
|
|
return path
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# run_ocr
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestRunOcr:
|
|||
|
|
def test_returns_stdout_on_success(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "test.jpg")
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run("Trackliste\n")):
|
|||
|
|
result = run_ocr(img)
|
|||
|
|
assert result == "Trackliste"
|
|||
|
|
|
|||
|
|
def test_calls_tesseract_with_correct_args(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "test.jpg")
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
|
|||
|
|
run_ocr(img, languages="deu+eng")
|
|||
|
|
args = mock_run.call_args[0][0]
|
|||
|
|
assert args[0] == "tesseract"
|
|||
|
|
assert str(img) in args
|
|||
|
|
assert "deu+eng" in args
|
|||
|
|
assert "--psm" in args
|
|||
|
|
|
|||
|
|
def test_raises_on_nonzero_returncode(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "test.jpg")
|
|||
|
|
with patch(
|
|||
|
|
"musiksammlung.ocr.subprocess.run",
|
|||
|
|
return_value=_fake_run("", returncode=1),
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
run_ocr(img)
|
|||
|
|
assert False, "RuntimeError expected"
|
|||
|
|
except RuntimeError as e:
|
|||
|
|
assert "Tesseract" in str(e)
|
|||
|
|
|
|||
|
|
def test_custom_language(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "test.jpg")
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
|
|||
|
|
run_ocr(img, languages="fra")
|
|||
|
|
args = mock_run.call_args[0][0]
|
|||
|
|
assert "fra" in args
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# _detect_and_fix_rotation
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestDetectAndFixRotation:
|
|||
|
|
def test_no_rotation_needed(self) -> None:
|
|||
|
|
img = Image.new("L", (200, 100))
|
|||
|
|
osd_output = "Rotate: 0\nOrientation in degrees: 0\n"
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
|||
|
|
result = _detect_and_fix_rotation(img)
|
|||
|
|
assert result.size == (200, 100)
|
|||
|
|
|
|||
|
|
def test_rotates_90_degrees(self) -> None:
|
|||
|
|
img = Image.new("L", (200, 100)) # breit
|
|||
|
|
osd_output = "Rotate: 90\nOrientation in degrees: 90\n"
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
|||
|
|
result = _detect_and_fix_rotation(img)
|
|||
|
|
# 200×100 rotiert um 90° → 100×200
|
|||
|
|
assert result.size == (100, 200)
|
|||
|
|
|
|||
|
|
def test_rotates_180_degrees(self) -> None:
|
|||
|
|
img = Image.new("L", (200, 100))
|
|||
|
|
osd_output = "Rotate: 180\n"
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
|||
|
|
result = _detect_and_fix_rotation(img)
|
|||
|
|
# 180° Rotation ändert Größe nicht
|
|||
|
|
assert result.size == (200, 100)
|
|||
|
|
|
|||
|
|
def test_fallback_brute_force_when_osd_fails(self) -> None:
|
|||
|
|
img = Image.new("L", (200, 100))
|
|||
|
|
call_count = [0]
|
|||
|
|
|
|||
|
|
def side_effect(cmd, **kwargs):
|
|||
|
|
call_count[0] += 1
|
|||
|
|
if call_count[0] == 1:
|
|||
|
|
# Erster Aufruf (OSD) schlägt fehl
|
|||
|
|
return _fake_run("", returncode=1)
|
|||
|
|
# Weitere Aufrufe (brute-force): 90° liefert die meisten Buchstaben
|
|||
|
|
if "--psm" in cmd and "6" in cmd:
|
|||
|
|
return _fake_run("Allegro Andante Beethoven" if call_count[0] == 3 else "a")
|
|||
|
|
return _fake_run("")
|
|||
|
|
|
|||
|
|
with patch("musiksammlung.ocr.subprocess.run", side_effect=side_effect):
|
|||
|
|
_detect_and_fix_rotation(img)
|
|||
|
|
# Brute-force wurde verwendet (mind. 4 Subprocess-Aufrufe)
|
|||
|
|
assert call_count[0] >= 2
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# ocr_images
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestOcrImages:
|
|||
|
|
def test_returns_text_for_single_image(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "img.jpg")
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch("musiksammlung.ocr.preprocess_image", return_value=img),
|
|||
|
|
patch("musiksammlung.ocr.run_ocr", return_value="Track 1\nTrack 2"),
|
|||
|
|
):
|
|||
|
|
result = ocr_images([img])
|
|||
|
|
|
|||
|
|
assert result == "Track 1\nTrack 2"
|
|||
|
|
|
|||
|
|
def test_concatenates_multiple_images(self, tmp_path: Path) -> None:
|
|||
|
|
img1 = _make_image(tmp_path / "img1.jpg")
|
|||
|
|
img2 = _make_image(tmp_path / "img2.jpg")
|
|||
|
|
preprocessed = tmp_path / "pre.png"
|
|||
|
|
preprocessed.write_bytes(b"fake")
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
|||
|
|
patch("musiksammlung.ocr.run_ocr", side_effect=["Text A", "Text B"]),
|
|||
|
|
):
|
|||
|
|
result = ocr_images([img1, img2])
|
|||
|
|
|
|||
|
|
assert result == "Text A\n\nText B"
|
|||
|
|
|
|||
|
|
def test_skips_empty_ocr_result(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "img.jpg")
|
|||
|
|
preprocessed = tmp_path / "pre.png"
|
|||
|
|
preprocessed.write_bytes(b"fake")
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
|||
|
|
patch("musiksammlung.ocr.run_ocr", return_value=""),
|
|||
|
|
):
|
|||
|
|
result = ocr_images([img])
|
|||
|
|
|
|||
|
|
assert result == ""
|
|||
|
|
|
|||
|
|
def test_cleans_up_temp_file(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "img.jpg")
|
|||
|
|
preprocessed = tmp_path / "pre.png"
|
|||
|
|
preprocessed.write_bytes(b"fake")
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
|||
|
|
patch("musiksammlung.ocr.run_ocr", return_value="text"),
|
|||
|
|
):
|
|||
|
|
ocr_images([img])
|
|||
|
|
|
|||
|
|
# Temporäre Datei wurde gelöscht
|
|||
|
|
assert not preprocessed.exists()
|
|||
|
|
|
|||
|
|
def test_cleans_up_even_on_error(self, tmp_path: Path) -> None:
|
|||
|
|
img = _make_image(tmp_path / "img.jpg")
|
|||
|
|
preprocessed = tmp_path / "pre.png"
|
|||
|
|
preprocessed.write_bytes(b"fake")
|
|||
|
|
|
|||
|
|
err = RuntimeError("Tesseract fehlgeschlagen")
|
|||
|
|
with (
|
|||
|
|
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
|||
|
|
patch("musiksammlung.ocr.run_ocr", side_effect=err),
|
|||
|
|
):
|
|||
|
|
try:
|
|||
|
|
ocr_images([img])
|
|||
|
|
except RuntimeError:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
assert not preprocessed.exists()
|