Musiksammlung/tests/test_ocr.py
dschlueter 795be8609a Opus/M4A-Cover-Embedding, cover.py-Tests und OCR-Tests
- tagger.py: embed_cover() unterstützt jetzt .opus (Vorbis-Comment
  METADATA_BLOCK_PICTURE) und .m4a (MP4Cover); imports ergänzt
- test_tagger.py: 2 neue Tests für Opus/M4A; minimale Audio-Fixtures
  als base64-Konstanten (176 B Opus, 856 B M4A)
- test_cover.py: TestPrepareCover (5 Tests) und TestCopyCovers (6 Tests)
  für prepare_cover() und copy_covers()
- test_ocr.py: 13 Tests für run_ocr(), _detect_and_fix_rotation()
  und ocr_images(); Tesseract via subprocess.run gemockt

144 Tests, 0 Fehler

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 04:50:13 +01:00

191 lines
7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests für OCR-Funktionen (subprocess via Mock)."""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
from PIL import Image
from musiksammlung.ocr import _detect_and_fix_rotation, ocr_images, run_ocr
def _fake_run(stdout: str = "", returncode: int = 0) -> MagicMock:
"""Erstellt ein Mock-subprocess.CompletedProcess."""
result = MagicMock()
result.returncode = returncode
result.stdout = stdout
result.stderr = ""
return result
def _make_image(path: Path, size: tuple[int, int] = (100, 100)) -> Path:
Image.new("RGB", size, color=(200, 100, 50)).save(str(path), "JPEG")
return path
# ---------------------------------------------------------------------------
# run_ocr
# ---------------------------------------------------------------------------
class TestRunOcr:
def test_returns_stdout_on_success(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run("Trackliste\n")):
result = run_ocr(img)
assert result == "Trackliste"
def test_calls_tesseract_with_correct_args(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
run_ocr(img, languages="deu+eng")
args = mock_run.call_args[0][0]
assert args[0] == "tesseract"
assert str(img) in args
assert "deu+eng" in args
assert "--psm" in args
def test_raises_on_nonzero_returncode(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch(
"musiksammlung.ocr.subprocess.run",
return_value=_fake_run("", returncode=1),
):
try:
run_ocr(img)
assert False, "RuntimeError expected"
except RuntimeError as e:
assert "Tesseract" in str(e)
def test_custom_language(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "test.jpg")
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
run_ocr(img, languages="fra")
args = mock_run.call_args[0][0]
assert "fra" in args
# ---------------------------------------------------------------------------
# _detect_and_fix_rotation
# ---------------------------------------------------------------------------
class TestDetectAndFixRotation:
def test_no_rotation_needed(self) -> None:
img = Image.new("L", (200, 100))
osd_output = "Rotate: 0\nOrientation in degrees: 0\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
assert result.size == (200, 100)
def test_rotates_90_degrees(self) -> None:
img = Image.new("L", (200, 100)) # breit
osd_output = "Rotate: 90\nOrientation in degrees: 90\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
# 200×100 rotiert um 90° → 100×200
assert result.size == (100, 200)
def test_rotates_180_degrees(self) -> None:
img = Image.new("L", (200, 100))
osd_output = "Rotate: 180\n"
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
result = _detect_and_fix_rotation(img)
# 180° Rotation ändert Größe nicht
assert result.size == (200, 100)
def test_fallback_brute_force_when_osd_fails(self) -> None:
img = Image.new("L", (200, 100))
call_count = [0]
def side_effect(cmd, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
# Erster Aufruf (OSD) schlägt fehl
return _fake_run("", returncode=1)
# Weitere Aufrufe (brute-force): 90° liefert die meisten Buchstaben
if "--psm" in cmd and "6" in cmd:
return _fake_run("Allegro Andante Beethoven" if call_count[0] == 3 else "a")
return _fake_run("")
with patch("musiksammlung.ocr.subprocess.run", side_effect=side_effect):
_detect_and_fix_rotation(img)
# Brute-force wurde verwendet (mind. 4 Subprocess-Aufrufe)
assert call_count[0] >= 2
# ---------------------------------------------------------------------------
# ocr_images
# ---------------------------------------------------------------------------
class TestOcrImages:
def test_returns_text_for_single_image(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=img),
patch("musiksammlung.ocr.run_ocr", return_value="Track 1\nTrack 2"),
):
result = ocr_images([img])
assert result == "Track 1\nTrack 2"
def test_concatenates_multiple_images(self, tmp_path: Path) -> None:
img1 = _make_image(tmp_path / "img1.jpg")
img2 = _make_image(tmp_path / "img2.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", side_effect=["Text A", "Text B"]),
):
result = ocr_images([img1, img2])
assert result == "Text A\n\nText B"
def test_skips_empty_ocr_result(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", return_value=""),
):
result = ocr_images([img])
assert result == ""
def test_cleans_up_temp_file(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", return_value="text"),
):
ocr_images([img])
# Temporäre Datei wurde gelöscht
assert not preprocessed.exists()
def test_cleans_up_even_on_error(self, tmp_path: Path) -> None:
img = _make_image(tmp_path / "img.jpg")
preprocessed = tmp_path / "pre.png"
preprocessed.write_bytes(b"fake")
err = RuntimeError("Tesseract fehlgeschlagen")
with (
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
patch("musiksammlung.ocr.run_ocr", side_effect=err),
):
try:
ocr_images([img])
except RuntimeError:
pass
assert not preprocessed.exists()