Opus/M4A-Cover-Embedding, cover.py-Tests und OCR-Tests
- tagger.py: embed_cover() unterstützt jetzt .opus (Vorbis-Comment METADATA_BLOCK_PICTURE) und .m4a (MP4Cover); imports ergänzt - test_tagger.py: 2 neue Tests für Opus/M4A; minimale Audio-Fixtures als base64-Konstanten (176 B Opus, 856 B M4A) - test_cover.py: TestPrepareCover (5 Tests) und TestCopyCovers (6 Tests) für prepare_cover() und copy_covers() - test_ocr.py: 13 Tests für run_ocr(), _detect_and_fix_rotation() und ocr_images(); Tesseract via subprocess.run gemockt 144 Tests, 0 Fehler Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
cfc2a2018e
commit
795be8609a
4 changed files with 343 additions and 1 deletions
191
tests/test_ocr.py
Normal file
191
tests/test_ocr.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
"""Tests für OCR-Funktionen (subprocess via Mock)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from musiksammlung.ocr import _detect_and_fix_rotation, ocr_images, run_ocr
|
||||
|
||||
|
||||
def _fake_run(stdout: str = "", returncode: int = 0) -> MagicMock:
|
||||
"""Erstellt ein Mock-subprocess.CompletedProcess."""
|
||||
result = MagicMock()
|
||||
result.returncode = returncode
|
||||
result.stdout = stdout
|
||||
result.stderr = ""
|
||||
return result
|
||||
|
||||
|
||||
def _make_image(path: Path, size: tuple[int, int] = (100, 100)) -> Path:
|
||||
Image.new("RGB", size, color=(200, 100, 50)).save(str(path), "JPEG")
|
||||
return path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_ocr
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRunOcr:
|
||||
def test_returns_stdout_on_success(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "test.jpg")
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run("Trackliste\n")):
|
||||
result = run_ocr(img)
|
||||
assert result == "Trackliste"
|
||||
|
||||
def test_calls_tesseract_with_correct_args(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "test.jpg")
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
|
||||
run_ocr(img, languages="deu+eng")
|
||||
args = mock_run.call_args[0][0]
|
||||
assert args[0] == "tesseract"
|
||||
assert str(img) in args
|
||||
assert "deu+eng" in args
|
||||
assert "--psm" in args
|
||||
|
||||
def test_raises_on_nonzero_returncode(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "test.jpg")
|
||||
with patch(
|
||||
"musiksammlung.ocr.subprocess.run",
|
||||
return_value=_fake_run("", returncode=1),
|
||||
):
|
||||
try:
|
||||
run_ocr(img)
|
||||
assert False, "RuntimeError expected"
|
||||
except RuntimeError as e:
|
||||
assert "Tesseract" in str(e)
|
||||
|
||||
def test_custom_language(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "test.jpg")
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run:
|
||||
run_ocr(img, languages="fra")
|
||||
args = mock_run.call_args[0][0]
|
||||
assert "fra" in args
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _detect_and_fix_rotation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDetectAndFixRotation:
|
||||
def test_no_rotation_needed(self) -> None:
|
||||
img = Image.new("L", (200, 100))
|
||||
osd_output = "Rotate: 0\nOrientation in degrees: 0\n"
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
||||
result = _detect_and_fix_rotation(img)
|
||||
assert result.size == (200, 100)
|
||||
|
||||
def test_rotates_90_degrees(self) -> None:
|
||||
img = Image.new("L", (200, 100)) # breit
|
||||
osd_output = "Rotate: 90\nOrientation in degrees: 90\n"
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
||||
result = _detect_and_fix_rotation(img)
|
||||
# 200×100 rotiert um 90° → 100×200
|
||||
assert result.size == (100, 200)
|
||||
|
||||
def test_rotates_180_degrees(self) -> None:
|
||||
img = Image.new("L", (200, 100))
|
||||
osd_output = "Rotate: 180\n"
|
||||
with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)):
|
||||
result = _detect_and_fix_rotation(img)
|
||||
# 180° Rotation ändert Größe nicht
|
||||
assert result.size == (200, 100)
|
||||
|
||||
def test_fallback_brute_force_when_osd_fails(self) -> None:
|
||||
img = Image.new("L", (200, 100))
|
||||
call_count = [0]
|
||||
|
||||
def side_effect(cmd, **kwargs):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
# Erster Aufruf (OSD) schlägt fehl
|
||||
return _fake_run("", returncode=1)
|
||||
# Weitere Aufrufe (brute-force): 90° liefert die meisten Buchstaben
|
||||
if "--psm" in cmd and "6" in cmd:
|
||||
return _fake_run("Allegro Andante Beethoven" if call_count[0] == 3 else "a")
|
||||
return _fake_run("")
|
||||
|
||||
with patch("musiksammlung.ocr.subprocess.run", side_effect=side_effect):
|
||||
_detect_and_fix_rotation(img)
|
||||
# Brute-force wurde verwendet (mind. 4 Subprocess-Aufrufe)
|
||||
assert call_count[0] >= 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ocr_images
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestOcrImages:
|
||||
def test_returns_text_for_single_image(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "img.jpg")
|
||||
|
||||
with (
|
||||
patch("musiksammlung.ocr.preprocess_image", return_value=img),
|
||||
patch("musiksammlung.ocr.run_ocr", return_value="Track 1\nTrack 2"),
|
||||
):
|
||||
result = ocr_images([img])
|
||||
|
||||
assert result == "Track 1\nTrack 2"
|
||||
|
||||
def test_concatenates_multiple_images(self, tmp_path: Path) -> None:
|
||||
img1 = _make_image(tmp_path / "img1.jpg")
|
||||
img2 = _make_image(tmp_path / "img2.jpg")
|
||||
preprocessed = tmp_path / "pre.png"
|
||||
preprocessed.write_bytes(b"fake")
|
||||
|
||||
with (
|
||||
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
||||
patch("musiksammlung.ocr.run_ocr", side_effect=["Text A", "Text B"]),
|
||||
):
|
||||
result = ocr_images([img1, img2])
|
||||
|
||||
assert result == "Text A\n\nText B"
|
||||
|
||||
def test_skips_empty_ocr_result(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "img.jpg")
|
||||
preprocessed = tmp_path / "pre.png"
|
||||
preprocessed.write_bytes(b"fake")
|
||||
|
||||
with (
|
||||
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
||||
patch("musiksammlung.ocr.run_ocr", return_value=""),
|
||||
):
|
||||
result = ocr_images([img])
|
||||
|
||||
assert result == ""
|
||||
|
||||
def test_cleans_up_temp_file(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "img.jpg")
|
||||
preprocessed = tmp_path / "pre.png"
|
||||
preprocessed.write_bytes(b"fake")
|
||||
|
||||
with (
|
||||
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
||||
patch("musiksammlung.ocr.run_ocr", return_value="text"),
|
||||
):
|
||||
ocr_images([img])
|
||||
|
||||
# Temporäre Datei wurde gelöscht
|
||||
assert not preprocessed.exists()
|
||||
|
||||
def test_cleans_up_even_on_error(self, tmp_path: Path) -> None:
|
||||
img = _make_image(tmp_path / "img.jpg")
|
||||
preprocessed = tmp_path / "pre.png"
|
||||
preprocessed.write_bytes(b"fake")
|
||||
|
||||
err = RuntimeError("Tesseract fehlgeschlagen")
|
||||
with (
|
||||
patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed),
|
||||
patch("musiksammlung.ocr.run_ocr", side_effect=err),
|
||||
):
|
||||
try:
|
||||
ocr_images([img])
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
assert not preprocessed.exists()
|
||||
Loading…
Add table
Add a link
Reference in a new issue