"""Tests für OCR-Funktionen (subprocess via Mock).""" from __future__ import annotations from pathlib import Path from unittest.mock import MagicMock, patch from PIL import Image from musiksammlung.ocr import _detect_and_fix_rotation, ocr_images, run_ocr def _fake_run(stdout: str = "", returncode: int = 0) -> MagicMock: """Erstellt ein Mock-subprocess.CompletedProcess.""" result = MagicMock() result.returncode = returncode result.stdout = stdout result.stderr = "" return result def _make_image(path: Path, size: tuple[int, int] = (100, 100)) -> Path: Image.new("RGB", size, color=(200, 100, 50)).save(str(path), "JPEG") return path # --------------------------------------------------------------------------- # run_ocr # --------------------------------------------------------------------------- class TestRunOcr: def test_returns_stdout_on_success(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "test.jpg") with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run("Trackliste\n")): result = run_ocr(img) assert result == "Trackliste" def test_calls_tesseract_with_correct_args(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "test.jpg") with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run: run_ocr(img, languages="deu+eng") args = mock_run.call_args[0][0] assert args[0] == "tesseract" assert str(img) in args assert "deu+eng" in args assert "--psm" in args def test_raises_on_nonzero_returncode(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "test.jpg") with patch( "musiksammlung.ocr.subprocess.run", return_value=_fake_run("", returncode=1), ): try: run_ocr(img) assert False, "RuntimeError expected" except RuntimeError as e: assert "Tesseract" in str(e) def test_custom_language(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "test.jpg") with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run()) as mock_run: run_ocr(img, languages="fra") args = mock_run.call_args[0][0] assert "fra" in args # --------------------------------------------------------------------------- # _detect_and_fix_rotation # --------------------------------------------------------------------------- class TestDetectAndFixRotation: def test_no_rotation_needed(self) -> None: img = Image.new("L", (200, 100)) osd_output = "Rotate: 0\nOrientation in degrees: 0\n" with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)): result = _detect_and_fix_rotation(img) assert result.size == (200, 100) def test_rotates_90_degrees(self) -> None: img = Image.new("L", (200, 100)) # breit osd_output = "Rotate: 90\nOrientation in degrees: 90\n" with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)): result = _detect_and_fix_rotation(img) # 200×100 rotiert um 90° → 100×200 assert result.size == (100, 200) def test_rotates_180_degrees(self) -> None: img = Image.new("L", (200, 100)) osd_output = "Rotate: 180\n" with patch("musiksammlung.ocr.subprocess.run", return_value=_fake_run(osd_output)): result = _detect_and_fix_rotation(img) # 180° Rotation ändert Größe nicht assert result.size == (200, 100) def test_fallback_brute_force_when_osd_fails(self) -> None: img = Image.new("L", (200, 100)) call_count = [0] def side_effect(cmd, **kwargs): call_count[0] += 1 if call_count[0] == 1: # Erster Aufruf (OSD) schlägt fehl return _fake_run("", returncode=1) # Weitere Aufrufe (brute-force): 90° liefert die meisten Buchstaben if "--psm" in cmd and "6" in cmd: return _fake_run("Allegro Andante Beethoven" if call_count[0] == 3 else "a") return _fake_run("") with patch("musiksammlung.ocr.subprocess.run", side_effect=side_effect): _detect_and_fix_rotation(img) # Brute-force wurde verwendet (mind. 4 Subprocess-Aufrufe) assert call_count[0] >= 2 # --------------------------------------------------------------------------- # ocr_images # --------------------------------------------------------------------------- class TestOcrImages: def test_returns_text_for_single_image(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "img.jpg") with ( patch("musiksammlung.ocr.preprocess_image", return_value=img), patch("musiksammlung.ocr.run_ocr", return_value="Track 1\nTrack 2"), ): result = ocr_images([img]) assert result == "Track 1\nTrack 2" def test_concatenates_multiple_images(self, tmp_path: Path) -> None: img1 = _make_image(tmp_path / "img1.jpg") img2 = _make_image(tmp_path / "img2.jpg") preprocessed = tmp_path / "pre.png" preprocessed.write_bytes(b"fake") with ( patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed), patch("musiksammlung.ocr.run_ocr", side_effect=["Text A", "Text B"]), ): result = ocr_images([img1, img2]) assert result == "Text A\n\nText B" def test_skips_empty_ocr_result(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "img.jpg") preprocessed = tmp_path / "pre.png" preprocessed.write_bytes(b"fake") with ( patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed), patch("musiksammlung.ocr.run_ocr", return_value=""), ): result = ocr_images([img]) assert result == "" def test_cleans_up_temp_file(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "img.jpg") preprocessed = tmp_path / "pre.png" preprocessed.write_bytes(b"fake") with ( patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed), patch("musiksammlung.ocr.run_ocr", return_value="text"), ): ocr_images([img]) # Temporäre Datei wurde gelöscht assert not preprocessed.exists() def test_cleans_up_even_on_error(self, tmp_path: Path) -> None: img = _make_image(tmp_path / "img.jpg") preprocessed = tmp_path / "pre.png" preprocessed.write_bytes(b"fake") err = RuntimeError("Tesseract fehlgeschlagen") with ( patch("musiksammlung.ocr.preprocess_image", return_value=preprocessed), patch("musiksammlung.ocr.run_ocr", side_effect=err), ): try: ocr_images([img]) except RuntimeError: pass assert not preprocessed.exists()