diff --git a/hint_extractor.py b/hint_extractor.py index 92654ce..9a3117a 100644 --- a/hint_extractor.py +++ b/hint_extractor.py @@ -1,7 +1,11 @@ from __future__ import annotations +import base64 +import json +import os import re import sys +import urllib.request from pathlib import Path from typing import Optional, List, Dict, Tuple @@ -224,6 +228,85 @@ def _read_tracklist_file(path: Path) -> Optional[str]: return None +_OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434") +# Modelle in Prioritätsreihenfolge; überschreibbar via OLLAMA_OCR_MODEL +_OCR_MODELS = [m.strip() for m in os.getenv( + "OLLAMA_OCR_MODEL", + "qwen3-vl:latest,minicpm-v:latest,deepseek-ocr:latest" +).split(",") if m.strip()] + +_OCR_PROMPT = ( + "This image shows a CD album back cover or booklet page. " + "Your task: extract the complete tracklist as plain text.\n" + "Rules:\n" + "- Output track number and title per line, e.g. '1. Title' or '1-1 Title'\n" + "- If multiple discs/CDs: include a header like 'CD 1' or 'Disc 1' before each group\n" + "- Include durations if visible (e.g. '1. Title 4:32')\n" + "- Do NOT include label info, barcodes, or other non-tracklist text\n" + "- If no tracklist is visible, reply with: NO_TRACKLIST" +) + + +def _ocr_back_cover(image_files: List[Path]) -> Optional[str]: + """ + OCR eines Back-Cover- oder Booklet-Bildes via Ollama Vision. + Gibt den erkannten Text zurück, oder None wenn nichts gefunden. + """ + # Nur Bilder die nach Back-Cover aussehen + candidates = [ + p for p in image_files + if any(kw in p.name.lower() for kw in ("back", "inlay", "booklet", "inside", "rear")) + ] + # Fallback: alle Bilder außer dem Front-Cover + if not candidates: + candidates = [ + p for p in image_files + if not any(kw in p.name.lower() for kw in ("front", "folder", "cover")) + ] + if not candidates: + return None + + image_path = candidates[0] + try: + img_b64 = base64.b64encode(image_path.read_bytes()).decode() + except Exception as e: + print(f" ⚠️ OCR-Bild lesen {image_path.name}: {e}", file=sys.stderr) + return None + + for model in _OCR_MODELS: + payload = json.dumps({ + "model": model, + "messages": [{ + "role": "user", + "content": _OCR_PROMPT, + "images": [img_b64], + }], + "stream": False, + "options": {"temperature": 0.0}, + }).encode() + try: + req = urllib.request.Request( + f"{_OLLAMA_HOST}/api/chat", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=180) as resp: + data = json.loads(resp.read()) + text = data.get("message", {}).get("content", "").strip() + if text and "NO_TRACKLIST" not in text: + print(f" 📷 OCR {image_path.name} via {model}: {len(text)} Zeichen extrahiert", + file=sys.stderr) + return text + elif "NO_TRACKLIST" in text: + print(f" 📷 OCR {image_path.name}: kein Tracklist-Text erkannt", file=sys.stderr) + return None + except Exception as e: + print(f" ⚠️ OCR-Fehler ({model}) {image_path.name}: {e}", file=sys.stderr) + continue + return None + + def _check_cover_images(paths: List[Path]) -> List[Path]: good: List[Path] = [] for p in paths: @@ -236,7 +319,7 @@ def _check_cover_images(paths: List[Path]) -> List[Path]: return good -def extract_hints(scan: AlbumScan) -> AlbumHints: +def extract_hints(scan: AlbumScan, use_ocr: bool = True) -> AlbumHints: hints = AlbumHints(album_dir=scan.album_dir) # Directory name @@ -253,6 +336,12 @@ def extract_hints(scan: AlbumScan) -> AlbumHints: texts.append(txt) hints.tracklist_text = "\n\n".join(texts) if texts else None + # OCR-Fallback: Back-Cover scannen wenn keine Tracklist-Textdatei vorhanden + if use_ocr and not hints.tracklist_text and scan.image_files: + ocr_text = _ocr_back_cover(scan.image_files) + if ocr_text: + hints.tracklist_text = ocr_text + parsed_tracklist = _parse_tracklist(hints.tracklist_text) if hints.tracklist_text else [] # M3U/Playlist-Reihenfolge: filename (stem, normalisiert) → Tracknummer diff --git a/music_enricher.py b/music_enricher.py index 30387cc..3e86d4c 100644 --- a/music_enricher.py +++ b/music_enricher.py @@ -90,7 +90,7 @@ def process_album( stats["skipped"] += 1 return stats - hints = extract_hints(scan) + hints = extract_hints(scan, use_ocr=not args.no_api) proposal = resolve( hints,