107 lines
3.2 KiB
Python
107 lines
3.2 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Interaktiver EAN-Scan-Test: Foto hochladen → Vision-LLM → EAN ausgeben.
|
||
|
|
|
||
|
|
Starten: python3 test_ean_scan.py
|
||
|
|
Beenden: Strg+C
|
||
|
|
"""
|
||
|
|
|
||
|
|
import base64
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import httpx
|
||
|
|
|
||
|
|
from musiksammlung.scanner_server import ScannerServer, print_qr
|
||
|
|
from musiksammlung.vision_llm import EAN_PROMPT
|
||
|
|
|
||
|
|
UPLOAD_DIR = Path("/tmp/ean_scan_test")
|
||
|
|
PORT = 8765
|
||
|
|
MODEL = "qwen3-vl:235b-cloud"
|
||
|
|
BASE_URL = "http://localhost:11434"
|
||
|
|
TIMEOUT_PHOTO = 300.0 # Sekunden warten auf Foto-Upload
|
||
|
|
TIMEOUT_LLM = 60.0 # Sekunden warten auf LLM-Antwort
|
||
|
|
|
||
|
|
|
||
|
|
def query_llm(image_path: Path) -> str:
|
||
|
|
"""Schickt das Bild ans Vision-LLM und gibt die Rohausgabe zurück."""
|
||
|
|
b64 = base64.b64encode(image_path.read_bytes()).decode()
|
||
|
|
response = httpx.post(
|
||
|
|
f"{BASE_URL}/api/chat",
|
||
|
|
json={
|
||
|
|
"model": MODEL,
|
||
|
|
"messages": [{"role": "user", "content": EAN_PROMPT, "images": [b64]}],
|
||
|
|
"stream": False,
|
||
|
|
},
|
||
|
|
timeout=TIMEOUT_LLM,
|
||
|
|
)
|
||
|
|
response.raise_for_status()
|
||
|
|
return response.json()["message"]["content"]
|
||
|
|
|
||
|
|
|
||
|
|
def extract_ean(raw: str) -> str | None:
|
||
|
|
"""Bereinigt Rohausgabe und extrahiert Ziffernfolge."""
|
||
|
|
cleaned = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||
|
|
digits = re.sub(r"\D", "", cleaned)
|
||
|
|
return digits if digits else None
|
||
|
|
|
||
|
|
|
||
|
|
def run() -> None:
|
||
|
|
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
|
server = ScannerServer(port=PORT, upload_dir=UPLOAD_DIR)
|
||
|
|
server.start()
|
||
|
|
|
||
|
|
print(f"\nModel: {MODEL}")
|
||
|
|
print(f"Prompt: {EAN_PROMPT!r}\n")
|
||
|
|
print_qr(server.url())
|
||
|
|
print(f"Upload-URL: {server.url()}\n")
|
||
|
|
|
||
|
|
runde = 0
|
||
|
|
try:
|
||
|
|
while True:
|
||
|
|
runde += 1
|
||
|
|
print(f"\n{'─'*50}")
|
||
|
|
print(f"Runde {runde}: Bitte Foto hochladen — URL: {server.url()}")
|
||
|
|
print(f"(Timeout {TIMEOUT_PHOTO:.0f}s — Formular auf dem Handy lädt sich nach 3s selbst zurück)")
|
||
|
|
photo = server.get_photo(timeout=TIMEOUT_PHOTO)
|
||
|
|
|
||
|
|
if photo is None:
|
||
|
|
print("Timeout — kein Foto empfangen. Nochmal? (j/n) ", end="", flush=True)
|
||
|
|
if input().strip().lower() not in ("j", "ja", "y", "yes"):
|
||
|
|
break
|
||
|
|
continue
|
||
|
|
|
||
|
|
print(f"Foto empfangen: {photo} ({photo.stat().st_size} Bytes)")
|
||
|
|
print(f"LLM-Anfrage läuft ({MODEL}) ...")
|
||
|
|
|
||
|
|
try:
|
||
|
|
raw = query_llm(photo)
|
||
|
|
except Exception as exc:
|
||
|
|
print(f"LLM-Fehler: {exc}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
print(f"\nRohantwort:\n {raw!r}")
|
||
|
|
|
||
|
|
cleaned = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
|
||
|
|
if cleaned != raw.strip():
|
||
|
|
print(f"Nach Think-Tag-Entfernung:\n {cleaned!r}")
|
||
|
|
|
||
|
|
ean = extract_ean(raw)
|
||
|
|
if ean:
|
||
|
|
print(f"\n✓ EAN erkannt: {ean} ({len(ean)} Stellen)")
|
||
|
|
else:
|
||
|
|
print("\n✗ Kein Barcode erkannt")
|
||
|
|
|
||
|
|
print("\nNochmal? (j/n) ", end="", flush=True)
|
||
|
|
if input().strip().lower() not in ("j", "ja", "y", "yes"):
|
||
|
|
break
|
||
|
|
|
||
|
|
except KeyboardInterrupt:
|
||
|
|
print("\nAbgebrochen.")
|
||
|
|
finally:
|
||
|
|
server.stop()
|
||
|
|
print("Server gestoppt.")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
run()
|