Add HTTP service, MCP adapter, systemd autostart; fix bugs and docs

- chatterbox_cli_v4.py: cooperative stop/interrupt via threading.Event; fix force_split_sentence (word boundary instead of mid-word cut); fix synthesize_streaming normalization order (split before preprocess) - tts_service.py: FastAPI service with job queue, model cache, worker thread; LAN-accessible on 0.0.0.0:9999; audio_device default None (auto) - mcp_adapter.py: MCP adapter (stdio + streamable-http) wrapping REST API; update docstring and default TTS_URL to port 9999 - requirements.txt: add fastapi, uvicorn, httpx, mcp - README.md, BEDIENUNGSANLEITUNG.md: document service, MCP, AI integrations (Claude, Ollama, Open WebUI, llama.cpp, Home Assistant), systemd autostart - CLAUDE.md: reflect current architecture (service + adapter now implemented) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 10:19:00 +02:00 · 2026-05-16 10:19:00 +02:00 · d1971049ce
commit d1971049ce
parent bcf6374c29
7 changed files with 494 additions and 146 deletions
--- a/chatterbox_cli_v4.py
+++ b/chatterbox_cli_v4.py
@ -463,7 +463,9 @@ def force_split_sentence(text: str, max_len: int) -> List[str]:
    while len(remaining) > max_len:
        split_pos = remaining.rfind(" ", 0, max_len + 1)
        if split_pos <= 0:
-            split_pos = max_len
+            # Kein Leerzeichen vor max_len — vorwärts zum nächsten Wortende suchen
+            next_space = remaining.find(" ", max_len)
+            split_pos = next_space if next_space != -1 else len(remaining)
        parts.append(remaining[:split_pos].strip())
        remaining = remaining[split_pos:].strip()

@ -830,15 +832,9 @@ def synthesize_streaming(
    if voice_path and not Path(voice_path).exists():
        raise FileNotFoundError(f"Voice-Referenz nicht gefunden: {voice_path}")

-    text = preprocess_tts_text(
-        text=text,
-        lang=lang,
-        spell_uppercase_acronyms=spell_uppercase_acronyms,
-        acronym_mode=acronym_mode,
-        normalize_time_values=normalize_time_values,
-        normalize_year_values=normalize_year_values,
-        normalize_units_values=normalize_units_values,
-    )
+    # Erst bereinigen und splitten, dann pro Chunk normalisieren —
+    # sonst erzeugen Akronym-Punkte ("ARD" → "Ah Er De.") falsche Satzgrenzen.
+    text = clean_raw_text(text)

    model, model_kind, sr = load_model(lang, device)

@ -849,9 +845,20 @@ def synthesize_streaming(
        )

    if conversation_mode:
-        text_chunks = split_for_conversation(text, first_chunk_len=first_chunk_len, max_len=max_len)
+        raw_chunks = split_for_conversation(text, first_chunk_len=first_chunk_len, max_len=max_len)
    else:
-        text_chunks = split_long_text(text, max_len=max_len)
+        raw_chunks = split_long_text(text, max_len=max_len)
+
+    preprocess_kw = dict(
+        lang=lang,
+        spell_uppercase_acronyms=spell_uppercase_acronyms,
+        acronym_mode=acronym_mode,
+        normalize_time_values=normalize_time_values,
+        normalize_year_values=normalize_year_values,
+        normalize_units_values=normalize_units_values,
+    )
+    text_chunks = [preprocess_tts_text(c, **preprocess_kw) for c in raw_chunks]
+    text_chunks = [c for c in text_chunks if c.strip()]

    if not text_chunks:
        raise ValueError("Kein verwertbarer Text nach dem Einlesen gefunden.")