Add HTTP service, MCP adapter, systemd autostart; fix bugs and docs

- chatterbox_cli_v4.py: cooperative stop/interrupt via threading.Event;
  fix force_split_sentence (word boundary instead of mid-word cut);
  fix synthesize_streaming normalization order (split before preprocess)
- tts_service.py: FastAPI service with job queue, model cache, worker thread;
  LAN-accessible on 0.0.0.0:9999; audio_device default None (auto)
- mcp_adapter.py: MCP adapter (stdio + streamable-http) wrapping REST API;
  update docstring and default TTS_URL to port 9999
- requirements.txt: add fastapi, uvicorn, httpx, mcp
- README.md, BEDIENUNGSANLEITUNG.md: document service, MCP, AI integrations
  (Claude, Ollama, Open WebUI, llama.cpp, Home Assistant), systemd autostart
- CLAUDE.md: reflect current architecture (service + adapter now implemented)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Dieter Schlüter 2026-05-16 10:19:00 +02:00
commit d1971049ce
7 changed files with 494 additions and 146 deletions

View file

@ -463,7 +463,9 @@ def force_split_sentence(text: str, max_len: int) -> List[str]:
while len(remaining) > max_len:
split_pos = remaining.rfind(" ", 0, max_len + 1)
if split_pos <= 0:
split_pos = max_len
# Kein Leerzeichen vor max_len — vorwärts zum nächsten Wortende suchen
next_space = remaining.find(" ", max_len)
split_pos = next_space if next_space != -1 else len(remaining)
parts.append(remaining[:split_pos].strip())
remaining = remaining[split_pos:].strip()
@ -830,15 +832,9 @@ def synthesize_streaming(
if voice_path and not Path(voice_path).exists():
raise FileNotFoundError(f"Voice-Referenz nicht gefunden: {voice_path}")
text = preprocess_tts_text(
text=text,
lang=lang,
spell_uppercase_acronyms=spell_uppercase_acronyms,
acronym_mode=acronym_mode,
normalize_time_values=normalize_time_values,
normalize_year_values=normalize_year_values,
normalize_units_values=normalize_units_values,
)
# Erst bereinigen und splitten, dann pro Chunk normalisieren —
# sonst erzeugen Akronym-Punkte ("ARD" → "Ah Er De.") falsche Satzgrenzen.
text = clean_raw_text(text)
model, model_kind, sr = load_model(lang, device)
@ -849,9 +845,20 @@ def synthesize_streaming(
)
if conversation_mode:
text_chunks = split_for_conversation(text, first_chunk_len=first_chunk_len, max_len=max_len)
raw_chunks = split_for_conversation(text, first_chunk_len=first_chunk_len, max_len=max_len)
else:
text_chunks = split_long_text(text, max_len=max_len)
raw_chunks = split_long_text(text, max_len=max_len)
preprocess_kw = dict(
lang=lang,
spell_uppercase_acronyms=spell_uppercase_acronyms,
acronym_mode=acronym_mode,
normalize_time_values=normalize_time_values,
normalize_year_values=normalize_year_values,
normalize_units_values=normalize_units_values,
)
text_chunks = [preprocess_tts_text(c, **preprocess_kw) for c in raw_chunks]
text_chunks = [c for c in text_chunks if c.strip()]
if not text_chunks:
raise ValueError("Kein verwertbarer Text nach dem Einlesen gefunden.")