diff --git a/.gitignore b/.gitignore
index 0c215f6..38953f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,7 @@ env/
 
 # Claude Code
 .claude/
+
+# Ideen
+Ideen/
+
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..32e2c30
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,86 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Running the CLI
+
+```bash
+conda activate chatterbox
+
+# Deutschen Text aus Datei vorlesen
+python chatterbox_cli_v4.py --lang de --input text.txt
+
+# Mit Voice Cloning
+python chatterbox_cli_v4.py --lang de --voice my_voice.wav --input text.txt
+
+# Text direkt übergeben (Englisch)
+python chatterbox_cli_v4.py --lang en --text "Hello world"
+
+# Nur speichern, kein Playback
+python chatterbox_cli_v4.py --lang de --no-play --output ausgabe.wav --input text.txt
+
+# Geschwindigkeit anpassen (pitch-erhaltend, erfordert rubberband-cli)
+python chatterbox_cli_v4.py --lang de --speed 0.85 --input text.txt
+
+# Streaming-Modus (experimentell, niedrigere Latenz, kann abgehackt klingen)
+python chatterbox_cli_v4.py --lang de --stream --input text.txt
+
+# Aussprache-Wörterbuch (JSON: {"Eigenname": "Lautschrift"})
+python chatterbox_cli_v4.py --lang de --pronunciation-dict aussprache.json --input text.txt
+```
+
+No build step, no test suite, no linter configuration — this is a single-file script.
+
+## Architecture
+
+Everything lives in `chatterbox_cli_v4.py`. The processing pipeline is:
+
+**Text input → normalization → chunking → TTS generation → audio output**
+
+### Text normalization (`preprocess_tts_text`)
+Applied per chunk before synthesis. Order matters:
+1. Pronunciation dict substitutions (before acronym expansion, so proper names are caught first)
+2. Unit normalization (120 km/h → "120 Kilometer pro Stunde")
+3. Time normalization (14:58 → "vierzehn Uhr achtundfünfzig")
+4. Year normalization (2026 → "zweitausendsechsundzwanzig")
+5. Acronym spelling (ARD → "Ah Er De"; skips entries in `NON_SPELLED_ACRONYMS`)
+
+`DEFAULT_PRONUNCIATION_DE` contains built-in German phonetic approximations (e.g. Xi → "Schi").
+
+### Text chunking
+Three modes (chosen by CLI flags):
+- **sentence_mode** (default): `split_into_sentences()` — one sentence per TTS call, lowest latency to first audio
+- **conversation_mode**: `split_for_conversation()` — first chunk is small (`--first-chunk-len`, default 80 chars), rest up to `--len` (400)
+- **plain**: `split_long_text()` — paragraph-aware chunking up to `--len`
+
+`SENTENCE_END_RE` handles edge cases like ordinal numbers, ellipses, and CJK punctuation. `SEPARATOR_LINE_RE` silently drops lines like `--- Ende ---`.
+
+### Model loading (`load_model`)
+- `--lang en` → `ChatterboxTTS` (mono, always available)
+- Other languages → `ChatterboxMultilingualTTS` (requires multilingual package; `HAS_MULTILINGUAL` flag guards import)
+- `--t3-model v3` (default) or `v2` selects the multilingual T3 checkpoint
+- Models are downloaded to `~/.cache/huggingface/` on first use (~2–3 GB)
+- **Critical**: `attn_implementation = "eager"` is forced at import time because SDPA returns `None` attention weights, breaking the `AlignmentStreamAnalyzer` hook
+
+### Audio output (`PlaybackWorker`)
+- Uses `sounddevice.OutputStream` with a callback at 48 kHz (PipeWire/PulseAudio standard)
+- Internal producer thread converts Torch tensors → `CALLBACK_BLOCK`-sized (2048 samples) numpy arrays
+- If `--speed != 1.0`: pyrubberband R3-Engine (`--fine` flag) stretches time without pitch change before resampling
+- Resampling: `torchaudio.functional.resample(chunk, model_sr, 48000)`
+- `PlaybackWorker.stop()` sends `None` sentinel into the queue and joins the thread
+
+### Two synthesis paths
+- **`synthesize_non_streaming`**: generates each chunk fully, feeds finished tensors to `PlaybackWorker`, concatenates all wavs for `--save`
+- **`synthesize_streaming`**: calls `model.generate_stream()` with `chunk_size`; each yielded audio sub-chunk goes directly to `PlaybackWorker`; marked experimental in docs
+
+## Planned extensions (Ideen/)
+
+The `Ideen/` folder documents a planned **REST/MCP bridge**:
+- `tts_service.py` (FastAPI): `POST /speak`, `POST /stop`, `GET /health`, `GET /voices`
+- `mcp_adapter.py`: thin MCP wrapper calling the REST API
+- `chatterbox_backend.py`: imports `chatterbox_cli_v4.py` via `importlib` and calls `synthesize_non_streaming()` directly
+
+Key gaps to address before building the service:
+1. **Stop/interrupt**: `PlaybackWorker.stop()` drains the audio queue, but a blocking `model.generate()` call cannot be interrupted mid-run. A `threading.Event`-based cancel token threaded through `synthesize_non_streaming` is the planned approach.
+2. **Model caching**: `load_model()` reloads from disk on every call; a service needs a per-language singleton.
+3. **Status object**: progress is `print()`-based; a service needs structured state.
diff --git a/chatterbox_cli_v4.py b/chatterbox_cli_v4.py
index 14c2a64..4ffc32d 100755
--- a/chatterbox_cli_v4.py
+++ b/chatterbox_cli_v4.py
@@ -10,6 +10,20 @@ import time
 from pathlib import Path
 from typing import List, Optional, Tuple
 
+# ---------------------------------------------------------------------------
+# Kooperativer Stop-Mechanismus
+# ---------------------------------------------------------------------------
+STOP_REQUESTED = threading.Event()
+
+def request_stop() -> None:
+    STOP_REQUESTED.set()
+
+def clear_stop() -> None:
+    STOP_REQUESTED.clear()
+
+def stop_requested() -> bool:
+    return STOP_REQUESTED.is_set()
+
 import torch
 import torchaudio as ta
 
@@ -556,10 +570,12 @@ class PlaybackWorker:
     PLAYBACK_RATE = 48000  # PipeWire/PulseAudio standard
     CALLBACK_BLOCK = 2048  # ~43 ms pro Callback-Block bei 48 kHz
 
-    def __init__(self, sample_rate: int, device: Optional[str] = "pulse", speed: float = 1.0):
+    def __init__(self, sample_rate: int, device: Optional[str] = "pulse", speed: float = 1.0,
+                 stop_event: Optional[threading.Event] = None):
         self.sample_rate = sample_rate
         self.device = device
         self.speed = speed
+        self.stop_event = stop_event
         # Eingang: Torch-Tensoren vom TTS-Modell
         self.audio_queue: "queue.Queue[Optional[torch.Tensor]]" = queue.Queue()
         # Intern: fertig vorbereitete numpy-Blöcke für den Callback
@@ -579,6 +595,9 @@ class PlaybackWorker:
 
     def _callback(self, outdata, frames, time_info, status):
         # Läuft im Audio-Thread: so schnell wie möglich, kein Lock nötig.
+        if self.stop_event and self.stop_event.is_set():
+            outdata[:] = 0.0
+            return
         try:
             data = self._block_queue.get_nowait()
             outdata[:, 0] = data
@@ -593,6 +612,8 @@ class PlaybackWorker:
         remainder = np.zeros(0, dtype="float32")
 
         while True:
+            if self.stop_event and self.stop_event.is_set():
+                break
             item = self.audio_queue.get()
             if item is None:
                 break
@@ -690,6 +711,7 @@ def synthesize_non_streaming(
     debug_delay: float = 0.0,
     t3_model: Optional[str] = None,
     pronunciation_dict: Optional[dict] = None,
+    stop_event: Optional[threading.Event] = None,
 ) -> Optional[Path]:
     if lang not in SUPPORTED_LANGS:
         raise ValueError(
@@ -739,7 +761,8 @@ def synthesize_non_streaming(
             print(f"Ausgabe: {output_path}")
 
     if play_audio:
-        playback = PlaybackWorker(sample_rate=sr, device=audio_device, speed=speed)
+        playback = PlaybackWorker(sample_rate=sr, device=audio_device, speed=speed,
+                                  stop_event=stop_event)
         playback.start()
     else:
         playback = None
@@ -747,6 +770,10 @@ def synthesize_non_streaming(
     wavs = []
     try:
         for i, chunk in enumerate(chunks, start=1):
+            if stop_event and stop_event.is_set():
+                if show_progress:
+                    print("Abbruch angefordert – Synthese gestoppt.")
+                break
             if debug_delay > 0:
                 if show_progress:
                     print(f"[{i}/{len(chunks)}] Warte {debug_delay:.0f}s (debug_delay) ...")
@@ -793,6 +820,7 @@ def synthesize_streaming(
     save_wav: bool = True,
     stream_chunk_size: int = 25,
     audio_device: Optional[str] = None,
+    stop_event: Optional[threading.Event] = None,
 ) -> Optional[Path]:
     if lang not in SUPPORTED_LANGS:
         raise ValueError(
@@ -829,7 +857,7 @@ def synthesize_streaming(
         raise ValueError("Kein verwertbarer Text nach dem Einlesen gefunden.")
 
     if play_audio:
-        playback = PlaybackWorker(sample_rate=sr, device=audio_device)
+        playback = PlaybackWorker(sample_rate=sr, device=audio_device, stop_event=stop_event)
         playback.start()
     else:
         playback = None
@@ -853,6 +881,10 @@ def synthesize_streaming(
 
     try:
         for text_idx, text_chunk in enumerate(text_chunks, start=1):
+            if stop_event and stop_event.is_set():
+                if show_progress:
+                    print("Abbruch angefordert – Streaming gestoppt.")
+                break
             if show_progress:
                 print(f"[Text {text_idx}/{len(text_chunks)}] Starte Streaming für {len(text_chunk)} Zeichen ...")
 
@@ -866,6 +898,8 @@ def synthesize_streaming(
             )
 
             for audio_idx, item in enumerate(stream_iter, start=1):
+                if stop_event and stop_event.is_set():
+                    break
                 if isinstance(item, tuple) and len(item) == 2:
                     audio_chunk, metrics = item
                 else:
@@ -944,6 +978,7 @@ def build_argparser() -> argparse.ArgumentParser:
     p.add_argument("--debug-delay", type=float, default=0.0, help="Sekunden Pause vor jedem Satz (simuliert langsame KI). Nur zum Testen.")
     p.add_argument("--t3-model", type=str, default="v3", help="Multilingual T3-Modell: 'v3' (default), 'v2' oder Dateiname.")
     p.add_argument("--no-conversation-mode", action="store_true", help="Ersten Chunk nicht künstlich kleiner machen (nur ohne --no-sentence-mode).")
+    p.add_argument("--stop", action="store_true", help="Globales Stop-Signal setzen (für Tests und Service-Integration).")
     return p
 
 
@@ -951,6 +986,11 @@ def main() -> int:
     parser = build_argparser()
     args = parser.parse_args()
 
+    if args.stop:
+        request_stop()
+        print("Stop-Signal gesetzt.")
+        return 0
+
     try:
         text = read_input_text(args.text, args.input)
         device = get_device(args.device)
@@ -970,6 +1010,8 @@ def main() -> int:
                 raise FileNotFoundError(f"Aussprache-Dict nicht gefunden: {pron_path}")
             pronunciation_dict = json.loads(pron_path.read_text(encoding="utf-8"))
 
+        clear_stop()
+
         if args.stream:
             out = synthesize_streaming(
                 text=text,
@@ -990,6 +1032,7 @@ def main() -> int:
                 save_wav=save_wav,
                 stream_chunk_size=args.stream_chunk_size,
                 audio_device=args.audio_device,
+                stop_event=STOP_REQUESTED,
             )
         else:
             out = synthesize_non_streaming(
@@ -1015,6 +1058,7 @@ def main() -> int:
                 debug_delay=args.debug_delay,
                 t3_model=args.t3_model,
                 pronunciation_dict=pronunciation_dict,
+                stop_event=STOP_REQUESTED,
             )
 
         if out is not None:
diff --git a/mcp_adapter.py b/mcp_adapter.py
new file mode 100644
index 0000000..ca1562e
--- /dev/null
+++ b/mcp_adapter.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""
+Chatterbox TTS – MCP-Adapter
+
+Setzt einen laufenden tts_service.py voraus (Standard: http://127.0.0.1:8000).
+
+Start (streamable-http, Port 8001 – für beliebige MCP-Clients):
+    python mcp_adapter.py
+
+Start (stdio – für Claude Code / Claude Desktop):
+    python mcp_adapter.py --stdio
+
+Claude Code Konfiguration (.claude/settings.json):
+    {
+      "mcpServers": {
+        "chatterbox-tts": {
+          "command": "python",
+          "args": ["/home/dschlueter/chatterbox-tts-cli/mcp_adapter.py", "--stdio"]
+        }
+      }
+    }
+
+Umgebungsvariable TTS_URL überschreibt die Service-Adresse:
+    TTS_URL=http://192.168.1.10:8000 python mcp_adapter.py --stdio
+"""
+from __future__ import annotations
+
+import argparse
+import os
+
+import httpx
+from mcp.server.fastmcp import FastMCP
+
+TTS_URL = os.environ.get("TTS_URL", "http://127.0.0.1:8000").rstrip("/")
+
+mcp = FastMCP(
+    "Chatterbox TTS",
+    instructions=(
+        "Lokaler Text-to-Speech-Service. Liest Texte auf Deutsch und 20+ weiteren "
+        "Sprachen vor. Unterstützt Voice Cloning, Geschwindigkeitsanpassung und "
+        "Aussprache-Wörterbücher."
+    ),
+    port=8001,
+)
+
+
+# ---------------------------------------------------------------------------
+# Tools
+# ---------------------------------------------------------------------------
+
+@mcp.tool()
+async def speak(
+    text: str,
+    lang: str = "de",
+    voice: str | None = None,
+    interrupt: bool = False,
+    speed: float = 1.0,
+) -> dict:
+    """Text als Sprache ausgeben.
+
+    Reiht den Text in die Ausgabewarteschlange ein. Das Modell generiert
+    satzweise und beginnt sofort mit der Wiedergabe.
+
+    Args:
+        text:      Auszugebender Text (max. 4000 Zeichen).
+        lang:      Sprachcode, z. B. 'de', 'en', 'fr'. Standard: 'de'.
+        voice:     Optionaler Pfad zu einer WAV-Referenzdatei (10–30s) für
+                   Voice Cloning.
+        interrupt: True = laufende Ausgabe sofort unterbrechen und diesen
+                   Text vorgezogen abspielen.
+        speed:     Wiedergabegeschwindigkeit (0.5–2.0). Pitch bleibt gleich.
+    """
+    async with httpx.AsyncClient(timeout=15) as client:
+        r = await client.post(f"{TTS_URL}/speak", json={
+            "text": text,
+            "lang": lang,
+            "voice": voice,
+            "interrupt": interrupt,
+            "speed": speed,
+        })
+        r.raise_for_status()
+        return r.json()
+
+
+@mcp.tool()
+async def stop() -> dict:
+    """Laufende Sprachausgabe sofort stoppen und Warteschlange leeren."""
+    async with httpx.AsyncClient(timeout=5) as client:
+        r = await client.post(f"{TTS_URL}/stop")
+        r.raise_for_status()
+        return r.json()
+
+
+@mcp.tool()
+async def get_status() -> dict:
+    """Aktuellen Ausgabe-Status abfragen.
+
+    Gibt zurück: laufender Job (mit Chunk-Fortschritt), Queue-Länge und
+    die letzten abgeschlossenen Jobs.
+    """
+    async with httpx.AsyncClient(timeout=5) as client:
+        r = await client.get(f"{TTS_URL}/status")
+        r.raise_for_status()
+        return r.json()
+
+
+@mcp.tool()
+async def list_voices() -> dict:
+    """Unterstützte Sprachen und Hinweise zu Voice Cloning abfragen."""
+    async with httpx.AsyncClient(timeout=5) as client:
+        r = await client.get(f"{TTS_URL}/voices")
+        r.raise_for_status()
+        return r.json()
+
+
+# ---------------------------------------------------------------------------
+# Einstiegspunkt
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Chatterbox TTS MCP-Adapter")
+    parser.add_argument(
+        "--stdio", action="store_true",
+        help="stdio-Transport (für Claude Code / Claude Desktop)",
+    )
+    parser.add_argument("--host", default="127.0.0.1",
+                        help="Host für streamable-http (Standard: 127.0.0.1)")
+    parser.add_argument("--port", type=int, default=8001,
+                        help="Port für streamable-http (Standard: 8001)")
+    args = parser.parse_args()
+
+    if args.stdio:
+        mcp.run()  # stdio ist der Default-Transport
+    else:
+        mcp.run(transport="streamable-http", host=args.host, port=args.port)
diff --git a/requirements.txt b/requirements.txt
index 6408a4d..3b709db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,13 @@ sounddevice>=0.4.0
 pyrubberband>=0.4.0
 # rubberband-cli muss zusätzlich als Systempakete installiert sein:
 #   sudo apt install rubberband-cli
+
+# HTTP-Service (Phase 2)
+fastapi>=0.115.0
+uvicorn[standard]>=0.32.0
+
+# HTTP-Client für MCP-Adapter (Phase 3)
+httpx>=0.28.0
+
+# MCP-Adapter (Phase 3)
+mcp>=1.0.0
diff --git a/tts_service.py b/tts_service.py
new file mode 100644
index 0000000..0b2a6a5
--- /dev/null
+++ b/tts_service.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Chatterbox TTS – lokaler HTTP-Service
+
+Start:
+    uvicorn tts_service:app --host 127.0.0.1 --port 8000
+
+Endpunkte:
+    POST /speak   – Text in Warteschlange einreihen
+    POST /stop    – laufende Ausgabe abbrechen, Queue leeren
+    GET  /health  – Service-Status
+    GET  /status  – aktueller Job + Queue-Länge
+    GET  /voices  – unterstützte Sprachen
+"""
+from __future__ import annotations
+
+import queue
+import sys
+import threading
+import uuid
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+
+# CLI-Modul aus demselben Verzeichnis laden
+sys.path.insert(0, str(Path(__file__).parent))
+import chatterbox_cli_v4 as tts  # noqa: E402
+
+import torch
+import torchaudio as ta
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+# ---------------------------------------------------------------------------
+# Gerät einmalig bestimmen
+# ---------------------------------------------------------------------------
+_DEVICE = tts.get_device(None)
+
+# ---------------------------------------------------------------------------
+# Modell-Cache  (lang, t3_model) → (model, model_kind, sr)
+# ---------------------------------------------------------------------------
+_model_cache: dict[tuple, tuple] = {}
+_model_lock = threading.Lock()
+
+
+def _get_or_load_model(lang: str, t3_model: str) -> tuple:
+    key = (lang, t3_model)
+    with _model_lock:
+        if key not in _model_cache:
+            _model_cache[key] = tts.load_model(lang, _DEVICE, t3_model=t3_model)
+    return _model_cache[key]
+
+
+# ---------------------------------------------------------------------------
+# Job-Datenmodell
+# ---------------------------------------------------------------------------
+class JobStatus(str, Enum):
+    pending = "pending"
+    running = "running"
+    done = "done"
+    cancelled = "cancelled"
+    error = "error"
+
+
+@dataclass
+class SpeakJob:
+    id: str
+    text: str
+    lang: str
+    t3_model: str
+    voice: Optional[str]
+    speed: float
+    audio_device: str
+    max_len: int
+    save_wav: bool
+    output_path: Optional[str]
+    pronunciation_dict: Optional[dict]
+    session_id: Optional[str]
+    status: JobStatus = field(default=JobStatus.pending)
+    text_preview: str = field(default="")
+    chunks_total: int = 0
+    chunks_done: int = 0
+    error: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# Worker-Thread
+# ---------------------------------------------------------------------------
+_job_queue: queue.Queue[SpeakJob] = queue.Queue()
+_current_job: Optional[SpeakJob] = None
+_state_lock = threading.Lock()
+_recent_jobs: list[SpeakJob] = []
+_MAX_RECENT = 20
+
+
+def _worker() -> None:
+    global _current_job
+
+    while True:
+        job = _job_queue.get()
+
+        with _state_lock:
+            _current_job = job
+            job.status = JobStatus.running
+
+        tts.clear_stop()
+
+        try:
+            model, model_kind, sr = _get_or_load_model(job.lang, job.t3_model)
+
+            raw = tts.clean_raw_text(job.text)
+            raw_chunks = tts.split_into_sentences(raw, max_len=job.max_len)
+            chunks = [
+                tts.preprocess_tts_text(c, lang=job.lang,
+                                        pronunciation_dict=job.pronunciation_dict)
+                for c in raw_chunks
+            ]
+            chunks = [c for c in chunks if c.strip()]
+
+            job.chunks_total = len(chunks)
+            job.text_preview = job.text[:80]
+
+            playback = tts.PlaybackWorker(
+                sample_rate=sr,
+                device=job.audio_device,
+                speed=job.speed,
+                stop_event=tts.STOP_REQUESTED,
+            )
+            playback.start()
+
+            wavs: list[torch.Tensor] = []
+            try:
+                for chunk in chunks:
+                    if tts.stop_requested():
+                        break
+                    wav = tts.generate_chunk(model, model_kind, chunk, job.lang, job.voice)
+                    wavs.append(wav)
+                    playback.put(wav)
+                    job.chunks_done += 1
+            finally:
+                playback.stop()
+
+            if job.save_wav and job.output_path and wavs:
+                out = Path(job.output_path)
+                out.parent.mkdir(parents=True, exist_ok=True)
+                final = wavs[0] if len(wavs) == 1 else torch.cat(wavs, dim=-1)
+                ta.save(str(out), final, sr)
+
+            job.status = (
+                JobStatus.cancelled if tts.stop_requested() else JobStatus.done
+            )
+
+        except Exception as exc:  # noqa: BLE001
+            job.status = JobStatus.error
+            job.error = str(exc)
+
+        finally:
+            with _state_lock:
+                _current_job = None
+                _recent_jobs.append(job)
+                if len(_recent_jobs) > _MAX_RECENT:
+                    _recent_jobs.pop(0)
+            _job_queue.task_done()
+
+
+_worker_thread = threading.Thread(target=_worker, daemon=True, name="tts-worker")
+_worker_thread.start()
+
+
+# ---------------------------------------------------------------------------
+# API-Modelle
+# ---------------------------------------------------------------------------
+class SpeakRequest(BaseModel):
+    text: str = Field(min_length=1, max_length=4000)
+    lang: str = "de"
+    voice: Optional[str] = None
+    interrupt: bool = False
+    speed: float = Field(default=1.0, ge=0.5, le=2.0)
+    t3_model: str = "v3"
+    audio_device: str = "pulse"
+    max_len: int = Field(default=400, ge=50, le=1000)
+    save_wav: bool = False
+    output_path: Optional[str] = None
+    session_id: Optional[str] = None
+    pronunciation_dict: Optional[dict] = None
+
+
+def _job_to_dict(j: SpeakJob) -> dict:
+    return {
+        "id": j.id,
+        "status": j.status,
+        "lang": j.lang,
+        "text_preview": j.text_preview,
+        "chunks_total": j.chunks_total,
+        "chunks_done": j.chunks_done,
+        "error": j.error,
+    }
+
+
+def _drain_queue() -> None:
+    while not _job_queue.empty():
+        try:
+            _job_queue.get_nowait()
+            _job_queue.task_done()
+        except queue.Empty:
+            break
+
+
+# ---------------------------------------------------------------------------
+# FastAPI-App
+# ---------------------------------------------------------------------------
+app = FastAPI(title="Chatterbox TTS Service", version="1.0")
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok", "device": _DEVICE}
+
+
+@app.get("/voices")
+def voices():
+    return {
+        "languages": sorted(tts.SUPPORTED_LANGS),
+        "note": "Voice cloning via 'voice' field (WAV-Pfad, 10–30s Aufnahme)",
+    }
+
+
+@app.post("/speak")
+def speak(req: SpeakRequest):
+    if req.lang not in tts.SUPPORTED_LANGS:
+        raise HTTPException(status_code=422,
+                            detail=f"Sprache nicht unterstützt: {req.lang}")
+    if req.voice and not Path(req.voice).exists():
+        raise HTTPException(status_code=422,
+                            detail=f"Voice-Datei nicht gefunden: {req.voice}")
+
+    if req.interrupt:
+        tts.request_stop()
+        _drain_queue()
+
+    job = SpeakJob(
+        id=str(uuid.uuid4()),
+        text=req.text,
+        lang=req.lang,
+        t3_model=req.t3_model,
+        voice=req.voice,
+        speed=req.speed,
+        audio_device=req.audio_device,
+        max_len=req.max_len,
+        save_wav=req.save_wav,
+        output_path=req.output_path,
+        pronunciation_dict=req.pronunciation_dict,
+        session_id=req.session_id,
+    )
+    _job_queue.put(job)
+
+    return {
+        "job_id": job.id,
+        "status": job.status,
+        "queue_position": _job_queue.qsize(),
+    }
+
+
+@app.post("/stop")
+def stop():
+    tts.request_stop()
+    _drain_queue()
+    return {"stopped": True}
+
+
+@app.get("/status")
+def status():
+    with _state_lock:
+        cur = _current_job
+        recent = list(_recent_jobs)
+
+    return {
+        "current_job": _job_to_dict(cur) if cur else None,
+        "queue_length": _job_queue.qsize(),
+        "recent_jobs": [_job_to_dict(j) for j in reversed(recent)],
+    }