diff --git a/mcp_adapter.py b/mcp_adapter.py
index 04e168b..09fc653 100644
--- a/mcp_adapter.py
+++ b/mcp_adapter.py
@@ -61,6 +61,7 @@ async def speak(
     interrupt: bool = False,
     speed: float = 1.0,
     session_id: str | None = None,
+    keep_audio: bool = False,
 ) -> dict:
     """Text als Sprache ausgeben.
 
@@ -68,14 +69,16 @@ async def speak(
     satzweise und beginnt sofort mit der Wiedergabe.
 
     Args:
-        text:       Auszugebender Text (max. 4000 Zeichen).
-        lang:       Sprachcode, z. B. 'de', 'en', 'fr'. Standard: 'de'.
-        voice:      Optionaler Pfad zu einer WAV-Referenzdatei (10–30s) für
-                    Voice Cloning.
-        interrupt:  True = laufende Ausgabe sofort unterbrechen und diesen
-                    Text vorgezogen abspielen.
-        speed:      Wiedergabegeschwindigkeit (0.5–2.0). Pitch bleibt gleich.
-        session_id: Optionale Session-ID für Job-Tracking im TTS-Service.
+        text:        Auszugebender Text (max. 4000 Zeichen).
+        lang:        Sprachcode, z. B. 'de', 'en', 'fr'. Standard: 'de'.
+        voice:       Optionaler Pfad zu einer WAV-Referenzdatei (10–30s) für
+                     Voice Cloning.
+        interrupt:   True = laufende Ausgabe sofort unterbrechen und diesen
+                     Text vorgezogen abspielen.
+        speed:       Wiedergabegeschwindigkeit (0.5–2.0). Pitch bleibt gleich.
+        session_id:  Optionale Session-ID für Job-Tracking im TTS-Service.
+        keep_audio:  True = WAV-Datei nach der Synthese im Cache behalten;
+                     abrufbar via GET /audio/{job_id}.
     """
     async with httpx.AsyncClient(timeout=30) as client:
         r = await client.post(f"{TTS_URL}/speak", json={
@@ -85,6 +88,7 @@ async def speak(
             "interrupt": interrupt,
             "speed": speed,
             "session_id": session_id,
+            "keep_audio": keep_audio,
         })
         _raise_for_status(r)
         return r.json()
diff --git a/tts_service.py b/tts_service.py
index 0161fda..f71abc8 100644
--- a/tts_service.py
+++ b/tts_service.py
@@ -6,14 +6,18 @@ Start:
     uvicorn tts_service:app --host 0.0.0.0 --port 9999
 
 Endpunkte:
-    POST /speak   – Text in Warteschlange einreihen
-    POST /stop    – laufende Ausgabe abbrechen, Queue leeren
-    GET  /health  – Service-Status
-    GET  /status  – aktueller Job + Queue-Länge
-    GET  /voices  – unterstützte Sprachen
+    POST /speak          – Text in Warteschlange einreihen
+    POST /stop           – laufende Ausgabe abbrechen, Queue leeren
+    POST /pause          – Ausgabe pausieren (ohne Datenverlust)
+    POST /resume         – pausierte Ausgabe fortsetzen
+    GET  /audio/{job_id} – fertige WAV herunterladen (nur wenn keep_audio=true)
+    GET  /health         – Service-Status
+    GET  /status         – aktueller Job + Queue-Länge
+    GET  /voices         – unterstützte Sprachen
 """
 from __future__ import annotations
 
+import os
 import queue
 import sys
 import threading
@@ -30,8 +34,13 @@ import chatterbox_cli_v4 as tts  # noqa: E402
 import torch
 import torchaudio as ta
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field
 
+# Verzeichnis für temporäre Audio-Downloads (keep_audio=True)
+_AUDIO_CACHE_DIR = Path.home() / ".cache" / "chatterbox-tts"
+_AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
 # ---------------------------------------------------------------------------
 # Gerät einmalig bestimmen
 # ---------------------------------------------------------------------------
@@ -89,11 +98,13 @@ class SpeakJob:
     output_path: Optional[str]
     pronunciation_dict: Optional[dict]
     session_id: Optional[str]
+    keep_audio: bool = False
     status: JobStatus = field(default=JobStatus.pending)
     text_preview: str = field(default="")
     chunks_total: int = 0
     chunks_done: int = 0
     error: Optional[str] = None
+    audio_path: Optional[str] = None  # gesetzt wenn keep_audio=True und Job fertig
 
 
 # ---------------------------------------------------------------------------
@@ -153,11 +164,18 @@ def _worker() -> None:
             finally:
                 playback.stop()
 
-            if job.save_wav and job.output_path and wavs:
-                out = Path(job.output_path)
-                out.parent.mkdir(parents=True, exist_ok=True)
+            if wavs:
                 final = wavs[0] if len(wavs) == 1 else torch.cat(wavs, dim=-1)
-                ta.save(str(out), final, sr)
+
+                if job.save_wav and job.output_path:
+                    out = Path(job.output_path)
+                    out.parent.mkdir(parents=True, exist_ok=True)
+                    ta.save(str(out), final, sr)
+
+                if job.keep_audio:
+                    cache_path = _AUDIO_CACHE_DIR / f"{job.id}.wav"
+                    ta.save(str(cache_path), final, sr)
+                    job.audio_path = str(cache_path)
 
             job.status = (
                 JobStatus.cancelled if tts.stop_requested() else JobStatus.done
@@ -196,6 +214,7 @@ class SpeakRequest(BaseModel):
     output_path: Optional[str] = None
     session_id: Optional[str] = None
     pronunciation_dict: Optional[dict] = None
+    keep_audio: bool = False  # WAV im Cache behalten für GET /audio/{job_id}
 
 
 def _job_to_dict(j: SpeakJob) -> dict:
@@ -264,6 +283,7 @@ def speak(req: SpeakRequest):
         output_path=req.output_path,
         pronunciation_dict=req.pronunciation_dict,
         session_id=req.session_id,
+        keep_audio=req.keep_audio,
     )
     _job_queue.put(job)
 
@@ -293,6 +313,58 @@ def resume():
     return {"resumed": True}
 
 
+@app.get("/audio/{job_id}")
+def download_audio(job_id: str):
+    """Fertige WAV-Datei herunterladen (nur wenn speak mit keep_audio=true aufgerufen wurde).
+
+    Die Datei wird nach dem Download automatisch gelöscht.
+    Ist der Job noch nicht fertig, wird 202 zurückgegeben.
+    """
+    with _state_lock:
+        cur = _current_job
+        recent = list(_recent_jobs)
+
+    # Laufenden Job prüfen
+    if cur and cur.id == job_id:
+        raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
+
+    # In den letzten Jobs suchen
+    job = next((j for j in recent if j.id == job_id), None)
+    if job is None:
+        raise HTTPException(status_code=404, detail=f"Job nicht gefunden: {job_id}")
+
+    if job.status == JobStatus.pending or job.status == JobStatus.running:
+        raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
+
+    if not job.audio_path or not Path(job.audio_path).exists():
+        if not job.keep_audio:
+            raise HTTPException(
+                status_code=404,
+                detail="Keine Audio-Datei vorhanden. Bitte /speak mit keep_audio=true aufrufen.",
+            )
+        raise HTTPException(status_code=404, detail="Audio-Datei nicht mehr vorhanden.")
+
+    audio_path = Path(job.audio_path)
+
+    def cleanup_after_send():
+        try:
+            os.unlink(audio_path)
+            job.audio_path = None
+        except OSError:
+            pass
+
+    response = FileResponse(
+        path=str(audio_path),
+        media_type="audio/wav",
+        filename=f"tts_{job_id[:8]}.wav",
+        background=None,
+    )
+    # Datei nach dem Senden löschen — via BackgroundTask
+    from starlette.background import BackgroundTask
+    response.background = BackgroundTask(cleanup_after_send)
+    return response
+
+
 @app.get("/status")
 def status():
     with _state_lock: