Audio-Download-Endpunkt GET /audio/{job_id} hinzufügen

- SpeakRequest: keep_audio=true speichert WAV in ~/.cache/chatterbox-tts/ - SpeakJob: audio_path-Feld für gespeicherte WAV-Datei - GET /audio/{job_id}: liefert WAV als FileResponse, löscht Datei danach - mcp_adapter: keep_audio-Parameter in speak() weitergereicht - Docstring: neuen Endpunkt dokumentiert Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-03 21:09:06 +02:00 · 2026-06-03 21:09:06 +02:00 · fe74b84360
commit fe74b84360
parent 69de37d1a0
2 changed files with 93 additions and 17 deletions
--- a/tts_service.py
+++ b/tts_service.py
@ -6,14 +6,18 @@ Start:
    uvicorn tts_service:app --host 0.0.0.0 --port 9999

 Endpunkte:
-    POST /speak   – Text in Warteschlange einreihen
-    POST /stop    – laufende Ausgabe abbrechen, Queue leeren
-    GET  /health  – Service-Status
-    GET  /status  – aktueller Job + Queue-Länge
-    GET  /voices  – unterstützte Sprachen
+    POST /speak          – Text in Warteschlange einreihen
+    POST /stop           – laufende Ausgabe abbrechen, Queue leeren
+    POST /pause          – Ausgabe pausieren (ohne Datenverlust)
+    POST /resume         – pausierte Ausgabe fortsetzen
+    GET  /audio/{job_id} – fertige WAV herunterladen (nur wenn keep_audio=true)
+    GET  /health         – Service-Status
+    GET  /status         – aktueller Job + Queue-Länge
+    GET  /voices         – unterstützte Sprachen
 """
 from __future__ import annotations

+import os
 import queue
 import sys
 import threading
@ -30,8 +34,13 @@ import chatterbox_cli_v4 as tts  # noqa: E402
 import torch
 import torchaudio as ta
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
 from pydantic import BaseModel, Field

+# Verzeichnis für temporäre Audio-Downloads (keep_audio=True)
+_AUDIO_CACHE_DIR = Path.home() / ".cache" / "chatterbox-tts"
+_AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
 # ---------------------------------------------------------------------------
 # Gerät einmalig bestimmen
 # ---------------------------------------------------------------------------
@ -89,11 +98,13 @@ class SpeakJob:
    output_path: Optional[str]
    pronunciation_dict: Optional[dict]
    session_id: Optional[str]
+    keep_audio: bool = False
    status: JobStatus = field(default=JobStatus.pending)
    text_preview: str = field(default="")
    chunks_total: int = 0
    chunks_done: int = 0
    error: Optional[str] = None
+    audio_path: Optional[str] = None  # gesetzt wenn keep_audio=True und Job fertig


 # ---------------------------------------------------------------------------
@ -153,11 +164,18 @@ def _worker() -> None:
            finally:
                playback.stop()

-            if job.save_wav and job.output_path and wavs:
-                out = Path(job.output_path)
-                out.parent.mkdir(parents=True, exist_ok=True)
+            if wavs:
                final = wavs[0] if len(wavs) == 1 else torch.cat(wavs, dim=-1)
-                ta.save(str(out), final, sr)
+
+                if job.save_wav and job.output_path:
+                    out = Path(job.output_path)
+                    out.parent.mkdir(parents=True, exist_ok=True)
+                    ta.save(str(out), final, sr)
+
+                if job.keep_audio:
+                    cache_path = _AUDIO_CACHE_DIR / f"{job.id}.wav"
+                    ta.save(str(cache_path), final, sr)
+                    job.audio_path = str(cache_path)

            job.status = (
                JobStatus.cancelled if tts.stop_requested() else JobStatus.done
@ -196,6 +214,7 @@ class SpeakRequest(BaseModel):
    output_path: Optional[str] = None
    session_id: Optional[str] = None
    pronunciation_dict: Optional[dict] = None
+    keep_audio: bool = False  # WAV im Cache behalten für GET /audio/{job_id}


 def _job_to_dict(j: SpeakJob) -> dict:
@ -264,6 +283,7 @@ def speak(req: SpeakRequest):
        output_path=req.output_path,
        pronunciation_dict=req.pronunciation_dict,
        session_id=req.session_id,
+        keep_audio=req.keep_audio,
    )
    _job_queue.put(job)

@ -293,6 +313,58 @@ def resume():
    return {"resumed": True}


+@app.get("/audio/{job_id}")
+def download_audio(job_id: str):
+    """Fertige WAV-Datei herunterladen (nur wenn speak mit keep_audio=true aufgerufen wurde).
+
+    Die Datei wird nach dem Download automatisch gelöscht.
+    Ist der Job noch nicht fertig, wird 202 zurückgegeben.
+    """
+    with _state_lock:
+        cur = _current_job
+        recent = list(_recent_jobs)
+
+    # Laufenden Job prüfen
+    if cur and cur.id == job_id:
+        raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
+
+    # In den letzten Jobs suchen
+    job = next((j for j in recent if j.id == job_id), None)
+    if job is None:
+        raise HTTPException(status_code=404, detail=f"Job nicht gefunden: {job_id}")
+
+    if job.status == JobStatus.pending or job.status == JobStatus.running:
+        raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
+
+    if not job.audio_path or not Path(job.audio_path).exists():
+        if not job.keep_audio:
+            raise HTTPException(
+                status_code=404,
+                detail="Keine Audio-Datei vorhanden. Bitte /speak mit keep_audio=true aufrufen.",
+            )
+        raise HTTPException(status_code=404, detail="Audio-Datei nicht mehr vorhanden.")
+
+    audio_path = Path(job.audio_path)
+
+    def cleanup_after_send():
+        try:
+            os.unlink(audio_path)
+            job.audio_path = None
+        except OSError:
+            pass
+
+    response = FileResponse(
+        path=str(audio_path),
+        media_type="audio/wav",
+        filename=f"tts_{job_id[:8]}.wav",
+        background=None,
+    )
+    # Datei nach dem Senden löschen — via BackgroundTask
+    from starlette.background import BackgroundTask
+    response.background = BackgroundTask(cleanup_after_send)
+    return response
+
+
@app.get("/status")
 def status():
    with _state_lock: