Audio-Download-Endpunkt GET /audio/{job_id} hinzufügen
- SpeakRequest: keep_audio=true speichert WAV in ~/.cache/chatterbox-tts/
- SpeakJob: audio_path-Feld für gespeicherte WAV-Datei
- GET /audio/{job_id}: liefert WAV als FileResponse, löscht Datei danach
- mcp_adapter: keep_audio-Parameter in speak() weitergereicht
- Docstring: neuen Endpunkt dokumentiert
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
69de37d1a0
commit
fe74b84360
2 changed files with 93 additions and 17 deletions
|
|
@ -61,6 +61,7 @@ async def speak(
|
||||||
interrupt: bool = False,
|
interrupt: bool = False,
|
||||||
speed: float = 1.0,
|
speed: float = 1.0,
|
||||||
session_id: str | None = None,
|
session_id: str | None = None,
|
||||||
|
keep_audio: bool = False,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Text als Sprache ausgeben.
|
"""Text als Sprache ausgeben.
|
||||||
|
|
||||||
|
|
@ -76,6 +77,8 @@ async def speak(
|
||||||
Text vorgezogen abspielen.
|
Text vorgezogen abspielen.
|
||||||
speed: Wiedergabegeschwindigkeit (0.5–2.0). Pitch bleibt gleich.
|
speed: Wiedergabegeschwindigkeit (0.5–2.0). Pitch bleibt gleich.
|
||||||
session_id: Optionale Session-ID für Job-Tracking im TTS-Service.
|
session_id: Optionale Session-ID für Job-Tracking im TTS-Service.
|
||||||
|
keep_audio: True = WAV-Datei nach der Synthese im Cache behalten;
|
||||||
|
abrufbar via GET /audio/{job_id}.
|
||||||
"""
|
"""
|
||||||
async with httpx.AsyncClient(timeout=30) as client:
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
r = await client.post(f"{TTS_URL}/speak", json={
|
r = await client.post(f"{TTS_URL}/speak", json={
|
||||||
|
|
@ -85,6 +88,7 @@ async def speak(
|
||||||
"interrupt": interrupt,
|
"interrupt": interrupt,
|
||||||
"speed": speed,
|
"speed": speed,
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
|
"keep_audio": keep_audio,
|
||||||
})
|
})
|
||||||
_raise_for_status(r)
|
_raise_for_status(r)
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
|
||||||
|
|
@ -8,12 +8,16 @@ Start:
|
||||||
Endpunkte:
|
Endpunkte:
|
||||||
POST /speak – Text in Warteschlange einreihen
|
POST /speak – Text in Warteschlange einreihen
|
||||||
POST /stop – laufende Ausgabe abbrechen, Queue leeren
|
POST /stop – laufende Ausgabe abbrechen, Queue leeren
|
||||||
|
POST /pause – Ausgabe pausieren (ohne Datenverlust)
|
||||||
|
POST /resume – pausierte Ausgabe fortsetzen
|
||||||
|
GET /audio/{job_id} – fertige WAV herunterladen (nur wenn keep_audio=true)
|
||||||
GET /health – Service-Status
|
GET /health – Service-Status
|
||||||
GET /status – aktueller Job + Queue-Länge
|
GET /status – aktueller Job + Queue-Länge
|
||||||
GET /voices – unterstützte Sprachen
|
GET /voices – unterstützte Sprachen
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
import queue
|
import queue
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
|
|
@ -30,8 +34,13 @@ import chatterbox_cli_v4 as tts # noqa: E402
|
||||||
import torch
|
import torch
|
||||||
import torchaudio as ta
|
import torchaudio as ta
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
# Verzeichnis für temporäre Audio-Downloads (keep_audio=True)
|
||||||
|
_AUDIO_CACHE_DIR = Path.home() / ".cache" / "chatterbox-tts"
|
||||||
|
_AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Gerät einmalig bestimmen
|
# Gerät einmalig bestimmen
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -89,11 +98,13 @@ class SpeakJob:
|
||||||
output_path: Optional[str]
|
output_path: Optional[str]
|
||||||
pronunciation_dict: Optional[dict]
|
pronunciation_dict: Optional[dict]
|
||||||
session_id: Optional[str]
|
session_id: Optional[str]
|
||||||
|
keep_audio: bool = False
|
||||||
status: JobStatus = field(default=JobStatus.pending)
|
status: JobStatus = field(default=JobStatus.pending)
|
||||||
text_preview: str = field(default="")
|
text_preview: str = field(default="")
|
||||||
chunks_total: int = 0
|
chunks_total: int = 0
|
||||||
chunks_done: int = 0
|
chunks_done: int = 0
|
||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
|
audio_path: Optional[str] = None # gesetzt wenn keep_audio=True und Job fertig
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -153,12 +164,19 @@ def _worker() -> None:
|
||||||
finally:
|
finally:
|
||||||
playback.stop()
|
playback.stop()
|
||||||
|
|
||||||
if job.save_wav and job.output_path and wavs:
|
if wavs:
|
||||||
|
final = wavs[0] if len(wavs) == 1 else torch.cat(wavs, dim=-1)
|
||||||
|
|
||||||
|
if job.save_wav and job.output_path:
|
||||||
out = Path(job.output_path)
|
out = Path(job.output_path)
|
||||||
out.parent.mkdir(parents=True, exist_ok=True)
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
final = wavs[0] if len(wavs) == 1 else torch.cat(wavs, dim=-1)
|
|
||||||
ta.save(str(out), final, sr)
|
ta.save(str(out), final, sr)
|
||||||
|
|
||||||
|
if job.keep_audio:
|
||||||
|
cache_path = _AUDIO_CACHE_DIR / f"{job.id}.wav"
|
||||||
|
ta.save(str(cache_path), final, sr)
|
||||||
|
job.audio_path = str(cache_path)
|
||||||
|
|
||||||
job.status = (
|
job.status = (
|
||||||
JobStatus.cancelled if tts.stop_requested() else JobStatus.done
|
JobStatus.cancelled if tts.stop_requested() else JobStatus.done
|
||||||
)
|
)
|
||||||
|
|
@ -196,6 +214,7 @@ class SpeakRequest(BaseModel):
|
||||||
output_path: Optional[str] = None
|
output_path: Optional[str] = None
|
||||||
session_id: Optional[str] = None
|
session_id: Optional[str] = None
|
||||||
pronunciation_dict: Optional[dict] = None
|
pronunciation_dict: Optional[dict] = None
|
||||||
|
keep_audio: bool = False # WAV im Cache behalten für GET /audio/{job_id}
|
||||||
|
|
||||||
|
|
||||||
def _job_to_dict(j: SpeakJob) -> dict:
|
def _job_to_dict(j: SpeakJob) -> dict:
|
||||||
|
|
@ -264,6 +283,7 @@ def speak(req: SpeakRequest):
|
||||||
output_path=req.output_path,
|
output_path=req.output_path,
|
||||||
pronunciation_dict=req.pronunciation_dict,
|
pronunciation_dict=req.pronunciation_dict,
|
||||||
session_id=req.session_id,
|
session_id=req.session_id,
|
||||||
|
keep_audio=req.keep_audio,
|
||||||
)
|
)
|
||||||
_job_queue.put(job)
|
_job_queue.put(job)
|
||||||
|
|
||||||
|
|
@ -293,6 +313,58 @@ def resume():
|
||||||
return {"resumed": True}
|
return {"resumed": True}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/audio/{job_id}")
|
||||||
|
def download_audio(job_id: str):
|
||||||
|
"""Fertige WAV-Datei herunterladen (nur wenn speak mit keep_audio=true aufgerufen wurde).
|
||||||
|
|
||||||
|
Die Datei wird nach dem Download automatisch gelöscht.
|
||||||
|
Ist der Job noch nicht fertig, wird 202 zurückgegeben.
|
||||||
|
"""
|
||||||
|
with _state_lock:
|
||||||
|
cur = _current_job
|
||||||
|
recent = list(_recent_jobs)
|
||||||
|
|
||||||
|
# Laufenden Job prüfen
|
||||||
|
if cur and cur.id == job_id:
|
||||||
|
raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
|
||||||
|
|
||||||
|
# In den letzten Jobs suchen
|
||||||
|
job = next((j for j in recent if j.id == job_id), None)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Job nicht gefunden: {job_id}")
|
||||||
|
|
||||||
|
if job.status == JobStatus.pending or job.status == JobStatus.running:
|
||||||
|
raise HTTPException(status_code=202, detail="Job läuft noch — bitte später erneut abrufen.")
|
||||||
|
|
||||||
|
if not job.audio_path or not Path(job.audio_path).exists():
|
||||||
|
if not job.keep_audio:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail="Keine Audio-Datei vorhanden. Bitte /speak mit keep_audio=true aufrufen.",
|
||||||
|
)
|
||||||
|
raise HTTPException(status_code=404, detail="Audio-Datei nicht mehr vorhanden.")
|
||||||
|
|
||||||
|
audio_path = Path(job.audio_path)
|
||||||
|
|
||||||
|
def cleanup_after_send():
|
||||||
|
try:
|
||||||
|
os.unlink(audio_path)
|
||||||
|
job.audio_path = None
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
response = FileResponse(
|
||||||
|
path=str(audio_path),
|
||||||
|
media_type="audio/wav",
|
||||||
|
filename=f"tts_{job_id[:8]}.wav",
|
||||||
|
background=None,
|
||||||
|
)
|
||||||
|
# Datei nach dem Senden löschen — via BackgroundTask
|
||||||
|
from starlette.background import BackgroundTask
|
||||||
|
response.background = BackgroundTask(cleanup_after_send)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@app.get("/status")
|
@app.get("/status")
|
||||||
def status():
|
def status():
|
||||||
with _state_lock:
|
with _state_lock:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue