Bugfixes, Verbesserungen und Mixed-Language-Support
Bugfixes: - Abkürzungen (z.B., d.h., Dr., Prof.) werden nicht mehr als Satzenden erkannt (_ABBREV_MASK_RE) - Multilingual-Import: except Exception → except (ImportError, ModuleNotFoundError) - tts_agent: ReAct-Schleife auf max. 10 Iterationen begrenzt, model_dump → explizites Dict - tts_service: audio_device=None fällt auf 'pulse' zurück - JSON-Fehlerbehandlung für --pronunciation-dict mit aussagekräftiger Meldung - PlaybackWorker: Audio-Device wird vor Stream-Start via sd.query_devices() geprüft - mcp_adapter: Fehlerbehandlung für HTTP-Fehler, Timeout erhöht, session_id ergänzt - tts_agent: Health-Check beim Start, --speed/--first-chunk-len Validierung Neue Features: - Gemischtsprachige Texte: [en]...[/en]-Markierungen für per-Segment language_id - strip_markdown(): entfernt Markdown-Formatierung vor der Synthese (--no-strip-markdown) - Emoji-Entfernung in clean_raw_text() via unicodedata - Pause/Resume: request_pause()/request_resume(), POST /pause, POST /resume, MCP-Tools - Neue Einheiten: °C, °F, kWh, kW, W, V, A, J, kPa, bar, m², m³, m/s, rpm - number_to_words_de/en bis Milliarden - DEFAULT_PRONUNCIATION_DE erweitert (GitHub, YouTube, LinkedIn, Wi-Fi, iPhone, ChatGPT, …) - NON_SPELLED_ACRONYMS erweitert (USB, CPU, GPU, API, CEO, HTML, …) - Nummerierte Listen als separate Chunks behandelt - Modell-Warmup via TTS_PRELOAD_LANG Env-Variable - requirements.txt: Upper-Bounds für fastapi und uvicorn Dokumentation: CLAUDE.md, README.md, BEDIENUNGSANLEITUNG.md vollständig aktualisiert Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d1971049ce
commit
34a34907a8
8 changed files with 778 additions and 114 deletions
|
|
@ -11,19 +11,45 @@ from pathlib import Path
|
|||
from typing import List, Optional, Tuple
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Kooperativer Stop-Mechanismus
|
||||
# Kooperativer Stop- und Pause-Mechanismus
|
||||
# ---------------------------------------------------------------------------
|
||||
STOP_REQUESTED = threading.Event()
|
||||
STOP_REQUESTED = threading.Event()
|
||||
PAUSE_REQUESTED = threading.Event()
|
||||
|
||||
|
||||
def request_stop() -> None:
|
||||
STOP_REQUESTED.set()
|
||||
PAUSE_REQUESTED.clear() # eine laufende Pause beim Stop aufheben
|
||||
|
||||
|
||||
def clear_stop() -> None:
|
||||
STOP_REQUESTED.clear()
|
||||
|
||||
|
||||
def stop_requested() -> bool:
|
||||
return STOP_REQUESTED.is_set()
|
||||
|
||||
|
||||
def request_pause() -> None:
|
||||
PAUSE_REQUESTED.set()
|
||||
|
||||
|
||||
def request_resume() -> None:
|
||||
PAUSE_REQUESTED.clear()
|
||||
|
||||
|
||||
def is_paused() -> bool:
|
||||
return PAUSE_REQUESTED.is_set()
|
||||
|
||||
|
||||
def _wait_while_paused(stop_event: Optional[threading.Event] = None) -> bool:
|
||||
"""Blockiert solange pausiert ist. Gibt True zurück wenn Stop angefordert wurde."""
|
||||
while PAUSE_REQUESTED.is_set():
|
||||
if (stop_event and stop_event.is_set()) or STOP_REQUESTED.is_set():
|
||||
return True
|
||||
time.sleep(0.05)
|
||||
return False
|
||||
|
||||
import torch
|
||||
import torchaudio as ta
|
||||
|
||||
|
|
@ -37,7 +63,7 @@ from chatterbox.tts import ChatterboxTTS
|
|||
try:
|
||||
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
|
||||
HAS_MULTILINGUAL = True
|
||||
except Exception:
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
ChatterboxMultilingualTTS = None
|
||||
HAS_MULTILINGUAL = False
|
||||
|
||||
|
|
@ -58,11 +84,26 @@ SENTENCE_END_RE = re.compile(
|
|||
re.DOTALL
|
||||
)
|
||||
|
||||
# Abkürzungen, deren abschließender Punkt KEIN Satzende ist.
|
||||
# Punkte in gematchten Mustern werden in split_into_sentences() temporär maskiert.
|
||||
_ABBREV_MASK_RE = re.compile(
|
||||
r'\b(?:'
|
||||
r'z\.B|d\.h|u\.a|z\.T|u\.U|s\.o|s\.u|m\.E|i\.d\.R' # zweiteilige Konnektive
|
||||
r'|ggf|vgl|etc|ca|usw|bzw|sog|inkl|exkl|bzgl|zzgl' # einsilbige Kürzel
|
||||
r'|Dr|Prof|Hr|Fr|Hrsg|Dipl|Ing' # Titel
|
||||
r'|Abs|Nr|Art|Bd|Abb|Kap|Mrd|Mio|Std|Tel|Str' # Fachbegriffe
|
||||
r')\.'
|
||||
)
|
||||
|
||||
NON_SPELLED_ACRONYMS = {
|
||||
"NATO",
|
||||
"NASA",
|
||||
"UNESCO",
|
||||
"OPEC",
|
||||
# Internationale Organisationen / Eigennamen (werden als Wort gesprochen)
|
||||
"NATO", "NASA", "UNESCO", "OPEC", "IAEA", "UNICEF",
|
||||
# Tech-Akronyme, die buchstabenweise ausgesprochen werden sollen,
|
||||
# aber mit deutschen Buchstabennamen falsch klingen würden → daher hier ausnehmen,
|
||||
# damit sie als lateinische Buchstaben buchstabiert werden (via period_space-Modus)
|
||||
"USB", "SSD", "RAM", "CPU", "GPU", "URL", "API", "PDF", "LAN", "WLAN",
|
||||
"HTML", "HTTP", "HTTPS", "JSON", "SQL", "VPN", "SSH", "FTP",
|
||||
"CEO", "CFO", "CTO", "COO",
|
||||
}
|
||||
|
||||
GERMAN_LETTER_NAMES = {
|
||||
|
|
@ -92,36 +133,76 @@ TIME_RE = re.compile(r'\b([01]?\d|2[0-3])([:.])([0-5]\d)(?:\s*Uhr)?\b', re.IGNOR
|
|||
# Vierstellige Jahreszahlen
|
||||
YEAR_RE = re.compile(r'\b(19\d{2}|20\d{2}|21\d{2})\b')
|
||||
|
||||
# Einfache deutsche Einheiten
|
||||
# Einfache deutsche Einheiten (absteigende Länge wird in normalize_units() sichergestellt)
|
||||
UNIT_REPLACEMENTS = {
|
||||
# Geschwindigkeit
|
||||
"km/h": "Kilometer pro Stunde",
|
||||
"m/s": "Meter pro Sekunde",
|
||||
"rpm": "Umdrehungen pro Minute",
|
||||
# Länge
|
||||
"km": "Kilometer",
|
||||
"m": "Meter",
|
||||
"cm": "Zentimeter",
|
||||
"mm": "Millimeter",
|
||||
"m": "Meter",
|
||||
# Fläche / Volumen
|
||||
"cm²": "Quadratzentimeter",
|
||||
"m²": "Quadratmeter",
|
||||
"m³": "Kubikmeter",
|
||||
# Masse
|
||||
"kg": "Kilogramm",
|
||||
"g": "Gramm",
|
||||
"mg": "Milligramm",
|
||||
"Hz": "Hertz",
|
||||
"kHz": "Kilohertz",
|
||||
"MHz": "Megahertz",
|
||||
"g": "Gramm",
|
||||
# Temperatur
|
||||
"°C": "Grad Celsius",
|
||||
"°F": "Grad Fahrenheit",
|
||||
# Elektrik / Energie
|
||||
"kWh": "Kilowattstunde",
|
||||
"kW": "Kilowatt",
|
||||
"W": "Watt",
|
||||
"V": "Volt",
|
||||
"A": "Ampere",
|
||||
"J": "Joule",
|
||||
# Frequenz
|
||||
"GHz": "Gigahertz",
|
||||
"MHz": "Megahertz",
|
||||
"kHz": "Kilohertz",
|
||||
"Hz": "Hertz",
|
||||
# Druck
|
||||
"kPa": "Kilopascal",
|
||||
"bar": "bar",
|
||||
# Datenspeicher
|
||||
"PB": "Petabyte",
|
||||
"TB": "Terabyte",
|
||||
"GB": "Gigabyte",
|
||||
"Mb": "Megabyte",
|
||||
"Kb": "Kilobyte",
|
||||
# Sonstiges
|
||||
"€": "Euro",
|
||||
"$": "Dollar",
|
||||
"%": "Prozent",
|
||||
"Kb": "Kilobyte",
|
||||
"Mb": "Megabyte",
|
||||
"GB": "Gigabyte",
|
||||
"TB": "Terabyte",
|
||||
"PB": "Petabyte",
|
||||
}
|
||||
|
||||
# Eingebaute phonetische Annäherungen für häufige Fremdnamen (Deutsch)
|
||||
# Eingebaute phonetische Annäherungen für häufige Fremdnamen und Anglizismen (Deutsch).
|
||||
# Nur Begriffe aufnehmen, bei denen das deutsche TTS eine falsche Aussprache produziert.
|
||||
# Anglizismen wie "Cloud", "Update", "Meeting" klingen auf Deutsch akzeptabel → kein Eintrag.
|
||||
DEFAULT_PRONUNCIATION_DE: dict[str, str] = {
|
||||
"Xi Jinping": "Schi Jinping",
|
||||
"Xi": "Schi",
|
||||
"Jinping": "Jinping",
|
||||
"Peking": "Peking", # bleibt — deutsches TTS kennt es
|
||||
# Chinesische Eigennamen
|
||||
"Xi Jinping": "Schi Jinping",
|
||||
"Xi": "Schi",
|
||||
"Jinping": "Jinping",
|
||||
"Peking": "Peking",
|
||||
# Tech-Markennamen mit problematischer Aussprache
|
||||
"GitHub": "Git Hab",
|
||||
"LinkedIn": "Linked In",
|
||||
"YouTube": "Jutjub",
|
||||
"Wi-Fi": "Wai Fai",
|
||||
"iPhone": "Aiphone",
|
||||
"MacBook": "Mäk Buk",
|
||||
"ChatGPT": "Tschet Dschie Pie Tie",
|
||||
"OpenAI": "Open A I",
|
||||
# KI-Begriffe
|
||||
"GPT": "Dschie Pie Tie",
|
||||
"LLM": "El El Em",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -131,13 +212,79 @@ def apply_pronunciation_dict(text: str, pron_dict: dict[str, str]) -> str:
|
|||
return text
|
||||
|
||||
|
||||
import unicodedata as _unicodedata
|
||||
|
||||
# Markdown-Muster für strip_markdown()
|
||||
_MD_CODE_BLOCK = re.compile(r'```[\s\S]*?```')
|
||||
_MD_INLINE_CODE = re.compile(r'`([^`\n]+)`')
|
||||
_MD_BOLD_ITALIC = re.compile(r'[*_]{1,3}([^*_\n]+)[*_]{1,3}')
|
||||
_MD_HEADING = re.compile(r'^#{1,6}\s+', re.MULTILINE)
|
||||
_MD_LIST_ITEM = re.compile(r'^\s*[-*+]\s+', re.MULTILINE)
|
||||
_MD_LINK = re.compile(r'\[([^\]]+)\]\([^\)]+\)')
|
||||
_MD_IMAGE = re.compile(r'!\[([^\]]*)\]\([^\)]+\)')
|
||||
_MD_BLOCKQUOTE = re.compile(r'^\s*>\s?', re.MULTILINE)
|
||||
_MD_HR = re.compile(r'^\s*[-*_]{3,}\s*$', re.MULTILINE)
|
||||
|
||||
# Unicode-Kategorien, die Emojis und nicht druckbare Symbole abdecken
|
||||
_EMOJI_CATEGORIES = frozenset({"So", "Cn", "Co"})
|
||||
|
||||
|
||||
def strip_markdown(text: str) -> str:
|
||||
"""Entfernt Markdown-Formatierung und gibt lesbaren Klartext zurück."""
|
||||
text = _MD_CODE_BLOCK.sub('', text) # ```...``` komplett entfernen
|
||||
text = _MD_INLINE_CODE.sub(r'\1', text) # `code` → code
|
||||
text = _MD_IMAGE.sub(r'\1', text) #  → alt
|
||||
text = _MD_LINK.sub(r'\1', text) # [text](url) → text
|
||||
text = _MD_BOLD_ITALIC.sub(r'\1', text) # **fett** / *kursiv* → Inhalt
|
||||
text = _MD_HEADING.sub('', text) # # Überschrift → Überschrift
|
||||
text = _MD_BLOCKQUOTE.sub('', text) # > Zitat → Zitat
|
||||
text = _MD_LIST_ITEM.sub('', text) # - Punkt → Punkt
|
||||
text = _MD_HR.sub('', text) # --- / *** komplett entfernen
|
||||
return text
|
||||
|
||||
|
||||
def clean_raw_text(text: str) -> str:
|
||||
"""Unsichtbare Steuerzeichen entfernen, die Splitting oder TTS stoeren."""
|
||||
"""Unsichtbare Steuerzeichen und Emojis entfernen, die Splitting oder TTS stoeren."""
|
||||
for ch in ('', '', '', ''):
|
||||
text = text.replace(ch, '')
|
||||
# Emojis und nicht-druckbare Sondersymbole entfernen
|
||||
text = ''.join(
|
||||
ch for ch in text
|
||||
if _unicodedata.category(ch) not in _EMOJI_CATEGORIES
|
||||
)
|
||||
return text
|
||||
|
||||
|
||||
# Regex für Sprachmarkierungen [xx]...[/xx] im Text
|
||||
_LANG_SPAN_RE = re.compile(r'\[([a-z]{2})\](.*?)\[/\1\]', re.DOTALL)
|
||||
|
||||
|
||||
def extract_language_spans(text: str, default_lang: str) -> List[Tuple[str, str]]:
|
||||
"""Zerlegt Text mit [xx]...[/xx]-Markierungen in (segment, lang)-Tupel.
|
||||
|
||||
Beispiel:
|
||||
"Das [en]Machine Learning[/en] Modell."
|
||||
→ [("Das", "de"), ("Machine Learning", "en"), ("Modell.", "de")]
|
||||
|
||||
Ohne Markierungen wird [(text, default_lang)] zurückgegeben.
|
||||
"""
|
||||
segments: List[Tuple[str, str]] = []
|
||||
last_end = 0
|
||||
for m in _LANG_SPAN_RE.finditer(text):
|
||||
before = text[last_end:m.start()]
|
||||
if before.strip():
|
||||
segments.append((before.strip(), default_lang))
|
||||
lang_tag = m.group(1)
|
||||
content = m.group(2).strip()
|
||||
if content and lang_tag in SUPPORTED_LANGS:
|
||||
segments.append((content, lang_tag))
|
||||
last_end = m.end()
|
||||
tail = text[last_end:].strip()
|
||||
if tail:
|
||||
segments.append((tail, default_lang))
|
||||
return segments if segments else [(text, default_lang)]
|
||||
|
||||
|
||||
def has_module(name: str) -> bool:
|
||||
return importlib.util.find_spec(name) is not None
|
||||
|
||||
|
|
@ -197,6 +344,18 @@ def number_to_words_de(n: int) -> str:
|
|||
prefix = "eintausend" if th == 1 else f"{number_to_words_de(th)}tausend"
|
||||
return prefix if r == 0 else f"{prefix}{number_to_words_de(r)}"
|
||||
|
||||
if n < 1_000_000_000:
|
||||
m = n // 1_000_000
|
||||
r = n % 1_000_000
|
||||
prefix = "eine Million" if m == 1 else f"{number_to_words_de(m)} Millionen"
|
||||
return prefix if r == 0 else f"{prefix} {number_to_words_de(r)}"
|
||||
|
||||
if n < 1_000_000_000_000:
|
||||
b = n // 1_000_000_000
|
||||
r = n % 1_000_000_000
|
||||
prefix = "eine Milliarde" if b == 1 else f"{number_to_words_de(b)} Milliarden"
|
||||
return prefix if r == 0 else f"{prefix} {number_to_words_de(r)}"
|
||||
|
||||
return str(n)
|
||||
|
||||
|
||||
|
|
@ -233,6 +392,18 @@ def number_to_words_en(n: int) -> str:
|
|||
prefix = f"{number_to_words_en(th)} thousand"
|
||||
return prefix if r == 0 else f"{prefix} {number_to_words_en(r)}"
|
||||
|
||||
if n < 1_000_000_000:
|
||||
m = n // 1_000_000
|
||||
r = n % 1_000_000
|
||||
prefix = f"{number_to_words_en(m)} million"
|
||||
return prefix if r == 0 else f"{prefix} {number_to_words_en(r)}"
|
||||
|
||||
if n < 1_000_000_000_000:
|
||||
b = n // 1_000_000_000
|
||||
r = n % 1_000_000_000
|
||||
prefix = f"{number_to_words_en(b)} billion"
|
||||
return prefix if r == 0 else f"{prefix} {number_to_words_en(r)}"
|
||||
|
||||
return str(n)
|
||||
|
||||
|
||||
|
|
@ -476,6 +647,10 @@ def force_split_sentence(text: str, max_len: int) -> List[str]:
|
|||
|
||||
|
||||
def split_into_sentences(text: str, max_len: int = 200) -> List[str]:
|
||||
# Nummerierte Listenpunkte ("1. ...", "2. ...") als eigene Absätze normalisieren,
|
||||
# damit sie nicht mit benachbarten Sätzen zusammengefasst werden.
|
||||
text = re.sub(r'(?m)^(\d+\.\s+)', r'\n\n\1', text)
|
||||
|
||||
result = []
|
||||
for part in text.split("\n\n"):
|
||||
part = part.strip()
|
||||
|
|
@ -483,13 +658,15 @@ def split_into_sentences(text: str, max_len: int = 200) -> List[str]:
|
|||
continue
|
||||
if SEPARATOR_LINE_RE.match(part):
|
||||
continue
|
||||
sentences = SENTENCE_END_RE.findall(part)
|
||||
# Abkürzungspunkte temporär maskieren, damit sie nicht als Satzenden gelten.
|
||||
masked = _ABBREV_MASK_RE.sub(lambda m: m.group(0)[:-1] + "\x00", part)
|
||||
sentences = SENTENCE_END_RE.findall(masked)
|
||||
consumed = "".join(sentences).strip()
|
||||
rest = part[len(consumed):].strip()
|
||||
rest = masked[len(consumed):].strip()
|
||||
if rest:
|
||||
sentences.append(rest)
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
sentence = sentence.replace("\x00", ".").strip()
|
||||
if not sentence:
|
||||
continue
|
||||
if len(sentence) > max_len:
|
||||
|
|
@ -592,12 +769,21 @@ class PlaybackWorker:
|
|||
raise RuntimeError(
|
||||
"Für Live-Wiedergabe ist das Modul 'sounddevice' nötig. Installiere z. B. 'pip install sounddevice'."
|
||||
)
|
||||
if self.device is not None:
|
||||
import sounddevice as sd
|
||||
try:
|
||||
sd.query_devices(self.device)
|
||||
except ValueError:
|
||||
available = [d["name"] for d in sd.query_devices()]
|
||||
raise RuntimeError(
|
||||
f"Audio-Gerät nicht gefunden: '{self.device}'. Verfügbare Geräte: {available}"
|
||||
)
|
||||
self.thread = threading.Thread(target=self._run, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
def _callback(self, outdata, frames, time_info, status):
|
||||
# Läuft im Audio-Thread: so schnell wie möglich, kein Lock nötig.
|
||||
if self.stop_event and self.stop_event.is_set():
|
||||
if (self.stop_event and self.stop_event.is_set()) or PAUSE_REQUESTED.is_set():
|
||||
outdata[:] = 0.0
|
||||
return
|
||||
try:
|
||||
|
|
@ -729,15 +915,10 @@ def synthesize_non_streaming(
|
|||
|
||||
model, model_kind, sr = load_model(lang, device, t3_model=t3_model)
|
||||
|
||||
if sentence_mode:
|
||||
raw_chunks = split_into_sentences(text, max_len=max_len)
|
||||
elif conversation_mode:
|
||||
raw_chunks = split_for_conversation(text, first_chunk_len=first_chunk_len, max_len=max_len)
|
||||
else:
|
||||
raw_chunks = split_long_text(text, max_len=max_len)
|
||||
# [xx]...[/xx]-Sprachmarkierungen extrahieren; ohne Markierungen → ein Span in default lang.
|
||||
lang_spans = extract_language_spans(text, lang)
|
||||
|
||||
preprocess_kw = dict(
|
||||
lang=lang,
|
||||
spell_uppercase_acronyms=spell_uppercase_acronyms,
|
||||
acronym_mode=acronym_mode,
|
||||
normalize_time_values=normalize_time_values,
|
||||
|
|
@ -745,12 +926,25 @@ def synthesize_non_streaming(
|
|||
normalize_units_values=normalize_units_values,
|
||||
pronunciation_dict=pronunciation_dict,
|
||||
)
|
||||
chunks = [preprocess_tts_text(c, **preprocess_kw) for c in raw_chunks]
|
||||
chunks = [c for c in chunks if c.strip()]
|
||||
# chunk_pairs: Liste von (verarbeiteter_Text, chunk_lang)
|
||||
chunk_pairs: List[Tuple[str, str]] = []
|
||||
for span_idx, (span_text, span_lang) in enumerate(lang_spans):
|
||||
if sentence_mode:
|
||||
raw = split_into_sentences(span_text, max_len=max_len)
|
||||
elif conversation_mode and span_idx == 0:
|
||||
raw = split_for_conversation(span_text, first_chunk_len=first_chunk_len, max_len=max_len)
|
||||
else:
|
||||
raw = split_long_text(span_text, max_len=max_len)
|
||||
for c in raw:
|
||||
processed = preprocess_tts_text(c, lang=span_lang, **preprocess_kw)
|
||||
if processed.strip():
|
||||
chunk_pairs.append((processed, span_lang))
|
||||
|
||||
if not chunks:
|
||||
if not chunk_pairs:
|
||||
raise ValueError("Kein verwertbarer Text nach dem Einlesen gefunden.")
|
||||
|
||||
chunks = [t for t, _ in chunk_pairs] # für Progress-Anzeige
|
||||
|
||||
if show_progress:
|
||||
print(f"Sprache: {lang}")
|
||||
print(f"Gerät: {device}")
|
||||
|
|
@ -771,18 +965,21 @@ def synthesize_non_streaming(
|
|||
|
||||
wavs = []
|
||||
try:
|
||||
for i, chunk in enumerate(chunks, start=1):
|
||||
for i, (chunk, chunk_lang) in enumerate(chunk_pairs, start=1):
|
||||
if stop_event and stop_event.is_set():
|
||||
if show_progress:
|
||||
print("Abbruch angefordert – Synthese gestoppt.")
|
||||
break
|
||||
if _wait_while_paused(stop_event):
|
||||
break
|
||||
if debug_delay > 0:
|
||||
if show_progress:
|
||||
print(f"[{i}/{len(chunks)}] Warte {debug_delay:.0f}s (debug_delay) ...")
|
||||
time.sleep(debug_delay)
|
||||
if show_progress:
|
||||
print(f"[{i}/{len(chunks)}] Generiere ({len(chunk)} Zeichen) ...")
|
||||
wav = generate_chunk(model, model_kind, chunk, lang, voice_path)
|
||||
lang_hint = f" [{chunk_lang}]" if chunk_lang != lang else ""
|
||||
print(f"[{i}/{len(chunks)}] Generiere ({len(chunk)} Zeichen){lang_hint} ...")
|
||||
wav = generate_chunk(model, model_kind, chunk, chunk_lang, voice_path)
|
||||
wavs.append(wav)
|
||||
if playback is not None:
|
||||
playback.put(wav)
|
||||
|
|
@ -985,6 +1182,7 @@ def build_argparser() -> argparse.ArgumentParser:
|
|||
p.add_argument("--debug-delay", type=float, default=0.0, help="Sekunden Pause vor jedem Satz (simuliert langsame KI). Nur zum Testen.")
|
||||
p.add_argument("--t3-model", type=str, default="v3", help="Multilingual T3-Modell: 'v3' (default), 'v2' oder Dateiname.")
|
||||
p.add_argument("--no-conversation-mode", action="store_true", help="Ersten Chunk nicht künstlich kleiner machen (nur ohne --no-sentence-mode).")
|
||||
p.add_argument("--no-strip-markdown", action="store_true", help="Markdown-Formatierung (**, *, #, etc.) nicht aus dem Text entfernen.")
|
||||
p.add_argument("--stop", action="store_true", help="Globales Stop-Signal setzen (für Tests und Service-Integration).")
|
||||
return p
|
||||
|
||||
|
|
@ -998,8 +1196,17 @@ def main() -> int:
|
|||
print("Stop-Signal gesetzt.")
|
||||
return 0
|
||||
|
||||
if args.speed <= 0:
|
||||
parser.error(f"--speed muss positiv sein, erhalten: {args.speed}")
|
||||
if args.first_chunk_len > args.max_len:
|
||||
parser.error(
|
||||
f"--first-chunk-len ({args.first_chunk_len}) darf nicht größer sein als --len ({args.max_len})"
|
||||
)
|
||||
|
||||
try:
|
||||
text = read_input_text(args.text, args.input)
|
||||
if not args.no_strip_markdown:
|
||||
text = strip_markdown(text)
|
||||
device = get_device(args.device)
|
||||
output_path = Path(args.output) if args.output else default_output_path(args.input, args.lang)
|
||||
|
||||
|
|
@ -1015,7 +1222,10 @@ def main() -> int:
|
|||
pron_path = Path(args.pronunciation_dict)
|
||||
if not pron_path.exists():
|
||||
raise FileNotFoundError(f"Aussprache-Dict nicht gefunden: {pron_path}")
|
||||
pronunciation_dict = json.loads(pron_path.read_text(encoding="utf-8"))
|
||||
try:
|
||||
pronunciation_dict = json.loads(pron_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Ungültiges JSON in {pron_path}: {e}") from e
|
||||
|
||||
clear_stop()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue