Vollständiges Multi-Agenten-System für Fact-Checking, Artikelschreiben und Argumentationsanalyse. Zwei Backends: llama.cpp (★ bevorzugt) und Ollama. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
299 lines
10 KiB
TypeScript
299 lines
10 KiB
TypeScript
/**
|
|
* lib/router.ts
|
|
* Model-Router: Entscheidet ob lokales Ollama oder OpenRouter verwendet wird.
|
|
*
|
|
* Strategie:
|
|
* - Lokal (Ollama): Claim-Extraktion, Strukturierung, einfache Klassifizierung,
|
|
* Artikelschreiben (Standard), Verdict-Synthese, OCR/Vision
|
|
* - OpenRouter: Tiefe Argumentationsanalyse, komplexes Reasoning,
|
|
* anspruchsvolles Schreiben/Lektorat
|
|
*
|
|
* Bevorzugt günstige chinesische Modelle (DeepSeek, Qwen3) wo verfügbar —
|
|
* Gemini nur als Fallback / explizite Wahl.
|
|
*
|
|
* Konfiguration via Env-Variablen:
|
|
* ROUTER_FORCE_LOCAL=1 → immer Ollama (für Tests / Offline)
|
|
* ROUTER_FORCE_CLOUD=1 → immer OpenRouter
|
|
* OPENROUTER_API_KEY → OpenRouter-Key (Pflicht für Cloud-Aufrufe)
|
|
* OLLAMA_HOST → Ollama-URL (Standard: http://localhost:11434)
|
|
*/
|
|
|
|
export type TaskType =
|
|
| "claim_extraction" // Text → strukturierte Claims (lokal optimal)
|
|
| "verdict_synthesis" // Claims + Belege → Urteil (lokal gut genug)
|
|
| "article_writing" // Verifizierte Claims → Artikeltext
|
|
| "logic_analysis" // Argumentationsanalyse (Reasoning-intensiv)
|
|
| "deep_reasoning" // Komplexe mehrstufige Analyse
|
|
| "style_editing" // Stilverbesserung, Lektorat
|
|
| "ocr" // OCR / Texterkennung aus Bild → lokal (Vision-Modell)
|
|
| "vision_analysis"; // Bildbeschreibung, Bildanalyse → lokal bevorzugt
|
|
|
|
export type ComplexityHint = "low" | "medium" | "high";
|
|
|
|
export type RouterDecision = {
|
|
provider: "ollama" | "openrouter";
|
|
model: string;
|
|
reason: string;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Lokale Modelle (Ollama, RTX 3090 24GB)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const LOCAL_MODELS = {
|
|
// Text
|
|
fast: "qwen3.5:27b", // 17GB — Standard Allrounder
|
|
reasoning: "deepseek-r1:32b", // 19GB — eingebautes Reasoning
|
|
small: "qwen3:8b", // 5.2GB — schnell für einfache Tasks
|
|
// Vision / OCR
|
|
ocr: "fredrezones55/chandra-ocr-2:patch", // 5.8GB — OCR-spezialisiert
|
|
vision: "qwen3-vl:latest", // 6.1GB — Vision-Language allgemein
|
|
vision_small: "minicpm-v:latest", // 5.5GB — leichtgewichtig
|
|
} as const;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// OpenRouter-Modelle — nach Kosten/Leistung (Stand 2025/2026)
|
|
// Preise in USD/1M Tokens: https://openrouter.ai/models
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const CLOUD_MODELS = {
|
|
// DeepSeek — extrem günstig, sehr kompetent
|
|
/** DeepSeek V3 — ~$0.014/M in, $0.028/M out — bestes Preis-Leistungs-Verhältnis */
|
|
cheap: "deepseek/deepseek-chat-v3-0324",
|
|
/** DeepSeek R1 — ~$0.55/M in, $2.19/M out — starkes Reasoning, günstiger als Gemini Pro */
|
|
reasoning: "deepseek/deepseek-r1",
|
|
|
|
// Qwen3 (Alibaba) — gut und günstig
|
|
/** Qwen3 235B A22B MoE — ~$0.13/M in, $0.60/M out — Alibabas Flaggschiff */
|
|
qwen_large: "qwen/qwen3-235b-a22b",
|
|
/** Qwen3 30B A3B — ~$0.03/M in, $0.10/M out — schneller + günstiger */
|
|
qwen_fast: "qwen/qwen3-30b-a3b",
|
|
|
|
// Google Gemini — Fallback / explizite Nutzung
|
|
/** Gemini 2.5 Flash — ~$0.15/M in, $0.60/M out */
|
|
gemini_flash: "google/gemini-2.5-flash",
|
|
/** Gemini 2.5 Flash Lite — ~$0.075/M in, $0.30/M out */
|
|
gemini_lite: "google/gemini-2.5-flash-lite",
|
|
/** Gemini 2.5 Pro — ~$1.25/M in, $10.0/M out — nur für heikle High-Stakes-Fälle */
|
|
gemini_pro: "google/gemini-2.5-pro",
|
|
} as const;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Routing-Regeln
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Entscheidet anhand Task-Typ und Komplexität welches Modell verwendet werden soll.
|
|
* Bevorzugt günstige chinesische Modelle über teure westliche Alternativen.
|
|
*/
|
|
export function routeModel(task: TaskType, complexity: ComplexityHint = "medium"): RouterDecision {
|
|
const forceLocal = process.env.ROUTER_FORCE_LOCAL === "1";
|
|
const forceCloud = process.env.ROUTER_FORCE_CLOUD === "1";
|
|
const hasOpenRouter = !!process.env.OPENROUTER_API_KEY;
|
|
|
|
if (forceLocal) {
|
|
const localModel = (task === "ocr")
|
|
? LOCAL_MODELS.ocr
|
|
: (task === "vision_analysis")
|
|
? LOCAL_MODELS.vision
|
|
: (task === "deep_reasoning" || task === "logic_analysis")
|
|
? LOCAL_MODELS.reasoning
|
|
: LOCAL_MODELS.fast;
|
|
return { provider: "ollama", model: localModel, reason: "ROUTER_FORCE_LOCAL gesetzt" };
|
|
}
|
|
|
|
if (forceCloud && hasOpenRouter) {
|
|
const cloudModel = complexity === "high"
|
|
? CLOUD_MODELS.reasoning
|
|
: CLOUD_MODELS.cheap;
|
|
return { provider: "openrouter", model: cloudModel, reason: "ROUTER_FORCE_CLOUD gesetzt" };
|
|
}
|
|
|
|
switch (task) {
|
|
|
|
// --- Immer lokal ---
|
|
case "claim_extraction":
|
|
case "verdict_synthesis":
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.fast,
|
|
reason: "Strukturierter Extraktions-Task → Ollama optimal",
|
|
};
|
|
|
|
// --- Immer lokal (Vision-Modelle) ---
|
|
case "ocr":
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.ocr,
|
|
reason: "OCR → lokales Chandra-OCR-2 (5.8GB, RTX 3090)",
|
|
};
|
|
|
|
case "vision_analysis":
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.vision,
|
|
reason: "Bildanalyse → lokales qwen3-vl (6.1GB, RTX 3090)",
|
|
};
|
|
|
|
// --- Lokal bevorzugt, Cloud bei Bedarf ---
|
|
case "article_writing":
|
|
case "style_editing":
|
|
if (complexity === "low") {
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.fast,
|
|
reason: "Einfaches Schreiben → Ollama ausreichend",
|
|
};
|
|
}
|
|
if (hasOpenRouter) {
|
|
return {
|
|
provider: "openrouter",
|
|
// DeepSeek V3 ist extrem günstig und schreibt sehr guten Text
|
|
model: CLOUD_MODELS.cheap,
|
|
reason: "Anspruchsvolles Schreiben → DeepSeek V3 (günstig, stark)",
|
|
};
|
|
}
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.fast,
|
|
reason: "OpenRouter nicht verfügbar → Ollama Fallback",
|
|
};
|
|
|
|
// --- Cloud bevorzugt für Reasoning ---
|
|
case "logic_analysis":
|
|
if (hasOpenRouter) {
|
|
// DeepSeek R1 ist ein dediziertes Reasoning-Modell, deutlich günstiger als Gemini Pro
|
|
const model = complexity === "high"
|
|
? CLOUD_MODELS.reasoning // DeepSeek R1 für tiefe Analyse
|
|
: CLOUD_MODELS.cheap; // DeepSeek V3 für mittlere Komplexität
|
|
return {
|
|
provider: "openrouter",
|
|
model,
|
|
reason: complexity === "high"
|
|
? "Komplexe Argumentationsanalyse → DeepSeek R1 (Reasoning-Modell)"
|
|
: "Argumentationsanalyse → DeepSeek V3 (günstig + kompetent)",
|
|
};
|
|
}
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.reasoning,
|
|
reason: "Argumentationsanalyse → deepseek-r1 lokal (kein OpenRouter-Key)",
|
|
};
|
|
|
|
case "deep_reasoning":
|
|
if (hasOpenRouter) {
|
|
return {
|
|
provider: "openrouter",
|
|
// DeepSeek R1 ist für Reasoning-Tasks günstiger als Gemini Pro
|
|
// und liefert vergleichbare oder bessere Ergebnisse
|
|
model: complexity === "high"
|
|
? CLOUD_MODELS.reasoning // DeepSeek R1
|
|
: CLOUD_MODELS.qwen_large, // Qwen3 235B für mittlere Komplexität
|
|
reason: complexity === "high"
|
|
? "Deep Reasoning (high) → DeepSeek R1 (günstig, stark)"
|
|
: "Deep Reasoning (medium) → Qwen3 235B A22B",
|
|
};
|
|
}
|
|
return {
|
|
provider: "ollama",
|
|
model: LOCAL_MODELS.reasoning,
|
|
reason: "Deep Reasoning → deepseek-r1 lokal (kein OpenRouter-Key)",
|
|
};
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// OpenRouter API-Aufruf (generisch)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export type OpenRouterMessage = { role: "system" | "user" | "assistant"; content: string };
|
|
|
|
/**
|
|
* Ruft ein Modell via OpenRouter auf.
|
|
*/
|
|
export async function callOpenRouter(
|
|
model: string,
|
|
messages: OpenRouterMessage[],
|
|
options?: {
|
|
temperature?: number;
|
|
maxTokens?: number;
|
|
signal?: AbortSignal;
|
|
}
|
|
): Promise<{ text: string; promptTokens: number; completionTokens: number; latencyMs: number }> {
|
|
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
if (!apiKey) throw new Error("OPENROUTER_API_KEY ist nicht gesetzt");
|
|
|
|
const t0 = Date.now();
|
|
|
|
const resp = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Bearer ${apiKey}`,
|
|
"Content-Type": "application/json",
|
|
"HTTP-Referer": "https://pi.local",
|
|
"X-Title": "Pi Text-Agent",
|
|
},
|
|
body: JSON.stringify({
|
|
model,
|
|
messages,
|
|
temperature: options?.temperature ?? 0.3,
|
|
max_tokens: options?.maxTokens ?? 2000,
|
|
}),
|
|
signal: options?.signal,
|
|
});
|
|
|
|
if (!resp.ok) {
|
|
const text = await resp.text().catch(() => "");
|
|
throw new Error(`OpenRouter Fehler ${resp.status}: ${text}`);
|
|
}
|
|
|
|
const data = await resp.json() as {
|
|
choices?: Array<{ message?: { content?: string } }>;
|
|
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
|
};
|
|
|
|
const text = data.choices?.[0]?.message?.content?.trim() ?? "";
|
|
if (!text) throw new Error("Leere Antwort von OpenRouter");
|
|
|
|
return {
|
|
text,
|
|
promptTokens: data.usage?.prompt_tokens ?? 0,
|
|
completionTokens: data.usage?.completion_tokens ?? 0,
|
|
latencyMs: Date.now() - t0,
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Kostenabschätzung
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Schätzt die ungefähren Kosten eines OpenRouter-Aufrufs (USD).
|
|
* Preise sind Näherungswerte — für präzise Zahlen: OpenRouter-Dashboard.
|
|
*/
|
|
export function estimateOpenRouterCost(
|
|
model: string,
|
|
promptTokens: number,
|
|
completionTokens: number
|
|
): number {
|
|
// USD pro 1M Tokens [in, out] — Stand 2025/2026
|
|
const pricing: Record<string, [number, number]> = {
|
|
// DeepSeek — extrem günstig
|
|
"deepseek/deepseek-chat-v3-0324": [0.014, 0.028],
|
|
"deepseek/deepseek-chat": [0.014, 0.028], // Alias
|
|
"deepseek/deepseek-r1": [0.55, 2.19],
|
|
|
|
// Qwen3 (Alibaba)
|
|
"qwen/qwen3-235b-a22b": [0.13, 0.60],
|
|
"qwen/qwen3-30b-a3b": [0.03, 0.10],
|
|
|
|
// Google Gemini
|
|
"google/gemini-2.5-flash": [0.15, 0.60],
|
|
"google/gemini-2.5-flash-lite": [0.075, 0.30],
|
|
"google/gemini-2.5-pro": [1.25, 10.0],
|
|
};
|
|
|
|
const [inPrice, outPrice] = pricing[model] ?? [1.0, 3.0]; // konservativer Fallback
|
|
return (promptTokens / 1_000_000) * inPrice
|
|
+ (completionTokens / 1_000_000) * outPrice;
|
|
}
|