feat: Pi Text-Agent — initialer Commit (sauberes Repo)
Vollständiges Multi-Agenten-System für Fact-Checking, Artikelschreiben und Argumentationsanalyse. Zwei Backends: llama.cpp (★ bevorzugt) und Ollama. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
5146b7fa30
62 changed files with 11279 additions and 0 deletions
162
lib/cache.ts
Normal file
162
lib/cache.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
/**
|
||||
* lib/cache.ts
|
||||
* Hash-basierter File-Cache für Perplexity-Ergebnisse.
|
||||
*
|
||||
* Vermeidet doppelte Perplexity-Kosten wenn derselbe Claim in mehreren Artikeln
|
||||
* oder in Wiederholungsläufen geprüft wird.
|
||||
*
|
||||
* Ablageort: ~/.pi/agent/cache/perplexity/<sha256>.json
|
||||
* TTL: 7 Tage (ältere Einträge werden beim Lesen ignoriert)
|
||||
* Schlüssel: SHA256 des normalisierten Claim-Textes
|
||||
*
|
||||
* Verwendung in verify-article.ts:
|
||||
* import { getCached, setCached } from "../lib/cache.js";
|
||||
* const cached = getCached(claimText);
|
||||
* if (cached) return cached;
|
||||
* const result = await searchPerplexity(claimText, opts);
|
||||
* setCached(claimText, result);
|
||||
*/
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
import { mkdirSync, writeFileSync, readFileSync, statSync, readdirSync, unlinkSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Konstanten
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const CACHE_DIR = join(homedir(), ".pi", "agent", "cache", "perplexity");
|
||||
const TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 Tage
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Interner Typ (Cache-Datei)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type CacheEntry<T> = {
|
||||
cachedAt: string; // ISO-Timestamp
|
||||
data: T;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hilfsfunktionen
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function ensureCacheDir(): void {
|
||||
mkdirSync(CACHE_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalisiert einen Claim-Text für konsistentes Hashing:
|
||||
* - Whitespace kollabieren
|
||||
* - Kleinschreibung
|
||||
* - Führende/nachfolgende Leerzeichen entfernen
|
||||
*/
|
||||
function normalizeText(text: string): string {
|
||||
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256-Hash des normalisierten Claim-Textes als Hex-String (64 Zeichen).
|
||||
*/
|
||||
export function claimHash(claimText: string): string {
|
||||
return createHash("sha256").update(normalizeText(claimText)).digest("hex");
|
||||
}
|
||||
|
||||
function cachePath(hash: string): string {
|
||||
return join(CACHE_DIR, `${hash}.json`);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Öffentliche API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Liest einen gecachten Perplexity-Wert für den gegebenen Claim-Text.
|
||||
* Gibt null zurück wenn:
|
||||
* - kein Cache-Eintrag vorhanden
|
||||
* - der Eintrag älter als TTL_MS ist
|
||||
* - der Eintrag korrupt ist
|
||||
*/
|
||||
export function getCached<T>(claimText: string): T | null {
|
||||
try {
|
||||
const path = cachePath(claimHash(claimText));
|
||||
const stat = statSync(path);
|
||||
const ageMs = Date.now() - stat.mtimeMs;
|
||||
if (ageMs > TTL_MS) return null; // abgelaufen
|
||||
|
||||
const entry = JSON.parse(readFileSync(path, "utf8")) as CacheEntry<T>;
|
||||
return entry.data;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Speichert ein Perplexity-Ergebnis im Cache.
|
||||
* Fehler beim Schreiben werden ignoriert (Cache ist optional).
|
||||
*/
|
||||
export function setCached<T>(claimText: string, data: T): void {
|
||||
try {
|
||||
ensureCacheDir();
|
||||
const entry: CacheEntry<T> = {
|
||||
cachedAt: new Date().toISOString(),
|
||||
data,
|
||||
};
|
||||
writeFileSync(cachePath(claimHash(claimText)), JSON.stringify(entry, null, 2), "utf8");
|
||||
} catch {
|
||||
// Cache-Fehler dürfen den Programmablauf nicht unterbrechen
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Löscht abgelaufene Cache-Einträge (älter als TTL_MS).
|
||||
* Gibt die Anzahl gelöschter Einträge zurück.
|
||||
*/
|
||||
export function pruneCache(): number {
|
||||
try {
|
||||
ensureCacheDir();
|
||||
const files = readdirSync(CACHE_DIR).filter((f) => f.endsWith(".json"));
|
||||
let deleted = 0;
|
||||
for (const file of files) {
|
||||
try {
|
||||
const path = join(CACHE_DIR, file);
|
||||
const ageMs = Date.now() - statSync(path).mtimeMs;
|
||||
if (ageMs > TTL_MS) {
|
||||
unlinkSync(path);
|
||||
deleted++;
|
||||
}
|
||||
} catch {
|
||||
// Einzelne Fehler ignorieren
|
||||
}
|
||||
}
|
||||
return deleted;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt Statistiken über den Cache zurück.
|
||||
*/
|
||||
export function cacheStats(): { total: number; expired: number; sizeBytes: number } {
|
||||
try {
|
||||
ensureCacheDir();
|
||||
const files = readdirSync(CACHE_DIR).filter((f) => f.endsWith(".json"));
|
||||
let expired = 0;
|
||||
let sizeBytes = 0;
|
||||
for (const file of files) {
|
||||
try {
|
||||
const path = join(CACHE_DIR, file);
|
||||
const stat = statSync(path);
|
||||
sizeBytes += stat.size;
|
||||
if (Date.now() - stat.mtimeMs > TTL_MS) expired++;
|
||||
} catch {
|
||||
// ignorieren
|
||||
}
|
||||
}
|
||||
return { total: files.length, expired, sizeBytes };
|
||||
} catch {
|
||||
return { total: 0, expired: 0, sizeBytes: 0 };
|
||||
}
|
||||
}
|
||||
308
lib/jobs.ts
Normal file
308
lib/jobs.ts
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
/**
|
||||
* lib/jobs.ts
|
||||
* Job-Speicher für die Pipeline-Agenten.
|
||||
*
|
||||
* Verzeichnisstruktur:
|
||||
* ~/.pi/agent/jobs/<datum>_<slug>/
|
||||
* ├── input.txt ← Originaltext
|
||||
* ├── claims.json ← Ausgabe ollama-claim-extractor (ClaimSet)
|
||||
* ├── perplexity/
|
||||
* │ ├── c001.json ← Perplexity-Ergebnis pro Claim (PerplexityResult)
|
||||
* │ └── c002.json
|
||||
* ├── report.json ← Ausgabe verify-article (VerificationReport)
|
||||
* ├── article.md ← Ausgabe writer
|
||||
* └── meta.json ← Timestamp, Modell, Kosten, Status
|
||||
*
|
||||
* Verwendung:
|
||||
* import { createJob, findJobDir, saveJobFile, loadJobFile, updateJobMeta } from "../lib/jobs.js";
|
||||
* const jobDir = createJob("umerziehung", "qwen3.5:27b");
|
||||
* saveJobFile(jobDir, "claims.json", claimSet);
|
||||
* const cached = loadJobFile<ClaimSet>(jobDir, "claims.json");
|
||||
* updateJobMeta(jobDir, { status: "verifying" });
|
||||
*/
|
||||
|
||||
import { mkdirSync, writeFileSync, readFileSync, readdirSync, statSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Konstanten
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const JOBS_DIR = join(homedir(), ".pi", "agent", "jobs");
|
||||
|
||||
export type JobStatus =
|
||||
| "created"
|
||||
| "extracting"
|
||||
| "verifying"
|
||||
| "writing"
|
||||
| "completed"
|
||||
| "failed";
|
||||
|
||||
export type JobMeta = {
|
||||
slug: string;
|
||||
jobId: string; // Verzeichnisname: <datum>_<slug>
|
||||
model: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
status: JobStatus;
|
||||
steps: {
|
||||
extract?: {
|
||||
completedAt: string;
|
||||
totalClaims: number;
|
||||
checkableClaims: number;
|
||||
latencyMs: number;
|
||||
};
|
||||
verify?: {
|
||||
completedAt: string;
|
||||
claimsVerified: number;
|
||||
totalCostUSD: number;
|
||||
latencyMs: number;
|
||||
};
|
||||
write?: {
|
||||
completedAt: string;
|
||||
style: string;
|
||||
wordCount: number;
|
||||
provider: string;
|
||||
costUSD: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Interne Hilfsfunktionen
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function ensureDir(dir: string): void {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Job erstellen
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Legt ein neues Job-Verzeichnis an und schreibt meta.json.
|
||||
* Gibt den absoluten Pfad zum Job-Verzeichnis zurück.
|
||||
*
|
||||
* @param slug Kurzer, menschenlesbarer Name (z.B. "umerziehung", "klimaartikel")
|
||||
* Erlaubte Zeichen: a–z, 0–9, Bindestrich, Unterstrich
|
||||
* @param model Das verwendete Ollama-Modell
|
||||
*/
|
||||
export function createJob(slug: string, model: string): string {
|
||||
ensureDir(JOBS_DIR);
|
||||
|
||||
const date = new Date().toISOString().slice(0, 10); // "2026-04-16"
|
||||
const safeSlug = slug.toLowerCase().replace(/[^a-z0-9_-]/g, "_").slice(0, 40);
|
||||
const jobId = `${date}_${safeSlug}`;
|
||||
const jobDir = join(JOBS_DIR, jobId);
|
||||
|
||||
ensureDir(jobDir);
|
||||
ensureDir(join(jobDir, "perplexity"));
|
||||
|
||||
const meta: JobMeta = {
|
||||
slug: safeSlug,
|
||||
jobId,
|
||||
model,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
status: "created",
|
||||
steps: {},
|
||||
};
|
||||
|
||||
writeFileSync(join(jobDir, "meta.json"), JSON.stringify(meta, null, 2), "utf8");
|
||||
return jobDir;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Job finden
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Sucht das neueste Job-Verzeichnis mit dem angegebenen Slug.
|
||||
* Gibt den absoluten Pfad zurück oder null wenn nicht gefunden.
|
||||
*/
|
||||
export function findJobDir(slug: string): string | null {
|
||||
try {
|
||||
const safeSlug = slug.toLowerCase().replace(/[^a-z0-9_-]/g, "_").slice(0, 40);
|
||||
const entries = readdirSync(JOBS_DIR)
|
||||
.filter((d) => {
|
||||
try {
|
||||
return statSync(join(JOBS_DIR, d)).isDirectory() && d.endsWith(`_${safeSlug}`);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.sort()
|
||||
.reverse(); // Neueste zuerst (Datumspräfix sorgt für richtiges Sorting)
|
||||
|
||||
return entries.length > 0 ? join(JOBS_DIR, entries[0]) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sucht oder erstellt ein Job-Verzeichnis.
|
||||
* Wenn ein Job mit diesem Slug existiert: Wiederverwendung (Resume).
|
||||
* Wenn nicht: neuer Job.
|
||||
*/
|
||||
export function getOrCreateJob(slug: string, model: string): { jobDir: string; isNew: boolean } {
|
||||
const existing = findJobDir(slug);
|
||||
if (existing) {
|
||||
return { jobDir: existing, isNew: false };
|
||||
}
|
||||
return { jobDir: createJob(slug, model), isNew: true };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dateien lesen / schreiben
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Schreibt eine Datei in das Job-Verzeichnis.
|
||||
* Bei JSON-Daten (object/array): automatisch serialisiert.
|
||||
* Bei string: direkt geschrieben.
|
||||
*/
|
||||
export function saveJobFile(jobDir: string, filename: string, data: unknown): void {
|
||||
const content =
|
||||
typeof data === "string" ? data : JSON.stringify(data, null, 2);
|
||||
writeFileSync(join(jobDir, filename), content, "utf8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Liest eine Datei aus dem Job-Verzeichnis und parst sie als JSON.
|
||||
* Gibt null zurück wenn die Datei nicht existiert oder ungültiges JSON enthält.
|
||||
*/
|
||||
export function loadJobFile<T>(jobDir: string, filename: string): T | null {
|
||||
try {
|
||||
const content = readFileSync(join(jobDir, filename), "utf8");
|
||||
return JSON.parse(content) as T;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Liest eine Datei als rohen String. Gibt null zurück wenn nicht vorhanden.
|
||||
*/
|
||||
export function loadJobText(jobDir: string, filename: string): string | null {
|
||||
try {
|
||||
return readFileSync(join(jobDir, filename), "utf8");
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prüft ob eine Datei im Job-Verzeichnis existiert.
|
||||
*/
|
||||
export function jobFileExists(jobDir: string, filename: string): boolean {
|
||||
try {
|
||||
statSync(join(jobDir, filename));
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Meta-Daten aktualisieren
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Führt updates in meta.json ein (shallow merge, updatedAt wird automatisch gesetzt).
|
||||
*/
|
||||
export function updateJobMeta(
|
||||
jobDir: string,
|
||||
updates: Partial<Omit<JobMeta, "slug" | "jobId" | "createdAt">> & { steps?: Partial<JobMeta["steps"]> }
|
||||
): void {
|
||||
const metaPath = join(jobDir, "meta.json");
|
||||
let current: JobMeta = {
|
||||
slug: "",
|
||||
jobId: "",
|
||||
model: "",
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
status: "created",
|
||||
steps: {},
|
||||
};
|
||||
|
||||
try {
|
||||
current = JSON.parse(readFileSync(metaPath, "utf8")) as JobMeta;
|
||||
} catch {
|
||||
// Neue meta.json wenn nicht vorhanden
|
||||
}
|
||||
|
||||
const updated: JobMeta = {
|
||||
...current,
|
||||
...updates,
|
||||
steps: {
|
||||
...current.steps,
|
||||
...updates.steps,
|
||||
},
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
writeFileSync(metaPath, JSON.stringify(updated, null, 2), "utf8");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Jobs auflisten
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gibt alle Jobs als Array von JobMeta zurück, neueste zuerst.
|
||||
*/
|
||||
export function listJobs(): JobMeta[] {
|
||||
try {
|
||||
ensureDir(JOBS_DIR);
|
||||
return readdirSync(JOBS_DIR)
|
||||
.filter((d) => {
|
||||
try {
|
||||
return statSync(join(JOBS_DIR, d)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.sort()
|
||||
.reverse()
|
||||
.map((d) => {
|
||||
try {
|
||||
return JSON.parse(readFileSync(join(JOBS_DIR, d, "meta.json"), "utf8")) as JobMeta;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter((m): m is JobMeta => m !== null);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Formatiert eine Job-Liste als Tabelle für die CLI-Ausgabe.
|
||||
*/
|
||||
export function formatJobList(jobs: JobMeta[]): string {
|
||||
if (jobs.length === 0) return "Keine Jobs gefunden.";
|
||||
|
||||
const STATUS_ICON: Record<JobStatus, string> = {
|
||||
created: "○",
|
||||
extracting: "⟳",
|
||||
verifying: "⟳",
|
||||
writing: "⟳",
|
||||
completed: "✓",
|
||||
failed: "✗",
|
||||
};
|
||||
|
||||
const lines: string[] = [`Jobs in ${JOBS_DIR}:\n`];
|
||||
for (const j of jobs) {
|
||||
const icon = STATUS_ICON[j.status] ?? "?";
|
||||
const stepInfo: string[] = [];
|
||||
if (j.steps.extract) stepInfo.push(`${j.steps.extract.totalClaims} Claims`);
|
||||
if (j.steps.verify) stepInfo.push(`$${j.steps.verify.totalCostUSD.toFixed(4)} Perplexity`);
|
||||
if (j.steps.write) stepInfo.push(`${j.steps.write.wordCount}w ${j.steps.write.style}`);
|
||||
lines.push(`${icon} ${j.jobId} [${j.status}]${stepInfo.length ? " " + stepInfo.join(", ") : ""}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
107
lib/logger.ts
Normal file
107
lib/logger.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* lib/logger.ts
|
||||
* Einfacher File-Logger für alle Agenten.
|
||||
*
|
||||
* Schreibt strukturierte Log-Einträge nach ~/.pi/agent/logs/<timestamp>[_<jobId>].log
|
||||
* Im verbose-Modus werden alle Einträge zusätzlich auf stderr ausgegeben.
|
||||
* Warnung/Fehler gehen immer auf stderr (unabhängig von verbose).
|
||||
*
|
||||
* Verwendung:
|
||||
* import { createLogger } from "../lib/logger.js";
|
||||
* const log = createLogger({ verbose: cliFlags.verbose });
|
||||
* log.info("Claims extrahieren...", { model, numChunks: 3 });
|
||||
* log.warn("0 Claims in Chunk", { chunk: 2 });
|
||||
* log.error("Ollama nicht erreichbar", { url: OLLAMA_HOST });
|
||||
*/
|
||||
|
||||
import { appendFileSync, mkdirSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Konstanten
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const LOG_DIR = join(homedir(), ".pi", "agent", "logs");
|
||||
|
||||
export type LogLevel = "info" | "warn" | "error" | "debug";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Logger-Klasse
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class Logger {
|
||||
private logFile: string | null;
|
||||
private verbose: boolean;
|
||||
|
||||
constructor(opts?: { logFile?: string; verbose?: boolean }) {
|
||||
this.logFile = opts?.logFile ?? null;
|
||||
this.verbose = opts?.verbose ?? false;
|
||||
}
|
||||
|
||||
log(level: LogLevel, message: string, data?: Record<string, unknown>): void {
|
||||
const ts = new Date().toISOString();
|
||||
const dataStr = data ? " " + JSON.stringify(data) : "";
|
||||
const line = `[${ts}] [${level.toUpperCase().padEnd(5)}] ${message}${dataStr}\n`;
|
||||
|
||||
// In Datei schreiben (append, non-blocking, Fehler ignorieren)
|
||||
if (this.logFile) {
|
||||
try {
|
||||
appendFileSync(this.logFile, line);
|
||||
} catch {
|
||||
// Log-Fehler dürfen den Programmablauf nicht stören
|
||||
}
|
||||
}
|
||||
|
||||
// Auf stderr ausgeben wenn verbose ODER level >= warn
|
||||
if (this.verbose || level === "error" || level === "warn") {
|
||||
process.stderr.write(line);
|
||||
}
|
||||
}
|
||||
|
||||
info(message: string, data?: Record<string, unknown>): void {
|
||||
this.log("info", message, data);
|
||||
}
|
||||
|
||||
warn(message: string, data?: Record<string, unknown>): void {
|
||||
this.log("warn", message, data);
|
||||
}
|
||||
|
||||
error(message: string, data?: Record<string, unknown>): void {
|
||||
this.log("error", message, data);
|
||||
}
|
||||
|
||||
debug(message: string, data?: Record<string, unknown>): void {
|
||||
this.log("debug", message, data);
|
||||
}
|
||||
}
|
||||
|
||||
// Null-Logger für Kontexte wo kein Logging gewünscht ist
|
||||
export const nullLogger = new Logger();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Erstellt einen Logger der in eine neue Log-Datei schreibt.
|
||||
* @param opts.jobId Optionaler Suffix für den Dateinamen (z.B. "umerziehung")
|
||||
* @param opts.verbose Wenn true: alle Log-Einträge auf stderr
|
||||
*/
|
||||
export function createLogger(opts?: { jobId?: string; verbose?: boolean }): Logger {
|
||||
try {
|
||||
mkdirSync(LOG_DIR, { recursive: true });
|
||||
} catch {
|
||||
// Verzeichnis existiert bereits oder kein Schreibzugriff
|
||||
}
|
||||
|
||||
const ts = new Date()
|
||||
.toISOString()
|
||||
.replace(/T/, "_")
|
||||
.replace(/:/g, "-")
|
||||
.slice(0, 19); // "2026-04-16_14-30-00"
|
||||
const suffix = opts?.jobId ? `_${opts.jobId}` : "";
|
||||
const logFile = join(LOG_DIR, `${ts}${suffix}.log`);
|
||||
|
||||
return new Logger({ logFile, verbose: opts?.verbose ?? false });
|
||||
}
|
||||
237
lib/ollama.ts
Normal file
237
lib/ollama.ts
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
/**
|
||||
* lib/ollama.ts
|
||||
* Zentraler Ollama-Client: Text-Chat und Vision/OCR-Aufrufe.
|
||||
*
|
||||
* Neu angelegte Agenten nutzen diesen Client statt inline-fetch.
|
||||
* Bestehende Agenten (ollama-claim-extractor, verifier) können schrittweise migriert werden.
|
||||
*
|
||||
* Konfiguration:
|
||||
* OLLAMA_HOST → Ollama-URL (Standard: http://localhost:11434)
|
||||
*/
|
||||
|
||||
export const OLLAMA_HOST = process.env.OLLAMA_HOST ?? "http://localhost:11434";
|
||||
|
||||
export type OllamaMessage = {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
images?: string[]; // base64-kodierte Bilder (Vision-Aufrufe)
|
||||
};
|
||||
|
||||
export type OllamaResult = {
|
||||
text: string;
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
latencyMs: number;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Intern
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MAX_RETRIES = 3;
|
||||
const RETRY_DELAY_MS = 15_000;
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Haupt-Aufruf
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Generischer Ollama-Chat (Text oder Vision).
|
||||
* Für Vision: images-Felder in den Messages setzen, oder callOllamaVision() nutzen.
|
||||
*/
|
||||
export async function callOllamaChat(
|
||||
model: string,
|
||||
messages: OllamaMessage[],
|
||||
options?: {
|
||||
/** JSON-Schema für structured output (Ollama >= 0.5) */
|
||||
format?: "json" | Record<string, unknown>;
|
||||
temperature?: number;
|
||||
numCtx?: number;
|
||||
numPredict?: number;
|
||||
/**
|
||||
* Thinking-Mode für qwen3/deepseek-r1-Modelle (Standard: false).
|
||||
* false → /no_think → nur Antwort, kein Chain-of-Thought
|
||||
* true → Modell denkt zuerst, Antwort in content; thinking in separatem Feld
|
||||
*/
|
||||
think?: boolean;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
): Promise<OllamaResult> {
|
||||
const t0 = Date.now();
|
||||
let lastError: unknown;
|
||||
|
||||
// qwen3 und deepseek-r1 haben Thinking-Mode standardmäßig an.
|
||||
// Für strukturierte Ausgaben (JSON, Extraktion) ist Thinking unerwünscht.
|
||||
const think = options?.think ?? false;
|
||||
|
||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const body: Record<string, unknown> = {
|
||||
model,
|
||||
messages,
|
||||
stream: false,
|
||||
think,
|
||||
options: {
|
||||
temperature: options?.temperature ?? 0.1,
|
||||
...(options?.numCtx ? { num_ctx: options.numCtx } : {}),
|
||||
...(options?.numPredict ? { num_predict: options.numPredict } : {}),
|
||||
},
|
||||
};
|
||||
if (options?.format !== undefined) {
|
||||
body.format = options.format;
|
||||
}
|
||||
|
||||
const resp = await fetch(`${OLLAMA_HOST}/api/chat`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const errText = await resp.text().catch(() => "");
|
||||
throw new Error(`Ollama HTTP ${resp.status}: ${errText}`);
|
||||
}
|
||||
|
||||
const data = await resp.json() as {
|
||||
message?: { content?: string; thinking?: string };
|
||||
prompt_eval_count?: number;
|
||||
eval_count?: number;
|
||||
};
|
||||
|
||||
// Bei Thinking-Modellen (qwen3, deepseek-r1): wenn content leer,
|
||||
// Fallback auf thinking-Feld (passiert bei sehr kurzen Antworten).
|
||||
const text = data.message?.content?.trim()
|
||||
|| (think ? data.message?.thinking?.trim() : "")
|
||||
|| "";
|
||||
|
||||
return {
|
||||
text,
|
||||
promptTokens: data.prompt_eval_count ?? 0,
|
||||
completionTokens: data.eval_count ?? 0,
|
||||
latencyMs: Date.now() - t0,
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
if (attempt < MAX_RETRIES) await sleep(RETRY_DELAY_MS);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Ollama fehlgeschlagen nach ${MAX_RETRIES} Versuchen: ${
|
||||
lastError instanceof Error ? lastError.message : String(lastError)
|
||||
}`
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vision / OCR
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Ollama-Aufruf mit Bild-Input (Vision / OCR).
|
||||
*
|
||||
* Empfohlene Modelle (passen alle auf RTX 3090 24GB):
|
||||
* fredrezones55/chandra-ocr-2:patch 5.8GB — OCR-spezialisiert, Dokumente/Scans
|
||||
* qwen3-vl:latest 6.1GB — Vision-Language, Bildbeschreibung + OCR
|
||||
* qwen2.5vl:7b 6.0GB — Alternative zu qwen3-vl
|
||||
* minicpm-v:latest 5.5GB — Leichtgewichtig, gut für einfache OCR
|
||||
*
|
||||
* @param imageSource Absoluter Dateipfad ("/…") oder base64-String
|
||||
*/
|
||||
export async function callOllamaVision(
|
||||
model: string,
|
||||
imageSource: string,
|
||||
prompt: string,
|
||||
options?: {
|
||||
systemPrompt?: string;
|
||||
temperature?: number;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
): Promise<OllamaResult> {
|
||||
let imageBase64: string;
|
||||
|
||||
if (imageSource.startsWith("/") || imageSource.startsWith("~")) {
|
||||
const { readFile } = await import("node:fs/promises");
|
||||
const resolvedPath = imageSource.startsWith("~")
|
||||
? imageSource.replace(/^~/, process.env.HOME ?? "/root")
|
||||
: imageSource;
|
||||
const buf = await readFile(resolvedPath);
|
||||
imageBase64 = buf.toString("base64");
|
||||
} else {
|
||||
imageBase64 = imageSource; // schon base64
|
||||
}
|
||||
|
||||
const messages: OllamaMessage[] = [];
|
||||
if (options?.systemPrompt) {
|
||||
messages.push({ role: "system", content: options.systemPrompt });
|
||||
}
|
||||
messages.push({ role: "user", content: prompt, images: [imageBase64] });
|
||||
|
||||
return callOllamaChat(model, messages, {
|
||||
temperature: options?.temperature ?? 0.1,
|
||||
signal: options?.signal,
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Modell-Infos (lokal installiert, passend für RTX 3090 24GB)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Alle bekannten lokalen Ollama-Modelle nach Kategorie. */
|
||||
export const LOCAL_CATALOG = {
|
||||
// --- Text / Reasoning ---
|
||||
text: {
|
||||
/** 17GB — Haupt-Allrounder, 1 GPU */
|
||||
"qwen3.5:27b": { vramGB: 17, gpus: 1 },
|
||||
/** 19GB — Eingebautes Reasoning (DeepSeek R1), 1 GPU */
|
||||
"deepseek-r1:32b": { vramGB: 19, gpus: 1 },
|
||||
/** 18GB — Code + allgemein, 128k-Kontext, 1 GPU */
|
||||
"qwen3-coder-30b-128k:latest": { vramGB: 18, gpus: 1 },
|
||||
/** 18GB — Optimierte GPU-Variante des Qwen3-Coders, 1 GPU */
|
||||
"qwen3-coder-30b-gpu:latest": { vramGB: 18, gpus: 1 },
|
||||
/** 18GB — GLM-4.7 Flash, chinesisches Modell, 1 GPU */
|
||||
"glm-4.7-flash:latest": { vramGB: 18, gpus: 1 },
|
||||
/** 17GB — Gemma4 26B von Google, 1 GPU */
|
||||
"gemma4:26b": { vramGB: 17, gpus: 1 },
|
||||
/** 9.6GB — Gemma4 E4B (Effizienz-Variante), 1 GPU */
|
||||
"gemma4:e4b": { vramGB: 9.6, gpus: 1 },
|
||||
/** 9.0GB — Qwen2.5 14B Instruct, 1 GPU */
|
||||
"qwen2.5:14b-instruct": { vramGB: 9, gpus: 1 },
|
||||
/** 5.2GB — Qwen3 8B, schnell für einfache Tasks */
|
||||
"qwen3:8b": { vramGB: 5.2, gpus: 1 },
|
||||
/** 4.9GB — Llama 3.1 8B */
|
||||
"llama3.1:8b": { vramGB: 4.9, gpus: 1 },
|
||||
/** 7.1GB — Mistral Nemo */
|
||||
"mistral-nemo:latest": { vramGB: 7.1, gpus: 1 },
|
||||
},
|
||||
// --- Code ---
|
||||
code: {
|
||||
/** 9.0GB — Qwen2.5-Coder 14B, 1 GPU */
|
||||
"qwen2.5-coder:14b": { vramGB: 9, gpus: 1 },
|
||||
/** 4.7GB — Qwen2.5-Coder 7B, schnell */
|
||||
"qwen2.5-coder:7b": { vramGB: 4.7, gpus: 1 },
|
||||
},
|
||||
// --- Vision / OCR ---
|
||||
vision: {
|
||||
/** 5.8GB — OCR-spezialisiert (Chandra OCR 2) */
|
||||
"fredrezones55/chandra-ocr-2:patch": { vramGB: 5.8, gpus: 1 },
|
||||
/** 6.1GB — Qwen3 Vision-Language Model */
|
||||
"qwen3-vl:latest": { vramGB: 6.1, gpus: 1 },
|
||||
/** 6.0GB — Qwen2.5 Vision-Language 7B */
|
||||
"qwen2.5vl:7b": { vramGB: 6, gpus: 1 },
|
||||
/** 5.5GB — MiniCPM-V, leichtgewichtig */
|
||||
"minicpm-v:latest": { vramGB: 5.5, gpus: 1 },
|
||||
/** 3.3GB — Qwen3-VL 4B, sehr klein */
|
||||
"qwen3-vl:4b": { vramGB: 3.3, gpus: 1 },
|
||||
},
|
||||
// --- Embedding ---
|
||||
embedding: {
|
||||
/** 4.7GB — Qwen3 Embedding */
|
||||
"qwen3-embedding:latest": { vramGB: 4.7, gpus: 1 },
|
||||
},
|
||||
} as const;
|
||||
175
lib/perplexity.ts
Normal file
175
lib/perplexity.ts
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
/**
|
||||
* lib/perplexity.ts
|
||||
* Gemeinsamer Perplexity-Sonar-Wrapper für alle Agenten.
|
||||
* Wird von verifier.ts und verify-article.ts genutzt.
|
||||
*/
|
||||
|
||||
const PRICING = {
|
||||
sonar: { inputPerM: 1, outputPerM: 1 },
|
||||
"sonar-pro": { inputPerM: 3, outputPerM: 15 },
|
||||
} as const;
|
||||
|
||||
const SEARCH_COST_PER_CALL = 0.005;
|
||||
|
||||
export type PerplexitySource = {
|
||||
url: string;
|
||||
title: string | undefined;
|
||||
snippet: string | undefined;
|
||||
};
|
||||
|
||||
export type PerplexityResult = {
|
||||
summary: string;
|
||||
sources: PerplexitySource[];
|
||||
model: string;
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
searchQueries: number;
|
||||
estimatedCostUSD: number;
|
||||
};
|
||||
|
||||
type PerplexityApiResponse = {
|
||||
model?: string;
|
||||
citations?: string[];
|
||||
search_results?: Array<{ url?: string; title?: string; snippet?: string }>;
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
usage?: {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
search_queries?: number;
|
||||
};
|
||||
};
|
||||
|
||||
class RetryableError extends Error {}
|
||||
|
||||
async function sleep(ms: number) {
|
||||
await new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
function estimateCost(model: string, promptTokens: number, completionTokens: number, searchQueries: number): number {
|
||||
const p = PRICING[model as keyof typeof PRICING] ?? PRICING["sonar"];
|
||||
return (promptTokens / 1_000_000) * p.inputPerM +
|
||||
(completionTokens / 1_000_000) * p.outputPerM +
|
||||
searchQueries * SEARCH_COST_PER_CALL;
|
||||
}
|
||||
|
||||
function parseSources(data: PerplexityApiResponse): PerplexitySource[] {
|
||||
const seen = new Set<string>();
|
||||
|
||||
const fromSearch = data.search_results
|
||||
?.filter((r) => !!r?.url)
|
||||
.map((r) => ({ url: r.url!, title: r.title, snippet: r.snippet }))
|
||||
.filter((s) => !seen.has(s.url) && seen.add(s.url))
|
||||
?? [];
|
||||
|
||||
if (fromSearch.length > 0) return fromSearch.slice(0, 8);
|
||||
|
||||
return (data.citations ?? [])
|
||||
.filter((u): u is string => typeof u === "string" && /^https?:\/\//.test(u) && !seen.has(u) && !!seen.add(u))
|
||||
.slice(0, 8)
|
||||
.map((url) => ({ url, title: undefined, snippet: undefined }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Ruft die Perplexity Sonar API auf und gibt ein normiertes Ergebnis zurück.
|
||||
* Wirft einen Error wenn PERPLEXITY_API_KEY fehlt oder der Aufruf fehlschlägt.
|
||||
*/
|
||||
export async function searchPerplexity(
|
||||
query: string,
|
||||
options?: {
|
||||
mode?: "fast" | "deep";
|
||||
recency?: string;
|
||||
signal?: AbortSignal;
|
||||
maxTokens?: number;
|
||||
}
|
||||
): Promise<PerplexityResult> {
|
||||
const apiKey = process.env.PERPLEXITY_API_KEY;
|
||||
if (!apiKey) throw new Error("PERPLEXITY_API_KEY ist nicht gesetzt");
|
||||
|
||||
const mode = options?.mode ?? "fast";
|
||||
const model: "sonar" | "sonar-pro" = mode === "deep" ? "sonar-pro" : "sonar";
|
||||
const contextSize = mode === "deep" ? "high" : "low";
|
||||
const maxTokens = options?.maxTokens ?? (mode === "deep" ? 600 : 350);
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"Du bist ein Recherche-Tool für Fact-Checking. " +
|
||||
"Recherchiere präzise und faktisch. " +
|
||||
"Setze Inline-Zitierungen [1][2][3] direkt nach jedem belegten Satz. " +
|
||||
"Fokussiere auf überprüfbare Fakten und Primärquellen.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `Recherchefrage zum Fact-Checking:\n\n${query}`,
|
||||
},
|
||||
],
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0.1,
|
||||
web_search_options: { search_context_size: contextSize },
|
||||
};
|
||||
|
||||
if (options?.recency) body.search_recency_filter = options.recency;
|
||||
|
||||
let lastError: unknown;
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
try {
|
||||
const resp = await fetch("https://api.perplexity.ai/chat/completions", {
|
||||
method: "POST",
|
||||
headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const text = await resp.text().catch(() => "");
|
||||
if (resp.status === 429 || resp.status >= 500) throw new RetryableError(`Perplexity ${resp.status}: ${text}`);
|
||||
throw new Error(`Perplexity ${resp.status}: ${text}`);
|
||||
}
|
||||
|
||||
const data = (await resp.json()) as PerplexityApiResponse;
|
||||
const summary = data.choices?.[0]?.message?.content?.trim() ?? "";
|
||||
if (!summary) throw new RetryableError("Leere Antwort von Perplexity");
|
||||
|
||||
const sources = parseSources(data);
|
||||
const usage = data.usage ?? {};
|
||||
const promptTokens = usage.prompt_tokens ?? 0;
|
||||
const completionTokens = usage.completion_tokens ?? 0;
|
||||
const searchQueries = usage.search_queries ?? 1;
|
||||
const finalModel = data.model ?? model;
|
||||
|
||||
return {
|
||||
summary,
|
||||
sources,
|
||||
model: finalModel,
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
searchQueries,
|
||||
estimatedCostUSD: estimateCost(finalModel, promptTokens, completionTokens, searchQueries),
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
if (err instanceof RetryableError && attempt < 3) {
|
||||
await sleep(400 * 2 ** (attempt - 1));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError instanceof Error ? lastError : new Error("Perplexity-Fehler");
|
||||
}
|
||||
|
||||
/** Formatiert Quellen als kompakte Inline-Liste für Prompts */
|
||||
export function formatSourcesForPrompt(sources: PerplexitySource[], maxSnippetLen = 250): string {
|
||||
return sources
|
||||
.map((s, i) => {
|
||||
const title = s.title ?? s.url;
|
||||
const snippet = s.snippet ? `\n "${s.snippet.slice(0, maxSnippetLen)}${s.snippet.length > maxSnippetLen ? "…" : ""}"` : "";
|
||||
return `[${i + 1}] ${title} (${s.url})${snippet}`;
|
||||
})
|
||||
.join("\n");
|
||||
}
|
||||
299
lib/router.ts
Normal file
299
lib/router.ts
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
/**
|
||||
* lib/router.ts
|
||||
* Model-Router: Entscheidet ob lokales Ollama oder OpenRouter verwendet wird.
|
||||
*
|
||||
* Strategie:
|
||||
* - Lokal (Ollama): Claim-Extraktion, Strukturierung, einfache Klassifizierung,
|
||||
* Artikelschreiben (Standard), Verdict-Synthese, OCR/Vision
|
||||
* - OpenRouter: Tiefe Argumentationsanalyse, komplexes Reasoning,
|
||||
* anspruchsvolles Schreiben/Lektorat
|
||||
*
|
||||
* Bevorzugt günstige chinesische Modelle (DeepSeek, Qwen3) wo verfügbar —
|
||||
* Gemini nur als Fallback / explizite Wahl.
|
||||
*
|
||||
* Konfiguration via Env-Variablen:
|
||||
* ROUTER_FORCE_LOCAL=1 → immer Ollama (für Tests / Offline)
|
||||
* ROUTER_FORCE_CLOUD=1 → immer OpenRouter
|
||||
* OPENROUTER_API_KEY → OpenRouter-Key (Pflicht für Cloud-Aufrufe)
|
||||
* OLLAMA_HOST → Ollama-URL (Standard: http://localhost:11434)
|
||||
*/
|
||||
|
||||
export type TaskType =
|
||||
| "claim_extraction" // Text → strukturierte Claims (lokal optimal)
|
||||
| "verdict_synthesis" // Claims + Belege → Urteil (lokal gut genug)
|
||||
| "article_writing" // Verifizierte Claims → Artikeltext
|
||||
| "logic_analysis" // Argumentationsanalyse (Reasoning-intensiv)
|
||||
| "deep_reasoning" // Komplexe mehrstufige Analyse
|
||||
| "style_editing" // Stilverbesserung, Lektorat
|
||||
| "ocr" // OCR / Texterkennung aus Bild → lokal (Vision-Modell)
|
||||
| "vision_analysis"; // Bildbeschreibung, Bildanalyse → lokal bevorzugt
|
||||
|
||||
export type ComplexityHint = "low" | "medium" | "high";
|
||||
|
||||
export type RouterDecision = {
|
||||
provider: "ollama" | "openrouter";
|
||||
model: string;
|
||||
reason: string;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Lokale Modelle (Ollama, RTX 3090 24GB)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const LOCAL_MODELS = {
|
||||
// Text
|
||||
fast: "qwen3.5:27b", // 17GB — Standard Allrounder
|
||||
reasoning: "deepseek-r1:32b", // 19GB — eingebautes Reasoning
|
||||
small: "qwen3:8b", // 5.2GB — schnell für einfache Tasks
|
||||
// Vision / OCR
|
||||
ocr: "fredrezones55/chandra-ocr-2:patch", // 5.8GB — OCR-spezialisiert
|
||||
vision: "qwen3-vl:latest", // 6.1GB — Vision-Language allgemein
|
||||
vision_small: "minicpm-v:latest", // 5.5GB — leichtgewichtig
|
||||
} as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OpenRouter-Modelle — nach Kosten/Leistung (Stand 2025/2026)
|
||||
// Preise in USD/1M Tokens: https://openrouter.ai/models
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const CLOUD_MODELS = {
|
||||
// DeepSeek — extrem günstig, sehr kompetent
|
||||
/** DeepSeek V3 — ~$0.014/M in, $0.028/M out — bestes Preis-Leistungs-Verhältnis */
|
||||
cheap: "deepseek/deepseek-chat-v3-0324",
|
||||
/** DeepSeek R1 — ~$0.55/M in, $2.19/M out — starkes Reasoning, günstiger als Gemini Pro */
|
||||
reasoning: "deepseek/deepseek-r1",
|
||||
|
||||
// Qwen3 (Alibaba) — gut und günstig
|
||||
/** Qwen3 235B A22B MoE — ~$0.13/M in, $0.60/M out — Alibabas Flaggschiff */
|
||||
qwen_large: "qwen/qwen3-235b-a22b",
|
||||
/** Qwen3 30B A3B — ~$0.03/M in, $0.10/M out — schneller + günstiger */
|
||||
qwen_fast: "qwen/qwen3-30b-a3b",
|
||||
|
||||
// Google Gemini — Fallback / explizite Nutzung
|
||||
/** Gemini 2.5 Flash — ~$0.15/M in, $0.60/M out */
|
||||
gemini_flash: "google/gemini-2.5-flash",
|
||||
/** Gemini 2.5 Flash Lite — ~$0.075/M in, $0.30/M out */
|
||||
gemini_lite: "google/gemini-2.5-flash-lite",
|
||||
/** Gemini 2.5 Pro — ~$1.25/M in, $10.0/M out — nur für heikle High-Stakes-Fälle */
|
||||
gemini_pro: "google/gemini-2.5-pro",
|
||||
} as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Routing-Regeln
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Entscheidet anhand Task-Typ und Komplexität welches Modell verwendet werden soll.
|
||||
* Bevorzugt günstige chinesische Modelle über teure westliche Alternativen.
|
||||
*/
|
||||
export function routeModel(task: TaskType, complexity: ComplexityHint = "medium"): RouterDecision {
|
||||
const forceLocal = process.env.ROUTER_FORCE_LOCAL === "1";
|
||||
const forceCloud = process.env.ROUTER_FORCE_CLOUD === "1";
|
||||
const hasOpenRouter = !!process.env.OPENROUTER_API_KEY;
|
||||
|
||||
if (forceLocal) {
|
||||
const localModel = (task === "ocr")
|
||||
? LOCAL_MODELS.ocr
|
||||
: (task === "vision_analysis")
|
||||
? LOCAL_MODELS.vision
|
||||
: (task === "deep_reasoning" || task === "logic_analysis")
|
||||
? LOCAL_MODELS.reasoning
|
||||
: LOCAL_MODELS.fast;
|
||||
return { provider: "ollama", model: localModel, reason: "ROUTER_FORCE_LOCAL gesetzt" };
|
||||
}
|
||||
|
||||
if (forceCloud && hasOpenRouter) {
|
||||
const cloudModel = complexity === "high"
|
||||
? CLOUD_MODELS.reasoning
|
||||
: CLOUD_MODELS.cheap;
|
||||
return { provider: "openrouter", model: cloudModel, reason: "ROUTER_FORCE_CLOUD gesetzt" };
|
||||
}
|
||||
|
||||
switch (task) {
|
||||
|
||||
// --- Immer lokal ---
|
||||
case "claim_extraction":
|
||||
case "verdict_synthesis":
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.fast,
|
||||
reason: "Strukturierter Extraktions-Task → Ollama optimal",
|
||||
};
|
||||
|
||||
// --- Immer lokal (Vision-Modelle) ---
|
||||
case "ocr":
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.ocr,
|
||||
reason: "OCR → lokales Chandra-OCR-2 (5.8GB, RTX 3090)",
|
||||
};
|
||||
|
||||
case "vision_analysis":
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.vision,
|
||||
reason: "Bildanalyse → lokales qwen3-vl (6.1GB, RTX 3090)",
|
||||
};
|
||||
|
||||
// --- Lokal bevorzugt, Cloud bei Bedarf ---
|
||||
case "article_writing":
|
||||
case "style_editing":
|
||||
if (complexity === "low") {
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.fast,
|
||||
reason: "Einfaches Schreiben → Ollama ausreichend",
|
||||
};
|
||||
}
|
||||
if (hasOpenRouter) {
|
||||
return {
|
||||
provider: "openrouter",
|
||||
// DeepSeek V3 ist extrem günstig und schreibt sehr guten Text
|
||||
model: CLOUD_MODELS.cheap,
|
||||
reason: "Anspruchsvolles Schreiben → DeepSeek V3 (günstig, stark)",
|
||||
};
|
||||
}
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.fast,
|
||||
reason: "OpenRouter nicht verfügbar → Ollama Fallback",
|
||||
};
|
||||
|
||||
// --- Cloud bevorzugt für Reasoning ---
|
||||
case "logic_analysis":
|
||||
if (hasOpenRouter) {
|
||||
// DeepSeek R1 ist ein dediziertes Reasoning-Modell, deutlich günstiger als Gemini Pro
|
||||
const model = complexity === "high"
|
||||
? CLOUD_MODELS.reasoning // DeepSeek R1 für tiefe Analyse
|
||||
: CLOUD_MODELS.cheap; // DeepSeek V3 für mittlere Komplexität
|
||||
return {
|
||||
provider: "openrouter",
|
||||
model,
|
||||
reason: complexity === "high"
|
||||
? "Komplexe Argumentationsanalyse → DeepSeek R1 (Reasoning-Modell)"
|
||||
: "Argumentationsanalyse → DeepSeek V3 (günstig + kompetent)",
|
||||
};
|
||||
}
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.reasoning,
|
||||
reason: "Argumentationsanalyse → deepseek-r1 lokal (kein OpenRouter-Key)",
|
||||
};
|
||||
|
||||
case "deep_reasoning":
|
||||
if (hasOpenRouter) {
|
||||
return {
|
||||
provider: "openrouter",
|
||||
// DeepSeek R1 ist für Reasoning-Tasks günstiger als Gemini Pro
|
||||
// und liefert vergleichbare oder bessere Ergebnisse
|
||||
model: complexity === "high"
|
||||
? CLOUD_MODELS.reasoning // DeepSeek R1
|
||||
: CLOUD_MODELS.qwen_large, // Qwen3 235B für mittlere Komplexität
|
||||
reason: complexity === "high"
|
||||
? "Deep Reasoning (high) → DeepSeek R1 (günstig, stark)"
|
||||
: "Deep Reasoning (medium) → Qwen3 235B A22B",
|
||||
};
|
||||
}
|
||||
return {
|
||||
provider: "ollama",
|
||||
model: LOCAL_MODELS.reasoning,
|
||||
reason: "Deep Reasoning → deepseek-r1 lokal (kein OpenRouter-Key)",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OpenRouter API-Aufruf (generisch)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type OpenRouterMessage = { role: "system" | "user" | "assistant"; content: string };
|
||||
|
||||
/**
|
||||
* Ruft ein Modell via OpenRouter auf.
|
||||
*/
|
||||
export async function callOpenRouter(
|
||||
model: string,
|
||||
messages: OpenRouterMessage[],
|
||||
options?: {
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
): Promise<{ text: string; promptTokens: number; completionTokens: number; latencyMs: number }> {
|
||||
const apiKey = process.env.OPENROUTER_API_KEY;
|
||||
if (!apiKey) throw new Error("OPENROUTER_API_KEY ist nicht gesetzt");
|
||||
|
||||
const t0 = Date.now();
|
||||
|
||||
const resp = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://pi.local",
|
||||
"X-Title": "Pi Text-Agent",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages,
|
||||
temperature: options?.temperature ?? 0.3,
|
||||
max_tokens: options?.maxTokens ?? 2000,
|
||||
}),
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const text = await resp.text().catch(() => "");
|
||||
throw new Error(`OpenRouter Fehler ${resp.status}: ${text}`);
|
||||
}
|
||||
|
||||
const data = await resp.json() as {
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
||||
};
|
||||
|
||||
const text = data.choices?.[0]?.message?.content?.trim() ?? "";
|
||||
if (!text) throw new Error("Leere Antwort von OpenRouter");
|
||||
|
||||
return {
|
||||
text,
|
||||
promptTokens: data.usage?.prompt_tokens ?? 0,
|
||||
completionTokens: data.usage?.completion_tokens ?? 0,
|
||||
latencyMs: Date.now() - t0,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Kostenabschätzung
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Schätzt die ungefähren Kosten eines OpenRouter-Aufrufs (USD).
|
||||
* Preise sind Näherungswerte — für präzise Zahlen: OpenRouter-Dashboard.
|
||||
*/
|
||||
export function estimateOpenRouterCost(
|
||||
model: string,
|
||||
promptTokens: number,
|
||||
completionTokens: number
|
||||
): number {
|
||||
// USD pro 1M Tokens [in, out] — Stand 2025/2026
|
||||
const pricing: Record<string, [number, number]> = {
|
||||
// DeepSeek — extrem günstig
|
||||
"deepseek/deepseek-chat-v3-0324": [0.014, 0.028],
|
||||
"deepseek/deepseek-chat": [0.014, 0.028], // Alias
|
||||
"deepseek/deepseek-r1": [0.55, 2.19],
|
||||
|
||||
// Qwen3 (Alibaba)
|
||||
"qwen/qwen3-235b-a22b": [0.13, 0.60],
|
||||
"qwen/qwen3-30b-a3b": [0.03, 0.10],
|
||||
|
||||
// Google Gemini
|
||||
"google/gemini-2.5-flash": [0.15, 0.60],
|
||||
"google/gemini-2.5-flash-lite": [0.075, 0.30],
|
||||
"google/gemini-2.5-pro": [1.25, 10.0],
|
||||
};
|
||||
|
||||
const [inPrice, outPrice] = pricing[model] ?? [1.0, 3.0]; // konservativer Fallback
|
||||
return (promptTokens / 1_000_000) * inPrice
|
||||
+ (completionTokens / 1_000_000) * outPrice;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue