feat: Pi Text-Agent — initialer Commit (sauberes Repo)

Vollständiges Multi-Agenten-System für Fact-Checking, Artikelschreiben und Argumentationsanalyse. Zwei Backends: llama.cpp (★ bevorzugt) und Ollama. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 04:21:48 +02:00 · 2026-05-12 04:21:48 +02:00 · 5146b7fa30
commit 5146b7fa30
62 changed files with 11279 additions and 0 deletions
--- a/lib/ollama.ts
+++ b/lib/ollama.ts
@ -0,0 +1,237 @@
+/**
+ * lib/ollama.ts
+ * Zentraler Ollama-Client: Text-Chat und Vision/OCR-Aufrufe.
+ *
+ * Neu angelegte Agenten nutzen diesen Client statt inline-fetch.
+ * Bestehende Agenten (ollama-claim-extractor, verifier) können schrittweise migriert werden.
+ *
+ * Konfiguration:
+ *   OLLAMA_HOST   → Ollama-URL (Standard: http://localhost:11434)
+ */
+
+export const OLLAMA_HOST = process.env.OLLAMA_HOST ?? "http://localhost:11434";
+
+export type OllamaMessage = {
+	role: "system" | "user" | "assistant";
+	content: string;
+	images?: string[]; // base64-kodierte Bilder (Vision-Aufrufe)
+};
+
+export type OllamaResult = {
+	text: string;
+	promptTokens: number;
+	completionTokens: number;
+	latencyMs: number;
+};
+
+// ---------------------------------------------------------------------------
+// Intern
+// ---------------------------------------------------------------------------
+
+const MAX_RETRIES = 3;
+const RETRY_DELAY_MS = 15_000;
+
+function sleep(ms: number): Promise<void> {
+	return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+// ---------------------------------------------------------------------------
+// Haupt-Aufruf
+// ---------------------------------------------------------------------------
+
+/**
+ * Generischer Ollama-Chat (Text oder Vision).
+ * Für Vision: images-Felder in den Messages setzen, oder callOllamaVision() nutzen.
+ */
+export async function callOllamaChat(
+	model: string,
+	messages: OllamaMessage[],
+	options?: {
+		/** JSON-Schema für structured output (Ollama >= 0.5) */
+		format?: "json" | Record<string, unknown>;
+		temperature?: number;
+		numCtx?: number;
+		numPredict?: number;
+		/**
+		 * Thinking-Mode für qwen3/deepseek-r1-Modelle (Standard: false).
+		 * false → /no_think → nur Antwort, kein Chain-of-Thought
+		 * true  → Modell denkt zuerst, Antwort in content; thinking in separatem Feld
+		 */
+		think?: boolean;
+		signal?: AbortSignal;
+	}
+): Promise<OllamaResult> {
+	const t0 = Date.now();
+	let lastError: unknown;
+
+	// qwen3 und deepseek-r1 haben Thinking-Mode standardmäßig an.
+	// Für strukturierte Ausgaben (JSON, Extraktion) ist Thinking unerwünscht.
+	const think = options?.think ?? false;
+
+	for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+		try {
+			const body: Record<string, unknown> = {
+				model,
+				messages,
+				stream: false,
+				think,
+				options: {
+					temperature: options?.temperature ?? 0.1,
+					...(options?.numCtx    ? { num_ctx:     options.numCtx    } : {}),
+					...(options?.numPredict ? { num_predict: options.numPredict } : {}),
+				},
+			};
+			if (options?.format !== undefined) {
+				body.format = options.format;
+			}
+
+			const resp = await fetch(`${OLLAMA_HOST}/api/chat`, {
+				method: "POST",
+				headers: { "Content-Type": "application/json" },
+				body: JSON.stringify(body),
+				signal: options?.signal,
+			});
+
+			if (!resp.ok) {
+				const errText = await resp.text().catch(() => "");
+				throw new Error(`Ollama HTTP ${resp.status}: ${errText}`);
+			}
+
+			const data = await resp.json() as {
+				message?: { content?: string; thinking?: string };
+				prompt_eval_count?: number;
+				eval_count?: number;
+			};
+
+			// Bei Thinking-Modellen (qwen3, deepseek-r1): wenn content leer,
+			// Fallback auf thinking-Feld (passiert bei sehr kurzen Antworten).
+			const text = data.message?.content?.trim()
+				|| (think ? data.message?.thinking?.trim() : "")
+				|| "";
+
+			return {
+				text,
+				promptTokens:     data.prompt_eval_count ?? 0,
+				completionTokens: data.eval_count        ?? 0,
+				latencyMs: Date.now() - t0,
+			};
+		} catch (err) {
+			lastError = err;
+			if (attempt < MAX_RETRIES) await sleep(RETRY_DELAY_MS);
+		}
+	}
+
+	throw new Error(
+		`Ollama fehlgeschlagen nach ${MAX_RETRIES} Versuchen: ${
+			lastError instanceof Error ? lastError.message : String(lastError)
+		}`
+	);
+}
+
+// ---------------------------------------------------------------------------
+// Vision / OCR
+// ---------------------------------------------------------------------------
+
+/**
+ * Ollama-Aufruf mit Bild-Input (Vision / OCR).
+ *
+ * Empfohlene Modelle (passen alle auf RTX 3090 24GB):
+ *   fredrezones55/chandra-ocr-2:patch   5.8GB  — OCR-spezialisiert, Dokumente/Scans
+ *   qwen3-vl:latest                     6.1GB  — Vision-Language, Bildbeschreibung + OCR
+ *   qwen2.5vl:7b                        6.0GB  — Alternative zu qwen3-vl
+ *   minicpm-v:latest                    5.5GB  — Leichtgewichtig, gut für einfache OCR
+ *
+ * @param imageSource  Absoluter Dateipfad ("/…") oder base64-String
+ */
+export async function callOllamaVision(
+	model: string,
+	imageSource: string,
+	prompt: string,
+	options?: {
+		systemPrompt?: string;
+		temperature?: number;
+		signal?: AbortSignal;
+	}
+): Promise<OllamaResult> {
+	let imageBase64: string;
+
+	if (imageSource.startsWith("/") || imageSource.startsWith("~")) {
+		const { readFile } = await import("node:fs/promises");
+		const resolvedPath = imageSource.startsWith("~")
+			? imageSource.replace(/^~/, process.env.HOME ?? "/root")
+			: imageSource;
+		const buf = await readFile(resolvedPath);
+		imageBase64 = buf.toString("base64");
+	} else {
+		imageBase64 = imageSource; // schon base64
+	}
+
+	const messages: OllamaMessage[] = [];
+	if (options?.systemPrompt) {
+		messages.push({ role: "system", content: options.systemPrompt });
+	}
+	messages.push({ role: "user", content: prompt, images: [imageBase64] });
+
+	return callOllamaChat(model, messages, {
+		temperature: options?.temperature ?? 0.1,
+		signal: options?.signal,
+	});
+}
+
+// ---------------------------------------------------------------------------
+// Modell-Infos (lokal installiert, passend für RTX 3090 24GB)
+// ---------------------------------------------------------------------------
+
+/** Alle bekannten lokalen Ollama-Modelle nach Kategorie. */
+export const LOCAL_CATALOG = {
+	// --- Text / Reasoning ---
+	text: {
+		/** 17GB — Haupt-Allrounder, 1 GPU */
+		"qwen3.5:27b":             { vramGB: 17, gpus: 1 },
+		/** 19GB — Eingebautes Reasoning (DeepSeek R1), 1 GPU */
+		"deepseek-r1:32b":         { vramGB: 19, gpus: 1 },
+		/** 18GB — Code + allgemein, 128k-Kontext, 1 GPU */
+		"qwen3-coder-30b-128k:latest": { vramGB: 18, gpus: 1 },
+		/** 18GB — Optimierte GPU-Variante des Qwen3-Coders, 1 GPU */
+		"qwen3-coder-30b-gpu:latest":  { vramGB: 18, gpus: 1 },
+		/** 18GB — GLM-4.7 Flash, chinesisches Modell, 1 GPU */
+		"glm-4.7-flash:latest":    { vramGB: 18, gpus: 1 },
+		/** 17GB — Gemma4 26B von Google, 1 GPU */
+		"gemma4:26b":              { vramGB: 17, gpus: 1 },
+		/** 9.6GB — Gemma4 E4B (Effizienz-Variante), 1 GPU */
+		"gemma4:e4b":              { vramGB: 9.6, gpus: 1 },
+		/** 9.0GB — Qwen2.5 14B Instruct, 1 GPU */
+		"qwen2.5:14b-instruct":    { vramGB: 9, gpus: 1 },
+		/** 5.2GB — Qwen3 8B, schnell für einfache Tasks */
+		"qwen3:8b":                { vramGB: 5.2, gpus: 1 },
+		/** 4.9GB — Llama 3.1 8B */
+		"llama3.1:8b":             { vramGB: 4.9, gpus: 1 },
+		/** 7.1GB — Mistral Nemo */
+		"mistral-nemo:latest":     { vramGB: 7.1, gpus: 1 },
+	},
+	// --- Code ---
+	code: {
+		/** 9.0GB — Qwen2.5-Coder 14B, 1 GPU */
+		"qwen2.5-coder:14b":       { vramGB: 9, gpus: 1 },
+		/** 4.7GB — Qwen2.5-Coder 7B, schnell */
+		"qwen2.5-coder:7b":        { vramGB: 4.7, gpus: 1 },
+	},
+	// --- Vision / OCR ---
+	vision: {
+		/** 5.8GB — OCR-spezialisiert (Chandra OCR 2) */
+		"fredrezones55/chandra-ocr-2:patch": { vramGB: 5.8, gpus: 1 },
+		/** 6.1GB — Qwen3 Vision-Language Model */
+		"qwen3-vl:latest":         { vramGB: 6.1, gpus: 1 },
+		/** 6.0GB — Qwen2.5 Vision-Language 7B */
+		"qwen2.5vl:7b":            { vramGB: 6, gpus: 1 },
+		/** 5.5GB — MiniCPM-V, leichtgewichtig */
+		"minicpm-v:latest":        { vramGB: 5.5, gpus: 1 },
+		/** 3.3GB — Qwen3-VL 4B, sehr klein */
+		"qwen3-vl:4b":             { vramGB: 3.3, gpus: 1 },
+	},
+	// --- Embedding ---
+	embedding: {
+		/** 4.7GB — Qwen3 Embedding */
+		"qwen3-embedding:latest":  { vramGB: 4.7, gpus: 1 },
+	},
+} as const;