Text_Agent/agenten/llama-verify-article.ts

/**
 * llama-verify-article.ts
 * Pi-Extension + CLI: Vollständige Fact-Check-Pipeline via llama.cpp
 *
 * Ablauf:
 *   1. Claim-Extraktion via llama.cpp (lokal, Port 8000)
 *   2. Perplexity-Recherche für alle prüfbaren Claims (parallel)
 *   3. Batch-Urteilssynthese via llama.cpp (1 Aufruf für alle Claims)
 *   4. Verifikationsbericht formatieren
 *
 * Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/llama-verify-article.ts
 * Als CLI:
 *   npx tsx agenten/llama-verify-article.ts "$(cat artikel.txt)"
 *   npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep
 *   npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
 */

import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { fileURLToPath } from "node:url";
import { readFile } from "node:fs/promises";
import {
	searchPerplexity,
	formatSourcesForPrompt,
	type PerplexityResult,
} from "../lib/perplexity.js";
import { callLlamaClaimExtract, type ClaimSet } from "./llama-claim-extractor.js";
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";
import {
	saveJobFile,
	loadJobFile,
	jobFileExists,
	updateJobMeta,
	getOrCreateJob,
} from "../lib/jobs.js";
import { getCached, setCached } from "../lib/cache.js";

// ---------------------------------------------------------------------------
// Typen
// ---------------------------------------------------------------------------

type VerificationStatus =
	| "supported"
	| "contradicted"
	| "mixed"
	| "insufficient_evidence"
	| "needs_human_review"
	| "not_checkable";

type Confidence = "high" | "medium" | "low";

type VerdictItem = {
	claim_id: string;
	status: VerificationStatus;
	confidence: Confidence;
	summary: string;
	counter_evidence: string | null;
	notes: string | null;
	supporting_urls: string[];
};

type BatchVerdictRaw = { verdicts: VerdictItem[] };

export type VerificationReport = {
	schema_version: "1.0.0";
	verified_at: string;
	source_text_summary: string;
	summary: string;
	results: Array<{
		claim_id: string;
		claim_text: string;
		status: VerificationStatus;
		confidence: Confidence;
		summary: string;
		sources: Array<{ url: string; title: string | null; supports_claim: boolean }>;
		counter_evidence: string | null;
		notes: string | null;
	}>;
	stats: Record<string, number>;
	totalCostUSD: number;
	latencyMs: number;
};

// llama.cpp OpenAI-kompatibles API-Format
type LlamaResponse = {
	choices: Array<{
		message?: { content?: string; reasoning_content?: string };
		finish_reason?: string;
	}>;
	usage?: { prompt_tokens?: number; completion_tokens?: number };
};

// ---------------------------------------------------------------------------
// Konfiguration
// ---------------------------------------------------------------------------

const DEFAULT_MODEL = "Qwopus3.6-35B-A3B-v1-Q4_K_M.gguf";
const LLAMA_HOST = process.env.LLAMA_HOST ?? "http://localhost:8000";
const DEFAULT_MAX_CLAIMS = 15;
const DEFAULT_USER_LANGUAGE = "de";
const MAX_PARALLEL_PERPLEXITY = 5;
// Batch-Verdicts: viele Claims + Perplexity-Texte → großes Kontextfenster
const MAX_TOKENS_BATCH = 32768;
const TEMPERATURE = 0.1;
const MAX_RETRIES = 3;
const RETRY_DELAY_MS = 15_000;

// ---------------------------------------------------------------------------
// Batch-Urteilssynthese via llama.cpp
// ---------------------------------------------------------------------------

function langLabel(userLanguage: string): string {
	if (userLanguage === "de") return "Deutsch";
	if (userLanguage === "en") return "Englisch";
	if (userLanguage === "fr") return "Französisch";
	if (userLanguage === "es") return "Spanisch";
	return userLanguage;
}

function buildBatchVerdictSystemPrompt(userLanguage: string): string {
	return `Du bist ein erfahrener Fact-Checker. Bewerte jede Behauptung anhand der bereitgestellten Recherche-Ergebnisse.

Status-Skala:
- supported: Quellen bestätigen klar und konsistent
- contradicted: Quellen widersprechen klar und SUBSTANZIELL
- mixed: Widersprüchliche Quellenlage ODER Behauptung technisch ungenau aber im Kern korrekt
- insufficient_evidence: Zu wenig oder schwache Quellen
- needs_human_review: Komplex, politisch heikel, stark kontextabhängig

Confidence: high (eindeutige Primärquellen), medium (begrenzte/sekundäre Quellen), low (sehr unklar)

WICHTIGE REGELN für "contradicted":
- Nur bei klar substanziellen Fehlern: falsche Person, Zahl >5% abweichend, falsch zugeordnetes Ereignis
- Gerundete/allgemein akzeptierte Näherungswerte → "supported" (z.B. "21 Millionen Bitcoin" ist korrekte Rundung)
- Zeitzonendifferenzen historischer Ereignisse → "supported" wenn im üblichen regionalen Kontext korrekt
- Technische Präzisierungen zu korrekten Aussagen → "mixed", nicht "contradicted"
- Im Zweifel immer "mixed" statt "contradicted"

AUSGABESPRACHE: Schreibe summary, counter_evidence und notes auf ${langLabel(userLanguage)}.
Die Enum-Werte status und confidence bleiben englisch.

summary: 1-3 präzise Sätze. Nicht spekulieren.
counter_evidence: Gegenbelege als Satz, sonst null.
notes: Zeitabhängigkeit, Einschränkungen, sonst null.
supporting_urls: URLs der stützenden Quellen (leeres Array wenn keine).

Antworte NUR mit diesem JSON-Objekt — kein Freitext davor oder danach:
{
  "verdicts": [
    {
      "claim_id": "c001",
      "status": "supported|contradicted|mixed|insufficient_evidence|needs_human_review",
      "confidence": "high|medium|low",
      "summary": "...",
      "counter_evidence": "..." | null,
      "notes": "..." | null,
      "supporting_urls": ["url1"]
    }
  ]
}`;
}

function buildBatchVerdictUserPrompt(
	claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>
): string {
	const claimsBlock = claims
		.map(({ id, text, perplexity }) => {
			const sourcesFormatted = formatSourcesForPrompt(perplexity.sources, 200);
			return `---
BEHAUPTUNG ${id}: "${text}"
RECHERCHE:
${perplexity.summary}

QUELLEN:
${sourcesFormatted || "(keine Quellen gefunden)"}`;
		})
		.join("\n\n");

	return `/no_think\n${claimsBlock}\n\nBewerte alle ${claims.length} Behauptungen.`;
}

async function synthesizeBatchVerdicts(
	claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>,
	model: string,
	userLanguage: string,
	signal?: AbortSignal,
	logger?: Logger
): Promise<VerdictItem[]> {
	if (claims.length === 0) return [];

	const log = logger ?? nullLogger;

	const body = {
		model,
		messages: [
			{ role: "system", content: buildBatchVerdictSystemPrompt(userLanguage) },
			{ role: "user", content: buildBatchVerdictUserPrompt(claims) },
		],
		stream: false,
		temperature: TEMPERATURE,
		max_tokens: MAX_TOKENS_BATCH,
	};

	let resp: Response | null = null;

	for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
		try {
			resp = await fetch(`${LLAMA_HOST}/v1/chat/completions`, {
				method: "POST",
				headers: { "Content-Type": "application/json" },
				body: JSON.stringify(body),
				signal,
			});
			break;
		} catch (err) {
			const isLast = attempt === MAX_RETRIES;
			log.warn(`llama.cpp Batch-Verdict fetch fehlgeschlagen (Versuch ${attempt}/${MAX_RETRIES})`, {
				error: err instanceof Error ? err.message : String(err),
			});
			if (isLast) throw new Error(`fetch failed nach ${MAX_RETRIES} Versuchen: ${err instanceof Error ? err.message : err}`);
			await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
		}
	}

	if (!resp!.ok) {
		const errText = await resp!.text().catch(() => "");
		throw new Error(`llama.cpp Batch-Verdict Fehler ${resp!.status}: ${errText}`);
	}

	const data = (await resp!.json()) as LlamaResponse;
	const choice = data.choices?.[0];
	let raw = choice?.message?.content ?? "";

	// Reasoning-Fallback: wenn content leer, JSON aus reasoning_content extrahieren
	if (!raw.trim() && choice?.message?.reasoning_content) {
		const rc = choice.message.reasoning_content;
		const allMatches = [...rc.matchAll(/\{[^{}]*"verdicts"\s*:/g)];
		const lastIdx = allMatches.length > 0
			? rc.lastIndexOf(allMatches[allMatches.length - 1][0])
			: -1;
		const extracted = lastIdx >= 0
			? rc.slice(lastIdx).match(/\{[\s\S]*\}/)?.[0]
			: rc.match(/\{[\s\S]*"verdicts"[\s\S]*\}/)?.[0];
		if (extracted) {
			raw = extracted;
			log.warn("Batch-Verdict: JSON aus reasoning_content extrahiert", {
				finishReason: choice.finish_reason,
			});
		}
	}

	const cleanedRaw = raw
		.replace(/^```(?:json)?\s*/i, "")
		.replace(/\s*```$/i, "")
		.trim();

	log.debug("Batch-Verdict erhalten", {
		promptTokens: data.usage?.prompt_tokens,
		outputTokens: data.usage?.completion_tokens,
		finishReason: choice?.finish_reason,
		rawLength: raw.length,
	});

	if (!cleanedRaw) throw new Error("Leere llama.cpp-Antwort für Batch-Verdicts");

	let parsed: unknown;
	try {
		parsed = JSON.parse(cleanedRaw);
	} catch {
		throw new Error(`Kein gültiges JSON von llama.cpp: ${cleanedRaw.slice(0, 300)}`);
	}

	const { verdicts } = parsed as BatchVerdictRaw;
	return verdicts ?? [];
}

// ---------------------------------------------------------------------------
// Parallel-Limiter für Perplexity
// ---------------------------------------------------------------------------

async function runWithConcurrencyLimit<T>(
	tasks: Array<() => Promise<T>>,
	limit: number
): Promise<T[]> {
	const results: T[] = new Array(tasks.length);
	let index = 0;

	async function worker() {
		while (index < tasks.length) {
			const current = index++;
			results[current] = await tasks[current]();
		}
	}

	const workers = Array.from({ length: Math.min(limit, tasks.length) }, worker);
	await Promise.all(workers);
	return results;
}

// ---------------------------------------------------------------------------
// Hauptfunktion
// ---------------------------------------------------------------------------

export async function verifyArticle(
	text: string,
	options?: {
		maxClaims?: number;
		mode?: "fast" | "deep";
		model?: string;
		userLanguage?: string;
		signal?: AbortSignal;
		onProgress?: (msg: string) => void;
		logger?: Logger;
		jobDir?: string;
		noCache?: boolean;
	}
): Promise<VerificationReport> {
	const t0 = Date.now();
	const model = options?.model ?? DEFAULT_MODEL;
	const maxClaims = Math.min(options?.maxClaims ?? DEFAULT_MAX_CLAIMS, 20);
	const mode = options?.mode ?? "fast";
	const userLanguage = options?.userLanguage ?? DEFAULT_USER_LANGUAGE;
	const log = options?.logger ?? nullLogger;
	const jobDir = options?.jobDir;
	const useCache = !(options?.noCache ?? false);
	const progress = (msg: string) => {
		options?.onProgress?.(msg);
		log.info(msg);
	};

	log.info("llama-verify-article gestartet", { textLength: text.length, model, maxClaims, mode, userLanguage, jobDir });

	// Schritt 1: Claim-Extraktion (oder aus Job-Cache laden)
	let claimSet: ClaimSet;
	if (jobDir) {
		const cached = loadJobFile<ClaimSet>(jobDir, "claims.json");
		if (cached) {
			claimSet = cached;
			const checkable = claimSet.claims.filter((c) => c.checkability === "checkable").length;
			progress(`Claims aus Job geladen (${claimSet.total_claims} total, ${checkable} prüfbar) — Extraktion übersprungen.`);
		} else {
			updateJobMeta(jobDir, { status: "extracting" });
			progress("Claims extrahieren (llama.cpp)...");
			const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
				text, model, maxClaims, options?.signal, log
			);
			claimSet = extracted;
			saveJobFile(jobDir, "claims.json", claimSet);
			updateJobMeta(jobDir, {
				status: "verifying",
				steps: {
					extract: {
						completedAt: new Date().toISOString(),
						totalClaims: claimSet.total_claims,
						checkableClaims: claimSet.claims.filter((c) => c.checkability === "checkable").length,
						latencyMs: extractLatency,
					},
				},
			});
			log.info("Claims extrahiert + gespeichert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
		}
	} else {
		progress("Claims extrahieren (llama.cpp)...");
		const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
			text, model, maxClaims, options?.signal, log
		);
		claimSet = extracted;
		log.info("Claims extrahiert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
	}

	const checkableClaims = claimSet.claims.filter((c) => c.checkability === "checkable");
	const uncheckedClaims = claimSet.claims.filter((c) => c.checkability !== "checkable");
	progress(
		`${claimSet.total_claims} Claims — ${checkableClaims.length} prüfbar, ` +
		`${uncheckedClaims.length} nicht prüfbar.`
	);

	if (checkableClaims.length === 0) {
		progress("⚠ Keine prüfbaren Claims gefunden — Verifikation nicht möglich.");
	}

	// Schritt 2: Perplexity parallel (mit Limit) — mit Job- und Global-Cache
	let doneCount = 0;
	const total = checkableClaims.length;

	if (jobDir && total > 0) {
		const cachedCount = checkableClaims.filter((c) =>
			jobFileExists(jobDir, `perplexity/${c.claim_id}.json`)
		).length;
		if (cachedCount > 0) {
			progress(`${cachedCount}/${total} Perplexity-Ergebnisse aus Job-Cache geladen.`);
		}
	}

	const perplexityTasks = checkableClaims.map((claim) => async () => {
		const short = claim.text.length > 55 ? claim.text.slice(0, 52) + "..." : claim.text;

		if (jobDir) {
			const cached = loadJobFile<PerplexityResult>(jobDir, `perplexity/${claim.claim_id}.json`);
			if (cached) {
				doneCount++;
				progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cached) "${short}"`);
				return { claim, result: cached, error: null };
			}
		}

		if (useCache) {
			const globalCached = getCached<PerplexityResult>(claim.text);
			if (globalCached) {
				doneCount++;
				progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cache) "${short}"`);
				return { claim, result: globalCached, error: null };
			}
		}

		try {
			const result = await searchPerplexity(claim.text, { mode, signal: options?.signal });
			doneCount++;
			if (useCache) setCached(claim.text, result);
			if (jobDir) {
				saveJobFile(jobDir, `perplexity/${claim.claim_id}.json`, result);
			}
			progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ "${short}"`);
			return { claim, result, error: null };
		} catch (err: unknown) {
			doneCount++;
			const errMsg = err instanceof Error ? err.message : "Perplexity-Fehler";
			progress(`[${doneCount}/${total}] ${claim.claim_id} ✗ "${short}" — ${errMsg}`);
			return { claim, result: null as PerplexityResult | null, error: errMsg };
		}
	});

	if (total > 0) progress(`Recherche läuft (${total} Claims, max. ${MAX_PARALLEL_PERPLEXITY} parallel)...`);
	const perplexityOutcomes = await runWithConcurrencyLimit(perplexityTasks, MAX_PARALLEL_PERPLEXITY);
	const successful = perplexityOutcomes.filter((o) => o.result !== null) as Array<{
		claim: (typeof checkableClaims)[number];
		result: PerplexityResult;
		error: null;
	}>;
	const failed = perplexityOutcomes.filter((o) => o.error !== null);
	const totalPerplexityCost = successful.reduce((sum, o) => sum + o.result.estimatedCostUSD, 0);

	log.info("Perplexity abgeschlossen", {
		successful: successful.length,
		failed: failed.length,
		totalCostUSD: totalPerplexityCost.toFixed(4),
	});

	// Schritt 3: Batch-Urteilssynthese via llama.cpp
	progress(`Urteilssynthese (llama.cpp, ${successful.length} Claims, Sprache: ${userLanguage})...`);
	const verdicts = await synthesizeBatchVerdicts(
		successful.map((o) => ({ id: o.claim.claim_id, text: o.claim.text, perplexity: o.result })),
		model,
		userLanguage,
		options?.signal,
		log
	);

	// Schritt 4: Report zusammenbauen
	const verdictMap = new Map(verdicts.map((v) => [v.claim_id, v]));

	const results: VerificationReport["results"] = [
		...successful.map((o) => {
			const verdict = verdictMap.get(o.claim.claim_id);
			const sources = o.result.sources.map((s) => ({
				url: s.url,
				title: s.title ?? null,
				supports_claim: verdict?.supporting_urls.includes(s.url) ?? false,
			}));
			return {
				claim_id: o.claim.claim_id,
				claim_text: o.claim.text,
				status: (verdict?.status ?? "insufficient_evidence") as VerificationStatus,
				confidence: (verdict?.confidence ?? "low") as Confidence,
				summary: verdict?.summary ?? "Keine Urteilssynthese verfügbar.",
				sources,
				counter_evidence: verdict?.counter_evidence ?? null,
				notes: verdict?.notes ?? null,
			};
		}),
		...failed.map((o) => ({
			claim_id: o.claim.claim_id,
			claim_text: o.claim.text,
			status: "insufficient_evidence" as VerificationStatus,
			confidence: "low" as Confidence,
			summary: `Recherche fehlgeschlagen: ${o.error}`,
			sources: [],
			counter_evidence: null,
			notes: null,
		})),
		...uncheckedClaims.map((c) => ({
			claim_id: c.claim_id,
			claim_text: c.text,
			status: "not_checkable" as VerificationStatus,
			confidence: "high" as Confidence,
			summary: `Nicht empirisch prüfbar (${c.claim_type}).`,
			sources: [],
			counter_evidence: null,
			notes: null,
		})),
	];

	const stats: Record<string, number> = {
		total: results.length,
		supported: 0,
		contradicted: 0,
		mixed: 0,
		insufficient_evidence: 0,
		needs_human_review: 0,
		not_checkable: 0,
	};
	for (const r of results) stats[r.status] = (stats[r.status] ?? 0) + 1;

	const checkedCount = successful.length;
	const summaryParts = [
		`${claimSet.total_claims} Claims extrahiert, ${checkedCount} recherchiert.`,
		stats.supported > 0 ? `${stats.supported} bestätigt` : "",
		stats.contradicted > 0 ? `${stats.contradicted} widerlegt` : "",
		stats.mixed > 0 ? `${stats.mixed} gemischt` : "",
		stats.needs_human_review > 0 ? `${stats.needs_human_review} → Menschliche Prüfung nötig` : "",
		stats.insufficient_evidence > 0 ? `${stats.insufficient_evidence} ohne ausreichende Belege` : "",
	]
		.filter(Boolean)
		.join(". ");

	const totalLatencyMs = Date.now() - t0;
	log.info("llama-verify-article abgeschlossen", {
		...stats,
		totalCostUSD: totalPerplexityCost.toFixed(4),
		latencyMs: totalLatencyMs,
	});

	const report: VerificationReport = {
		schema_version: "1.0.0",
		verified_at: new Date().toISOString(),
		source_text_summary: text.slice(0, 200) + (text.length > 200 ? "…" : ""),
		summary: summaryParts,
		results,
		stats,
		totalCostUSD: totalPerplexityCost,
		latencyMs: totalLatencyMs,
	};

	if (jobDir) {
		saveJobFile(jobDir, "report.json", report);
		updateJobMeta(jobDir, {
			status: "completed",
			steps: {
				verify: {
					completedAt: new Date().toISOString(),
					claimsVerified: successful.length,
					totalCostUSD: totalPerplexityCost,
					latencyMs: totalLatencyMs,
				},
			},
		});
		log.info("Report in Job gespeichert", { jobDir });
	}

	return report;
}

// ---------------------------------------------------------------------------
// Formatierung
// ---------------------------------------------------------------------------

const STATUS_ICON: Record<VerificationStatus, string> = {
	supported: "✓ BESTÄTIGT",
	contradicted: "✗ WIDERLEGT",
	mixed: "~ GEMISCHT",
	insufficient_evidence: "? BELEGE UNZUREICHEND",
	needs_human_review: "⚠ MENSCHLICHE PRÜFUNG NÖTIG",
	not_checkable: "— NICHT PRÜFBAR",
};

function formatReport(report: VerificationReport, model: string): string {
	const lines: string[] = [];

	lines.push(`## Verifikationsbericht (llama.cpp)`);
	lines.push(report.summary);
	lines.push("");

	const groups: VerificationStatus[] = [
		"supported",
		"contradicted",
		"mixed",
		"needs_human_review",
		"insufficient_evidence",
		"not_checkable",
	];

	for (const status of groups) {
		const items = report.results.filter((r) => r.status === status);
		if (items.length === 0) continue;

		lines.push(`**${STATUS_ICON[status]} (${items.length}):**`);
		for (const item of items) {
			lines.push(`\`${item.claim_id}\` "${item.claim_text}"`);

			if (item.status !== "not_checkable") {
				lines.push(`  → ${item.summary}`);
				if (item.counter_evidence) {
					lines.push(`  ✗ Gegenbeleg: ${item.counter_evidence}`);
				}
				if (item.notes) {
					lines.push(`  ℹ ${item.notes}`);
				}
				if (item.sources.length > 0) {
					const supporting = item.sources.filter((s) => s.supports_claim);
					if (supporting.length > 0) {
						lines.push(`  Quellen: ${supporting.map((s) => `[${s.title ?? s.url}](${s.url})`).join(", ")}`);
					}
				}
			}
			lines.push("");
		}
	}

	const latSec = (report.latencyMs / 1000).toFixed(0);
	lines.push(`_[Perplexity: ~$${report.totalCostUSD.toFixed(4)} | llama.cpp: ${model} | Gesamt: ${latSec}s]_`);

	return lines.join("\n");
}

// ---------------------------------------------------------------------------
// Pi-Extension: Default Export
// ---------------------------------------------------------------------------

const PARAMS = Type.Object({
	text: Type.String({
		description:
			"Der vollständige Artikel- oder Blogtext, der auf Fakten geprüft werden soll. " +
			"Nicht kürzen — der Originaltext wird für die Claim-Extraktion benötigt.",
	}),
	maxClaims: Type.Optional(
		Type.Number({
			description: `Maximale Anzahl zu prüfender Claims. Standard: ${DEFAULT_MAX_CLAIMS}. Max: 20.`,
		})
	),
	mode: Type.Optional(
		Type.Union([Type.Literal("fast"), Type.Literal("deep")], {
			description:
				"fast (Standard): sonar, kostengünstig. deep: sonar-pro, für investigative Inhalte.",
		})
	),
	model: Type.Optional(
		Type.String({
			description: `llama.cpp-Modell. Standard: ${DEFAULT_MODEL}.`,
		})
	),
	userLanguage: Type.Optional(
		Type.String({
			description: `Sprache für Urteilstext (summary, counter_evidence, notes). Standard: ${DEFAULT_USER_LANGUAGE}.`,
		})
	),
});

export default function llamaVerifyArticleExtension(pi: ExtensionAPI) {
	pi.registerTool({
		name: "verify_article_llama",
		label: "Artikel-Verifikation (llama.cpp)",
		description:
			"Vollständige Fact-Check-Pipeline via llama.cpp: " +
			"Claims extrahieren → Perplexity-Recherche (parallel) → llama.cpp-Urteil (batch) → Bericht. " +
			"Effizienter als verify_claim_llama für mehrere Claims. " +
			"Typische Kosten: $0.05–0.15 für einen Artikel mit 10–15 Claims (nur Perplexity, llama.cpp lokal).",
		promptGuidelines: [
			"Use verify_article_llama when the user wants to fact-check an entire article, blog post, or longer text.",
			"Use verify_claim_llama instead when the user wants to check a single specific claim.",
			"Pass the FULL article text — do not summarize it first.",
			"Use mode=deep for scientific, medical, legal, or politically sensitive content.",
			"Set userLanguage to match the user's preferred language (e.g. 'de' for German, 'en' for English).",
			"Always show the full formatted report including the cost/latency line.",
			"Highlight contradicted claims and claims needing human review prominently.",
			"If needs_human_review claims exist, explain that they require manual fact-checking.",
			"After the report, offer to show full sources for specific claims if the user wants details.",
		],
		parameters: PARAMS,
		async execute(_toolCallId, params, signal) {
			const model = params.model ?? DEFAULT_MODEL;
			try {
				const report = await verifyArticle(params.text, {
					maxClaims: params.maxClaims,
					mode: params.mode,
					model,
					userLanguage: params.userLanguage,
					signal,
				});

				return {
					content: [{ type: "text", text: formatReport(report, model) }],
					details: {
						totalClaims: report.stats.total,
						supported: report.stats.supported,
						contradicted: report.stats.contradicted,
						needsHumanReview: report.stats.needs_human_review,
						totalCostUSD: report.totalCostUSD,
						latencyMs: report.latencyMs,
					},
				};
			} catch (err) {
				const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
				return { content: [{ type: "text", text: `Artikel-Verifikation (llama.cpp) fehlgeschlagen: ${msg}` }] };
			}
		},
	});
}

// ---------------------------------------------------------------------------
// CLI-Modus
// ---------------------------------------------------------------------------

async function runCli() {
	const args = process.argv.slice(2);

	if (args.length === 0 || args[0] === "--help" || args[0] === "-h") {
		console.log(`
Artikel-Verifikator (llama.cpp) — Vollständige Fact-Check-Pipeline

Verwendung:
  npx tsx agenten/llama-verify-article.ts [Optionen] "Artikeltext..."
  npx tsx agenten/llama-verify-article.ts --file artikel.txt [Optionen]

Optionen:
  --file, -f <pfad>        Text aus Datei lesen
  --mode fast|deep         Perplexity-Modus (Standard: fast)
  --model <name>           llama.cpp-Modell (Standard: ${DEFAULT_MODEL})
  --max-claims <n>         Max. Claims (Standard: ${DEFAULT_MAX_CLAIMS})
  --user-language <lang>   Sprache für Urteilstext, z.B. "de", "en" (Standard: ${DEFAULT_USER_LANGUAGE})
  --job-id <slug>          Job-Speicher: Zwischenergebnisse nach ~/.pi/agent/jobs/<datum>_<slug>/
  --no-cache               Globalen Claim-Cache deaktivieren
  --json                   Ausgabe als JSON
  --verbose, -v            Ausführliche Ausgabe + Log-Datei
  --help                   Diese Hilfe

Umgebungsvariablen:
  LLAMA_HOST               llama.cpp-Server-URL (Standard: http://localhost:8000)
  PERPLEXITY_API_KEY       Perplexity API-Key (erforderlich)

Beispiele:
  npx tsx agenten/llama-verify-article.ts --file artikel.txt
  npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep --user-language en
  npx tsx agenten/llama-verify-article.ts --file artikel.txt --job-id mein-artikel --verbose
  npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
`);
		process.exit(0);
	}

	let mode: "fast" | "deep" = "fast";
	let model = DEFAULT_MODEL;
	let maxClaims = DEFAULT_MAX_CLAIMS;
	let userLanguage = DEFAULT_USER_LANGUAGE;
	let jobId: string | undefined;
	let jsonOutput = false;
	let verbose = false;
	let noCache = false;
	let file: string | null = null;
	const textParts: string[] = [];

	for (let i = 0; i < args.length; i++) {
		const arg = args[i];
		if (arg === "--mode" && args[i + 1]) {
			const m = args[++i];
			if (m === "fast" || m === "deep") mode = m;
		} else if (arg === "--model" && args[i + 1]) {
			model = args[++i];
		} else if (arg === "--max-claims" && args[i + 1]) {
			maxClaims = parseInt(args[++i], 10);
		} else if (arg === "--user-language" && args[i + 1]) {
			userLanguage = args[++i];
		} else if (arg === "--job-id" && args[i + 1]) {
			jobId = args[++i];
		} else if ((arg === "--file" || arg === "-f") && args[i + 1]) {
			file = args[++i];
		} else if (arg === "--json") {
			jsonOutput = true;
		} else if (arg === "--verbose" || arg === "-v") {
			verbose = true;
		} else if (arg === "--no-cache") {
			noCache = true;
		} else if (!arg.startsWith("--")) {
			textParts.push(arg);
		}
	}

	let text: string;
	if (file) {
		try {
			text = await readFile(file, "utf-8");
		} catch (err) {
			console.error(`Fehler: Datei '${file}' konnte nicht gelesen werden: ${err instanceof Error ? err.message : err}`);
			process.exit(1);
		}
	} else {
		text = textParts.join(" ").trim();
	}

	if (!text.trim()) {
		console.error("Fehler: Kein Text übergeben. Nutze --file <pfad> oder übergib den Text direkt.");
		process.exit(1);
	}

	if (!jsonOutput) {
		const src = file ? `Datei: ${file}` : "Direkteingabe";
		console.error(`\nModus: ${mode} | Modell: ${model} | Max. Claims: ${maxClaims} | Sprache: ${userLanguage} | ${src}${jobId ? ` | Job: ${jobId}` : ""}\n`);
	}

	const log = createLogger({ verbose, jobId });
	const onProgress = jsonOutput ? undefined : (msg: string) => process.stderr.write(`  ${msg}\n`);

	let jobDir: string | undefined;
	if (jobId) {
		const { jobDir: dir, isNew } = getOrCreateJob(jobId, model);
		jobDir = dir;
		if (isNew) saveJobFile(jobDir, "input.txt", text);
		if (!jsonOutput) {
			process.stderr.write(`  Job: ${jobDir} (${isNew ? "neu" : "fortgesetzt"})\n\n`);
		}
	}

	try {
		const report = await verifyArticle(text, { maxClaims, mode, model, userLanguage, onProgress, logger: log, jobDir, noCache });
		if (jsonOutput) {
			console.log(JSON.stringify(report, null, 2));
		} else {
			console.log(formatReport(report, model));
		}
	} catch (err) {
		if (jobDir) updateJobMeta(jobDir, { status: "failed" });
		console.error("Fehler:", err instanceof Error ? err.message : err);
		process.exit(1);
	}
}

const __filename = fileURLToPath(import.meta.url);
if (process.argv[1] === __filename) {
	runCli();
}