Verifier-Prompts: - "contradicted" nur bei substanziellen Fehlern (>5% Abweichung, nicht >10%) - Gerundete Näherungswerte → "supported" - Zeitzonendifferenzen → "supported" wenn regional korrekt - Technische Nuancen → "mixed" statt "contradicted" Testkorpus (expected.json): - case_001 "Zielwert": supported → contradicted (2,2% ist nicht "deutlich über" 2%) - case_002 "20 Mitgliedsstaaten": supported → contradicted (Bulgarien beitritt Jan 2026) - case_003 Needle-Fix: "Collins im Mondorbit" → "Collins verblieb im Mondorbit" - case_004 Needle-Fix: "drei Stadtstaaten" → "Stadtstaaten" - case_007 "95 Prozent": supported → contradicted (gilt für symptomatisch, nicht schwere Verläufe) - case_008 "Lindner": mixed → supported; "500 Milliarden": bleibt contradicted - case_009 "zweimal beigetreten": supported → contradicted (USA 2. Austritt 2026) run_corpus.sh: --job-id ergänzt (cacht Claim-Extraktion zwischen Läufen) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
838 lines
28 KiB
TypeScript
838 lines
28 KiB
TypeScript
/**
|
||
* llama-verify-article.ts
|
||
* Pi-Extension + CLI: Vollständige Fact-Check-Pipeline via llama.cpp
|
||
*
|
||
* Ablauf:
|
||
* 1. Claim-Extraktion via llama.cpp (lokal, Port 8000)
|
||
* 2. Perplexity-Recherche für alle prüfbaren Claims (parallel)
|
||
* 3. Batch-Urteilssynthese via llama.cpp (1 Aufruf für alle Claims)
|
||
* 4. Verifikationsbericht formatieren
|
||
*
|
||
* Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/llama-verify-article.ts
|
||
* Als CLI:
|
||
* npx tsx agenten/llama-verify-article.ts "$(cat artikel.txt)"
|
||
* npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep
|
||
* npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
|
||
*/
|
||
|
||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||
import { Type } from "@sinclair/typebox";
|
||
import { fileURLToPath } from "node:url";
|
||
import { readFile } from "node:fs/promises";
|
||
import {
|
||
searchPerplexity,
|
||
formatSourcesForPrompt,
|
||
type PerplexityResult,
|
||
} from "../lib/perplexity.js";
|
||
import { callLlamaClaimExtract, type ClaimSet } from "./llama-claim-extractor.js";
|
||
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";
|
||
import {
|
||
saveJobFile,
|
||
loadJobFile,
|
||
jobFileExists,
|
||
updateJobMeta,
|
||
getOrCreateJob,
|
||
} from "../lib/jobs.js";
|
||
import { getCached, setCached } from "../lib/cache.js";
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Typen
|
||
// ---------------------------------------------------------------------------
|
||
|
||
type VerificationStatus =
|
||
| "supported"
|
||
| "contradicted"
|
||
| "mixed"
|
||
| "insufficient_evidence"
|
||
| "needs_human_review"
|
||
| "not_checkable";
|
||
|
||
type Confidence = "high" | "medium" | "low";
|
||
|
||
type VerdictItem = {
|
||
claim_id: string;
|
||
status: VerificationStatus;
|
||
confidence: Confidence;
|
||
summary: string;
|
||
counter_evidence: string | null;
|
||
notes: string | null;
|
||
supporting_urls: string[];
|
||
};
|
||
|
||
type BatchVerdictRaw = { verdicts: VerdictItem[] };
|
||
|
||
export type VerificationReport = {
|
||
schema_version: "1.0.0";
|
||
verified_at: string;
|
||
source_text_summary: string;
|
||
summary: string;
|
||
results: Array<{
|
||
claim_id: string;
|
||
claim_text: string;
|
||
status: VerificationStatus;
|
||
confidence: Confidence;
|
||
summary: string;
|
||
sources: Array<{ url: string; title: string | null; supports_claim: boolean }>;
|
||
counter_evidence: string | null;
|
||
notes: string | null;
|
||
}>;
|
||
stats: Record<string, number>;
|
||
totalCostUSD: number;
|
||
latencyMs: number;
|
||
};
|
||
|
||
// llama.cpp OpenAI-kompatibles API-Format
|
||
type LlamaResponse = {
|
||
choices: Array<{
|
||
message?: { content?: string; reasoning_content?: string };
|
||
finish_reason?: string;
|
||
}>;
|
||
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
||
};
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Konfiguration
|
||
// ---------------------------------------------------------------------------
|
||
|
||
const DEFAULT_MODEL = "Qwopus3.6-35B-A3B-v1-Q4_K_M.gguf";
|
||
const LLAMA_HOST = process.env.LLAMA_HOST ?? "http://localhost:8000";
|
||
const DEFAULT_MAX_CLAIMS = 15;
|
||
const DEFAULT_USER_LANGUAGE = "de";
|
||
const MAX_PARALLEL_PERPLEXITY = 5;
|
||
// Batch-Verdicts: viele Claims + Perplexity-Texte → großes Kontextfenster
|
||
const MAX_TOKENS_BATCH = 32768;
|
||
const TEMPERATURE = 0.1;
|
||
const MAX_RETRIES = 3;
|
||
const RETRY_DELAY_MS = 15_000;
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Batch-Urteilssynthese via llama.cpp
|
||
// ---------------------------------------------------------------------------
|
||
|
||
function langLabel(userLanguage: string): string {
|
||
if (userLanguage === "de") return "Deutsch";
|
||
if (userLanguage === "en") return "Englisch";
|
||
if (userLanguage === "fr") return "Französisch";
|
||
if (userLanguage === "es") return "Spanisch";
|
||
return userLanguage;
|
||
}
|
||
|
||
function buildBatchVerdictSystemPrompt(userLanguage: string): string {
|
||
return `Du bist ein erfahrener Fact-Checker. Bewerte jede Behauptung anhand der bereitgestellten Recherche-Ergebnisse.
|
||
|
||
Status-Skala:
|
||
- supported: Quellen bestätigen klar und konsistent
|
||
- contradicted: Quellen widersprechen klar und SUBSTANZIELL
|
||
- mixed: Widersprüchliche Quellenlage ODER Behauptung technisch ungenau aber im Kern korrekt
|
||
- insufficient_evidence: Zu wenig oder schwache Quellen
|
||
- needs_human_review: Komplex, politisch heikel, stark kontextabhängig
|
||
|
||
Confidence: high (eindeutige Primärquellen), medium (begrenzte/sekundäre Quellen), low (sehr unklar)
|
||
|
||
WICHTIGE REGELN für "contradicted":
|
||
- Nur bei klar substanziellen Fehlern: falsche Person, Zahl >5% abweichend, falsch zugeordnetes Ereignis
|
||
- Gerundete/allgemein akzeptierte Näherungswerte → "supported" (z.B. "21 Millionen Bitcoin" ist korrekte Rundung)
|
||
- Zeitzonendifferenzen historischer Ereignisse → "supported" wenn im üblichen regionalen Kontext korrekt
|
||
- Technische Präzisierungen zu korrekten Aussagen → "mixed", nicht "contradicted"
|
||
- Im Zweifel immer "mixed" statt "contradicted"
|
||
|
||
AUSGABESPRACHE: Schreibe summary, counter_evidence und notes auf ${langLabel(userLanguage)}.
|
||
Die Enum-Werte status und confidence bleiben englisch.
|
||
|
||
summary: 1-3 präzise Sätze. Nicht spekulieren.
|
||
counter_evidence: Gegenbelege als Satz, sonst null.
|
||
notes: Zeitabhängigkeit, Einschränkungen, sonst null.
|
||
supporting_urls: URLs der stützenden Quellen (leeres Array wenn keine).
|
||
|
||
Antworte NUR mit diesem JSON-Objekt — kein Freitext davor oder danach:
|
||
{
|
||
"verdicts": [
|
||
{
|
||
"claim_id": "c001",
|
||
"status": "supported|contradicted|mixed|insufficient_evidence|needs_human_review",
|
||
"confidence": "high|medium|low",
|
||
"summary": "...",
|
||
"counter_evidence": "..." | null,
|
||
"notes": "..." | null,
|
||
"supporting_urls": ["url1"]
|
||
}
|
||
]
|
||
}`;
|
||
}
|
||
|
||
function buildBatchVerdictUserPrompt(
|
||
claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>
|
||
): string {
|
||
const claimsBlock = claims
|
||
.map(({ id, text, perplexity }) => {
|
||
const sourcesFormatted = formatSourcesForPrompt(perplexity.sources, 200);
|
||
return `---
|
||
BEHAUPTUNG ${id}: "${text}"
|
||
RECHERCHE:
|
||
${perplexity.summary}
|
||
|
||
QUELLEN:
|
||
${sourcesFormatted || "(keine Quellen gefunden)"}`;
|
||
})
|
||
.join("\n\n");
|
||
|
||
return `/no_think\n${claimsBlock}\n\nBewerte alle ${claims.length} Behauptungen.`;
|
||
}
|
||
|
||
async function synthesizeBatchVerdicts(
|
||
claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>,
|
||
model: string,
|
||
userLanguage: string,
|
||
signal?: AbortSignal,
|
||
logger?: Logger
|
||
): Promise<VerdictItem[]> {
|
||
if (claims.length === 0) return [];
|
||
|
||
const log = logger ?? nullLogger;
|
||
|
||
const body = {
|
||
model,
|
||
messages: [
|
||
{ role: "system", content: buildBatchVerdictSystemPrompt(userLanguage) },
|
||
{ role: "user", content: buildBatchVerdictUserPrompt(claims) },
|
||
],
|
||
stream: false,
|
||
temperature: TEMPERATURE,
|
||
max_tokens: MAX_TOKENS_BATCH,
|
||
};
|
||
|
||
let resp: Response | null = null;
|
||
|
||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||
try {
|
||
resp = await fetch(`${LLAMA_HOST}/v1/chat/completions`, {
|
||
method: "POST",
|
||
headers: { "Content-Type": "application/json" },
|
||
body: JSON.stringify(body),
|
||
signal,
|
||
});
|
||
break;
|
||
} catch (err) {
|
||
const isLast = attempt === MAX_RETRIES;
|
||
log.warn(`llama.cpp Batch-Verdict fetch fehlgeschlagen (Versuch ${attempt}/${MAX_RETRIES})`, {
|
||
error: err instanceof Error ? err.message : String(err),
|
||
});
|
||
if (isLast) throw new Error(`fetch failed nach ${MAX_RETRIES} Versuchen: ${err instanceof Error ? err.message : err}`);
|
||
await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
|
||
}
|
||
}
|
||
|
||
if (!resp!.ok) {
|
||
const errText = await resp!.text().catch(() => "");
|
||
throw new Error(`llama.cpp Batch-Verdict Fehler ${resp!.status}: ${errText}`);
|
||
}
|
||
|
||
const data = (await resp!.json()) as LlamaResponse;
|
||
const choice = data.choices?.[0];
|
||
let raw = choice?.message?.content ?? "";
|
||
|
||
// Reasoning-Fallback: wenn content leer, JSON aus reasoning_content extrahieren
|
||
if (!raw.trim() && choice?.message?.reasoning_content) {
|
||
const rc = choice.message.reasoning_content;
|
||
const allMatches = [...rc.matchAll(/\{[^{}]*"verdicts"\s*:/g)];
|
||
const lastIdx = allMatches.length > 0
|
||
? rc.lastIndexOf(allMatches[allMatches.length - 1][0])
|
||
: -1;
|
||
const extracted = lastIdx >= 0
|
||
? rc.slice(lastIdx).match(/\{[\s\S]*\}/)?.[0]
|
||
: rc.match(/\{[\s\S]*"verdicts"[\s\S]*\}/)?.[0];
|
||
if (extracted) {
|
||
raw = extracted;
|
||
log.warn("Batch-Verdict: JSON aus reasoning_content extrahiert", {
|
||
finishReason: choice.finish_reason,
|
||
});
|
||
}
|
||
}
|
||
|
||
const cleanedRaw = raw
|
||
.replace(/^```(?:json)?\s*/i, "")
|
||
.replace(/\s*```$/i, "")
|
||
.trim();
|
||
|
||
log.debug("Batch-Verdict erhalten", {
|
||
promptTokens: data.usage?.prompt_tokens,
|
||
outputTokens: data.usage?.completion_tokens,
|
||
finishReason: choice?.finish_reason,
|
||
rawLength: raw.length,
|
||
});
|
||
|
||
if (!cleanedRaw) throw new Error("Leere llama.cpp-Antwort für Batch-Verdicts");
|
||
|
||
let parsed: unknown;
|
||
try {
|
||
parsed = JSON.parse(cleanedRaw);
|
||
} catch {
|
||
throw new Error(`Kein gültiges JSON von llama.cpp: ${cleanedRaw.slice(0, 300)}`);
|
||
}
|
||
|
||
const { verdicts } = parsed as BatchVerdictRaw;
|
||
return verdicts ?? [];
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Parallel-Limiter für Perplexity
|
||
// ---------------------------------------------------------------------------
|
||
|
||
async function runWithConcurrencyLimit<T>(
|
||
tasks: Array<() => Promise<T>>,
|
||
limit: number
|
||
): Promise<T[]> {
|
||
const results: T[] = new Array(tasks.length);
|
||
let index = 0;
|
||
|
||
async function worker() {
|
||
while (index < tasks.length) {
|
||
const current = index++;
|
||
results[current] = await tasks[current]();
|
||
}
|
||
}
|
||
|
||
const workers = Array.from({ length: Math.min(limit, tasks.length) }, worker);
|
||
await Promise.all(workers);
|
||
return results;
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Hauptfunktion
|
||
// ---------------------------------------------------------------------------
|
||
|
||
export async function verifyArticle(
|
||
text: string,
|
||
options?: {
|
||
maxClaims?: number;
|
||
mode?: "fast" | "deep";
|
||
model?: string;
|
||
userLanguage?: string;
|
||
signal?: AbortSignal;
|
||
onProgress?: (msg: string) => void;
|
||
logger?: Logger;
|
||
jobDir?: string;
|
||
noCache?: boolean;
|
||
}
|
||
): Promise<VerificationReport> {
|
||
const t0 = Date.now();
|
||
const model = options?.model ?? DEFAULT_MODEL;
|
||
const maxClaims = Math.min(options?.maxClaims ?? DEFAULT_MAX_CLAIMS, 20);
|
||
const mode = options?.mode ?? "fast";
|
||
const userLanguage = options?.userLanguage ?? DEFAULT_USER_LANGUAGE;
|
||
const log = options?.logger ?? nullLogger;
|
||
const jobDir = options?.jobDir;
|
||
const useCache = !(options?.noCache ?? false);
|
||
const progress = (msg: string) => {
|
||
options?.onProgress?.(msg);
|
||
log.info(msg);
|
||
};
|
||
|
||
log.info("llama-verify-article gestartet", { textLength: text.length, model, maxClaims, mode, userLanguage, jobDir });
|
||
|
||
// Schritt 1: Claim-Extraktion (oder aus Job-Cache laden)
|
||
let claimSet: ClaimSet;
|
||
if (jobDir) {
|
||
const cached = loadJobFile<ClaimSet>(jobDir, "claims.json");
|
||
if (cached) {
|
||
claimSet = cached;
|
||
const checkable = claimSet.claims.filter((c) => c.checkability === "checkable").length;
|
||
progress(`Claims aus Job geladen (${claimSet.total_claims} total, ${checkable} prüfbar) — Extraktion übersprungen.`);
|
||
} else {
|
||
updateJobMeta(jobDir, { status: "extracting" });
|
||
progress("Claims extrahieren (llama.cpp)...");
|
||
const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
|
||
text, model, maxClaims, options?.signal, log
|
||
);
|
||
claimSet = extracted;
|
||
saveJobFile(jobDir, "claims.json", claimSet);
|
||
updateJobMeta(jobDir, {
|
||
status: "verifying",
|
||
steps: {
|
||
extract: {
|
||
completedAt: new Date().toISOString(),
|
||
totalClaims: claimSet.total_claims,
|
||
checkableClaims: claimSet.claims.filter((c) => c.checkability === "checkable").length,
|
||
latencyMs: extractLatency,
|
||
},
|
||
},
|
||
});
|
||
log.info("Claims extrahiert + gespeichert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
|
||
}
|
||
} else {
|
||
progress("Claims extrahieren (llama.cpp)...");
|
||
const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
|
||
text, model, maxClaims, options?.signal, log
|
||
);
|
||
claimSet = extracted;
|
||
log.info("Claims extrahiert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
|
||
}
|
||
|
||
const checkableClaims = claimSet.claims.filter((c) => c.checkability === "checkable");
|
||
const uncheckedClaims = claimSet.claims.filter((c) => c.checkability !== "checkable");
|
||
progress(
|
||
`${claimSet.total_claims} Claims — ${checkableClaims.length} prüfbar, ` +
|
||
`${uncheckedClaims.length} nicht prüfbar.`
|
||
);
|
||
|
||
if (checkableClaims.length === 0) {
|
||
progress("⚠ Keine prüfbaren Claims gefunden — Verifikation nicht möglich.");
|
||
}
|
||
|
||
// Schritt 2: Perplexity parallel (mit Limit) — mit Job- und Global-Cache
|
||
let doneCount = 0;
|
||
const total = checkableClaims.length;
|
||
|
||
if (jobDir && total > 0) {
|
||
const cachedCount = checkableClaims.filter((c) =>
|
||
jobFileExists(jobDir, `perplexity/${c.claim_id}.json`)
|
||
).length;
|
||
if (cachedCount > 0) {
|
||
progress(`${cachedCount}/${total} Perplexity-Ergebnisse aus Job-Cache geladen.`);
|
||
}
|
||
}
|
||
|
||
const perplexityTasks = checkableClaims.map((claim) => async () => {
|
||
const short = claim.text.length > 55 ? claim.text.slice(0, 52) + "..." : claim.text;
|
||
|
||
if (jobDir) {
|
||
const cached = loadJobFile<PerplexityResult>(jobDir, `perplexity/${claim.claim_id}.json`);
|
||
if (cached) {
|
||
doneCount++;
|
||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cached) "${short}"`);
|
||
return { claim, result: cached, error: null };
|
||
}
|
||
}
|
||
|
||
if (useCache) {
|
||
const globalCached = getCached<PerplexityResult>(claim.text);
|
||
if (globalCached) {
|
||
doneCount++;
|
||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cache) "${short}"`);
|
||
return { claim, result: globalCached, error: null };
|
||
}
|
||
}
|
||
|
||
try {
|
||
const result = await searchPerplexity(claim.text, { mode, signal: options?.signal });
|
||
doneCount++;
|
||
if (useCache) setCached(claim.text, result);
|
||
if (jobDir) {
|
||
saveJobFile(jobDir, `perplexity/${claim.claim_id}.json`, result);
|
||
}
|
||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ "${short}"`);
|
||
return { claim, result, error: null };
|
||
} catch (err: unknown) {
|
||
doneCount++;
|
||
const errMsg = err instanceof Error ? err.message : "Perplexity-Fehler";
|
||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✗ "${short}" — ${errMsg}`);
|
||
return { claim, result: null as PerplexityResult | null, error: errMsg };
|
||
}
|
||
});
|
||
|
||
if (total > 0) progress(`Recherche läuft (${total} Claims, max. ${MAX_PARALLEL_PERPLEXITY} parallel)...`);
|
||
const perplexityOutcomes = await runWithConcurrencyLimit(perplexityTasks, MAX_PARALLEL_PERPLEXITY);
|
||
const successful = perplexityOutcomes.filter((o) => o.result !== null) as Array<{
|
||
claim: (typeof checkableClaims)[number];
|
||
result: PerplexityResult;
|
||
error: null;
|
||
}>;
|
||
const failed = perplexityOutcomes.filter((o) => o.error !== null);
|
||
const totalPerplexityCost = successful.reduce((sum, o) => sum + o.result.estimatedCostUSD, 0);
|
||
|
||
log.info("Perplexity abgeschlossen", {
|
||
successful: successful.length,
|
||
failed: failed.length,
|
||
totalCostUSD: totalPerplexityCost.toFixed(4),
|
||
});
|
||
|
||
// Schritt 3: Batch-Urteilssynthese via llama.cpp
|
||
progress(`Urteilssynthese (llama.cpp, ${successful.length} Claims, Sprache: ${userLanguage})...`);
|
||
const verdicts = await synthesizeBatchVerdicts(
|
||
successful.map((o) => ({ id: o.claim.claim_id, text: o.claim.text, perplexity: o.result })),
|
||
model,
|
||
userLanguage,
|
||
options?.signal,
|
||
log
|
||
);
|
||
|
||
// Schritt 4: Report zusammenbauen
|
||
const verdictMap = new Map(verdicts.map((v) => [v.claim_id, v]));
|
||
|
||
const results: VerificationReport["results"] = [
|
||
...successful.map((o) => {
|
||
const verdict = verdictMap.get(o.claim.claim_id);
|
||
const sources = o.result.sources.map((s) => ({
|
||
url: s.url,
|
||
title: s.title ?? null,
|
||
supports_claim: verdict?.supporting_urls.includes(s.url) ?? false,
|
||
}));
|
||
return {
|
||
claim_id: o.claim.claim_id,
|
||
claim_text: o.claim.text,
|
||
status: (verdict?.status ?? "insufficient_evidence") as VerificationStatus,
|
||
confidence: (verdict?.confidence ?? "low") as Confidence,
|
||
summary: verdict?.summary ?? "Keine Urteilssynthese verfügbar.",
|
||
sources,
|
||
counter_evidence: verdict?.counter_evidence ?? null,
|
||
notes: verdict?.notes ?? null,
|
||
};
|
||
}),
|
||
...failed.map((o) => ({
|
||
claim_id: o.claim.claim_id,
|
||
claim_text: o.claim.text,
|
||
status: "insufficient_evidence" as VerificationStatus,
|
||
confidence: "low" as Confidence,
|
||
summary: `Recherche fehlgeschlagen: ${o.error}`,
|
||
sources: [],
|
||
counter_evidence: null,
|
||
notes: null,
|
||
})),
|
||
...uncheckedClaims.map((c) => ({
|
||
claim_id: c.claim_id,
|
||
claim_text: c.text,
|
||
status: "not_checkable" as VerificationStatus,
|
||
confidence: "high" as Confidence,
|
||
summary: `Nicht empirisch prüfbar (${c.claim_type}).`,
|
||
sources: [],
|
||
counter_evidence: null,
|
||
notes: null,
|
||
})),
|
||
];
|
||
|
||
const stats: Record<string, number> = {
|
||
total: results.length,
|
||
supported: 0,
|
||
contradicted: 0,
|
||
mixed: 0,
|
||
insufficient_evidence: 0,
|
||
needs_human_review: 0,
|
||
not_checkable: 0,
|
||
};
|
||
for (const r of results) stats[r.status] = (stats[r.status] ?? 0) + 1;
|
||
|
||
const checkedCount = successful.length;
|
||
const summaryParts = [
|
||
`${claimSet.total_claims} Claims extrahiert, ${checkedCount} recherchiert.`,
|
||
stats.supported > 0 ? `${stats.supported} bestätigt` : "",
|
||
stats.contradicted > 0 ? `${stats.contradicted} widerlegt` : "",
|
||
stats.mixed > 0 ? `${stats.mixed} gemischt` : "",
|
||
stats.needs_human_review > 0 ? `${stats.needs_human_review} → Menschliche Prüfung nötig` : "",
|
||
stats.insufficient_evidence > 0 ? `${stats.insufficient_evidence} ohne ausreichende Belege` : "",
|
||
]
|
||
.filter(Boolean)
|
||
.join(". ");
|
||
|
||
const totalLatencyMs = Date.now() - t0;
|
||
log.info("llama-verify-article abgeschlossen", {
|
||
...stats,
|
||
totalCostUSD: totalPerplexityCost.toFixed(4),
|
||
latencyMs: totalLatencyMs,
|
||
});
|
||
|
||
const report: VerificationReport = {
|
||
schema_version: "1.0.0",
|
||
verified_at: new Date().toISOString(),
|
||
source_text_summary: text.slice(0, 200) + (text.length > 200 ? "…" : ""),
|
||
summary: summaryParts,
|
||
results,
|
||
stats,
|
||
totalCostUSD: totalPerplexityCost,
|
||
latencyMs: totalLatencyMs,
|
||
};
|
||
|
||
if (jobDir) {
|
||
saveJobFile(jobDir, "report.json", report);
|
||
updateJobMeta(jobDir, {
|
||
status: "completed",
|
||
steps: {
|
||
verify: {
|
||
completedAt: new Date().toISOString(),
|
||
claimsVerified: successful.length,
|
||
totalCostUSD: totalPerplexityCost,
|
||
latencyMs: totalLatencyMs,
|
||
},
|
||
},
|
||
});
|
||
log.info("Report in Job gespeichert", { jobDir });
|
||
}
|
||
|
||
return report;
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Formatierung
|
||
// ---------------------------------------------------------------------------
|
||
|
||
const STATUS_ICON: Record<VerificationStatus, string> = {
|
||
supported: "✓ BESTÄTIGT",
|
||
contradicted: "✗ WIDERLEGT",
|
||
mixed: "~ GEMISCHT",
|
||
insufficient_evidence: "? BELEGE UNZUREICHEND",
|
||
needs_human_review: "⚠ MENSCHLICHE PRÜFUNG NÖTIG",
|
||
not_checkable: "— NICHT PRÜFBAR",
|
||
};
|
||
|
||
function formatReport(report: VerificationReport, model: string): string {
|
||
const lines: string[] = [];
|
||
|
||
lines.push(`## Verifikationsbericht (llama.cpp)`);
|
||
lines.push(report.summary);
|
||
lines.push("");
|
||
|
||
const groups: VerificationStatus[] = [
|
||
"supported",
|
||
"contradicted",
|
||
"mixed",
|
||
"needs_human_review",
|
||
"insufficient_evidence",
|
||
"not_checkable",
|
||
];
|
||
|
||
for (const status of groups) {
|
||
const items = report.results.filter((r) => r.status === status);
|
||
if (items.length === 0) continue;
|
||
|
||
lines.push(`**${STATUS_ICON[status]} (${items.length}):**`);
|
||
for (const item of items) {
|
||
lines.push(`\`${item.claim_id}\` "${item.claim_text}"`);
|
||
|
||
if (item.status !== "not_checkable") {
|
||
lines.push(` → ${item.summary}`);
|
||
if (item.counter_evidence) {
|
||
lines.push(` ✗ Gegenbeleg: ${item.counter_evidence}`);
|
||
}
|
||
if (item.notes) {
|
||
lines.push(` ℹ ${item.notes}`);
|
||
}
|
||
if (item.sources.length > 0) {
|
||
const supporting = item.sources.filter((s) => s.supports_claim);
|
||
if (supporting.length > 0) {
|
||
lines.push(` Quellen: ${supporting.map((s) => `[${s.title ?? s.url}](${s.url})`).join(", ")}`);
|
||
}
|
||
}
|
||
}
|
||
lines.push("");
|
||
}
|
||
}
|
||
|
||
const latSec = (report.latencyMs / 1000).toFixed(0);
|
||
lines.push(`_[Perplexity: ~$${report.totalCostUSD.toFixed(4)} | llama.cpp: ${model} | Gesamt: ${latSec}s]_`);
|
||
|
||
return lines.join("\n");
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Pi-Extension: Default Export
|
||
// ---------------------------------------------------------------------------
|
||
|
||
const PARAMS = Type.Object({
|
||
text: Type.String({
|
||
description:
|
||
"Der vollständige Artikel- oder Blogtext, der auf Fakten geprüft werden soll. " +
|
||
"Nicht kürzen — der Originaltext wird für die Claim-Extraktion benötigt.",
|
||
}),
|
||
maxClaims: Type.Optional(
|
||
Type.Number({
|
||
description: `Maximale Anzahl zu prüfender Claims. Standard: ${DEFAULT_MAX_CLAIMS}. Max: 20.`,
|
||
})
|
||
),
|
||
mode: Type.Optional(
|
||
Type.Union([Type.Literal("fast"), Type.Literal("deep")], {
|
||
description:
|
||
"fast (Standard): sonar, kostengünstig. deep: sonar-pro, für investigative Inhalte.",
|
||
})
|
||
),
|
||
model: Type.Optional(
|
||
Type.String({
|
||
description: `llama.cpp-Modell. Standard: ${DEFAULT_MODEL}.`,
|
||
})
|
||
),
|
||
userLanguage: Type.Optional(
|
||
Type.String({
|
||
description: `Sprache für Urteilstext (summary, counter_evidence, notes). Standard: ${DEFAULT_USER_LANGUAGE}.`,
|
||
})
|
||
),
|
||
});
|
||
|
||
export default function llamaVerifyArticleExtension(pi: ExtensionAPI) {
|
||
pi.registerTool({
|
||
name: "verify_article_llama",
|
||
label: "Artikel-Verifikation (llama.cpp)",
|
||
description:
|
||
"Vollständige Fact-Check-Pipeline via llama.cpp: " +
|
||
"Claims extrahieren → Perplexity-Recherche (parallel) → llama.cpp-Urteil (batch) → Bericht. " +
|
||
"Effizienter als verify_claim_llama für mehrere Claims. " +
|
||
"Typische Kosten: $0.05–0.15 für einen Artikel mit 10–15 Claims (nur Perplexity, llama.cpp lokal).",
|
||
promptGuidelines: [
|
||
"Use verify_article_llama when the user wants to fact-check an entire article, blog post, or longer text.",
|
||
"Use verify_claim_llama instead when the user wants to check a single specific claim.",
|
||
"Pass the FULL article text — do not summarize it first.",
|
||
"Use mode=deep for scientific, medical, legal, or politically sensitive content.",
|
||
"Set userLanguage to match the user's preferred language (e.g. 'de' for German, 'en' for English).",
|
||
"Always show the full formatted report including the cost/latency line.",
|
||
"Highlight contradicted claims and claims needing human review prominently.",
|
||
"If needs_human_review claims exist, explain that they require manual fact-checking.",
|
||
"After the report, offer to show full sources for specific claims if the user wants details.",
|
||
],
|
||
parameters: PARAMS,
|
||
async execute(_toolCallId, params, signal) {
|
||
const model = params.model ?? DEFAULT_MODEL;
|
||
try {
|
||
const report = await verifyArticle(params.text, {
|
||
maxClaims: params.maxClaims,
|
||
mode: params.mode,
|
||
model,
|
||
userLanguage: params.userLanguage,
|
||
signal,
|
||
});
|
||
|
||
return {
|
||
content: [{ type: "text", text: formatReport(report, model) }],
|
||
details: {
|
||
totalClaims: report.stats.total,
|
||
supported: report.stats.supported,
|
||
contradicted: report.stats.contradicted,
|
||
needsHumanReview: report.stats.needs_human_review,
|
||
totalCostUSD: report.totalCostUSD,
|
||
latencyMs: report.latencyMs,
|
||
},
|
||
};
|
||
} catch (err) {
|
||
const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
|
||
return { content: [{ type: "text", text: `Artikel-Verifikation (llama.cpp) fehlgeschlagen: ${msg}` }] };
|
||
}
|
||
},
|
||
});
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// CLI-Modus
|
||
// ---------------------------------------------------------------------------
|
||
|
||
async function runCli() {
|
||
const args = process.argv.slice(2);
|
||
|
||
if (args.length === 0 || args[0] === "--help" || args[0] === "-h") {
|
||
console.log(`
|
||
Artikel-Verifikator (llama.cpp) — Vollständige Fact-Check-Pipeline
|
||
|
||
Verwendung:
|
||
npx tsx agenten/llama-verify-article.ts [Optionen] "Artikeltext..."
|
||
npx tsx agenten/llama-verify-article.ts --file artikel.txt [Optionen]
|
||
|
||
Optionen:
|
||
--file, -f <pfad> Text aus Datei lesen
|
||
--mode fast|deep Perplexity-Modus (Standard: fast)
|
||
--model <name> llama.cpp-Modell (Standard: ${DEFAULT_MODEL})
|
||
--max-claims <n> Max. Claims (Standard: ${DEFAULT_MAX_CLAIMS})
|
||
--user-language <lang> Sprache für Urteilstext, z.B. "de", "en" (Standard: ${DEFAULT_USER_LANGUAGE})
|
||
--job-id <slug> Job-Speicher: Zwischenergebnisse nach ~/.pi/agent/jobs/<datum>_<slug>/
|
||
--no-cache Globalen Claim-Cache deaktivieren
|
||
--json Ausgabe als JSON
|
||
--verbose, -v Ausführliche Ausgabe + Log-Datei
|
||
--help Diese Hilfe
|
||
|
||
Umgebungsvariablen:
|
||
LLAMA_HOST llama.cpp-Server-URL (Standard: http://localhost:8000)
|
||
PERPLEXITY_API_KEY Perplexity API-Key (erforderlich)
|
||
|
||
Beispiele:
|
||
npx tsx agenten/llama-verify-article.ts --file artikel.txt
|
||
npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep --user-language en
|
||
npx tsx agenten/llama-verify-article.ts --file artikel.txt --job-id mein-artikel --verbose
|
||
npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
|
||
`);
|
||
process.exit(0);
|
||
}
|
||
|
||
let mode: "fast" | "deep" = "fast";
|
||
let model = DEFAULT_MODEL;
|
||
let maxClaims = DEFAULT_MAX_CLAIMS;
|
||
let userLanguage = DEFAULT_USER_LANGUAGE;
|
||
let jobId: string | undefined;
|
||
let jsonOutput = false;
|
||
let verbose = false;
|
||
let noCache = false;
|
||
let file: string | null = null;
|
||
const textParts: string[] = [];
|
||
|
||
for (let i = 0; i < args.length; i++) {
|
||
const arg = args[i];
|
||
if (arg === "--mode" && args[i + 1]) {
|
||
const m = args[++i];
|
||
if (m === "fast" || m === "deep") mode = m;
|
||
} else if (arg === "--model" && args[i + 1]) {
|
||
model = args[++i];
|
||
} else if (arg === "--max-claims" && args[i + 1]) {
|
||
maxClaims = parseInt(args[++i], 10);
|
||
} else if (arg === "--user-language" && args[i + 1]) {
|
||
userLanguage = args[++i];
|
||
} else if (arg === "--job-id" && args[i + 1]) {
|
||
jobId = args[++i];
|
||
} else if ((arg === "--file" || arg === "-f") && args[i + 1]) {
|
||
file = args[++i];
|
||
} else if (arg === "--json") {
|
||
jsonOutput = true;
|
||
} else if (arg === "--verbose" || arg === "-v") {
|
||
verbose = true;
|
||
} else if (arg === "--no-cache") {
|
||
noCache = true;
|
||
} else if (!arg.startsWith("--")) {
|
||
textParts.push(arg);
|
||
}
|
||
}
|
||
|
||
let text: string;
|
||
if (file) {
|
||
try {
|
||
text = await readFile(file, "utf-8");
|
||
} catch (err) {
|
||
console.error(`Fehler: Datei '${file}' konnte nicht gelesen werden: ${err instanceof Error ? err.message : err}`);
|
||
process.exit(1);
|
||
}
|
||
} else {
|
||
text = textParts.join(" ").trim();
|
||
}
|
||
|
||
if (!text.trim()) {
|
||
console.error("Fehler: Kein Text übergeben. Nutze --file <pfad> oder übergib den Text direkt.");
|
||
process.exit(1);
|
||
}
|
||
|
||
if (!jsonOutput) {
|
||
const src = file ? `Datei: ${file}` : "Direkteingabe";
|
||
console.error(`\nModus: ${mode} | Modell: ${model} | Max. Claims: ${maxClaims} | Sprache: ${userLanguage} | ${src}${jobId ? ` | Job: ${jobId}` : ""}\n`);
|
||
}
|
||
|
||
const log = createLogger({ verbose, jobId });
|
||
const onProgress = jsonOutput ? undefined : (msg: string) => process.stderr.write(` ${msg}\n`);
|
||
|
||
let jobDir: string | undefined;
|
||
if (jobId) {
|
||
const { jobDir: dir, isNew } = getOrCreateJob(jobId, model);
|
||
jobDir = dir;
|
||
if (isNew) saveJobFile(jobDir, "input.txt", text);
|
||
if (!jsonOutput) {
|
||
process.stderr.write(` Job: ${jobDir} (${isNew ? "neu" : "fortgesetzt"})\n\n`);
|
||
}
|
||
}
|
||
|
||
try {
|
||
const report = await verifyArticle(text, { maxClaims, mode, model, userLanguage, onProgress, logger: log, jobDir, noCache });
|
||
if (jsonOutput) {
|
||
console.log(JSON.stringify(report, null, 2));
|
||
} else {
|
||
console.log(formatReport(report, model));
|
||
}
|
||
} catch (err) {
|
||
if (jobDir) updateJobMeta(jobDir, { status: "failed" });
|
||
console.error("Fehler:", err instanceof Error ? err.message : err);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
if (process.argv[1] === __filename) {
|
||
runCli();
|
||
}
|