feat: Pi Text-Agent — initialer Commit (sauberes Repo)
Vollständiges Multi-Agenten-System für Fact-Checking, Artikelschreiben und Argumentationsanalyse. Zwei Backends: llama.cpp (★ bevorzugt) und Ollama. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
5146b7fa30
62 changed files with 11279 additions and 0 deletions
838
agenten/llama-verify-article.ts
Normal file
838
agenten/llama-verify-article.ts
Normal file
|
|
@ -0,0 +1,838 @@
|
|||
/**
|
||||
* llama-verify-article.ts
|
||||
* Pi-Extension + CLI: Vollständige Fact-Check-Pipeline via llama.cpp
|
||||
*
|
||||
* Ablauf:
|
||||
* 1. Claim-Extraktion via llama.cpp (lokal, Port 8000)
|
||||
* 2. Perplexity-Recherche für alle prüfbaren Claims (parallel)
|
||||
* 3. Batch-Urteilssynthese via llama.cpp (1 Aufruf für alle Claims)
|
||||
* 4. Verifikationsbericht formatieren
|
||||
*
|
||||
* Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/llama-verify-article.ts
|
||||
* Als CLI:
|
||||
* npx tsx agenten/llama-verify-article.ts "$(cat artikel.txt)"
|
||||
* npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep
|
||||
* npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
|
||||
*/
|
||||
|
||||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import {
|
||||
searchPerplexity,
|
||||
formatSourcesForPrompt,
|
||||
type PerplexityResult,
|
||||
} from "../lib/perplexity.js";
|
||||
import { callLlamaClaimExtract, type ClaimSet } from "./llama-claim-extractor.js";
|
||||
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";
|
||||
import {
|
||||
saveJobFile,
|
||||
loadJobFile,
|
||||
jobFileExists,
|
||||
updateJobMeta,
|
||||
getOrCreateJob,
|
||||
} from "../lib/jobs.js";
|
||||
import { getCached, setCached } from "../lib/cache.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Typen
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type VerificationStatus =
|
||||
| "supported"
|
||||
| "contradicted"
|
||||
| "mixed"
|
||||
| "insufficient_evidence"
|
||||
| "needs_human_review"
|
||||
| "not_checkable";
|
||||
|
||||
type Confidence = "high" | "medium" | "low";
|
||||
|
||||
type VerdictItem = {
|
||||
claim_id: string;
|
||||
status: VerificationStatus;
|
||||
confidence: Confidence;
|
||||
summary: string;
|
||||
counter_evidence: string | null;
|
||||
notes: string | null;
|
||||
supporting_urls: string[];
|
||||
};
|
||||
|
||||
type BatchVerdictRaw = { verdicts: VerdictItem[] };
|
||||
|
||||
export type VerificationReport = {
|
||||
schema_version: "1.0.0";
|
||||
verified_at: string;
|
||||
source_text_summary: string;
|
||||
summary: string;
|
||||
results: Array<{
|
||||
claim_id: string;
|
||||
claim_text: string;
|
||||
status: VerificationStatus;
|
||||
confidence: Confidence;
|
||||
summary: string;
|
||||
sources: Array<{ url: string; title: string | null; supports_claim: boolean }>;
|
||||
counter_evidence: string | null;
|
||||
notes: string | null;
|
||||
}>;
|
||||
stats: Record<string, number>;
|
||||
totalCostUSD: number;
|
||||
latencyMs: number;
|
||||
};
|
||||
|
||||
// llama.cpp OpenAI-kompatibles API-Format
|
||||
type LlamaResponse = {
|
||||
choices: Array<{
|
||||
message?: { content?: string; reasoning_content?: string };
|
||||
finish_reason?: string;
|
||||
}>;
|
||||
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Konfiguration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DEFAULT_MODEL = "Qwopus3.6-35B-A3B-v1-Q4_K_M.gguf";
|
||||
const LLAMA_HOST = process.env.LLAMA_HOST ?? "http://localhost:8000";
|
||||
const DEFAULT_MAX_CLAIMS = 15;
|
||||
const DEFAULT_USER_LANGUAGE = "de";
|
||||
const MAX_PARALLEL_PERPLEXITY = 5;
|
||||
// Batch-Verdicts: viele Claims + Perplexity-Texte → großes Kontextfenster
|
||||
const MAX_TOKENS_BATCH = 32768;
|
||||
const TEMPERATURE = 0.1;
|
||||
const MAX_RETRIES = 3;
|
||||
const RETRY_DELAY_MS = 15_000;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Batch-Urteilssynthese via llama.cpp
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function langLabel(userLanguage: string): string {
|
||||
if (userLanguage === "de") return "Deutsch";
|
||||
if (userLanguage === "en") return "Englisch";
|
||||
if (userLanguage === "fr") return "Französisch";
|
||||
if (userLanguage === "es") return "Spanisch";
|
||||
return userLanguage;
|
||||
}
|
||||
|
||||
function buildBatchVerdictSystemPrompt(userLanguage: string): string {
|
||||
return `Du bist ein erfahrener Fact-Checker. Bewerte jede Behauptung anhand der bereitgestellten Recherche-Ergebnisse.
|
||||
|
||||
Status-Skala:
|
||||
- supported: Quellen bestätigen klar und konsistent
|
||||
- contradicted: Quellen widersprechen klar und SUBSTANZIELL
|
||||
- mixed: Widersprüchliche Quellenlage ODER Behauptung technisch ungenau aber im Kern korrekt
|
||||
- insufficient_evidence: Zu wenig oder schwache Quellen
|
||||
- needs_human_review: Komplex, politisch heikel, stark kontextabhängig
|
||||
|
||||
Confidence: high (eindeutige Primärquellen), medium (begrenzte/sekundäre Quellen), low (sehr unklar)
|
||||
|
||||
WICHTIGE REGELN für "contradicted":
|
||||
- Nur bei klar substanziellen Fehlern: falsche Person, Zahl >10% abweichend, falsch zugeordnetes Ereignis
|
||||
- Gerundete/allgemein akzeptierte Näherungswerte → "supported" (z.B. "21 Millionen Bitcoin" ist korrekte Rundung)
|
||||
- Zeitzonendifferenzen historischer Ereignisse → "supported" wenn im üblichen regionalen Kontext korrekt
|
||||
- Technische Präzisierungen zu korrekten Aussagen → "mixed", nicht "contradicted"
|
||||
- Im Zweifel immer "mixed" statt "contradicted"
|
||||
|
||||
AUSGABESPRACHE: Schreibe summary, counter_evidence und notes auf ${langLabel(userLanguage)}.
|
||||
Die Enum-Werte status und confidence bleiben englisch.
|
||||
|
||||
summary: 1-3 präzise Sätze. Nicht spekulieren.
|
||||
counter_evidence: Gegenbelege als Satz, sonst null.
|
||||
notes: Zeitabhängigkeit, Einschränkungen, sonst null.
|
||||
supporting_urls: URLs der stützenden Quellen (leeres Array wenn keine).
|
||||
|
||||
Antworte NUR mit diesem JSON-Objekt — kein Freitext davor oder danach:
|
||||
{
|
||||
"verdicts": [
|
||||
{
|
||||
"claim_id": "c001",
|
||||
"status": "supported|contradicted|mixed|insufficient_evidence|needs_human_review",
|
||||
"confidence": "high|medium|low",
|
||||
"summary": "...",
|
||||
"counter_evidence": "..." | null,
|
||||
"notes": "..." | null,
|
||||
"supporting_urls": ["url1"]
|
||||
}
|
||||
]
|
||||
}`;
|
||||
}
|
||||
|
||||
function buildBatchVerdictUserPrompt(
|
||||
claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>
|
||||
): string {
|
||||
const claimsBlock = claims
|
||||
.map(({ id, text, perplexity }) => {
|
||||
const sourcesFormatted = formatSourcesForPrompt(perplexity.sources, 200);
|
||||
return `---
|
||||
BEHAUPTUNG ${id}: "${text}"
|
||||
RECHERCHE:
|
||||
${perplexity.summary}
|
||||
|
||||
QUELLEN:
|
||||
${sourcesFormatted || "(keine Quellen gefunden)"}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
return `/no_think\n${claimsBlock}\n\nBewerte alle ${claims.length} Behauptungen.`;
|
||||
}
|
||||
|
||||
async function synthesizeBatchVerdicts(
|
||||
claims: Array<{ id: string; text: string; perplexity: PerplexityResult }>,
|
||||
model: string,
|
||||
userLanguage: string,
|
||||
signal?: AbortSignal,
|
||||
logger?: Logger
|
||||
): Promise<VerdictItem[]> {
|
||||
if (claims.length === 0) return [];
|
||||
|
||||
const log = logger ?? nullLogger;
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{ role: "system", content: buildBatchVerdictSystemPrompt(userLanguage) },
|
||||
{ role: "user", content: buildBatchVerdictUserPrompt(claims) },
|
||||
],
|
||||
stream: false,
|
||||
temperature: TEMPERATURE,
|
||||
max_tokens: MAX_TOKENS_BATCH,
|
||||
};
|
||||
|
||||
let resp: Response | null = null;
|
||||
|
||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
resp = await fetch(`${LLAMA_HOST}/v1/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
signal,
|
||||
});
|
||||
break;
|
||||
} catch (err) {
|
||||
const isLast = attempt === MAX_RETRIES;
|
||||
log.warn(`llama.cpp Batch-Verdict fetch fehlgeschlagen (Versuch ${attempt}/${MAX_RETRIES})`, {
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
if (isLast) throw new Error(`fetch failed nach ${MAX_RETRIES} Versuchen: ${err instanceof Error ? err.message : err}`);
|
||||
await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
|
||||
}
|
||||
}
|
||||
|
||||
if (!resp!.ok) {
|
||||
const errText = await resp!.text().catch(() => "");
|
||||
throw new Error(`llama.cpp Batch-Verdict Fehler ${resp!.status}: ${errText}`);
|
||||
}
|
||||
|
||||
const data = (await resp!.json()) as LlamaResponse;
|
||||
const choice = data.choices?.[0];
|
||||
let raw = choice?.message?.content ?? "";
|
||||
|
||||
// Reasoning-Fallback: wenn content leer, JSON aus reasoning_content extrahieren
|
||||
if (!raw.trim() && choice?.message?.reasoning_content) {
|
||||
const rc = choice.message.reasoning_content;
|
||||
const allMatches = [...rc.matchAll(/\{[^{}]*"verdicts"\s*:/g)];
|
||||
const lastIdx = allMatches.length > 0
|
||||
? rc.lastIndexOf(allMatches[allMatches.length - 1][0])
|
||||
: -1;
|
||||
const extracted = lastIdx >= 0
|
||||
? rc.slice(lastIdx).match(/\{[\s\S]*\}/)?.[0]
|
||||
: rc.match(/\{[\s\S]*"verdicts"[\s\S]*\}/)?.[0];
|
||||
if (extracted) {
|
||||
raw = extracted;
|
||||
log.warn("Batch-Verdict: JSON aus reasoning_content extrahiert", {
|
||||
finishReason: choice.finish_reason,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const cleanedRaw = raw
|
||||
.replace(/^```(?:json)?\s*/i, "")
|
||||
.replace(/\s*```$/i, "")
|
||||
.trim();
|
||||
|
||||
log.debug("Batch-Verdict erhalten", {
|
||||
promptTokens: data.usage?.prompt_tokens,
|
||||
outputTokens: data.usage?.completion_tokens,
|
||||
finishReason: choice?.finish_reason,
|
||||
rawLength: raw.length,
|
||||
});
|
||||
|
||||
if (!cleanedRaw) throw new Error("Leere llama.cpp-Antwort für Batch-Verdicts");
|
||||
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(cleanedRaw);
|
||||
} catch {
|
||||
throw new Error(`Kein gültiges JSON von llama.cpp: ${cleanedRaw.slice(0, 300)}`);
|
||||
}
|
||||
|
||||
const { verdicts } = parsed as BatchVerdictRaw;
|
||||
return verdicts ?? [];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Parallel-Limiter für Perplexity
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function runWithConcurrencyLimit<T>(
|
||||
tasks: Array<() => Promise<T>>,
|
||||
limit: number
|
||||
): Promise<T[]> {
|
||||
const results: T[] = new Array(tasks.length);
|
||||
let index = 0;
|
||||
|
||||
async function worker() {
|
||||
while (index < tasks.length) {
|
||||
const current = index++;
|
||||
results[current] = await tasks[current]();
|
||||
}
|
||||
}
|
||||
|
||||
const workers = Array.from({ length: Math.min(limit, tasks.length) }, worker);
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hauptfunktion
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function verifyArticle(
|
||||
text: string,
|
||||
options?: {
|
||||
maxClaims?: number;
|
||||
mode?: "fast" | "deep";
|
||||
model?: string;
|
||||
userLanguage?: string;
|
||||
signal?: AbortSignal;
|
||||
onProgress?: (msg: string) => void;
|
||||
logger?: Logger;
|
||||
jobDir?: string;
|
||||
noCache?: boolean;
|
||||
}
|
||||
): Promise<VerificationReport> {
|
||||
const t0 = Date.now();
|
||||
const model = options?.model ?? DEFAULT_MODEL;
|
||||
const maxClaims = Math.min(options?.maxClaims ?? DEFAULT_MAX_CLAIMS, 20);
|
||||
const mode = options?.mode ?? "fast";
|
||||
const userLanguage = options?.userLanguage ?? DEFAULT_USER_LANGUAGE;
|
||||
const log = options?.logger ?? nullLogger;
|
||||
const jobDir = options?.jobDir;
|
||||
const useCache = !(options?.noCache ?? false);
|
||||
const progress = (msg: string) => {
|
||||
options?.onProgress?.(msg);
|
||||
log.info(msg);
|
||||
};
|
||||
|
||||
log.info("llama-verify-article gestartet", { textLength: text.length, model, maxClaims, mode, userLanguage, jobDir });
|
||||
|
||||
// Schritt 1: Claim-Extraktion (oder aus Job-Cache laden)
|
||||
let claimSet: ClaimSet;
|
||||
if (jobDir) {
|
||||
const cached = loadJobFile<ClaimSet>(jobDir, "claims.json");
|
||||
if (cached) {
|
||||
claimSet = cached;
|
||||
const checkable = claimSet.claims.filter((c) => c.checkability === "checkable").length;
|
||||
progress(`Claims aus Job geladen (${claimSet.total_claims} total, ${checkable} prüfbar) — Extraktion übersprungen.`);
|
||||
} else {
|
||||
updateJobMeta(jobDir, { status: "extracting" });
|
||||
progress("Claims extrahieren (llama.cpp)...");
|
||||
const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
|
||||
text, model, maxClaims, options?.signal, log
|
||||
);
|
||||
claimSet = extracted;
|
||||
saveJobFile(jobDir, "claims.json", claimSet);
|
||||
updateJobMeta(jobDir, {
|
||||
status: "verifying",
|
||||
steps: {
|
||||
extract: {
|
||||
completedAt: new Date().toISOString(),
|
||||
totalClaims: claimSet.total_claims,
|
||||
checkableClaims: claimSet.claims.filter((c) => c.checkability === "checkable").length,
|
||||
latencyMs: extractLatency,
|
||||
},
|
||||
},
|
||||
});
|
||||
log.info("Claims extrahiert + gespeichert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
|
||||
}
|
||||
} else {
|
||||
progress("Claims extrahieren (llama.cpp)...");
|
||||
const { claimSet: extracted, tokensIn, tokensOut, latencyMs: extractLatency } = await callLlamaClaimExtract(
|
||||
text, model, maxClaims, options?.signal, log
|
||||
);
|
||||
claimSet = extracted;
|
||||
log.info("Claims extrahiert", { total: claimSet.total_claims, tokensIn, tokensOut, latencyMs: extractLatency });
|
||||
}
|
||||
|
||||
const checkableClaims = claimSet.claims.filter((c) => c.checkability === "checkable");
|
||||
const uncheckedClaims = claimSet.claims.filter((c) => c.checkability !== "checkable");
|
||||
progress(
|
||||
`${claimSet.total_claims} Claims — ${checkableClaims.length} prüfbar, ` +
|
||||
`${uncheckedClaims.length} nicht prüfbar.`
|
||||
);
|
||||
|
||||
if (checkableClaims.length === 0) {
|
||||
progress("⚠ Keine prüfbaren Claims gefunden — Verifikation nicht möglich.");
|
||||
}
|
||||
|
||||
// Schritt 2: Perplexity parallel (mit Limit) — mit Job- und Global-Cache
|
||||
let doneCount = 0;
|
||||
const total = checkableClaims.length;
|
||||
|
||||
if (jobDir && total > 0) {
|
||||
const cachedCount = checkableClaims.filter((c) =>
|
||||
jobFileExists(jobDir, `perplexity/${c.claim_id}.json`)
|
||||
).length;
|
||||
if (cachedCount > 0) {
|
||||
progress(`${cachedCount}/${total} Perplexity-Ergebnisse aus Job-Cache geladen.`);
|
||||
}
|
||||
}
|
||||
|
||||
const perplexityTasks = checkableClaims.map((claim) => async () => {
|
||||
const short = claim.text.length > 55 ? claim.text.slice(0, 52) + "..." : claim.text;
|
||||
|
||||
if (jobDir) {
|
||||
const cached = loadJobFile<PerplexityResult>(jobDir, `perplexity/${claim.claim_id}.json`);
|
||||
if (cached) {
|
||||
doneCount++;
|
||||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cached) "${short}"`);
|
||||
return { claim, result: cached, error: null };
|
||||
}
|
||||
}
|
||||
|
||||
if (useCache) {
|
||||
const globalCached = getCached<PerplexityResult>(claim.text);
|
||||
if (globalCached) {
|
||||
doneCount++;
|
||||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ (cache) "${short}"`);
|
||||
return { claim, result: globalCached, error: null };
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await searchPerplexity(claim.text, { mode, signal: options?.signal });
|
||||
doneCount++;
|
||||
if (useCache) setCached(claim.text, result);
|
||||
if (jobDir) {
|
||||
saveJobFile(jobDir, `perplexity/${claim.claim_id}.json`, result);
|
||||
}
|
||||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✓ "${short}"`);
|
||||
return { claim, result, error: null };
|
||||
} catch (err: unknown) {
|
||||
doneCount++;
|
||||
const errMsg = err instanceof Error ? err.message : "Perplexity-Fehler";
|
||||
progress(`[${doneCount}/${total}] ${claim.claim_id} ✗ "${short}" — ${errMsg}`);
|
||||
return { claim, result: null as PerplexityResult | null, error: errMsg };
|
||||
}
|
||||
});
|
||||
|
||||
if (total > 0) progress(`Recherche läuft (${total} Claims, max. ${MAX_PARALLEL_PERPLEXITY} parallel)...`);
|
||||
const perplexityOutcomes = await runWithConcurrencyLimit(perplexityTasks, MAX_PARALLEL_PERPLEXITY);
|
||||
const successful = perplexityOutcomes.filter((o) => o.result !== null) as Array<{
|
||||
claim: (typeof checkableClaims)[number];
|
||||
result: PerplexityResult;
|
||||
error: null;
|
||||
}>;
|
||||
const failed = perplexityOutcomes.filter((o) => o.error !== null);
|
||||
const totalPerplexityCost = successful.reduce((sum, o) => sum + o.result.estimatedCostUSD, 0);
|
||||
|
||||
log.info("Perplexity abgeschlossen", {
|
||||
successful: successful.length,
|
||||
failed: failed.length,
|
||||
totalCostUSD: totalPerplexityCost.toFixed(4),
|
||||
});
|
||||
|
||||
// Schritt 3: Batch-Urteilssynthese via llama.cpp
|
||||
progress(`Urteilssynthese (llama.cpp, ${successful.length} Claims, Sprache: ${userLanguage})...`);
|
||||
const verdicts = await synthesizeBatchVerdicts(
|
||||
successful.map((o) => ({ id: o.claim.claim_id, text: o.claim.text, perplexity: o.result })),
|
||||
model,
|
||||
userLanguage,
|
||||
options?.signal,
|
||||
log
|
||||
);
|
||||
|
||||
// Schritt 4: Report zusammenbauen
|
||||
const verdictMap = new Map(verdicts.map((v) => [v.claim_id, v]));
|
||||
|
||||
const results: VerificationReport["results"] = [
|
||||
...successful.map((o) => {
|
||||
const verdict = verdictMap.get(o.claim.claim_id);
|
||||
const sources = o.result.sources.map((s) => ({
|
||||
url: s.url,
|
||||
title: s.title ?? null,
|
||||
supports_claim: verdict?.supporting_urls.includes(s.url) ?? false,
|
||||
}));
|
||||
return {
|
||||
claim_id: o.claim.claim_id,
|
||||
claim_text: o.claim.text,
|
||||
status: (verdict?.status ?? "insufficient_evidence") as VerificationStatus,
|
||||
confidence: (verdict?.confidence ?? "low") as Confidence,
|
||||
summary: verdict?.summary ?? "Keine Urteilssynthese verfügbar.",
|
||||
sources,
|
||||
counter_evidence: verdict?.counter_evidence ?? null,
|
||||
notes: verdict?.notes ?? null,
|
||||
};
|
||||
}),
|
||||
...failed.map((o) => ({
|
||||
claim_id: o.claim.claim_id,
|
||||
claim_text: o.claim.text,
|
||||
status: "insufficient_evidence" as VerificationStatus,
|
||||
confidence: "low" as Confidence,
|
||||
summary: `Recherche fehlgeschlagen: ${o.error}`,
|
||||
sources: [],
|
||||
counter_evidence: null,
|
||||
notes: null,
|
||||
})),
|
||||
...uncheckedClaims.map((c) => ({
|
||||
claim_id: c.claim_id,
|
||||
claim_text: c.text,
|
||||
status: "not_checkable" as VerificationStatus,
|
||||
confidence: "high" as Confidence,
|
||||
summary: `Nicht empirisch prüfbar (${c.claim_type}).`,
|
||||
sources: [],
|
||||
counter_evidence: null,
|
||||
notes: null,
|
||||
})),
|
||||
];
|
||||
|
||||
const stats: Record<string, number> = {
|
||||
total: results.length,
|
||||
supported: 0,
|
||||
contradicted: 0,
|
||||
mixed: 0,
|
||||
insufficient_evidence: 0,
|
||||
needs_human_review: 0,
|
||||
not_checkable: 0,
|
||||
};
|
||||
for (const r of results) stats[r.status] = (stats[r.status] ?? 0) + 1;
|
||||
|
||||
const checkedCount = successful.length;
|
||||
const summaryParts = [
|
||||
`${claimSet.total_claims} Claims extrahiert, ${checkedCount} recherchiert.`,
|
||||
stats.supported > 0 ? `${stats.supported} bestätigt` : "",
|
||||
stats.contradicted > 0 ? `${stats.contradicted} widerlegt` : "",
|
||||
stats.mixed > 0 ? `${stats.mixed} gemischt` : "",
|
||||
stats.needs_human_review > 0 ? `${stats.needs_human_review} → Menschliche Prüfung nötig` : "",
|
||||
stats.insufficient_evidence > 0 ? `${stats.insufficient_evidence} ohne ausreichende Belege` : "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(". ");
|
||||
|
||||
const totalLatencyMs = Date.now() - t0;
|
||||
log.info("llama-verify-article abgeschlossen", {
|
||||
...stats,
|
||||
totalCostUSD: totalPerplexityCost.toFixed(4),
|
||||
latencyMs: totalLatencyMs,
|
||||
});
|
||||
|
||||
const report: VerificationReport = {
|
||||
schema_version: "1.0.0",
|
||||
verified_at: new Date().toISOString(),
|
||||
source_text_summary: text.slice(0, 200) + (text.length > 200 ? "…" : ""),
|
||||
summary: summaryParts,
|
||||
results,
|
||||
stats,
|
||||
totalCostUSD: totalPerplexityCost,
|
||||
latencyMs: totalLatencyMs,
|
||||
};
|
||||
|
||||
if (jobDir) {
|
||||
saveJobFile(jobDir, "report.json", report);
|
||||
updateJobMeta(jobDir, {
|
||||
status: "completed",
|
||||
steps: {
|
||||
verify: {
|
||||
completedAt: new Date().toISOString(),
|
||||
claimsVerified: successful.length,
|
||||
totalCostUSD: totalPerplexityCost,
|
||||
latencyMs: totalLatencyMs,
|
||||
},
|
||||
},
|
||||
});
|
||||
log.info("Report in Job gespeichert", { jobDir });
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Formatierung
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const STATUS_ICON: Record<VerificationStatus, string> = {
|
||||
supported: "✓ BESTÄTIGT",
|
||||
contradicted: "✗ WIDERLEGT",
|
||||
mixed: "~ GEMISCHT",
|
||||
insufficient_evidence: "? BELEGE UNZUREICHEND",
|
||||
needs_human_review: "⚠ MENSCHLICHE PRÜFUNG NÖTIG",
|
||||
not_checkable: "— NICHT PRÜFBAR",
|
||||
};
|
||||
|
||||
function formatReport(report: VerificationReport, model: string): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push(`## Verifikationsbericht (llama.cpp)`);
|
||||
lines.push(report.summary);
|
||||
lines.push("");
|
||||
|
||||
const groups: VerificationStatus[] = [
|
||||
"supported",
|
||||
"contradicted",
|
||||
"mixed",
|
||||
"needs_human_review",
|
||||
"insufficient_evidence",
|
||||
"not_checkable",
|
||||
];
|
||||
|
||||
for (const status of groups) {
|
||||
const items = report.results.filter((r) => r.status === status);
|
||||
if (items.length === 0) continue;
|
||||
|
||||
lines.push(`**${STATUS_ICON[status]} (${items.length}):**`);
|
||||
for (const item of items) {
|
||||
lines.push(`\`${item.claim_id}\` "${item.claim_text}"`);
|
||||
|
||||
if (item.status !== "not_checkable") {
|
||||
lines.push(` → ${item.summary}`);
|
||||
if (item.counter_evidence) {
|
||||
lines.push(` ✗ Gegenbeleg: ${item.counter_evidence}`);
|
||||
}
|
||||
if (item.notes) {
|
||||
lines.push(` ℹ ${item.notes}`);
|
||||
}
|
||||
if (item.sources.length > 0) {
|
||||
const supporting = item.sources.filter((s) => s.supports_claim);
|
||||
if (supporting.length > 0) {
|
||||
lines.push(` Quellen: ${supporting.map((s) => `[${s.title ?? s.url}](${s.url})`).join(", ")}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
}
|
||||
|
||||
const latSec = (report.latencyMs / 1000).toFixed(0);
|
||||
lines.push(`_[Perplexity: ~$${report.totalCostUSD.toFixed(4)} | llama.cpp: ${model} | Gesamt: ${latSec}s]_`);
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pi-Extension: Default Export
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PARAMS = Type.Object({
|
||||
text: Type.String({
|
||||
description:
|
||||
"Der vollständige Artikel- oder Blogtext, der auf Fakten geprüft werden soll. " +
|
||||
"Nicht kürzen — der Originaltext wird für die Claim-Extraktion benötigt.",
|
||||
}),
|
||||
maxClaims: Type.Optional(
|
||||
Type.Number({
|
||||
description: `Maximale Anzahl zu prüfender Claims. Standard: ${DEFAULT_MAX_CLAIMS}. Max: 20.`,
|
||||
})
|
||||
),
|
||||
mode: Type.Optional(
|
||||
Type.Union([Type.Literal("fast"), Type.Literal("deep")], {
|
||||
description:
|
||||
"fast (Standard): sonar, kostengünstig. deep: sonar-pro, für investigative Inhalte.",
|
||||
})
|
||||
),
|
||||
model: Type.Optional(
|
||||
Type.String({
|
||||
description: `llama.cpp-Modell. Standard: ${DEFAULT_MODEL}.`,
|
||||
})
|
||||
),
|
||||
userLanguage: Type.Optional(
|
||||
Type.String({
|
||||
description: `Sprache für Urteilstext (summary, counter_evidence, notes). Standard: ${DEFAULT_USER_LANGUAGE}.`,
|
||||
})
|
||||
),
|
||||
});
|
||||
|
||||
export default function llamaVerifyArticleExtension(pi: ExtensionAPI) {
|
||||
pi.registerTool({
|
||||
name: "verify_article_llama",
|
||||
label: "Artikel-Verifikation (llama.cpp)",
|
||||
description:
|
||||
"Vollständige Fact-Check-Pipeline via llama.cpp: " +
|
||||
"Claims extrahieren → Perplexity-Recherche (parallel) → llama.cpp-Urteil (batch) → Bericht. " +
|
||||
"Effizienter als verify_claim_llama für mehrere Claims. " +
|
||||
"Typische Kosten: $0.05–0.15 für einen Artikel mit 10–15 Claims (nur Perplexity, llama.cpp lokal).",
|
||||
promptGuidelines: [
|
||||
"Use verify_article_llama when the user wants to fact-check an entire article, blog post, or longer text.",
|
||||
"Use verify_claim_llama instead when the user wants to check a single specific claim.",
|
||||
"Pass the FULL article text — do not summarize it first.",
|
||||
"Use mode=deep for scientific, medical, legal, or politically sensitive content.",
|
||||
"Set userLanguage to match the user's preferred language (e.g. 'de' for German, 'en' for English).",
|
||||
"Always show the full formatted report including the cost/latency line.",
|
||||
"Highlight contradicted claims and claims needing human review prominently.",
|
||||
"If needs_human_review claims exist, explain that they require manual fact-checking.",
|
||||
"After the report, offer to show full sources for specific claims if the user wants details.",
|
||||
],
|
||||
parameters: PARAMS,
|
||||
async execute(_toolCallId, params, signal) {
|
||||
const model = params.model ?? DEFAULT_MODEL;
|
||||
try {
|
||||
const report = await verifyArticle(params.text, {
|
||||
maxClaims: params.maxClaims,
|
||||
mode: params.mode,
|
||||
model,
|
||||
userLanguage: params.userLanguage,
|
||||
signal,
|
||||
});
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: formatReport(report, model) }],
|
||||
details: {
|
||||
totalClaims: report.stats.total,
|
||||
supported: report.stats.supported,
|
||||
contradicted: report.stats.contradicted,
|
||||
needsHumanReview: report.stats.needs_human_review,
|
||||
totalCostUSD: report.totalCostUSD,
|
||||
latencyMs: report.latencyMs,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
|
||||
return { content: [{ type: "text", text: `Artikel-Verifikation (llama.cpp) fehlgeschlagen: ${msg}` }] };
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CLI-Modus
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function runCli() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0 || args[0] === "--help" || args[0] === "-h") {
|
||||
console.log(`
|
||||
Artikel-Verifikator (llama.cpp) — Vollständige Fact-Check-Pipeline
|
||||
|
||||
Verwendung:
|
||||
npx tsx agenten/llama-verify-article.ts [Optionen] "Artikeltext..."
|
||||
npx tsx agenten/llama-verify-article.ts --file artikel.txt [Optionen]
|
||||
|
||||
Optionen:
|
||||
--file, -f <pfad> Text aus Datei lesen
|
||||
--mode fast|deep Perplexity-Modus (Standard: fast)
|
||||
--model <name> llama.cpp-Modell (Standard: ${DEFAULT_MODEL})
|
||||
--max-claims <n> Max. Claims (Standard: ${DEFAULT_MAX_CLAIMS})
|
||||
--user-language <lang> Sprache für Urteilstext, z.B. "de", "en" (Standard: ${DEFAULT_USER_LANGUAGE})
|
||||
--job-id <slug> Job-Speicher: Zwischenergebnisse nach ~/.pi/agent/jobs/<datum>_<slug>/
|
||||
--no-cache Globalen Claim-Cache deaktivieren
|
||||
--json Ausgabe als JSON
|
||||
--verbose, -v Ausführliche Ausgabe + Log-Datei
|
||||
--help Diese Hilfe
|
||||
|
||||
Umgebungsvariablen:
|
||||
LLAMA_HOST llama.cpp-Server-URL (Standard: http://localhost:8000)
|
||||
PERPLEXITY_API_KEY Perplexity API-Key (erforderlich)
|
||||
|
||||
Beispiele:
|
||||
npx tsx agenten/llama-verify-article.ts --file artikel.txt
|
||||
npx tsx agenten/llama-verify-article.ts --file artikel.txt --mode deep --user-language en
|
||||
npx tsx agenten/llama-verify-article.ts --file artikel.txt --job-id mein-artikel --verbose
|
||||
npx tsx agenten/llama-verify-article.ts --json --file artikel.txt > report.json
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
let mode: "fast" | "deep" = "fast";
|
||||
let model = DEFAULT_MODEL;
|
||||
let maxClaims = DEFAULT_MAX_CLAIMS;
|
||||
let userLanguage = DEFAULT_USER_LANGUAGE;
|
||||
let jobId: string | undefined;
|
||||
let jsonOutput = false;
|
||||
let verbose = false;
|
||||
let noCache = false;
|
||||
let file: string | null = null;
|
||||
const textParts: string[] = [];
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
if (arg === "--mode" && args[i + 1]) {
|
||||
const m = args[++i];
|
||||
if (m === "fast" || m === "deep") mode = m;
|
||||
} else if (arg === "--model" && args[i + 1]) {
|
||||
model = args[++i];
|
||||
} else if (arg === "--max-claims" && args[i + 1]) {
|
||||
maxClaims = parseInt(args[++i], 10);
|
||||
} else if (arg === "--user-language" && args[i + 1]) {
|
||||
userLanguage = args[++i];
|
||||
} else if (arg === "--job-id" && args[i + 1]) {
|
||||
jobId = args[++i];
|
||||
} else if ((arg === "--file" || arg === "-f") && args[i + 1]) {
|
||||
file = args[++i];
|
||||
} else if (arg === "--json") {
|
||||
jsonOutput = true;
|
||||
} else if (arg === "--verbose" || arg === "-v") {
|
||||
verbose = true;
|
||||
} else if (arg === "--no-cache") {
|
||||
noCache = true;
|
||||
} else if (!arg.startsWith("--")) {
|
||||
textParts.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
let text: string;
|
||||
if (file) {
|
||||
try {
|
||||
text = await readFile(file, "utf-8");
|
||||
} catch (err) {
|
||||
console.error(`Fehler: Datei '${file}' konnte nicht gelesen werden: ${err instanceof Error ? err.message : err}`);
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
text = textParts.join(" ").trim();
|
||||
}
|
||||
|
||||
if (!text.trim()) {
|
||||
console.error("Fehler: Kein Text übergeben. Nutze --file <pfad> oder übergib den Text direkt.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!jsonOutput) {
|
||||
const src = file ? `Datei: ${file}` : "Direkteingabe";
|
||||
console.error(`\nModus: ${mode} | Modell: ${model} | Max. Claims: ${maxClaims} | Sprache: ${userLanguage} | ${src}${jobId ? ` | Job: ${jobId}` : ""}\n`);
|
||||
}
|
||||
|
||||
const log = createLogger({ verbose, jobId });
|
||||
const onProgress = jsonOutput ? undefined : (msg: string) => process.stderr.write(` ${msg}\n`);
|
||||
|
||||
let jobDir: string | undefined;
|
||||
if (jobId) {
|
||||
const { jobDir: dir, isNew } = getOrCreateJob(jobId, model);
|
||||
jobDir = dir;
|
||||
if (isNew) saveJobFile(jobDir, "input.txt", text);
|
||||
if (!jsonOutput) {
|
||||
process.stderr.write(` Job: ${jobDir} (${isNew ? "neu" : "fortgesetzt"})\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const report = await verifyArticle(text, { maxClaims, mode, model, userLanguage, onProgress, logger: log, jobDir, noCache });
|
||||
if (jsonOutput) {
|
||||
console.log(JSON.stringify(report, null, 2));
|
||||
} else {
|
||||
console.log(formatReport(report, model));
|
||||
}
|
||||
} catch (err) {
|
||||
if (jobDir) updateJobMeta(jobDir, { status: "failed" });
|
||||
console.error("Fehler:", err instanceof Error ? err.message : err);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
if (process.argv[1] === __filename) {
|
||||
runCli();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue