450 lines
15 KiB
TypeScript
450 lines
15 KiB
TypeScript
|
|
/**
|
||
|
|
* ollama-verifier.ts
|
||
|
|
* Pi-Extension + CLI: Eine einzelne Behauptung via Perplexity + Ollama verifizieren.
|
||
|
|
*
|
||
|
|
* Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/ollama-verifier.ts
|
||
|
|
* Nach Änderungen in Pi: /reload
|
||
|
|
*
|
||
|
|
* Als CLI:
|
||
|
|
* npx tsx agenten/ollama-verifier.ts "Die Inflationsrate betrug 2024 in Deutschland 3,2%."
|
||
|
|
* npx tsx agenten/ollama-verifier.ts --mode deep "Die Erde ist 4,6 Milliarden Jahre alt."
|
||
|
|
* npx tsx agenten/ollama-verifier.ts --model deepseek-r1:32b "..."
|
||
|
|
* npx tsx agenten/ollama-verifier.ts --json "..." (gibt VerificationResult als JSON aus)
|
||
|
|
*
|
||
|
|
* Ablauf: Perplexity-Suche → Ollama-Urteil → formatierte Ausgabe
|
||
|
|
*/
|
||
|
|
|
||
|
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||
|
|
import { Type } from "@sinclair/typebox";
|
||
|
|
import { fileURLToPath } from "node:url";
|
||
|
|
import { searchPerplexity, formatSourcesForPrompt, type PerplexitySource } from "../lib/perplexity.js";
|
||
|
|
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Typen
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
type VerificationStatus =
|
||
|
|
| "supported"
|
||
|
|
| "contradicted"
|
||
|
|
| "mixed"
|
||
|
|
| "insufficient_evidence"
|
||
|
|
| "needs_human_review";
|
||
|
|
|
||
|
|
type Confidence = "high" | "medium" | "low";
|
||
|
|
|
||
|
|
type VerdictRaw = {
|
||
|
|
status: VerificationStatus;
|
||
|
|
confidence: Confidence;
|
||
|
|
summary: string;
|
||
|
|
counter_evidence: string | null;
|
||
|
|
notes: string | null;
|
||
|
|
supporting_urls: string[];
|
||
|
|
};
|
||
|
|
|
||
|
|
export type VerificationResult = {
|
||
|
|
claim: string;
|
||
|
|
status: VerificationStatus;
|
||
|
|
confidence: Confidence;
|
||
|
|
summary: string;
|
||
|
|
counter_evidence: string | null;
|
||
|
|
notes: string | null;
|
||
|
|
sources: PerplexitySource[];
|
||
|
|
supporting_urls: string[];
|
||
|
|
perplexityCostUSD: number;
|
||
|
|
latencyMs: number;
|
||
|
|
model: string;
|
||
|
|
};
|
||
|
|
|
||
|
|
type OllamaResponse = {
|
||
|
|
message?: { content?: string };
|
||
|
|
eval_count?: number;
|
||
|
|
prompt_eval_count?: number;
|
||
|
|
};
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Konfiguration
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
const DEFAULT_MODEL = "qwen3.5:27b";
|
||
|
|
const OLLAMA_HOST = process.env.OLLAMA_HOST ?? "http://localhost:11434";
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// JSON-Schema für Ollama Verdict-Ausgabe
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
const VERDICT_SCHEMA = {
|
||
|
|
type: "object",
|
||
|
|
additionalProperties: false,
|
||
|
|
properties: {
|
||
|
|
status: {
|
||
|
|
type: "string",
|
||
|
|
enum: ["supported", "contradicted", "mixed", "insufficient_evidence", "needs_human_review"],
|
||
|
|
},
|
||
|
|
confidence: { type: "string", enum: ["high", "medium", "low"] },
|
||
|
|
summary: { type: "string" },
|
||
|
|
counter_evidence: { type: ["string", "null"] },
|
||
|
|
notes: { type: ["string", "null"] },
|
||
|
|
supporting_urls: { type: "array", items: { type: "string" } },
|
||
|
|
},
|
||
|
|
required: ["status", "confidence", "summary", "counter_evidence", "notes", "supporting_urls"],
|
||
|
|
};
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Ollama Verdict-Synthese
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
function buildVerdictSystemPrompt(): string {
|
||
|
|
return `Du bist ein erfahrener Fact-Checker. Bewerte eine Behauptung anhand bereitgestellter Webquellen.
|
||
|
|
|
||
|
|
Bewertungsskala:
|
||
|
|
- supported: Quellen bestätigen die Behauptung klar und konsistent
|
||
|
|
- contradicted: Quellen widersprechen der Behauptung klar und substanziell
|
||
|
|
- mixed: Quellen liefern widersprüchliche Belege ODER die Behauptung ist technisch ungenau aber im Kern korrekt
|
||
|
|
- insufficient_evidence: Zu wenig oder qualitativ unzureichende Quellen für ein Urteil
|
||
|
|
- needs_human_review: Komplex, politisch heikel, veraltete Quellen, oder stark kontextabhängig
|
||
|
|
|
||
|
|
Confidence:
|
||
|
|
- high: Quellenlage ist eindeutig und aus Primärquellen
|
||
|
|
- medium: Quellen vorhanden aber begrenzt oder sekundär
|
||
|
|
- low: Quellen sehr rar, veraltet oder widersprüchlich
|
||
|
|
|
||
|
|
WICHTIGE REGELN für "contradicted":
|
||
|
|
- Nur bei klaren, substanziellen Fehlern verwenden: falsche Person, falsch zugeordnetes Ereignis, Zahl um mehr als 10% abweichend, grundlegend falsche Kausalität
|
||
|
|
- Gerundete oder allgemein akzeptierte Näherungswerte sind "supported"
|
||
|
|
- Zeitzonendifferenzen bei historischen Ereignissen: "supported" wenn im üblichen Kontext korrekt
|
||
|
|
- Technische Präzisierungen zu im Wesentlichen korrekten Aussagen → "mixed", nicht "contradicted"
|
||
|
|
- Im Zweifel: "mixed" statt "contradicted"
|
||
|
|
|
||
|
|
summary: 1-3 präzise Sätze basierend auf den Quellen. Nicht spekulieren.
|
||
|
|
counter_evidence: Gegenbelege als Satz beschreiben, falls vorhanden. Sonst null.
|
||
|
|
notes: Zeitabhängigkeit, regionale Einschränkungen, Vorbehalt. Sonst null.
|
||
|
|
supporting_urls: URLs aus den Quellen die den Claim stützen (leeres Array wenn keine).
|
||
|
|
|
||
|
|
Antworte NUR mit dem JSON-Objekt. Kein Freitext.`;
|
||
|
|
}
|
||
|
|
|
||
|
|
function buildVerdictUserPrompt(claim: string, perplexitySummary: string, sources: PerplexitySource[], context?: string): string {
|
||
|
|
const contextBlock = context ? `\nARTIKEL-KONTEXT: "${context.slice(0, 300)}"\n` : "";
|
||
|
|
return `ZU PRÜFENDE BEHAUPTUNG:
|
||
|
|
"${claim}"
|
||
|
|
${contextBlock}
|
||
|
|
RECHERCHE-ERGEBNIS (Perplexity):
|
||
|
|
${perplexitySummary}
|
||
|
|
|
||
|
|
QUELLEN:
|
||
|
|
${formatSourcesForPrompt(sources, 300)}
|
||
|
|
|
||
|
|
Bewerte die Behauptung anhand der Recherche.`;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function synthesizeVerdict(
|
||
|
|
claim: string,
|
||
|
|
perplexitySummary: string,
|
||
|
|
sources: PerplexitySource[],
|
||
|
|
model: string,
|
||
|
|
context?: string,
|
||
|
|
signal?: AbortSignal
|
||
|
|
): Promise<VerdictRaw> {
|
||
|
|
const body = {
|
||
|
|
model,
|
||
|
|
messages: [
|
||
|
|
{ role: "system", content: buildVerdictSystemPrompt() },
|
||
|
|
{ role: "user", content: buildVerdictUserPrompt(claim, perplexitySummary, sources, context) },
|
||
|
|
],
|
||
|
|
format: VERDICT_SCHEMA,
|
||
|
|
stream: false,
|
||
|
|
options: { temperature: 0.1, num_ctx: 8192 },
|
||
|
|
};
|
||
|
|
|
||
|
|
const resp = await fetch(`${OLLAMA_HOST}/api/chat`, {
|
||
|
|
method: "POST",
|
||
|
|
headers: { "Content-Type": "application/json" },
|
||
|
|
body: JSON.stringify(body),
|
||
|
|
signal,
|
||
|
|
});
|
||
|
|
|
||
|
|
if (!resp.ok) {
|
||
|
|
const text = await resp.text().catch(() => "");
|
||
|
|
throw new Error(`Ollama Fehler ${resp.status}: ${text}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const data = (await resp.json()) as OllamaResponse;
|
||
|
|
const raw = data.message?.content ?? "";
|
||
|
|
if (!raw.trim()) throw new Error("Leere Ollama-Antwort");
|
||
|
|
|
||
|
|
let parsed: unknown;
|
||
|
|
try {
|
||
|
|
parsed = JSON.parse(raw);
|
||
|
|
} catch {
|
||
|
|
throw new Error(`Kein gültiges JSON von Ollama: ${raw.slice(0, 200)}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
return parsed as VerdictRaw;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Hauptfunktion
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
export async function verifyClaim(
|
||
|
|
claim: string,
|
||
|
|
options?: {
|
||
|
|
context?: string;
|
||
|
|
mode?: "fast" | "deep";
|
||
|
|
model?: string;
|
||
|
|
signal?: AbortSignal;
|
||
|
|
logger?: Logger;
|
||
|
|
}
|
||
|
|
): Promise<VerificationResult> {
|
||
|
|
const t0 = Date.now();
|
||
|
|
const model = options?.model ?? DEFAULT_MODEL;
|
||
|
|
const log = options?.logger ?? nullLogger;
|
||
|
|
|
||
|
|
log.info("Perplexity-Suche gestartet", { claim: claim.slice(0, 80), mode: options?.mode ?? "fast" });
|
||
|
|
const perplexityResult = await searchPerplexity(claim, {
|
||
|
|
mode: options?.mode ?? "fast",
|
||
|
|
signal: options?.signal,
|
||
|
|
});
|
||
|
|
log.info("Perplexity abgeschlossen", {
|
||
|
|
sources: perplexityResult.sources.length,
|
||
|
|
costUSD: perplexityResult.estimatedCostUSD.toFixed(4),
|
||
|
|
});
|
||
|
|
|
||
|
|
log.info("Ollama-Urteil generieren...", { model });
|
||
|
|
const verdict = await synthesizeVerdict(
|
||
|
|
claim,
|
||
|
|
perplexityResult.summary,
|
||
|
|
perplexityResult.sources,
|
||
|
|
model,
|
||
|
|
options?.context,
|
||
|
|
options?.signal
|
||
|
|
);
|
||
|
|
log.info("Urteil erhalten", { status: verdict.status, confidence: verdict.confidence });
|
||
|
|
|
||
|
|
return {
|
||
|
|
claim,
|
||
|
|
status: verdict.status,
|
||
|
|
confidence: verdict.confidence,
|
||
|
|
summary: verdict.summary,
|
||
|
|
counter_evidence: verdict.counter_evidence,
|
||
|
|
notes: verdict.notes,
|
||
|
|
sources: perplexityResult.sources,
|
||
|
|
supporting_urls: verdict.supporting_urls,
|
||
|
|
perplexityCostUSD: perplexityResult.estimatedCostUSD,
|
||
|
|
latencyMs: Date.now() - t0,
|
||
|
|
model,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Formatierung
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
const STATUS_ICON: Record<VerificationStatus, string> = {
|
||
|
|
supported: "✓ BESTÄTIGT",
|
||
|
|
contradicted: "✗ WIDERLEGT",
|
||
|
|
mixed: "~ GEMISCHT",
|
||
|
|
insufficient_evidence: "? BELEGE UNZUREICHEND",
|
||
|
|
needs_human_review: "⚠ MENSCHLICHE PRÜFUNG NÖTIG",
|
||
|
|
};
|
||
|
|
|
||
|
|
const CONF_LABEL: Record<Confidence, string> = {
|
||
|
|
high: "hoch",
|
||
|
|
medium: "mittel",
|
||
|
|
low: "niedrig",
|
||
|
|
};
|
||
|
|
|
||
|
|
export function formatVerificationResult(result: VerificationResult): string {
|
||
|
|
const lines: string[] = [];
|
||
|
|
|
||
|
|
lines.push(`## Verifikation`);
|
||
|
|
lines.push(`**Behauptung:** "${result.claim}"`);
|
||
|
|
lines.push("");
|
||
|
|
lines.push(`**${STATUS_ICON[result.status]}** (Konfidenz: ${CONF_LABEL[result.confidence]})`);
|
||
|
|
lines.push("");
|
||
|
|
lines.push(`**Begründung:** ${result.summary}`);
|
||
|
|
|
||
|
|
if (result.counter_evidence) {
|
||
|
|
lines.push(`\n**Gegenbelege:** ${result.counter_evidence}`);
|
||
|
|
}
|
||
|
|
if (result.notes) {
|
||
|
|
lines.push(`\n**Hinweise:** ${result.notes}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (result.sources.length > 0) {
|
||
|
|
lines.push("\n**Quellen:**");
|
||
|
|
result.sources.forEach((s, i) => {
|
||
|
|
const supporting = result.supporting_urls.includes(s.url) ? " ✓" : "";
|
||
|
|
const title = s.title ?? s.url;
|
||
|
|
lines.push(`[${i + 1}]${supporting} [${title}](${s.url})`);
|
||
|
|
});
|
||
|
|
} else {
|
||
|
|
lines.push("\n_(Keine Quellen gefunden)_");
|
||
|
|
}
|
||
|
|
|
||
|
|
const latSec = (result.latencyMs / 1000).toFixed(1);
|
||
|
|
lines.push(`\n_[Perplexity: ~$${result.perplexityCostUSD.toFixed(4)} | Ollama: ${result.model} | Gesamt: ${latSec}s]_`);
|
||
|
|
|
||
|
|
return lines.join("\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// Pi-Extension: Default Export
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
const PARAMS = Type.Object({
|
||
|
|
claim: Type.String({
|
||
|
|
description:
|
||
|
|
"Die zu verifizierende Behauptung als vollständiger, selbstständiger Satz. " +
|
||
|
|
"Idealerweise das Ergebnis von extract_claims (claim_id + text).",
|
||
|
|
}),
|
||
|
|
context: Type.Optional(
|
||
|
|
Type.String({
|
||
|
|
description:
|
||
|
|
"Optionaler Kontext: kurzer Auszug aus dem Artikel, in dem die Behauptung steht. " +
|
||
|
|
"Hilft dem Fact-Checker bei mehrdeutigen Claims. Max. 300 Zeichen.",
|
||
|
|
})
|
||
|
|
),
|
||
|
|
mode: Type.Optional(
|
||
|
|
Type.Union([Type.Literal("fast"), Type.Literal("deep")], {
|
||
|
|
description:
|
||
|
|
"fast (Standard): sonar, für die meisten Behauptungen ausreichend. " +
|
||
|
|
"deep: sonar-pro, für komplexe, strittige oder heikle Behauptungen.",
|
||
|
|
})
|
||
|
|
),
|
||
|
|
model: Type.Optional(
|
||
|
|
Type.String({
|
||
|
|
description: `Ollama-Modell für die Urteilssynthese. Standard: ${DEFAULT_MODEL}.`,
|
||
|
|
})
|
||
|
|
),
|
||
|
|
});
|
||
|
|
|
||
|
|
export default function verifierExtension(pi: ExtensionAPI) {
|
||
|
|
pi.registerTool({
|
||
|
|
name: "verify_claim",
|
||
|
|
label: "Claim-Verifikation",
|
||
|
|
description:
|
||
|
|
"Verifiziert eine einzelne Behauptung: Perplexity-Recherche → Ollama-Urteil. " +
|
||
|
|
"Gibt Status (supported/contradicted/mixed/insufficient_evidence/needs_human_review), " +
|
||
|
|
"Konfidenz, Begründung und Quellen zurück. " +
|
||
|
|
"Nutze dieses Tool nach extract_claims um spezifische Claims zu prüfen. " +
|
||
|
|
"Kosten: ~$0.005-0.015 pro Claim (Perplexity) + lokal (Ollama).",
|
||
|
|
promptGuidelines: [
|
||
|
|
"Use verify_claim after extract_claims to check specific claims the user wants verified.",
|
||
|
|
"Pass the full claim text from extract_claims as the 'claim' parameter.",
|
||
|
|
"Use mode=deep for complex, politically sensitive, or scientifically contested claims.",
|
||
|
|
"The 'context' parameter helps when the claim is ambiguous without its original article context.",
|
||
|
|
"Show the full formatted output including the cost/latency line.",
|
||
|
|
"If status is 'needs_human_review' or 'insufficient_evidence', clearly communicate this to the user and suggest manual checking.",
|
||
|
|
"If status is 'contradicted', always show the counter_evidence to the user.",
|
||
|
|
"For multiple claims from an extract_claims result, use verify_article instead — it is faster and cheaper.",
|
||
|
|
"IMPORTANT: Never call verify_claim for multiple claims simultaneously. Ollama processes one request at a time — parallel calls will fail with 'fetch failed'. Always verify claims one by one, sequentially.",
|
||
|
|
],
|
||
|
|
parameters: PARAMS,
|
||
|
|
async execute(_toolCallId, params, signal) {
|
||
|
|
try {
|
||
|
|
const result = await verifyClaim(params.claim, {
|
||
|
|
context: params.context,
|
||
|
|
mode: params.mode,
|
||
|
|
model: params.model,
|
||
|
|
signal,
|
||
|
|
});
|
||
|
|
return {
|
||
|
|
content: [{ type: "text", text: formatVerificationResult(result) }],
|
||
|
|
details: {
|
||
|
|
status: result.status,
|
||
|
|
confidence: result.confidence,
|
||
|
|
model: result.model,
|
||
|
|
sourceCount: result.sources.length,
|
||
|
|
perplexityCostUSD: result.perplexityCostUSD,
|
||
|
|
latencyMs: result.latencyMs,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
} catch (err) {
|
||
|
|
const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
|
||
|
|
return { content: [{ type: "text", text: `Verifikationsfehler: ${msg}` }] };
|
||
|
|
}
|
||
|
|
},
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
// CLI-Modus
|
||
|
|
// ---------------------------------------------------------------------------
|
||
|
|
|
||
|
|
function parseCliArgs(args: string[]): { claim: string; mode: "fast" | "deep"; model: string; jsonOutput: boolean; verbose: boolean } {
|
||
|
|
let mode: "fast" | "deep" = "fast";
|
||
|
|
let model = DEFAULT_MODEL;
|
||
|
|
let jsonOutput = false;
|
||
|
|
let verbose = false;
|
||
|
|
const claimParts: string[] = [];
|
||
|
|
|
||
|
|
for (let i = 0; i < args.length; i++) {
|
||
|
|
const arg = args[i];
|
||
|
|
if (arg === "--mode" && args[i + 1]) {
|
||
|
|
const m = args[++i];
|
||
|
|
if (m === "fast" || m === "deep") mode = m;
|
||
|
|
} else if (arg === "--model" && args[i + 1]) {
|
||
|
|
model = args[++i];
|
||
|
|
} else if (arg === "--json") {
|
||
|
|
jsonOutput = true;
|
||
|
|
} else if (arg === "--verbose" || arg === "-v") {
|
||
|
|
verbose = true;
|
||
|
|
} else if (!arg.startsWith("--")) {
|
||
|
|
claimParts.push(arg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return { claim: claimParts.join(" ").trim(), mode, model, jsonOutput, verbose };
|
||
|
|
}
|
||
|
|
|
||
|
|
async function runCli() {
|
||
|
|
const args = process.argv.slice(2);
|
||
|
|
|
||
|
|
if (args.length === 0 || args[0] === "--help") {
|
||
|
|
console.log(`
|
||
|
|
Claim-Verifikator (Ollama) — Eine Behauptung mit Perplexity + Ollama prüfen
|
||
|
|
|
||
|
|
Verwendung:
|
||
|
|
npx tsx agenten/ollama-verifier.ts [Optionen] "Behauptung..."
|
||
|
|
|
||
|
|
Optionen:
|
||
|
|
--mode fast|deep Perplexity-Modus (Standard: fast)
|
||
|
|
--model <name> Ollama-Modell (Standard: ${DEFAULT_MODEL})
|
||
|
|
--json Ausgabe als JSON
|
||
|
|
--help Diese Hilfe
|
||
|
|
`);
|
||
|
|
process.exit(0);
|
||
|
|
}
|
||
|
|
|
||
|
|
const { claim, mode, model, jsonOutput, verbose } = parseCliArgs(args);
|
||
|
|
|
||
|
|
if (!claim) {
|
||
|
|
console.error("Fehler: Kein Claim übergeben.");
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!jsonOutput) {
|
||
|
|
console.error(`\nVerifiziere: "${claim}"\nModus: ${mode} | Modell: ${model}\n`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const log = createLogger({ verbose });
|
||
|
|
|
||
|
|
try {
|
||
|
|
const result = await verifyClaim(claim, { mode, model, logger: log });
|
||
|
|
if (jsonOutput) {
|
||
|
|
console.log(JSON.stringify(result, null, 2));
|
||
|
|
} else {
|
||
|
|
console.log(formatVerificationResult(result));
|
||
|
|
}
|
||
|
|
} catch (err) {
|
||
|
|
console.error("Fehler:", err instanceof Error ? err.message : err);
|
||
|
|
process.exit(1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const __filename = fileURLToPath(import.meta.url);
|
||
|
|
if (process.argv[1] === __filename) {
|
||
|
|
runCli();
|
||
|
|
}
|