Text_Agent/agenten/llama-logic-editor.ts

/**
 * llama-logic-editor.ts
 * Pi-Extension + CLI: Argumentationsanalyse via llama.cpp (Qwopus3.6)
 *
 * Analysiert einen Text auf:
 * - Hauptthese und Unterthesen
 * - Explizite Prämissen und Belege
 * - Schlussfolgerungen
 * - Implizite Annahmen
 * - Logische Fehlschlüsse (Ad Hominem, Strohmann, etc.)
 * - Verbesserungsvorschläge
 *
 * Kein Ollama-format-Parameter — Schema steht als JSON-Literal im System-Prompt.
 * /no_think deaktiviert den Thinking-Modus bei Qwen3/Qwopus-Reasoning-Modellen.
 *
 * Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/ (via Symlink)
 * Als CLI:
 *   npx tsx agenten/llama-logic-editor.ts "Artikeltext..."
 *   npx tsx agenten/llama-logic-editor.ts --only-fallacies "$(cat kommentar.txt)"
 *   npx tsx agenten/llama-logic-editor.ts --json "$(cat essay.txt)"
 */

import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { fileURLToPath } from "node:url";
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";

// ---------------------------------------------------------------------------
// Typen
// ---------------------------------------------------------------------------

type FallacyType =
	| "ad_hominem" | "straw_man" | "false_dichotomy" | "slippery_slope"
	| "circular_reasoning" | "appeal_to_authority" | "hasty_generalization"
	| "false_causation" | "appeal_to_emotion" | "overgeneralization"
	| "cherry_picking" | "other";

type Severity = "minor" | "moderate" | "critical";
type EvidenceStrength = "strong" | "moderate" | "weak";
type OverallQuality = "strong" | "adequate" | "weak" | "flawed";

type ArgumentMap = {
	schema_version: "1.0.0";
	thesis: string;
	sub_theses: string[];
	premises: string[];
	evidence: Array<{ claim: string; supports_thesis: boolean; strength: EvidenceStrength }>;
	conclusions: string[];
	implicit_assumptions: string[];
	fallacies: Array<{
		type: FallacyType;
		description: string;
		location: string;
		severity: Severity;
	}>;
	revision_suggestions: string[];
	overall_quality: OverallQuality;
	quality_notes: string;
};

// llama.cpp OpenAI-kompatibles API-Format
type LlamaResponse = {
	choices: Array<{
		message?: { content?: string; reasoning_content?: string };
		finish_reason?: string;
	}>;
	usage?: {
		prompt_tokens?: number;
		completion_tokens?: number;
	};
};

export type AnalysisResult = {
	map: ArgumentMap;
	provider: "llama";
	model: string;
	costUSD: 0;
	latencyMs: number;
};

// ---------------------------------------------------------------------------
// Konfiguration
// ---------------------------------------------------------------------------

const DEFAULT_MODEL = "Qwopus3.6-35B-A3B-v1-Q4_K_M.gguf";
const LLAMA_HOST = process.env.LLAMA_HOST ?? "http://localhost:8000";
const MAX_TOKENS = 16384;
const TEMPERATURE = 0.1;
const MAX_RETRIES = 3;
const RETRY_DELAY_MS = 15_000;

// ---------------------------------------------------------------------------
// System-Prompt mit eingebettetem JSON-Schema
// ---------------------------------------------------------------------------

const ANALYSIS_SYSTEM_PROMPT = `Du bist ein Experte für kritisches Denken, Rhetorik und formale Logik.
Antworte ausschließlich auf Deutsch.
Analysiere den folgenden Text auf seine Argumentationsstruktur.

Extrahiere:
1. thesis: Die zentrale Hauptbehauptung als vollständiger Satz
2. sub_theses: Untergeordnete Thesen die die Hauptthese stützen
3. premises: Ausdrücklich genannte Voraussetzungen und Grundannahmen
4. evidence: Verwendete Belege (Fakten, Statistiken, Zitate, Studien) — beachte ob sie die These wirklich stützen
5. conclusions: Explizite Schlussfolgerungen die aus den Prämissen gezogen werden
6. implicit_assumptions: Nicht ausgesprochene Annahmen die das Argument voraussetzt

Fehlschluss-Typen (für das "type"-Feld):
- ad_hominem: Person statt Argument angegriffen
- straw_man: Gegnerposition verzerrt dargestellt
- false_dichotomy: Falsche Zweiteilung (nur A oder B, obwohl mehr möglich)
- slippery_slope: Kettenreaktion ohne Beleg
- circular_reasoning: These wird durch sich selbst begründet
- appeal_to_authority: Autorität als einziger Beleg
- hasty_generalization: Einzelfall → Allgemeinregel
- false_causation: Korrelation als Kausalität dargestellt
- appeal_to_emotion: Emotionen statt Argumente
- overgeneralization: Zu weit gefasste Verallgemeinerung
- cherry_picking: Nur passende Fakten ausgewählt
- other: Sonstiger Fehlschluss

overall_quality-Werte: "strong" | "adequate" | "weak" | "flawed"
severity-Werte: "minor" | "moderate" | "critical"
strength-Werte: "strong" | "moderate" | "weak"

Antworte AUSSCHLIESSLICH mit einem JSON-Objekt gemäß folgendem Schema:
{
  "thesis": "string",
  "sub_theses": ["string"],
  "premises": ["string"],
  "evidence": [{"claim": "string", "supports_thesis": true, "strength": "strong|moderate|weak"}],
  "conclusions": ["string"],
  "implicit_assumptions": ["string"],
  "fallacies": [{"type": "ad_hominem|...", "description": "string", "location": "wörtliches Zitat max. 120 Zeichen", "severity": "minor|moderate|critical"}],
  "revision_suggestions": ["string"],
  "overall_quality": "strong|adequate|weak|flawed",
  "quality_notes": "string"
}

Kein Freitext vor oder nach dem JSON-Objekt.`;

// ---------------------------------------------------------------------------
// Prompt-Generierung
// ---------------------------------------------------------------------------

function buildUserPrompt(text: string): string {
	return `/no_think\nAnalysiere die Argumentationsstruktur:\n\n---\n${text}\n---`;
}

// ---------------------------------------------------------------------------
// llama.cpp-Aufruf
// ---------------------------------------------------------------------------

async function analyzeWithLlama(
	text: string,
	model: string,
	signal?: AbortSignal,
	logger?: Logger
): Promise<{ map: ArgumentMap; tokensIn: number; tokensOut: number; latencyMs: number }> {
	const log = logger ?? nullLogger;
	const t0 = Date.now();

	const body = {
		model,
		messages: [
			{ role: "system", content: ANALYSIS_SYSTEM_PROMPT },
			{ role: "user", content: buildUserPrompt(text) },
		],
		stream: false,
		temperature: TEMPERATURE,
		max_tokens: MAX_TOKENS,
	};

	log.debug("llama.cpp-LogicEditor gestartet", { model, textLength: text.length });

	let resp: Response | null = null;
	for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
		try {
			resp = await fetch(`${LLAMA_HOST}/v1/chat/completions`, {
				method: "POST",
				headers: { "Content-Type": "application/json" },
				body: JSON.stringify(body),
				signal,
			});
			break;
		} catch (err) {
			const isLast = attempt === MAX_RETRIES;
			log.warn(`llama.cpp fetch fehlgeschlagen (Versuch ${attempt}/${MAX_RETRIES})`, {
				error: err instanceof Error ? err.message : String(err),
				retryInMs: isLast ? 0 : RETRY_DELAY_MS,
			});
			if (isLast) throw new Error(`fetch failed nach ${MAX_RETRIES} Versuchen: ${err instanceof Error ? err.message : err}`);
			await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
		}
	}

	if (!resp!.ok) {
		const errorText = await resp!.text().catch(() => "");
		throw new Error(`llama.cpp API Fehler ${resp!.status}: ${errorText}`);
	}

	const data = (await resp!.json()) as LlamaResponse;
	const choice = data.choices?.[0];
	let raw = choice?.message?.content ?? "";

	// Reasoning-Fallback: Wenn content leer, JSON aus reasoning_content extrahieren
	if (!raw.trim() && choice?.message?.reasoning_content) {
		const rc = choice.message.reasoning_content;
		const lastBlock = rc.match(/\{[\s\S]*"thesis"[\s\S]*\}/)?.[0];
		if (lastBlock) {
			raw = lastBlock;
			log.warn("content leer — JSON aus reasoning_content extrahiert (Thinking-Modus aktiv trotz /no_think)", {
				finishReason: choice.finish_reason,
				rawLength: raw.length,
			});
		}
	}

	// Markdown-Codeblöcke entfernen
	const cleanedRaw = raw
		.replace(/^```(?:json)?\s*/i, "")
		.replace(/\s*```$/i, "")
		.trim();

	log.debug("llama.cpp-LogicEditor Antwort", {
		promptTokens: data.usage?.prompt_tokens,
		outputTokens: data.usage?.completion_tokens,
		finishReason: choice?.finish_reason,
		rawLength: cleanedRaw.length,
	});

	if (!cleanedRaw) throw new Error("Leere Antwort von llama.cpp-LogicEditor");

	let parsed: unknown;
	try {
		parsed = JSON.parse(cleanedRaw);
	} catch {
		throw new Error(`llama.cpp-LogicEditor-Ausgabe ist kein gültiges JSON: ${cleanedRaw.slice(0, 200)}`);
	}

	const p = parsed as Record<string, unknown>;
	if (typeof p.thesis !== "string") {
		throw new Error(`Ungültige Struktur: 'thesis' fehlt. Keys: ${Object.keys(p).join(", ")}`);
	}

	const map: ArgumentMap = { schema_version: "1.0.0", ...(p as Omit<ArgumentMap, "schema_version">) };
	return {
		map,
		tokensIn: data.usage?.prompt_tokens ?? 0,
		tokensOut: data.usage?.completion_tokens ?? 0,
		latencyMs: Date.now() - t0,
	};
}

// ---------------------------------------------------------------------------
// Hauptfunktion
// ---------------------------------------------------------------------------

export async function analyzeLogic(
	text: string,
	options?: {
		model?: string;
		signal?: AbortSignal;
		logger?: Logger;
	}
): Promise<AnalysisResult> {
	const model = options?.model ?? DEFAULT_MODEL;
	const { map, latencyMs } = await analyzeWithLlama(text, model, options?.signal, options?.logger);
	return { map, provider: "llama", model, costUSD: 0, latencyMs };
}

// ---------------------------------------------------------------------------
// Formatierung
// ---------------------------------------------------------------------------

const QUALITY_LABEL: Record<OverallQuality, string> = {
	strong: "STARK",
	adequate: "AUSREICHEND",
	weak: "SCHWACH",
	flawed: "FEHLERHAFT",
};

const QUALITY_ICON: Record<OverallQuality, string> = {
	strong: "✓",
	adequate: "~",
	weak: "⚠",
	flawed: "✗",
};

const FALLACY_LABEL: Record<FallacyType, string> = {
	ad_hominem: "Ad Hominem",
	straw_man: "Strohmann",
	false_dichotomy: "Falsche Dichotomie",
	slippery_slope: "Schiefe Ebene",
	circular_reasoning: "Zirkelschluss",
	appeal_to_authority: "Autoritätsargument",
	hasty_generalization: "Vorschnelle Generalisierung",
	false_causation: "Falsche Kausalität",
	appeal_to_emotion: "Appell an Emotionen",
	overgeneralization: "Überverallgemeinerung",
	cherry_picking: "Rosinenpickerei",
	other: "Sonstiger Fehlschluss",
};

const SEVERITY_ICON: Record<Severity, string> = {
	minor: "·",
	moderate: "⚠",
	critical: "✗",
};

export function formatAnalysis(result: AnalysisResult, onlyFallacies = false): string {
	const { map } = result;
	const latSec = (result.latencyMs / 1000).toFixed(1);
	const footer = `_[llama.cpp: ${result.model} · kostenlos (lokal) · ${latSec}s]_`;

	if (onlyFallacies) {
		if (map.fallacies.length === 0) return `Keine Fehlschlüsse erkannt.\n\n${footer}`;
		const lines: string[] = [`## Fehlschlüsse (${map.fallacies.length})\n`];
		map.fallacies.forEach((f) => {
			lines.push(`${SEVERITY_ICON[f.severity]} **${FALLACY_LABEL[f.type]}** (${f.severity})`);
			lines.push(`  ${f.description}`);
			lines.push(`  _"${f.location}"_\n`);
		});
		lines.push(footer);
		return lines.join("\n");
	}

	const q = map.overall_quality;
	const lines: string[] = [];

	lines.push(`## Argumentationsanalyse`);
	lines.push(`**Gesamtqualität: ${QUALITY_ICON[q]} ${QUALITY_LABEL[q]}**`);
	lines.push(map.quality_notes);
	lines.push("");

	lines.push(`**Hauptthese:**`);
	lines.push(`> ${map.thesis}`);
	lines.push("");

	if (map.sub_theses.length > 0) {
		lines.push(`**Unterthesen (${map.sub_theses.length}):**`);
		map.sub_theses.forEach((t) => lines.push(`- ${t}`));
		lines.push("");
	}

	if (map.premises.length > 0) {
		lines.push(`**Prämissen:**`);
		map.premises.forEach((p) => lines.push(`- ${p}`));
		lines.push("");
	}

	if (map.evidence.length > 0) {
		lines.push(`**Belege (${map.evidence.length}):**`);
		map.evidence.forEach((e) => {
			const icon = e.supports_thesis ? "✓" : "✗";
			const str = e.strength === "strong" ? "stark" : e.strength === "moderate" ? "mittel" : "schwach";
			lines.push(`${icon} [${str}] ${e.claim}`);
		});
		lines.push("");
	}

	if (map.conclusions.length > 0) {
		lines.push(`**Schlussfolgerungen:**`);
		map.conclusions.forEach((c) => lines.push(`- ${c}`));
		lines.push("");
	}

	if (map.implicit_assumptions.length > 0) {
		lines.push(`**Implizite Annahmen (${map.implicit_assumptions.length}):**`);
		map.implicit_assumptions.forEach((a) => lines.push(`- _${a}_`));
		lines.push("");
	}

	if (map.fallacies.length > 0) {
		lines.push(`**Fehlschlüsse (${map.fallacies.length}):**`);
		map.fallacies.forEach((f) => {
			lines.push(`${SEVERITY_ICON[f.severity]} **${FALLACY_LABEL[f.type]}** (${f.severity})`);
			lines.push(`  ${f.description}`);
			lines.push(`  _"${f.location}"_`);
			lines.push("");
		});
	} else {
		lines.push(`_Keine Fehlschlüsse erkannt._`);
		lines.push("");
	}

	if (map.revision_suggestions.length > 0) {
		lines.push(`**Verbesserungsvorschläge:**`);
		map.revision_suggestions.forEach((s, i) => lines.push(`${i + 1}. ${s}`));
		lines.push("");
	}

	lines.push(footer);
	return lines.join("\n");
}

// ---------------------------------------------------------------------------
// Pi-Extension
// ---------------------------------------------------------------------------

const PARAMS = Type.Object({
	text: Type.String({
		description:
			"Der zu analysierende Text: Artikel, Blogpost, Kommentar, Essay oder Nachrichtentext. " +
			"Der Text wird auf logische Struktur, Fehlschlüsse und Argumentationsqualität geprüft.",
	}),
	model: Type.Optional(
		Type.String({ description: "llama.cpp-Modell-Override." })
	),
});

export default function llamaLogicEditorExtension(pi: ExtensionAPI) {
	pi.registerTool({
		name: "analyze_logic_llama",
		label: "Argumentationsanalyse (llama.cpp)",
		description:
			"Analysiert die logische Struktur eines Texts: Hauptthese, Prämissen, Belege, " +
			"Schlussfolgerungen, implizite Annahmen und logische Fehlschlüsse. " +
			"Gibt konkrete Verbesserungsvorschläge und eine Qualitätsbewertung. " +
			"Verwendet llama.cpp lokal (kostenlos). BEVORZUGT gegenüber analyze_logic.",
		promptGuidelines: [
			"PREFERRED: Use analyze_logic_llama for all argument analysis (local, free, unified backend).",
			"Use analyze_logic (Ollama/deepseek-r1) only when explicitly requested by the user.",
			"Use analyze_logic_llama when the user wants to check argumentation quality of an article, comment, or essay.",
			"Use after verify_article_llama to get both factual AND logical quality assessment.",
			"Always show the full formatted output including fallacies and revision suggestions.",
			"If fallacies with severity 'critical' are found, highlight them prominently.",
			"The revision_suggestions are actionable — offer to rewrite specific sections if the user wants.",
			"Combine with verify_article_llama for a complete quality assessment: facts + logic.",
		],
		parameters: PARAMS,
		async execute(_toolCallId, params, signal) {
			try {
				const result = await analyzeLogic(params.text, {
					model: params.model,
					signal,
				});
				return {
					content: [{ type: "text", text: formatAnalysis(result) }],
					details: {
						overallQuality: result.map.overall_quality,
						fallacyCount: result.map.fallacies.length,
						criticalFallacies: result.map.fallacies.filter((f) => f.severity === "critical").length,
						provider: result.provider,
						model: result.model,
						latencyMs: result.latencyMs,
					},
				};
			} catch (err) {
				const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
				return { content: [{ type: "text", text: `Argumentationsanalyse fehlgeschlagen: ${msg}` }] };
			}
		},
	});
}

// ---------------------------------------------------------------------------
// CLI
// ---------------------------------------------------------------------------

async function runCli() {
	const args = process.argv.slice(2);

	if (args.length === 0 || args[0] === "--help") {
		console.log(`
Argumentationsanalyse via llama.cpp — Logik, Fehlschlüsse und Verbesserungsvorschläge

Verwendung:
  npx tsx agenten/llama-logic-editor.ts [Optionen] "Text..."
  npx tsx agenten/llama-logic-editor.ts "$(cat artikel.txt)"

Optionen:
  --only-fallacies   Nur Fehlschlüsse ausgeben (kein vollständiger Bericht)
  --model <name>     Modell-Override (Standard: ${DEFAULT_MODEL})
  --json             Ausgabe als JSON
  --verbose          Ausführliches Logging
  --help             Diese Hilfe
`);
		process.exit(0);
	}

	let model: string | undefined;
	let jsonOutput = false;
	let onlyFallacies = false;
	let verbose = false;
	const textParts: string[] = [];

	for (let i = 0; i < args.length; i++) {
		const arg = args[i];
		if (arg === "--model" && args[i + 1]) model = args[++i];
		else if (arg === "--json") jsonOutput = true;
		else if (arg === "--only-fallacies") onlyFallacies = true;
		else if (arg === "--verbose") verbose = true;
		else if (!arg.startsWith("--")) textParts.push(arg);
	}

	const text = textParts.join(" ").trim();
	if (!text) { console.error("Fehler: Kein Text."); process.exit(1); }

	const logger = verbose ? createLogger({ verbose: true }) : nullLogger;
	if (!jsonOutput) console.error(`\nAnalyse via llama.cpp...\n`);

	try {
		const result = await analyzeLogic(text, { model, logger });

		if (onlyFallacies) {
			if (jsonOutput) {
				console.log(JSON.stringify(result.map.fallacies, null, 2));
			} else {
				console.log(formatAnalysis(result, true));
			}
		} else {
			console.log(jsonOutput ? JSON.stringify(result.map, null, 2) : formatAnalysis(result));
		}
	} catch (err) {
		console.error("Fehler:", err instanceof Error ? err.message : err);
		process.exit(1);
	}
}

const __filename = fileURLToPath(import.meta.url);
if (process.argv[1] === __filename) runCli();