Text_Agent/agenten/llama-writer.ts

/**
 * llama-writer.ts
 * Pi-Extension + CLI: Artikel schreiben via llama.cpp (lokales LLM)
 *
 * Schreibt einen Artikel NUR auf Basis von "supported"-Claims aus einem VerificationReport.
 * Widerlgte, gemischte oder unzureichend belegte Claims werden automatisch ausgeschlossen.
 *
 * Kein Ollama-format-Parameter — Schema steht als JSON-Literal im System-Prompt.
 * /no_think deaktiviert den Thinking-Modus bei Qwen3/Qwopus-Reasoning-Modellen.
 *
 * Als Pi-Extension: ~/.pi/agent/extensions/fact-checker/ (via Symlink)
 * Als CLI:
 *   npx tsx agenten/llama-writer.ts --from-job <slug> --style blog
 *   npx tsx agenten/llama-verify-article.ts --json "$(cat artikel.txt)" | npx tsx agenten/llama-writer.ts --from-report
 */

import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { fileURLToPath } from "node:url";
import type { VerificationReport } from "./llama-verify-article.js";
import {
	findJobDir,
	loadJobFile,
	saveJobFile,
	updateJobMeta,
} from "../lib/jobs.js";
import { createLogger, nullLogger, type Logger } from "../lib/logger.js";

// ---------------------------------------------------------------------------
// Typen
// ---------------------------------------------------------------------------

type Style = "journalistic" | "blog" | "academic" | "editorial" | "explanatory";

type ArticleDraft = {
	schema_version: "1.0.0";
	title: string;
	lead: string;
	body: string;
	conclusion: string | null;
	style: Style;
	language: string;
	word_count: number;
	claim_ids_used: string[];
	sources: Array<{ number: number; url: string; title: string | null; claim_id: string }>;
	excluded_claims: string[];
	editorial_notes: string;
};

// llama.cpp OpenAI-kompatibles API-Format
type LlamaResponse = {
	choices: Array<{
		message?: { content?: string; reasoning_content?: string };
		finish_reason?: string;
	}>;
	usage?: {
		prompt_tokens?: number;
		completion_tokens?: number;
	};
};

export type WriteResult = {
	draft: ArticleDraft;
	provider: "llama";
	model: string;
	costUSD: 0;
	latencyMs: number;
};

// ---------------------------------------------------------------------------
// Konfiguration
// ---------------------------------------------------------------------------

const DEFAULT_MODEL = "Qwopus3.6-35B-A3B-v1-Q4_K_M.gguf";
const LLAMA_HOST = process.env.LLAMA_HOST ?? "http://localhost:8000";
const MAX_TOKENS = 16384;
const TEMPERATURE = 0.4;
const MAX_RETRIES = 3;
const RETRY_DELAY_MS = 15_000;

// ---------------------------------------------------------------------------
// Schema + Prompt-Generierung
// ---------------------------------------------------------------------------

const STYLE_GUIDE: Record<Style, string> = {
	journalistic:
		"Journalistisch: präzise, faktenbasiert, W-Fragen im Einleitungssatz, Inverted Pyramid, " +
		"zitierbare Aussagen direkt belegt, keine Meinungen ohne Kennzeichnung.",
	blog:
		"Blog: zugänglich, ansprechend, erste Person erlaubt, direkte Ansprache des Lesers, " +
		"lebendige Sprache, Zwischenüberschriften als Orientierung.",
	academic:
		"Akademisch: präzise Terminologie, passive Formulierungen, klare Abschnittsstruktur " +
		"(Einleitung, Hauptteil, Schluss), Quellenverweise inline.",
	editorial:
		"Leitartikel: klare Haltung, argumentativ, Bezug zur aktuellen Debatte, " +
		"stützt sich auf Fakten aber formuliert Bewertung.",
	explanatory:
		"Erklärstück: vereinfacht komplexe Sachverhalte, Analogien und Beispiele, " +
		"schrittweise Struktur, Leserfragen antizipieren.",
};

function buildWriterSystemPrompt(style: Style, language: string, wordCount: number): string {
	const langName = language === "de" ? "Deutsch" : language === "en" ? "Englisch" : language;
	return `Du bist ein erfahrener Autor. Schreibe einen Artikel nach folgenden Vorgaben:

STIL: ${STYLE_GUIDE[style]}
SPRACHE: ${langName}
LÄNGE: ca. ${wordCount} Wörter

Antworte AUSSCHLIESSLICH mit einem JSON-Objekt gemäß folgendem Schema:
{
  "title": "Artikeltitel (string)",
  "lead": "Einleitungsabsatz (string)",
  "body": "Haupttext mit Quellenangaben [N] (string)",
  "conclusion": "Schlussabsatz oder null",
  "editorial_notes": "Was fehlt für einen vollständigen Artikel? (string)"
}

REGELN:
- Alle Felder required: title, lead, body, conclusion, editorial_notes
- conclusion darf null sein
- Verwende NUR die vom Nutzer übergebenen verifizierten Claims als Faktengrundlage
- Kennzeichne jeden Fakt mit Inline-Quellenverweisen [N] aus der Beleg-Liste
- Erfinde keine Fakten, Zahlen oder Zitate
- Kein Freitext vor oder nach dem JSON-Objekt`;
}

type ClaimForWriting = {
	id: string;
	text: string;
	sources: Array<{ url: string; title: string | null }>;
};

function buildWriterUserPrompt(claims: ClaimForWriting[], topic: string): string {
	const claimsText = claims
		.map((c, i) => {
			const srcList = c.sources
				.map((s, j) => `[${i * 10 + j + 1}] ${s.title ?? s.url} (${s.url})`)
				.join("\n  ");
			return `Claim ${c.id}: ${c.text}\n  Belege:\n  ${srcList || "(keine URL)"}`;
		})
		.join("\n\n");

	return `/no_think\nSchreibe einen Artikel zum Thema: "${topic}"\n\nVERIFIZIERTE FAKTEN (nur diese dürfen verwendet werden):\n${claimsText}`;
}

// ---------------------------------------------------------------------------
// llama.cpp-Aufruf
// ---------------------------------------------------------------------------

async function writeWithLlama(
	claims: ClaimForWriting[],
	style: Style,
	topic: string,
	wordCount: number,
	language: string,
	model: string,
	signal?: AbortSignal,
	logger?: Logger
): Promise<{ raw: Pick<ArticleDraft, "title" | "lead" | "body" | "conclusion" | "editorial_notes">; tokensIn: number; tokensOut: number; latencyMs: number }> {
	const log = logger ?? nullLogger;
	const t0 = Date.now();

	const body = {
		model,
		messages: [
			{ role: "system", content: buildWriterSystemPrompt(style, language, wordCount) },
			{ role: "user", content: buildWriterUserPrompt(claims, topic) },
		],
		stream: false,
		temperature: TEMPERATURE,
		max_tokens: MAX_TOKENS,
	};

	log.debug("llama.cpp-Writer gestartet", { model, claimCount: claims.length, style, language, wordCount });

	let resp: Response | null = null;
	for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
		try {
			resp = await fetch(`${LLAMA_HOST}/v1/chat/completions`, {
				method: "POST",
				headers: { "Content-Type": "application/json" },
				body: JSON.stringify(body),
				signal,
			});
			break;
		} catch (err) {
			const isLast = attempt === MAX_RETRIES;
			log.warn(`llama.cpp fetch fehlgeschlagen (Versuch ${attempt}/${MAX_RETRIES})`, {
				error: err instanceof Error ? err.message : String(err),
				retryInMs: isLast ? 0 : RETRY_DELAY_MS,
			});
			if (isLast) throw new Error(`fetch failed nach ${MAX_RETRIES} Versuchen: ${err instanceof Error ? err.message : err}`);
			await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
		}
	}

	if (!resp!.ok) {
		const errorText = await resp!.text().catch(() => "");
		throw new Error(`llama.cpp API Fehler ${resp!.status}: ${errorText}`);
	}

	const data = (await resp!.json()) as LlamaResponse;
	const choice = data.choices?.[0];
	let raw = choice?.message?.content ?? "";

	// Reasoning-Fallback: Wenn content leer, JSON aus reasoning_content extrahieren
	if (!raw.trim() && choice?.message?.reasoning_content) {
		const rc = choice.message.reasoning_content;
		const allMatches = [...rc.matchAll(/\{[^{}]*"title"\s*:/g)];
		const lastBlock = allMatches.length > 0
			? rc.match(/\{[\s\S]*"title"[\s\S]*\}/)?.[0]
			: undefined;
		if (lastBlock) {
			raw = lastBlock;
			log.warn("content leer — JSON aus reasoning_content extrahiert (Thinking-Modus aktiv trotz /no_think)", {
				finishReason: choice.finish_reason,
				rawLength: raw.length,
			});
		}
	}

	// Markdown-Codeblöcke entfernen
	const cleanedRaw = raw
		.replace(/^```(?:json)?\s*/i, "")
		.replace(/\s*```$/i, "")
		.trim();

	log.debug("llama.cpp-Writer Antwort", {
		promptTokens: data.usage?.prompt_tokens,
		outputTokens: data.usage?.completion_tokens,
		finishReason: choice?.finish_reason,
		rawLength: cleanedRaw.length,
	});

	if (!cleanedRaw) throw new Error("Leere Antwort von llama.cpp-Writer");

	let parsed: unknown;
	try {
		parsed = JSON.parse(cleanedRaw);
	} catch {
		throw new Error(`llama.cpp-Writer-Ausgabe ist kein gültiges JSON: ${cleanedRaw.slice(0, 200)}`);
	}

	const p = parsed as Record<string, unknown>;
	if (typeof p.title !== "string" || typeof p.body !== "string") {
		throw new Error(`Ungültige Struktur: 'title' oder 'body' fehlt. Keys: ${Object.keys(p).join(", ")}`);
	}

	return {
		raw: p as Pick<ArticleDraft, "title" | "lead" | "body" | "conclusion" | "editorial_notes">,
		tokensIn: data.usage?.prompt_tokens ?? 0,
		tokensOut: data.usage?.completion_tokens ?? 0,
		latencyMs: Date.now() - t0,
	};
}

// ---------------------------------------------------------------------------
// Quellenverzeichnis aufbauen
// ---------------------------------------------------------------------------

function buildSourceIndex(claims: ClaimForWriting[]): Array<{ number: number; url: string; title: string | null; claim_id: string }> {
	const sources: Array<{ number: number; url: string; title: string | null; claim_id: string }> = [];
	let n = 1;
	for (const c of claims) {
		for (const s of c.sources) {
			sources.push({ number: n++, url: s.url, title: s.title, claim_id: c.id });
		}
	}
	return sources;
}

// ---------------------------------------------------------------------------
// Hauptfunktion
// ---------------------------------------------------------------------------

export async function writeFromReport(
	report: VerificationReport,
	options?: {
		style?: Style;
		topic?: string;
		wordCount?: number;
		language?: string;
		model?: string;
		signal?: AbortSignal;
		logger?: Logger;
	}
): Promise<WriteResult> {
	const log = options?.logger ?? nullLogger;
	const style = options?.style ?? "journalistic";
	const wordCount = options?.wordCount ?? 400;
	const language = options?.language ?? "de";
	const model = options?.model ?? DEFAULT_MODEL;

	const supported = report.results.filter((r) => r.status === "supported");
	const excluded = report.results.filter((r) => r.status !== "supported").map((r) => r.claim_id);

	if (supported.length === 0) {
		throw new Error("Keine verifizierten (supported) Claims im Report — kein Artikel möglich.");
	}

	const topic = options?.topic ?? report.source_text_summary ?? "Artikel";

	const claims: ClaimForWriting[] = supported.map((r) => ({
		id: r.claim_id,
		text: r.claim_text,
		sources: r.sources
			.filter((s) => s.supports_claim)
			.map((s) => ({ url: s.url, title: s.title })),
	}));

	log.info(`llama.cpp-Writer: ${claims.length} Claims, Stil: ${style}, Sprache: ${language}, Ziel: ${wordCount} Wörter`);

	const result = await writeWithLlama(claims, style, topic, wordCount, language, model, options?.signal, log);

	const sources = buildSourceIndex(claims);
	const wordCountActual = (result.raw.lead + " " + result.raw.body + " " + (result.raw.conclusion ?? ""))
		.split(/\s+/).filter(Boolean).length;

	const draft: ArticleDraft = {
		...result.raw,
		schema_version: "1.0.0" as const,
		style,
		language,
		word_count: wordCountActual,
		claim_ids_used: claims.map((c) => c.id),
		sources,
		excluded_claims: excluded,
		editorial_notes: result.raw.editorial_notes ?? "",
	};

	return { draft, provider: "llama", model, costUSD: 0, latencyMs: result.latencyMs };
}

// ---------------------------------------------------------------------------
// Formatierung
// ---------------------------------------------------------------------------

export function formatDraft(result: WriteResult): string {
	const { draft } = result;
	const lines: string[] = [];

	lines.push(`# ${draft.title}`);
	lines.push("");
	lines.push(`_${draft.lead}_`);
	lines.push("");
	lines.push(draft.body);

	if (draft.conclusion) {
		lines.push("");
		lines.push("---");
		lines.push(draft.conclusion);
	}

	if (draft.sources.length > 0) {
		lines.push("\n**Quellen:**");
		draft.sources.forEach((s) => {
			const title = s.title ?? s.url;
			lines.push(`[${s.number}] [${title}](${s.url})`);
		});
	}

	if (draft.excluded_claims.length > 0) {
		lines.push(`\n_${draft.excluded_claims.length} Claim(s) ausgeschlossen (nicht verifiziert): ${draft.excluded_claims.join(", ")}_`);
	}

	if (draft.editorial_notes) {
		lines.push(`\n**Redaktionshinweise:** ${draft.editorial_notes}`);
	}

	const latSec = (result.latencyMs / 1000).toFixed(1);
	lines.push(`\n_[llama.cpp: ${result.model} · ${draft.word_count} Wörter · kostenlos (lokal) · ${latSec}s]_`);

	return lines.join("\n");
}

// ---------------------------------------------------------------------------
// Pi-Extension
// ---------------------------------------------------------------------------

const PARAMS = Type.Object({
	reportJson: Type.String({
		description:
			"JSON-String eines VerificationReport (Ausgabe von verify_article_llama --json oder verify_article_llama). " +
			"Nur 'supported'-Claims werden für den Artikel verwendet.",
	}),
	topic: Type.Optional(
		Type.String({ description: "Artikelthema / Überschrift. Standard: wird aus dem Report abgeleitet." })
	),
	style: Type.Optional(
		Type.Union(
			[
				Type.Literal("journalistic"),
				Type.Literal("blog"),
				Type.Literal("academic"),
				Type.Literal("editorial"),
				Type.Literal("explanatory"),
			],
			{ description: "Schreibstil. Standard: journalistic." }
		)
	),
	wordCount: Type.Optional(
		Type.Number({ description: "Ziel-Wortanzahl. Standard: 400." })
	),
	language: Type.Optional(
		Type.String({ description: "Sprache (ISO 639-1). Standard: de." })
	),
	model: Type.Optional(
		Type.String({ description: "llama.cpp-Modell-Override." })
	),
});

export default function llamaWriterExtension(pi: ExtensionAPI) {
	pi.registerTool({
		name: "write_article_llama",
		label: "Artikel schreiben (llama.cpp)",
		description:
			"Schreibt einen Artikel ausschließlich auf Basis verifizierter Claims aus einem VerificationReport. " +
			"Verwendet llama.cpp lokal (kostenlos, kein Ollama-Timeout bei Thinking-Modellen). " +
			"BEVORZUGT gegenüber write_article (Ollama). " +
			"Workflow: verify_article_llama → write_article_llama.",
		promptGuidelines: [
			"PREFERRED: Use write_article_llama for all article generation (local, free, no timeout issues).",
			"Use write_article (Ollama) only when explicitly requested by the user.",
			"Always pass the full JSON output of verify_article or verify_article_llama as 'reportJson'.",
			"Ask the user for the desired style (journalistic, blog, academic, editorial, explanatory) if not specified.",
			"Show the full formatted draft including sources and editorial notes.",
			"Point out excluded claims to the user — these may be important context that was removed.",
			"If editorial_notes mention missing information, suggest running additional research.",
		],
		parameters: PARAMS,
		async execute(_toolCallId, params, signal) {
			try {
				const report = JSON.parse(params.reportJson) as VerificationReport;
				const result = await writeFromReport(report, {
					style: params.style,
					topic: params.topic,
					wordCount: params.wordCount,
					language: params.language,
					model: params.model,
					signal,
				});
				return {
					content: [{ type: "text", text: formatDraft(result) }],
					details: {
						wordCount: result.draft.word_count,
						claimsUsed: result.draft.claim_ids_used.length,
						claimsExcluded: result.draft.excluded_claims.length,
						provider: result.provider,
						latencyMs: result.latencyMs,
					},
				};
			} catch (err) {
				const msg = err instanceof Error ? err.message : "Unbekannter Fehler";
				return { content: [{ type: "text", text: `Artikelgenerierung fehlgeschlagen: ${msg}` }] };
			}
		},
	});
}

// ---------------------------------------------------------------------------
// CLI
// ---------------------------------------------------------------------------

async function runCli() {
	const args = process.argv.slice(2);

	if (args.length === 0 || args[0] === "--help") {
		console.log(`
Llama-Writer — Schreibt Artikel via llama.cpp auf Basis verifizierter Claims

Verwendung:
  # Via Job-Speicher (empfohlen):
  npx tsx agenten/llama-verify-article.ts --job-id umerziehung "$(cat artikel.txt)"
  npx tsx agenten/llama-writer.ts --from-job umerziehung --style blog

  # Via Pipe:
  npx tsx agenten/llama-verify-article.ts --json "..." | npx tsx agenten/llama-writer.ts --from-report

Optionen:
  --from-report       Lese VerificationReport von stdin (JSON)
  --from-job <slug>   Lese report.json aus Job ~/.pi/agent/jobs/<datum>_<slug>/
                      Speichert article.md automatisch zurück in den Job
  --style <s>         journalistic|blog|academic|editorial|explanatory (Standard: journalistic)
  --topic <text>      Artikelthema
  --words <n>         Ziel-Wortanzahl (Standard: 400)
  --lang <code>       Sprache (Standard: de)
  --model <name>      Modell-Override (Standard: ${DEFAULT_MODEL})
  --json              Ausgabe als JSON
  --verbose           Ausführliches Logging
  --help              Diese Hilfe
`);
		process.exit(0);
	}

	let fromReport = false;
	let fromJobSlug: string | undefined;
	let style: Style = "journalistic";
	let topic: string | undefined;
	let wordCount = 400;
	let language = "de";
	let model: string | undefined;
	let jsonOutput = false;
	let verbose = false;

	for (let i = 0; i < args.length; i++) {
		const arg = args[i];
		if (arg === "--from-report") fromReport = true;
		else if (arg === "--from-job" && args[i + 1]) fromJobSlug = args[++i];
		else if (arg === "--style" && args[i + 1]) style = args[++i] as Style;
		else if (arg === "--topic" && args[i + 1]) topic = args[++i];
		else if (arg === "--words" && args[i + 1]) wordCount = parseInt(args[++i], 10);
		else if (arg === "--lang" && args[i + 1]) language = args[++i];
		else if (arg === "--model" && args[i + 1]) model = args[++i];
		else if (arg === "--json") jsonOutput = true;
		else if (arg === "--verbose") verbose = true;
	}

	const logger = verbose ? createLogger({ verbose: true }) : nullLogger;

	let report: VerificationReport;
	let jobDir: string | undefined;

	if (fromJobSlug) {
		const dir = findJobDir(fromJobSlug);
		if (!dir) {
			console.error(`Fehler: Kein Job mit Slug "${fromJobSlug}" gefunden in ~/.pi/agent/jobs/`);
			console.error("Tipp: Zuerst llama-verify-article.ts --job-id <slug> ausführen.");
			process.exit(1);
		}
		jobDir = dir;
		const loaded = loadJobFile<VerificationReport>(dir, "report.json");
		if (!loaded) {
			console.error(`Fehler: Kein report.json in Job ${dir}`);
			console.error("Tipp: llama-verify-article.ts --job-id <slug> muss zuerst abgeschlossen werden.");
			process.exit(1);
		}
		report = loaded;
		if (!jsonOutput) console.error(`\nJob: ${dir}\nSchreibe ${style}-Artikel (${wordCount} Wörter, ${language})...\n`);
	} else if (fromReport) {
		const chunks: Buffer[] = [];
		for await (const chunk of process.stdin) chunks.push(chunk as Buffer);
		const input = Buffer.concat(chunks).toString("utf-8").trim();
		if (!input) { console.error("Fehler: Kein Input von stdin."); process.exit(1); }
		report = JSON.parse(input) as VerificationReport;
		if (!jsonOutput) console.error(`\nSchreibe ${style}-Artikel (${wordCount} Wörter, ${language})...\n`);
	} else {
		console.error("Fehler: --from-report oder --from-job <slug> erforderlich.");
		process.exit(1);
	}

	try {
		const result = await writeFromReport(report, { style, topic, wordCount, language, model, logger });

		if (jobDir) {
			saveJobFile(jobDir, "article.md", formatDraft(result));
			updateJobMeta(jobDir, {
				status: "completed",
				steps: {
					write: {
						completedAt: new Date().toISOString(),
						style,
						wordCount: result.draft.word_count,
						provider: result.provider,
						costUSD: 0,
					},
				},
			});
			if (!jsonOutput) process.stderr.write(`\n  Artikel in Job gespeichert: ${jobDir}/article.md\n`);
		}

		console.log(jsonOutput ? JSON.stringify(result.draft, null, 2) : formatDraft(result));
	} catch (err) {
		if (jobDir) updateJobMeta(jobDir, { status: "failed" });
		console.error("Fehler:", err instanceof Error ? err.message : err);
		process.exit(1);
	}
}

const __filename = fileURLToPath(import.meta.url);
if (process.argv[1] === __filename) runCli();