feat: automatische SemVer-Versionierung nach SHIP + /version-Command

Neuer /version-Command und automatischer Trigger nach SHIP-Verdikt in /optimize: - getCurrentVersion() liest höchsten vX.Y.Z-Tag (git tag -l | sort -V) - analyzeBumpType() klassifiziert Commits (feat! → major, feat: → minor, fix: → patch) - detectVersionFile() findet package.json / Cargo.toml / pyproject.toml / VERSION - applyVersionBump() schreibt Version in Manifest + chore-Commit - runVersionBump() zeigt ctx.ui.select()-Dialog mit empfohlenem Bump-Typ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-22 23:49:53 +02:00 · 2026-05-22 23:49:53 +02:00 · e13e9382ff
commit e13e9382ff
parent a6f7f968b5
4 changed files with 568 additions and 251 deletions
--- a/models.json
+++ b/models.json
@ -83,7 +83,7 @@
          "reasoning": true,
          "input": ["text"],
          "contextWindow": 262144,
-          "maxTokens": 8192,
+          "maxTokens": 16384,
          "cost": {
            "input": 0,
            "output": 0,
--- a/pi-coder-judge-extension.ts
+++ b/pi-coder-judge-extension.ts
@ -378,14 +378,15 @@ async function switchModel(
  ctx: ExtensionCommandContext,
  provider: string,
  modelId: string
-): Promise<void> {
+): Promise<boolean> {
  const model = ctx.modelRegistry.find(provider, modelId);
  if (!model) {
    ctx.ui.notify(`Modell ${provider}/${modelId} nicht gefunden`, "error");
-    return;
+    return false;
  }
  const ok = await pi.setModel(model);
  if (!ok) ctx.ui.notify(`Kein API-Key für ${modelId}`, "warning");
+  return ok !== false;
 }

 // Sendet eine Nachricht und wartet bis der Agent fertig ist.
@ -412,6 +413,47 @@ async function sendAndWait(
  await ctx.waitForIdle();
 }

+// Prüft via POST /v1/chat/completions ob das Modell im VRAM bereit ist.
+// /health und /v1/models antworten bereits während des GPU-Ladevorgangs — nur
+// ein echter Completion-Request liefert zuverlässig HTTP 200 wenn das Modell ready ist.
+async function waitUntilModelReady(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext,
+  port: number,
+  modelAlias: string,
+  maxWaitMs = 180_000
+): Promise<boolean> {
+  const deadline = Date.now() + maxWaitMs;
+  const body = JSON.stringify({
+    model: modelAlias,
+    messages: [{ role: "user", content: "ping" }],
+    max_tokens: 1, temperature: 0.0, stream: false,
+  });
+  // Body als Datei — verhindert Shell-Injection wenn modelAlias Sonderzeichen enthält
+  const tmpBody = `/tmp/pi_ready_${Date.now()}_${Math.random().toString(36).slice(2)}.json`;
+  await pi.exec("bash", ["-c", `printf "%s" "$1" > "${tmpBody}"`, "_", body], { cwd: ctx.cwd });
+  let notified = false;
+  try {
+    while (Date.now() < deadline) {
+      const r = await pi.exec("bash", ["-c",
+        `curl -s -o /dev/null -w "%{http_code}" --max-time 5 ` +
+        `-X POST http://localhost:${port}/v1/chat/completions ` +
+        `-H "Content-Type: application/json" ` +
+        `-d "@${tmpBody}"`
+      ], { cwd: ctx.cwd });
+      if (r.stdout?.trim() === "200") return true;
+      if (!notified) {
+        ctx.ui.notify(`Modell-Server (Port ${port}) lädt noch — warte bis zu 3 min…`, "info");
+        notified = true;
+      }
+      await new Promise(res => setTimeout(res, 3000));
+    }
+    return false;
+  } finally {
+    await pi.exec("bash", ["-c", `rm -f "${tmpBody}"`], { cwd: ctx.cwd });
+  }
+}
+
 // Führt einen Shell-Befehl aus und gibt stdout+stderr zurück (max. 6000 Zeichen).
 // Erkennt Test-Suiten im Projektverzeichnis anhand von Framework-Markern.
 // Alle Checks laufen parallel — konservativ, keine False Positives.
@ -427,8 +469,8 @@ async function detectTestCommands(
    ], { cwd: ctx.cwd }),
    pi.exec("bash", ["-c",
      "test -f package.json && " +
-      "node -e \"const p=require('./package.json');process.exit(" +
-      "p.scripts&&p.scripts.test&&!p.scripts.test.includes('no test')?0:1)\" 2>/dev/null"
+      "grep -q '\"test\"' package.json && " +
+      "! grep -q 'no test' package.json"
    ], { cwd: ctx.cwd }),
    pi.exec("bash", ["-c", "test -f Cargo.toml"], { cwd: ctx.cwd }),
    pi.exec("bash", ["-c",
@ -495,14 +537,18 @@ function getLastAssistantText(ctx: ExtensionCommandContext): string {
 }

 // Extrahiert das Urteil aus einer Judge-Antwort.
+// "UNREADABLE" wenn kein Urteil erkennbar — unterscheidbar von einem expliziten FAIL.
 function parseVerdict(text: string): string {
  const m = text.match(/Urteil:\s*(PASS WITH CONCERNS|PASS|FAIL)/i);
-  return m ? m[1].toUpperCase() : "";
+  return m ? m[1].toUpperCase() : "UNREADABLE";
 }

 // Extrahiert den Blocker-Abschnitt für die Loop-Erkennung.
+// Erkennt Bullet-Listen (- / – / *), Bold (**Blocker**) und Headings (## Blocker).
 function parseBlockers(text: string): string {
-  const m = text.match(/[-–*]\s*Blocker[:\n]([\s\S]*?)(?:\n[-–*]\s*Major|\n[-–*]\s*Minor|$)/i);
+  const m = text.match(
+    /(?:\*\*Blocker\*\*|##\s*Blocker|[-–*]\s*Blocker)[:\n]([\s\S]*?)(?:\n(?:\*\*Major\*\*|##\s*Major|[-–*]\s*Major)|\n(?:\*\*Minor\*\*|##\s*Minor|[-–*]\s*Minor)|$)/i
+  );
  return m ? m[1].trim() : "";
 }

@ -524,6 +570,9 @@ async function getFilesSinceTag(
    { cwd: ctx.cwd }
  );

+  // Bei git-Fehler alles verarbeiten (sicherer als stilles Überspringen)
+  if (diff.code !== 0) return null;
+
  return diff.stdout.trim()
    .split("\n")
    .filter(f =>
@ -539,48 +588,75 @@ async function getFilesSinceTag(
 // Dokumentations-Phase: inkrementell via Git-Tags, nur geänderte Dateien werden verarbeitet.
 // Wird von /update_doku und /optimize --with-doku genutzt.
 async function runUpdateDoku(pi: ExtensionAPI, ctx: ExtensionCommandContext): Promise<void> {
-  await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
+  if (!await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder")) {
+    ctx.ui.notify("Coder-Modell nicht verfügbar — Dokumentations-Phase abgebrochen", "error");
+    return;
+  }
+
+  // Jede Phase läuft unabhängig — Fehler in Phase 1 blockieren nicht Phase 2/3.
+  // Tag wird nur NACH erfolgreichem sendAndWait gesetzt.

  // Phase 1: Code-Kommentare
+  try {
    const commentFiles = await getFilesSinceTag(pi, ctx, "docs-last-commented");
    if (commentFiles === null) {
      ctx.ui.setStatus("update_doku", "1/3: Code wird kommentiert (alle Dateien)…");
+      currentActivity = "Coder kommentiert Code…";
      await sendAndWait(pi, ctx, commentCodePrompt());
+      await pi.exec("bash", ["-c", "git tag -f docs-last-commented"], { cwd: ctx.cwd });
    } else if (commentFiles.length === 0) {
      ctx.ui.notify("Code-Kommentare: keine Änderungen seit letztem Lauf – übersprungen.", "info");
    } else {
      ctx.ui.setStatus("update_doku", `1/3: Code wird kommentiert (${commentFiles.length} Datei(en))…`);
+      currentActivity = "Coder kommentiert Code…";
      await sendAndWait(pi, ctx, commentCodePromptIncremental(commentFiles));
-  }
      await pi.exec("bash", ["-c", "git tag -f docs-last-commented"], { cwd: ctx.cwd });
+    }
+  } catch (e: any) {
+    ctx.ui.notify(`1/3 Code-Kommentare fehlgeschlagen: ${String(e?.message ?? e)}`, "error");
+  }

  // Phase 2: README.md
+  try {
    const readmeFiles = await getFilesSinceTag(pi, ctx, "docs-last-readme");
    if (readmeFiles === null) {
      ctx.ui.setStatus("update_doku", "2/3: README.md wird geschrieben…");
+      currentActivity = "Coder schreibt README…";
      await sendAndWait(pi, ctx, readmeMdPrompt());
+      await pi.exec("bash", ["-c", "git tag -f docs-last-readme"], { cwd: ctx.cwd });
    } else if (readmeFiles.length === 0) {
      ctx.ui.notify("README.md: keine Änderungen seit letztem Lauf – übersprungen.", "info");
    } else {
      ctx.ui.setStatus("update_doku", `2/3: README.md wird geprüft (${readmeFiles.length} Datei(en) geändert)…`);
+      currentActivity = "Coder schreibt README…";
      await sendAndWait(pi, ctx, readmeMdPromptIncremental(readmeFiles));
-  }
      await pi.exec("bash", ["-c", "git tag -f docs-last-readme"], { cwd: ctx.cwd });
+    }
+  } catch (e: any) {
+    ctx.ui.notify(`2/3 README.md fehlgeschlagen: ${String(e?.message ?? e)}`, "error");
+  }

  // Phase 3: BEDIENUNGSANLEITUNG.md
+  try {
    const bedFiles = await getFilesSinceTag(pi, ctx, "docs-last-bedienungsanleitung");
    if (bedFiles === null) {
      ctx.ui.setStatus("update_doku", "3/3: BEDIENUNGSANLEITUNG.md wird geschrieben…");
+      currentActivity = "Coder schreibt Bedienungsanleitung…";
      await sendAndWait(pi, ctx, bedienungsanleitungPrompt());
+      await pi.exec("bash", ["-c", "git tag -f docs-last-bedienungsanleitung"], { cwd: ctx.cwd });
    } else if (bedFiles.length === 0) {
      ctx.ui.notify("BEDIENUNGSANLEITUNG.md: keine Änderungen seit letztem Lauf – übersprungen.", "info");
    } else {
      ctx.ui.setStatus("update_doku", `3/3: BEDIENUNGSANLEITUNG.md wird geprüft (${bedFiles.length} Datei(en) geändert)…`);
+      currentActivity = "Coder schreibt Bedienungsanleitung…";
      await sendAndWait(pi, ctx, bedienungsanleitungPromptIncremental(bedFiles));
-  }
      await pi.exec("bash", ["-c", "git tag -f docs-last-bedienungsanleitung"], { cwd: ctx.cwd });
+    }
+  } catch (e: any) {
+    ctx.ui.notify(`3/3 BEDIENUNGSANLEITUNG.md fehlgeschlagen: ${String(e?.message ?? e)}`, "error");
+  }

-  // Abschließender Dokumentations-Commit
+  // Abschließender Dokumentations-Commit (immer, auch bei Teilfehlern)
  await pi.exec(
    "bash",
    ["-c", "git add -A && git commit -m 'docs: update comments, README, BEDIENUNGSANLEITUNG' || true"],
@ -594,6 +670,120 @@ async function runUpdateDoku(pi: ExtensionAPI, ctx: ExtensionCommandContext): Pr
  ctx.ui.notify("Dokumentations-Phase abgeschlossen. Commit angelegt.", "info");
 }

+// ── Versions-Verwaltung (SemVer + Git-Tags) ──────────────────────────────────
+
+// Liest den höchsten vX.Y.Z-Tag via `git tag -l`. Gibt null zurück wenn kein Tag existiert.
+async function getCurrentVersion(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext
+): Promise<[number, number, number] | null> {
+  const res = await pi.exec("bash", ["-c", "git tag -l 'v*' | sort -V | tail -1"], { cwd: ctx.cwd });
+  const raw = (res.stdout ?? "").trim();
+  const m = raw.match(/^v?(\d+)\.(\d+)\.(\d+)$/);
+  return m ? [+m[1], +m[2], +m[3]] : null;
+}
+
+// Analysiert Commit-Subjects seit dem letzten Tag nach Conventional Commits.
+// feat! / BREAKING CHANGE → major, feat: → minor, alles andere → patch.
+async function analyzeBumpType(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext,
+  since?: string
+): Promise<"major" | "minor" | "patch"> {
+  const range = since ? `${since}..HEAD` : "HEAD";
+  const res = await pi.exec("bash", ["-c", `git log ${range} --format="%s" 2>/dev/null`], { cwd: ctx.cwd });
+  const lines = (res.stdout ?? "").split("\n");
+  if (lines.some(l => /^feat!:|BREAKING CHANGE/.test(l))) return "major";
+  if (lines.some(l => /^feat(\(.+\))?:/.test(l))) return "minor";
+  return "patch";
+}
+
+// Findet die erste vorhandene Versions-Manifest-Datei im Arbeitsverzeichnis.
+async function detectVersionFile(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext
+): Promise<"package.json" | "Cargo.toml" | "pyproject.toml" | "VERSION" | null> {
+  for (const f of ["package.json", "Cargo.toml", "pyproject.toml"]) {
+    const r = await pi.exec("bash", ["-c", `test -f ${f}`], { cwd: ctx.cwd });
+    if (r.exitCode === 0) return f as "package.json" | "Cargo.toml" | "pyproject.toml";
+  }
+  const r = await pi.exec("bash", ["-c", "test -f VERSION"], { cwd: ctx.cwd });
+  return r.exitCode === 0 ? "VERSION" : null;
+}
+
+// Schreibt die neue Version in die Manifest-Datei und erstellt einen chore-Commit.
+async function applyVersionBump(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext,
+  manifest: string,
+  version: string
+): Promise<void> {
+  let cmd: string;
+  if (manifest === "package.json") {
+    cmd = `npm version --no-git-tag-version ${version}`;
+  } else if (manifest === "Cargo.toml") {
+    cmd = `sed -i 's/^version = ".*"/version = "${version}"/' Cargo.toml`;
+  } else if (manifest === "pyproject.toml") {
+    cmd = `sed -i 's/^version = ".*"/version = "${version}"/' pyproject.toml`;
+  } else {
+    cmd = `printf 'v%s\\n' '${version}' > VERSION`;
+  }
+  await pi.exec("bash", ["-c", cmd], { cwd: ctx.cwd });
+  await pi.exec(
+    "bash",
+    ["-c", `git add ${manifest} && git commit -m "chore: bump version to v${version}"`],
+    { cwd: ctx.cwd }
+  );
+}
+
+// Hauptfunktion: ermittelt aktuelle Version, analysiert Commits, zeigt Dialog, setzt Tag.
+async function runVersionBump(pi: ExtensionAPI, ctx: ExtensionCommandContext): Promise<void> {
+  // Early exit wenn kein git-Repo vorhanden
+  const gitCheck = await pi.exec("bash", ["-c", "git rev-parse --is-inside-work-tree 2>/dev/null"], { cwd: ctx.cwd });
+  if (gitCheck.exitCode !== 0) return;
+
+  const current = await getCurrentVersion(pi, ctx);
+  const tag = current ? `v${current[0]}.${current[1]}.${current[2]}` : undefined;
+  const bump = await analyzeBumpType(pi, ctx, tag);
+
+  const [maj, min, pat] = current ?? [0, 0, 0];
+  const initial = !current;
+  const versions: Record<"patch" | "minor" | "major", string> = initial
+    ? { patch: "v0.0.1", minor: "v0.1.0", major: "v1.0.0" }
+    : { patch: `v${maj}.${min}.${pat + 1}`, minor: `v${maj}.${min + 1}.0`, major: `v${maj + 1}.0.0` };
+
+  const recommended: "patch" | "minor" | "major" = initial ? "minor" : bump;
+  const labels = (["patch", "minor", "major"] as const).map(
+    t => `${t} → ${versions[t]}${t === recommended ? " (empfohlen)" : ""}`
+  );
+
+  const choice = await ctx.ui.select({
+    title: "Version",
+    message: current
+      ? `Aktuelle Version: ${tag}. Commits seit letztem Tag: ${bump}-Bump erkannt.`
+      : "Noch kein Versions-Tag vorhanden.",
+    options: [...labels, "Überspringen"],
+  });
+
+  if (!choice || choice.startsWith("Überspringen")) return;
+
+  const chosen = (["patch", "minor", "major"] as const).find(t => choice.startsWith(t))!;
+  const newVersion = versions[chosen].replace(/^v/, "");
+  const newTag = `v${newVersion}`;
+
+  const manifest = await detectVersionFile(pi, ctx);
+  if (manifest) {
+    await applyVersionBump(pi, ctx, manifest, newVersion);
+  }
+
+  const tagResult = await pi.exec("bash", ["-c", `git tag ${newTag}`], { cwd: ctx.cwd });
+  if (tagResult.exitCode !== 0) {
+    ctx.ui.notify(`Tag ${newTag} existiert bereits — manuell löschen mit: git tag -d ${newTag}`, "error");
+    return;
+  }
+  ctx.ui.notify(`Version ${newTag} getaggt.`, "info");
+}
+
 // Prominente Abschluss-Notification + Widget-Update mit Uhrzeit und Ergebnis.
 function finalNotify(
  ctx: ExtensionCommandContext,
@ -601,41 +791,88 @@ function finalNotify(
  detail: string
 ): void {
  const timestamp = new Date().toLocaleTimeString("de-DE", { hour: "2-digit", minute: "2-digit" });
-  const level = verdict.includes("SHIP") && !verdict.includes("NO-SHIP") ? "warning"
-              : verdict.includes("NO-SHIP") ? "error"
+  const level = verdict.startsWith("🚀") ? "info"
+              : verdict.includes("NO-SHIP") || verdict.startsWith("⛔") ? "error"
              : verdict.includes("⚠") ? "warning"
              : "info";
  ctx.ui.notify(`${verdict}: ${detail}`, level);
  ctx.ui.setWidget("coder-judge", [
    `Letzter Lauf: ${verdict} — ${detail} (${timestamp})`,
-    "─────────────────────────────────────────",
-    "Workflow:  /coder <auftrag> | /judge | /fix | /shipit",
-    "Auto-Loop: /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"cmd\"]",
-    "Planung:   /plan <auftrag>  →  /coder | /optimize --continue | /discard",
-    "Patch:     /patch <änderung>  →  /quick_check [was]",
-    "Doku:      /update_doku  |  Neues Projekt: /new_project <pfad>",
-    "Abbruch:   Escape (Generation laufend) | /cancel (Loop nach aktuellem Schritt)",
-    "Resume:    /continue  |  Modell: auto (Coder→:8001, Judge→:8002)",
+    "/optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"cmd\"]",
+    "/fix  ·  /judge  ·  /shipit  ·  /cancel  ·  /continue  ·  /help",
  ]);
 }

 // ── Extension ────────────────────────────────────────────────────────────────

 let cancelRequested = false;
+let currentActivity = "";  // Working-Message für den aktuellen Command-Kontext
+
+// Erzeugt eine knappe Statuszeile aus Tool-Name und Argumenten.
+function toolExecutionLabel(toolName: string, args: Record<string, any>): string {
+  switch (toolName) {
+    case "edit":
+      return `Editiere ${args.path ?? "Datei"}…`;
+    case "write":
+      return `Schreibe ${args.path ?? "Datei"} neu…`;
+    case "read":
+      return `Lese ${args.path ?? "Datei"}…`;
+    case "grep":
+      return `Suche in ${args.path ?? args.pattern ?? "Dateien"}…`;
+    case "find":
+      return `Suche Dateien: ${args.pattern ?? ""}…`;
+    case "ls":
+      return `Verzeichnis: ${args.path ?? "."}…`;
+    case "bash": {
+      const cmd = String(args.command ?? "").trim().replace(/\n[\s\S]*/s, "");
+      if (/git\s+commit/.test(cmd))                          return "Git-Commit…";
+      if (/git\s+add/.test(cmd))                             return "Stage Änderungen…";
+      if (/git\s+tag/.test(cmd))                             return "Git-Tag setzen…";
+      if (/pytest|npm test|cargo test|go test|make test/.test(cmd)) return "Tests laufen…";
+      if (/git\s+(diff|log|show|tag -l)/.test(cmd))         return "Git-History lesen…";
+      if (/patch\s+-p1/.test(cmd))                           return "Wende Patch an…";
+      if (/curl/.test(cmd))                                  return "HTTP-Request…";
+      return `Shell: ${cmd.slice(0, 55)}${cmd.length > 55 ? "…" : ""}`;
+    }
+    case "apply_patch":
+      return "Wende Patch an…";
+    default:
+      return "";
+  }
+}

 export default function (pi: ExtensionAPI) {
  pi.on("session_start", async function (_event, ctx) {
    ctx.ui.setWidget("coder-judge", [
-      "Workflow:  /coder <auftrag> | /judge | /fix | /shipit",
-      "Auto-Loop: /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"cmd\"]",
-      "Planung:   /plan <auftrag>  →  /coder | /optimize --continue | /discard",
-      "Patch:     /patch <änderung>  →  /quick_check [was]",
-      "Doku:      /update_doku  |  Neues Projekt: /new_project <pfad>",
-      "Abbruch:   Escape (Generation laufend) | /cancel (Loop nach aktuellem Schritt)",
-      "Resume:    /continue  |  Modell: auto (Coder→:8001, Judge→:8002)",
+      "/optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"cmd\"]",
+      "/fix  ·  /judge  ·  /shipit  ·  /cancel  ·  /continue  ·  /help",
    ]);
  });

+  // ── Live-Aktivitätsstatus ────────────────────────────────────────────────
+  // turn_start: Working-Text auf aktuellen Command-Kontext setzen
+  pi.on("turn_start", function (_event, ctx) {
+    if (currentActivity) ctx.ui.setWorkingMessage(currentActivity);
+  });
+
+  // tool_execution_start: präzise Statuszeile während Tool-Ausführung
+  pi.on("tool_execution_start", function (event, ctx) {
+    const label = toolExecutionLabel(event.toolName, (event as any).args ?? {});
+    if (label) ctx.ui.setStatus("agent", label);
+  });
+
+  // tool_execution_end: Statuszeile löschen
+  pi.on("tool_execution_end", function (_event, ctx) {
+    ctx.ui.setStatus("agent", undefined);
+  });
+
+  // agent_end: Working-Text und Statuszeile zurücksetzen
+  pi.on("agent_end", function (_event, ctx) {
+    ctx.ui.setWorkingMessage();
+    ctx.ui.setStatus("agent", undefined);
+    currentActivity = "";
+  });
+
  // ── Robustes edit: Bottom-up-Reordering via tool_call-Hook ─────────────
  // Behebt "edits[n] doesn't match": Mehrere Edits auf dieselbe Datei werden
  // von hinten nach vorne sortiert, damit frühere Edits spätere Positionen nicht verschieben.
@ -673,41 +910,61 @@ export default function (pi: ExtensionAPI) {
  // ── Manuelle Kommandos ───────────────────────────────────────────────────

  pi.registerCommand("coder", {
-    description: "Legt TASK.md an, startet Implementierung → qwen3.5-coder (:8001).",
+    description: "Implementiert <auftrag> ohne Review-Loop → qwen3.5-coder (:8001).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const task = (args || "").trim();
      if (!task) {
        ctx.ui.notify("Benutzung: /coder <auftrag>", "error");
        return;
      }
+      if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+        ctx.ui.notify("Coder-Server nicht bereit (Port 8001) — start-coder.sh ausführen", "error");
+        return;
+      }
      await writeTaskMd(pi, ctx, task);
      await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
-      pi.sendUserMessage(coderKickoff(task));
+      currentActivity = "Coder implementiert…";
+      await sendAndWait(pi, ctx, coderKickoff(task));
    }
  });

  pi.registerCommand("judge", {
    description: "Review gegen TASK.md + git show HEAD → qwen3.5-judge (:8002).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
+      if (!await waitUntilModelReady(pi, ctx, 8002, "qwen3.5-judge")) {
+        ctx.ui.notify("Judge-Server nicht bereit (Port 8002) — start-judge.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
-      pi.sendUserMessage(judgePrompt(args || ""));
+      currentActivity = "Judge reviewt…";
+      await sendAndWait(pi, ctx, judgePrompt(args || ""));
    }
  });

  pi.registerCommand("fix", {
    description: "Fixt Judge-Kritik, committet Ergebnis → qwen3.5-coder (:8001).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
+      if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+        ctx.ui.notify("Coder-Server nicht bereit (Port 8001) — start-coder.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
-      pi.sendUserMessage(fixPrompt(args || ""));
+      currentActivity = "Coder fixt Judge-Kritik…";
+      await sendAndWait(pi, ctx, fixPrompt(args || ""));
    }
  });

  pi.registerCommand("shipit", {
    description: "Finale Freigabe gegen TASK.md + git log → qwen3.5-judge (:8002).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
+      if (!await waitUntilModelReady(pi, ctx, 8002, "qwen3.5-judge")) {
+        ctx.ui.notify("Judge-Server nicht bereit (Port 8002) — start-judge.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
      ctx.ui.notify("Judge prüft finale Freigabe — Ergebnis erscheint im Chat (SHIP / NO-SHIP)", "info");
-      pi.sendUserMessage(shipitPrompt(args || ""));
+      currentActivity = "Judge: finale Freigabe…";
+      await sendAndWait(pi, ctx, shipitPrompt(args || ""));
    }
  });

@ -720,10 +977,10 @@ export default function (pi: ExtensionAPI) {
      const maxRounds = roundsMatch ? Math.max(1, parseInt(roundsMatch[1], 10)) : 3;
      const withDoku = /--with-doku/.test(args || "");
      const continueMode = /--continue/.test(args || "");
-      const testCmdMatch = (args || "").match(/--test-cmd\s+"([^"]+)"|--test-cmd\s+(\S+)/);
-      const testCmd: string | null = testCmdMatch ? (testCmdMatch[1] ?? testCmdMatch[2]) : null;
+      const testCmdMatch = (args || "").match(/--test-cmd\s+"([^"]+)"|--test-cmd\s+'([^']+)'|--test-cmd\s+(\S+)/);
+      const testCmd: string | null = testCmdMatch ? (testCmdMatch[1] ?? testCmdMatch[2] ?? testCmdMatch[3]) : null;
      const testTimeoutMatch = (args || "").match(/--test-timeout\s+(\d+)/);
-      const testTimeout = testTimeoutMatch ? parseInt(testTimeoutMatch[1], 10) : 120;
+      const testTimeout = testTimeoutMatch ? Math.max(1, parseInt(testTimeoutMatch[1], 10)) : 120;
      const task = (args || "")
        .replace(/--rounds\s+\d+/, "")
        .replace(/--test-timeout\s+\d+/, "")
@ -738,6 +995,7 @@ export default function (pi: ExtensionAPI) {
        return;
      }

+      try {
        if (continueMode) {
          // --continue: Implementierungsphase überspringen, direkt in Judge→Fix-Schleife
          // Erweiterter Auftrag wird als Zusatzauftrag in TASK.md eingetragen (falls angegeben)
@ -747,31 +1005,36 @@ export default function (pi: ExtensionAPI) {
            ? `--continue: Zusatzauftrag in TASK.md eingetragen, überspringe Implementierung.`
            : `--continue: Überspringe Implementierung, starte direkt mit Judge-Prüfung.`;
          ctx.ui.notify(continueMsg, "info");
+
+          // Im --continue-Modus: Coder-Server jetzt prüfen, da er für die Fix-Phase gebraucht wird
+          // (in normalem Modus wird er beim coderKickoff implizit geprüft)
+          ctx.ui.setStatus("optimize", "Coder-Server wird geprüft…");
+          if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+            finalNotify(ctx, "⛔ Coder nicht erreichbar", "Port 8001 — kein HTTP 200 nach 3 min. start-coder.sh ausführen");
+            return;
+          }
        } else {
          // TASK.md anlegen und Implementierung starten
          await writeTaskMd(pi, ctx, task);
          ctx.ui.setStatus("optimize", `Starte Optimierung (max ${maxRounds} Runden)…`);
          const taskPreview = task.length > 55 ? task.slice(0, 52) + "…" : task;
          ctx.ui.setStatus("optimize", `◉ Coder liest Anforderungen + implementiert: ${taskPreview}`);
-        await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
+          if (!await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder")) {
+            finalNotify(ctx, "⛔ Modell-Fehler", "Coder-Modell (llama-cpp-coder) nicht verfügbar");
+            return;
+          }
+          currentActivity = "Coder implementiert…";
          await sendAndWait(pi, ctx, coderKickoff(task));
          await tickTaskMdStatus(pi, ctx, "Implementierung");
-        if (cancelRequested) { cancelRequested = false; finalNotify(ctx, "⛔ Abgebrochen", "Nach Implementierung"); return; }
+          if (cancelRequested) { finalNotify(ctx, "⛔ Abgebrochen", "Nach Implementierung"); return; }
        }

-      // Judge-Server-Bereitschaft prüfen — bei 503 (Modell lädt noch) bis zu 60s warten.
+        // Judge-Bereitschaft via Completion-Check — /health antwortet bereits während des
+        // GPU-Ladevorgangs und ist kein verlässliches Signal. Nur HTTP 200 auf einen
+        // echten Completion-Request bedeutet: Modell ist im VRAM und bereit.
        ctx.ui.setStatus("optimize", "Judge-Server wird geprüft…");
-      let serverReady = false;
-      for (let i = 0; i < 20; i++) {
-        const hc = await pi.exec("bash", ["-c",
-          "curl -sf --max-time 3 http://localhost:8002/health || " +
-          "curl -sf --max-time 3 http://localhost:8002/v1/models"
-        ], { cwd: ctx.cwd });
-        if (hc.code === 0) { serverReady = true; break; }
-        await new Promise(r => setTimeout(r, 3000));
-      }
-      if (!serverReady) {
-        finalNotify(ctx, "⛔ Judge nicht erreichbar", "Port 8002 antwortet nicht — start-judge.sh ausführen");
+        if (!await waitUntilModelReady(pi, ctx, 8002, "qwen3.5-judge")) {
+          finalNotify(ctx, "⛔ Judge nicht erreichbar", "Port 8002 — kein HTTP 200 nach 3 min. start-judge.sh ausführen");
          return;
        }

@ -797,7 +1060,10 @@ export default function (pi: ExtensionAPI) {
        // Schleife: Judge → (PASS? fertig : Fix → nächste Runde)
        for (let round = 1; round <= maxRounds; round++) {
          const prog = "●".repeat(round - 1) + "◉" + "○".repeat(maxRounds - round);
-        await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
+          if (!await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge")) {
+            finalNotify(ctx, "⛔ Modell-Fehler", "Judge-Modell (llama-cpp-judge) nicht verfügbar");
+            return;
+          }

          if (autoTestCmds.length > 0) {
            const label = autoTestCmds.length === 1
@ -806,12 +1072,14 @@ export default function (pi: ExtensionAPI) {
            ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${label}, max. ${testTimeout}s)…`);
            const testOutput = await runTestsParallel(pi, ctx, autoTestCmds, testTimeout);
            ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge analysiert Test-Ergebnis…`);
+            currentActivity = `Judge reviewt (Runde ${round}/${maxRounds})…`;
            await sendAndWait(pi, ctx, judgeWithTestsPrompt(testOutput, ""));
          } else {
            ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge — TASK.md + letzter Commit + Tests…`);
+            currentActivity = `Judge reviewt (Runde ${round}/${maxRounds})…`;
            await sendAndWait(pi, ctx, judgePrompt(""));
          }
-        if (cancelRequested) { cancelRequested = false; finalNotify(ctx, "⛔ Abgebrochen", `Nach Judge Runde ${round}`); return; }
+          if (cancelRequested) { finalNotify(ctx, "⛔ Abgebrochen", `Nach Judge Runde ${round}`); return; }

          const judgeText = getLastAssistantText(ctx);
          verdict = parseVerdict(judgeText);
@ -833,7 +1101,11 @@ export default function (pi: ExtensionAPI) {

          if (round === maxRounds) {
            ctx.ui.setStatus("optimize", `${"●".repeat(maxRounds)} ⚠ Max. ${maxRounds} Runden ohne PASS`);
+            if (verdict === "UNREADABLE") {
+              finalNotify(ctx, "⚠ Urteil unklar", `${maxRounds} Runden – Judge-Urteil nicht erkennbar, Antwort im Chat prüfen`);
+            } else {
              finalNotify(ctx, "⚠ Kein PASS", `${maxRounds} Runden ohne PASS – bitte /judge und /fix manuell`);
+            }
            return;
          }

@ -842,15 +1114,23 @@ export default function (pi: ExtensionAPI) {
            ? (currentBlockers.length > 50 ? currentBlockers.slice(0, 47) + "…" : currentBlockers)
            : "Kritikpunkte aus Judge-Bericht";
          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Coder fixt — ${blockerHint}`);
-        await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
+          if (!await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder")) {
+            finalNotify(ctx, "⛔ Modell-Fehler", "Coder-Modell (llama-cpp-coder) nicht verfügbar");
+            return;
+          }
+          currentActivity = "Coder fixt Blocker…";
          await sendAndWait(pi, ctx, fixPrompt(""));
-        if (cancelRequested) { cancelRequested = false; finalNotify(ctx, "⛔ Abgebrochen", `Nach Fix Runde ${round}`); return; }
+          if (cancelRequested) { finalNotify(ctx, "⛔ Abgebrochen", `Nach Fix Runde ${round}`); return; }
        }

        // Finale ShipIt-Prüfung nur bei PASS
        if (verdict === "PASS" || verdict === "PASS WITH CONCERNS") {
          ctx.ui.setStatus("optimize", `${"●".repeat(maxRounds)}◉ ShipIt — SHIP oder NO-SHIP?…`);
-        await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
+          if (!await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge")) {
+            finalNotify(ctx, "⛔ Modell-Fehler", "Judge-Modell (llama-cpp-judge) nicht verfügbar");
+            return;
+          }
+          currentActivity = "Judge: finale Freigabe…";
          await sendAndWait(pi, ctx, shipitPrompt(""));

          const shipText = getLastAssistantText(ctx);
@ -859,6 +1139,7 @@ export default function (pi: ExtensionAPI) {
          if (shipVerdict === "SHIP") {
            ctx.ui.setStatus("optimize", "🚀 SHIP – produktionsreif");
            finalNotify(ctx, "🚀 SHIP", "Programm ist produktionsreif");
+            await runVersionBump(pi, ctx);
            if (withDoku) {
              await runUpdateDoku(pi, ctx);
            } else {
@ -872,36 +1153,52 @@ export default function (pi: ExtensionAPI) {
            finalNotify(ctx, "ShipIt", "Kein klares Urteil – Antwort im Chat prüfen");
          }
        }
+      } catch (e: any) {
+        finalNotify(ctx, "⛔ Fehler", String(e?.message ?? e));
+      } finally {
+        // Sicherstellen dass cancelRequested nie in einen späteren /optimize-Aufruf leckt
+        cancelRequested = false;
+      }
    }
  });

  // ── Schlanke Kommandos für kleine Änderungen ─────────────────────────────

  pi.registerCommand("patch", {
-    description: "Gezielte Minimaländerung ohne vollständigen Review → qwen3.5-coder (:8001).",
+    description: "Gezielte Minimaländerung ohne Refactoring, committet → qwen3.5-coder (:8001).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const change = (args || "").trim();
      if (!change) {
        ctx.ui.notify("Benutzung: /patch <beschreibung der änderung>", "error");
        return;
      }
+      if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+        ctx.ui.notify("Coder-Server nicht bereit (Port 8001) — start-coder.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
-      pi.sendUserMessage(patchPrompt(change));
+      currentActivity = "Coder patcht…";
+      await sendAndWait(pi, ctx, patchPrompt(change));
    }
  });

  pi.registerCommand("quick_check", {
-    description: "Schnelle Prüfung der letzten Änderung (OK/PROBLEM) → qwen3.5-judge (:8002).",
+    description: "Schnelle OK/PROBLEM-Prüfung einer kleinen Codeänderung → qwen3.5-judge (:8002).",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
+      if (!await waitUntilModelReady(pi, ctx, 8002, "qwen3.5-judge")) {
+        ctx.ui.notify("Judge-Server nicht bereit (Port 8002) — start-judge.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
-      pi.sendUserMessage(quickCheckPrompt(args || ""));
+      currentActivity = "Judge: Schnellcheck…";
+      await sendAndWait(pi, ctx, quickCheckPrompt(args || ""));
    }
  });

  // ── Dokumentations-Phase ─────────────────────────────────────────────────

  pi.registerCommand("update_doku", {
-    description: "Code kommentieren + README.md + BEDIENUNGSANLEITUNG.md + git commit → qwen3.5-coder (:8001).",
+    description: "Inkrementelle Code-Kommentare + README.md + BEDIENUNGSANLEITUNG.md via Git-Tags.",
    handler: async function (_args: string, ctx: ExtensionCommandContext) {
      await runUpdateDoku(pi, ctx);
    }
@ -924,7 +1221,7 @@ export default function (pi: ExtensionAPI) {
      }),
    }),
    async execute(_id, params, _signal, _onUpdate, ctx) {
-      const tmpFile = `/tmp/pi_patch_${Date.now()}.diff`;
+      const tmpFile = `/tmp/pi_patch_${Date.now()}_${Math.random().toString(36).slice(2)}.diff`;
      await pi.exec(
        "bash",
        ["-c", `printf "%s" "$1" > "${tmpFile}"`, "_", params.patch],
@ -949,23 +1246,63 @@ export default function (pi: ExtensionAPI) {
  // ── Planungsmodus ────────────────────────────────────────────────────────

  pi.registerCommand("plan", {
-    description: "Analysiert Auftrag, schmiedet Implementierungsplan in PLAN.md — macht keine Dateiänderungen. → qwen3.5-coder (:8001)",
+    description: "Erstellt Implementierungsplan in PLAN.md ohne Dateiänderungen → qwen3.5-coder.",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const task = (args || "").trim();
      if (!task) {
        ctx.ui.notify("Benutzung: /plan <auftrag>", "error");
        return;
      }
+      if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+        ctx.ui.notify("Coder-Server nicht bereit (Port 8001) — start-coder.sh ausführen", "error");
+        return;
+      }
      await writeTaskMd(pi, ctx, task);
      await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
      ctx.ui.setStatus("plan", "Analysiere und plane (keine Dateiänderungen)…");
-      pi.sendUserMessage(planPrompt(task));
-      await ctx.waitForIdle();
+      currentActivity = "Coder plant (kein Code)…";
+      await sendAndWait(pi, ctx, planPrompt(task));
      ctx.ui.setStatus("plan", "");
      finalNotify(ctx, "📋 Plan", "Analyse abgeschlossen — PLAN.md + Chat");
    }
  });

+  pi.registerCommand("version", {
+    description: "Versionsnummer des Projekts erhöhen (SemVer + Git-Tag). Analysiert Commits seit letztem Tag.",
+    handler: async function (_args: string, ctx: ExtensionCommandContext) {
+      await runVersionBump(pi, ctx);
+    }
+  });
+
+  pi.registerCommand("help", {
+    description: "Zeigt alle Kommandos der pi-coder-judge-Extension.",
+    handler: async function (_args: string, ctx: ExtensionCommandContext) {
+      ctx.ui.notify([
+        "── Kern-Workflow ─────────────────────────────────────────",
+        "/optimize <auftrag> [--rounds N] [--with-doku] [--continue]",
+        "          [--test-cmd \"cmd\"] [--test-timeout N]",
+        "  Coder→Judge→Fix-Schleife bis PASS (empfohlener Einstieg)",
+        "/fix  [kommentar]   Fixt Judge-Kritik, committet → Coder",
+        "/judge [kommentar]  Review gegen TASK.md + HEAD → Judge",
+        "/shipit [kommentar] Finale Freigabe (SHIP/NO-SHIP) → Judge",
+        "",
+        "── Steuerung ─────────────────────────────────────────────",
+        "/continue           Unterbrochenen Prozess fortsetzen",
+        "/cancel             Laufenden Loop nach aktuellem Schritt abbrechen",
+        "",
+        "── Erweiterte Kommandos (immer tippbar, nicht im Menü) ───",
+        "/coder <auftrag>    Nur Implementierung ohne Review-Loop → Coder",
+        "/patch <änderung>   Gezielte Minimaländerung → Coder",
+        "/quick_check [was]  Schnelle OK/PROBLEM-Prüfung → Judge",
+        "/plan <auftrag>     Implementierungsplan in PLAN.md → Coder",
+        "/update_doku        Code-Kommentare + README.md + BEDIENUNGSANLEITUNG.md",
+        "/version            Versionsnummer erhöhen (SemVer + Git-Tag)",
+        "/discard            Verwirft PLAN.md",
+        "/new_project <pfad> Projektverzeichnis + git init + .gitignore",
+      ].join("\n"), "info");
+    }
+  });
+
  pi.registerCommand("cancel", {
    description: "Bricht laufenden Optimize-Loop nach dem aktuellen Schritt ab.",
    handler: async function (_args: string, ctx: ExtensionCommandContext) {
@ -975,7 +1312,7 @@ export default function (pi: ExtensionAPI) {
  });

  pi.registerCommand("discard", {
-    description: "Verwirft PLAN.md und setzt den Planungsstatus zurück.",
+    description: "Löscht PLAN.md und verwirft den aktuellen Plan.",
    handler: async function (_args: string, ctx: ExtensionCommandContext) {
      await pi.exec("bash", ["-c", "rm -f PLAN.md"], { cwd: ctx.cwd });
      ctx.ui.notify("PLAN.md gelöscht — Plan verworfen", "info");
@ -986,9 +1323,14 @@ export default function (pi: ExtensionAPI) {
  pi.registerCommand("continue", {
    description: "Nimmt unterbrochenen Prozess wieder auf — liest TASK.md, PLAN.md, git log und entscheidet den nächsten Schritt.",
    handler: async function (_args: string, ctx: ExtensionCommandContext) {
+      if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+        ctx.ui.notify("Coder-Server nicht bereit (Port 8001) — start-coder.sh ausführen", "error");
+        return;
+      }
      await switchModel(pi, ctx, "llama-cpp-coder", "qwen3.5-coder");
      ctx.ui.setStatus("continue", "Analysiere unterbrochenen Prozess…");
-      pi.sendUserMessage([
+      currentActivity = "Coder analysiert Stand…";
+      await sendAndWait(pi, ctx, [
        "Ein Prozess wurde unterbrochen. Analysiere den aktuellen Stand und führe ihn sinnvoll fort:",
        "1. Lies TASK.md für den Auftrag",
        "2. Lies PLAN.md falls vorhanden (war ein Plan in Arbeit?)",
@ -996,7 +1338,6 @@ export default function (pi: ExtensionAPI) {
        "4. Entscheide: Muss noch implementiert werden? Ist ein Review fällig? Müssen Fixes nachgezogen werden?",
        "5. Fahre direkt mit dem nächsten sinnvollen Schritt fort — kein langer Bericht, einfach weitermachen.",
      ].join("\n"));
-      await ctx.waitForIdle();
      ctx.ui.setStatus("continue", "");
    }
  });
@ -1004,7 +1345,7 @@ export default function (pi: ExtensionAPI) {
  // ── Projekt-Scaffolding ──────────────────────────────────────────────────

  pi.registerCommand("new_project", {
-    description: "Legt Projektverzeichnis an + git init + .gitignore. /new_project <pfad>",
+    description: "Legt Projektverzeichnis, git-Repo und .gitignore an.",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const rawPath = (args || "").trim();
      if (!rawPath) {
--- a/start-coder.sh
+++ b/start-coder.sh
@ -34,10 +34,11 @@ docker run -d \
    -c 262144 \
    -n 16384 \
    --jinja \
+    --chat-template-kwargs '{"enable_thinking":true}' \
    --no-context-shift \
-    --temp 0.2 \
-    --top-p 0.95 \
-    --top-k 40 \
+    --temp 0.6 \
+    --top-p 0.80 \
+    --top-k 20 \
    --min-p 0.01 \
    --repeat-penalty 1.05 \
    --main-gpu 0 \
@ -54,37 +55,24 @@ docker run -d \
    --host 0.0.0.0 \
    --port "$CONTAINER_PORT"

-echo "[*] Warte auf HTTP ..."
-HTTP_READY=0
+echo "[*] Warte auf Modell-Bereitschaft (Completion-Check, max. 180 s) ..."
+MODEL_READY=0
 for i in {1..90}; do
-  if curl -s "http://localhost:${HOST_PORT}/health" >/dev/null 2>&1 || \
-     curl -s "http://localhost:${HOST_PORT}/v1/models" >/dev/null 2>&1; then
-    HTTP_READY=1
-    break
-  fi
+  HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
+    -X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
+    -H "Content-Type: application/json" \
+    -d "{\"model\":\"${MODEL_ALIAS}\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"max_tokens\":1,\"temperature\":0.0,\"stream\":false}")
+  if [ "$HTTP_CODE" = "200" ]; then MODEL_READY=1; break; fi
+  echo "  [${i}/90] HTTP ${HTTP_CODE:-000} — Modell lädt noch, warte 2s ..."
  sleep 2
 done

-if [ "$HTTP_READY" -ne 1 ]; then
-  echo "[!] HTTP-Server wurde nicht rechtzeitig erreichbar." >&2
+if [ "$MODEL_READY" -ne 1 ]; then
+  echo "[!] Modell wurde nicht rechtzeitig bereit (kein HTTP 200 auf Completion)." >&2
  docker logs --tail 200 "$CONTAINER_NAME" || true
  exit 1
 fi

-echo "[*] Teste Chat-Completion ..."
-curl -s -X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"model\": \"${MODEL_ALIAS}\",
-    \"messages\": [
-      { \"role\": \"system\", \"content\": \"Du bist ein präziser Coding-Assistent.\" },
-      { \"role\": \"user\", \"content\": \"Antworte nur mit dem Wort: bereit\" }
-    ],
-    \"max_tokens\": 8,
-    \"temperature\": 0.0,
-    \"stream\": false
-  }"
-
-echo
-echo "[*] Server bereit auf http://0.0.0.0:${HOST_PORT}"
+echo "[*] Modell bereit — erster Completion-Request erfolgreich (HTTP 200)."
+echo "[*] Server läuft auf http://0.0.0.0:${HOST_PORT}"
 echo "[*] Stoppen mit: docker rm -f ${CONTAINER_NAME}"
--- a/start-judge.sh
+++ b/start-judge.sh
@ -32,12 +32,13 @@ docker run -d \
    -m "/hf_home/${MODEL_REL_PATH}" \
    --alias "${MODEL_ALIAS}" \
    -c 262144 \
-    -n 8192 \
+    -n 16384 \
    --jinja \
+    --chat-template-kwargs '{"enable_thinking":true}' \
    --no-context-shift \
-    --temp 0.1 \
-    --top-p 0.9 \
-    --top-k 40 \
+    --temp 0.7 \
+    --top-p 0.80 \
+    --top-k 20 \
    --min-p 0.01 \
    --repeat-penalty 1.05 \
    --main-gpu 0 \
@ -54,37 +55,24 @@ docker run -d \
    --host 0.0.0.0 \
    --port "$CONTAINER_PORT"

-echo "[*] Warte auf HTTP ..."
-HTTP_READY=0
+echo "[*] Warte auf Modell-Bereitschaft (Completion-Check, max. 180 s) ..."
+MODEL_READY=0
 for i in {1..90}; do
-  if curl -s "http://localhost:${HOST_PORT}/health" >/dev/null 2>&1 || \
-     curl -s "http://localhost:${HOST_PORT}/v1/models" >/dev/null 2>&1; then
-    HTTP_READY=1
-    break
-  fi
+  HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
+    -X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
+    -H "Content-Type: application/json" \
+    -d "{\"model\":\"${MODEL_ALIAS}\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"max_tokens\":1,\"temperature\":0.0,\"stream\":false}")
+  if [ "$HTTP_CODE" = "200" ]; then MODEL_READY=1; break; fi
+  echo "  [${i}/90] HTTP ${HTTP_CODE:-000} — Modell lädt noch, warte 2s ..."
  sleep 2
 done

-if [ "$HTTP_READY" -ne 1 ]; then
-  echo "[!] HTTP-Server wurde nicht rechtzeitig erreichbar." >&2
+if [ "$MODEL_READY" -ne 1 ]; then
+  echo "[!] Modell wurde nicht rechtzeitig bereit (kein HTTP 200 auf Completion)." >&2
  docker logs --tail 200 "$CONTAINER_NAME" || true
  exit 1
 fi

-echo "[*] Teste Judge-Endpoint ..."
-curl -s -X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
-  -H "Content-Type: application/json" \
-  -d "{
-    \"model\": \"${MODEL_ALIAS}\",
-    \"messages\": [
-      { \"role\": \"system\", \"content\": \"Du bist ein strenger Code-Reviewer.\" },
-      { \"role\": \"user\", \"content\": \"Antworte nur mit dem Wort: bereit\" }
-    ],
-    \"max_tokens\": 8,
-    \"temperature\": 0.0,
-    \"stream\": false
-  }"
-
-echo
-echo "[*] Server bereit auf http://0.0.0.0:${HOST_PORT}"
+echo "[*] Modell bereit — erster Completion-Request erfolgreich (HTTP 200)."
+echo "[*] Server läuft auf http://0.0.0.0:${HOST_PORT}"
 echo "[*] Stoppen mit: docker rm -f ${CONTAINER_NAME}"