perf: Quick-Judge Runde 1, switchModel-Cache, Blocker-Normalisierung + weitere TAT-Optimierungen

A) quickJudgePrompt()/quickJudgeWithTestsPrompt(): Runde 1 ohne --continue nutzt einen kompakten Prompt ohne TASK.md — spart 15-30% Inference-Zeit bei direktem PASS B) switchModel()-Caching via currentModelKey: Überspringt setModel() wenn Modell bereits korrekt gesetzt ist; currentModelKey wird im finally-Block resettet C) normalizeForComparison() für Loop-Detection: Whitespace/Satzzeichen-Normalisierung verhindert False-Negatives bei minimalen Formulierungsunterschieden im Judge-Output D) Parallele Server-Bereitschaftsprüfung im --continue-Modus via Promise.all: Spart bis zu 3 min bei Kaltstart beider Server E) --no-tests Flag: überspringt detectTestCommands() und autoTestCmds-Befüllung F) --approve-concerns Flag: behandelt "PASS WITH CONCERNS" wie "PASS" (kein ShipIt-Call) H) sendAndWait() settle-Delay 400ms → 150ms: ~1-2 s weniger Wartezeit pro Durchlauf Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 18:03:56 +02:00 · 2026-05-29 18:03:56 +02:00 · 482d98fb63
commit 482d98fb63
parent 11ac46e565
1 changed files with 115 additions and 34 deletions
--- a/pi-coder-judge-extension.ts
+++ b/pi-coder-judge-extension.ts
@ -57,6 +57,53 @@ function judgePrompt(extra: string): string {
  ].join("\n") + suffix;
 }

+// Kompakter Ersteindruck-Prompt für Runde 1: kein TASK.md, nur Diff-Review.
+// Reduziert Inference-Zeit wenn der Code offensichtlich gut ist.
+// Bei FAIL → Runde 2 mit vollem judgePrompt() für detaillierte Analyse.
+function quickJudgePrompt(extra: string): string {
+  const suffix = extra?.trim() ? "\n\nZusätzlicher Fokus des Users:\n" + extra.trim() : "";
+  return [
+    "Schneller Code-Review — erster Eindruck.",
+    "Du bist ein skeptischer Senior-Reviewer. Sei direkt und knapp.",
+    "",
+    "1. Sieh dir 'git show HEAD' an.",
+    "2. Führe relevante Tests aus, falls vorhanden.",
+    "3. Gibt es offensichtliche Blocker? (Bugs, fehlende Fehlerbehandlung, Sicherheitslücken, kaputte Imports)",
+    "4. Wenn alles offensichtlich in Ordnung ist: PASS.",
+    "5. Bei Zweifeln oder Lücken: FAIL — konkrete Blocker benennen.",
+    "",
+    "Ausgabeformat (kompakt):",
+    "- Urteil: PASS | PASS WITH CONCERNS | FAIL",
+    "- Blocker (falls vorhanden)",
+    "- Konkrete Fix-Aufträge (falls FAIL)"
+  ].join("\n") + suffix;
+}
+
+// Quick-Variante für Runde 1 mit bereits vorliegendem Test-Output.
+function quickJudgeWithTestsPrompt(testOutput: string, extra: string): string {
+  const suffix = extra?.trim() ? "\n\nZusätzlicher Fokus des Users:\n" + extra.trim() : "";
+  return [
+    "Schneller Code-Review — erster Eindruck.",
+    "Du bist ein skeptischer Senior-Reviewer. Sei direkt und knapp.",
+    "",
+    "Die Test-Suite wurde bereits extern ausgeführt. Führe KEINE weiteren Tests aus.",
+    "",
+    "1. Sieh dir 'git show HEAD' an.",
+    "2. Analysiere das folgende Test-Ergebnis:",
+    "```",
+    testOutput,
+    "```",
+    "3. Gibt es offensichtliche Blocker? (Test-Failures, Bugs, Sicherheitslücken)",
+    "4. Wenn alles offensichtlich in Ordnung ist: PASS.",
+    "5. Bei Zweifeln: FAIL — konkrete Blocker benennen.",
+    "",
+    "Ausgabeformat (kompakt):",
+    "- Urteil: PASS | PASS WITH CONCERNS | FAIL",
+    "- Blocker (falls vorhanden)",
+    "- Konkrete Fix-Aufträge (falls FAIL)"
+  ].join("\n") + suffix;
+}
+
 // Wie judgePrompt, aber Tests werden NICHT vom Judge ausgeführt —
 // die Extension hat sie bereits extern gestartet und übergibt den Output.
 function judgeWithTestsPrompt(testOutput: string, extra: string): string {
@ -379,12 +426,15 @@ async function switchModel(
  provider: string,
  modelId: string
 ): Promise<boolean> {
+  const key = `${provider}/${modelId}`;
+  if (key === currentModelKey) return true;
  const model = ctx.modelRegistry.find(provider, modelId);
  if (!model) {
    ctx.ui.notify(`Modell ${provider}/${modelId} nicht gefunden`, "error");
    return false;
  }
  const ok = await pi.setModel(model);
+  if (ok !== false) currentModelKey = key;
  if (!ok) ctx.ui.notify(`Kein API-Key für ${modelId}`, "warning");
  return ok !== false;
 }
@ -409,7 +459,7 @@ async function sendAndWait(
      await ctx.waitForIdle();
    }
  }
-  await new Promise(r => setTimeout(r, 400));
+  await new Promise(r => setTimeout(r, 150));
  await ctx.waitForIdle();
 }

@ -538,6 +588,12 @@ function getLastAssistantText(ctx: ExtensionCommandContext): string {

 // Extrahiert das Urteil aus einer Judge-Antwort.
 // "UNREADABLE" wenn kein Urteil erkennbar — unterscheidbar von einem expliziten FAIL.
+// Normalisiert Blocker-Text für die Loop-Erkennung — verhindert False-Negatives
+// durch minimale Formulierungsunterschiede im Judge-Output (Whitespace, Satzzeichen).
+function normalizeForComparison(s: string): string {
+  return s.trim().replace(/\s+/g, " ").replace(/[.,;:!?]+$/g, "").toLowerCase();
+}
+
 function parseVerdict(text: string): string {
  const m = text.match(/Urteil:\s*(PASS WITH CONCERNS|PASS|FAIL)/i);
  return m ? m[1].toUpperCase() : "UNREADABLE";
@ -827,6 +883,7 @@ function finalNotify(
 // ── Extension ────────────────────────────────────────────────────────────────

 let cancelRequested = false;
+let currentModelKey = "";  // Cache für switchModel() — verhindert redundante setModel()-Aufrufe
 let interactivePauseActive = false;
 let interactiveContinueRequested = false;
 let interactivePauseTask = "";
@ -1003,13 +1060,15 @@ export default function (pi: ExtensionAPI) {
  // ── Automatische Optimierungsschleife ────────────────────────────────────

  pi.registerCommand("optimize", {
-    description: "Coder→Judge→Fix-Schleife bis PASS (default 2 Runden). Klares PASS → direkt SHIP; PASS WITH CONCERNS → ShipIt-Runde. --interactive pausiert nach PASS für Zusatzaufträge via /continue. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--interactive] [--test-cmd \"override\"] [--test-timeout N]",
+    description: "Coder→Judge→Fix-Schleife bis PASS (default 2 Runden, Runde 1: Quick-Judge). Klares PASS → direkt SHIP; PASS WITH CONCERNS → ShipIt-Runde (oder --approve-concerns zum Überspringen). --interactive: Checkpoint nach PASS. --no-tests: Test-Erkennung überspringen. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--interactive] [--no-tests] [--approve-concerns] [--test-cmd \"override\"] [--test-timeout N]",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const roundsMatch = (args || "").match(/--rounds\s+(\d+)/);
      const maxRounds = roundsMatch ? Math.max(1, parseInt(roundsMatch[1], 10)) : 2;
      const withDoku = /--with-doku/.test(args || "");
      const continueMode = /--continue/.test(args || "");
      const interactive = /--interactive/.test(args || "");
+      const noTests = /--no-tests/.test(args || "");
+      const approveConcerns = /--approve-concerns/.test(args || "");
      const testCmdMatch = (args || "").match(/--test-cmd\s+"([^"]+)"|--test-cmd\s+'([^']+)'|--test-cmd\s+(\S+)/);
      const testCmd: string | null = testCmdMatch ? (testCmdMatch[1] ?? testCmdMatch[2] ?? testCmdMatch[3]) : null;
      const testTimeoutMatch = (args || "").match(/--test-timeout\s+(\d+)/);
@ -1020,6 +1079,8 @@ export default function (pi: ExtensionAPI) {
        .replace(/--with-doku/, "")
        .replace(/--continue/, "")
        .replace(/--interactive/, "")
+        .replace(/--no-tests/, "")
+        .replace(/--approve-concerns/, "")
        .replace(/--test-cmd\s+"[^"]*"/, "")
        .replace(/--test-cmd\s+\S+/, "")
        .trim();
@ -1040,13 +1101,20 @@ export default function (pi: ExtensionAPI) {
            : `--continue: Überspringe Implementierung, starte direkt mit Judge-Prüfung.`;
          ctx.ui.notify(continueMsg, "info");

-          // Im --continue-Modus: Coder-Server jetzt prüfen, da er für die Fix-Phase gebraucht wird
-          // (in normalem Modus wird er beim coderKickoff implizit geprüft)
-          ctx.ui.setStatus("optimize", "Coder-Server wird geprüft…");
-          if (!await waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder")) {
+          // Im --continue-Modus: beide Server parallel prüfen — spart bis zu 3 min bei Kaltstart.
+          ctx.ui.setStatus("optimize", "Coder- und Judge-Server werden geprüft (parallel)…");
+          const [coderReady, judgeReady] = await Promise.all([
+            waitUntilModelReady(pi, ctx, 8001, "qwen3.5-coder"),
+            waitUntilModelReady(pi, ctx, 8002, "qwen3.5-judge"),
+          ]);
+          if (!coderReady) {
            finalNotify(ctx, "⛔ Coder nicht erreichbar", "Port 8001 — kein HTTP 200 nach 3 min. start-coder.sh ausführen");
            return;
          }
+          if (!judgeReady) {
+            finalNotify(ctx, "⛔ Judge nicht erreichbar", "Port 8002 — kein HTTP 200 nach 3 min. start-judge.sh ausführen");
+            return;
+          }
        } else {
          // TASK.md anlegen und Implementierung starten
          await writeTaskMd(pi, ctx, task);
@ -1061,7 +1129,6 @@ export default function (pi: ExtensionAPI) {
          await sendAndWait(pi, ctx, coderKickoff(task));
          await tickTaskMdStatus(pi, ctx, "Implementierung");
          if (cancelRequested) { finalNotify(ctx, "⛔ Abgebrochen", "Nach Implementierung"); return; }
-        }

          // Judge-Bereitschaft via Completion-Check — /health antwortet bereits während des
          // GPU-Ladevorgangs und ist kein verlässliches Signal. Nur HTTP 200 auf einen
@ -1071,13 +1138,16 @@ export default function (pi: ExtensionAPI) {
            finalNotify(ctx, "⛔ Judge nicht erreichbar", "Port 8002 — kein HTTP 200 nach 3 min. start-judge.sh ausführen");
            return;
          }
+        }

-        // Test-Suiten einmalig ermitteln: --test-cmd überschreibt Auto-Erkennung.
+        // Test-Suiten ermitteln: --no-tests überspringt alles, --test-cmd überschreibt Auto-Erkennung.
        // Läuft nach Coder, damit neu angelegte Test-Dateien bereits erkannt werden.
+        let autoTestCmds: string[] = [];
+        if (noTests) {
+          ctx.ui.notify("--no-tests: Test-Erkennung übersprungen.", "info");
+        } else {
          ctx.ui.setStatus("optimize", "Test-Suiten werden erkannt…");
-        const autoTestCmds: string[] = testCmd
-          ? [testCmd]
-          : await detectTestCommands(pi, ctx);
+          autoTestCmds = testCmd ? [testCmd] : await detectTestCommands(pi, ctx);
          if (autoTestCmds.length > 0) {
            const label = autoTestCmds.map(c => c.split(" ")[0]).join(", ");
            ctx.ui.notify(
@ -1087,6 +1157,7 @@ export default function (pi: ExtensionAPI) {
          } else {
            ctx.ui.notify("Keine Test-Suiten erkannt — Judge führt Tests selbst aus.", "info");
          }
+        }

        let lastBlockers = "";
        let verdict = "";
@ -1106,19 +1177,26 @@ export default function (pi: ExtensionAPI) {
              return;
            }

+            // Runde 1 ohne --continue: Quick-Judge (kein TASK.md, kürzerer Prompt).
+            // Bei FAIL folgt Runde 2 mit vollem judgePrompt für detaillierte Analyse.
+            const useQuickJudge = round === 1 && !continueMode;
            if (autoTestCmds.length > 0) {
              const label = autoTestCmds.length === 1
                ? autoTestCmds[0].split(" ")[0]
                : `${autoTestCmds.length} Suiten parallel`;
              ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${label}, max. ${testTimeout}s)…`);
              const testOutput = await runTestsParallel(pi, ctx, autoTestCmds, testTimeout);
-              ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge analysiert Test-Ergebnis…`);
+              const judgeLabel = useQuickJudge ? "Quick-Check" : "Judge analysiert";
+              ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: ${judgeLabel} Test-Ergebnis…`);
              currentActivity = `Judge reviewt (Runde ${round}/${maxRounds})…`;
-              await sendAndWait(pi, ctx, judgeWithTestsPrompt(testOutput, ""));
+              await sendAndWait(pi, ctx, useQuickJudge
+                ? quickJudgeWithTestsPrompt(testOutput, "")
+                : judgeWithTestsPrompt(testOutput, ""));
            } else {
-              ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge — TASK.md + letzter Commit + Tests…`);
+              const judgeLabel = useQuickJudge ? "Quick-Check" : "Judge — TASK.md + letzter Commit + Tests";
+              ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: ${judgeLabel}…`);
              currentActivity = `Judge reviewt (Runde ${round}/${maxRounds})…`;
-              await sendAndWait(pi, ctx, judgePrompt(""));
+              await sendAndWait(pi, ctx, useQuickJudge ? quickJudgePrompt("") : judgePrompt(""));
            }
            if (cancelRequested) { finalNotify(ctx, "⛔ Abgebrochen", `Nach Judge Runde ${round}`); return; }

@ -1132,9 +1210,10 @@ export default function (pi: ExtensionAPI) {
              break;
            }

-            // Loop-Erkennung: gleicher Blocker zweimal → manuell eingreifen
+            // Loop-Erkennung: gleicher Blocker zweimal → manuell eingreifen.
+            // Normalisierung verhindert False-Negatives durch minimale Formulierungsunterschiede.
            const currentBlockers = parseBlockers(judgeText);
-            if (currentBlockers && currentBlockers === lastBlockers) {
+            if (currentBlockers && normalizeForComparison(currentBlockers) === normalizeForComparison(lastBlockers)) {
              ctx.ui.setStatus("optimize", `${prog} ⚠ Gleicher Blocker in Runde ${round} – manuelle Intervention nötig`);
              finalNotify(ctx, "⚠ Schleife", "Gleicher Blocker zweimal – manuelle Intervention nötig");
              return;
@ -1214,8 +1293,9 @@ export default function (pi: ExtensionAPI) {
        }

        // Finaler SHIP-Schritt: klares PASS → direkt SHIP ohne zweiten Inference-Aufruf.
-        // "PASS WITH CONCERNS" → ShipIt-Runde als finale Abwägung.
-        if (verdict === "PASS") {
+        // "PASS WITH CONCERNS" + --approve-concerns → direkt SHIP (ShipIt-Runde überspringen).
+        // "PASS WITH CONCERNS" ohne Flag → ShipIt-Runde als finale Abwägung.
+        if (verdict === "PASS" || (verdict === "PASS WITH CONCERNS" && approveConcerns)) {
          ctx.ui.setStatus("optimize", "🚀 SHIP – produktionsreif");
          await autoCommitIfDirty(pi, ctx);
          notifyShipSuccess(ctx);
@ -1262,6 +1342,7 @@ export default function (pi: ExtensionAPI) {
      } finally {
        // Sicherstellen dass keine Zustandsvariable in späteren /optimize-Aufruf leckt
        cancelRequested = false;
+        currentModelKey = "";
        interactivePauseActive = false;
        interactiveContinueRequested = false;
        interactivePauseTask = "";