feat: automatische Test-Erkennung + parallele Ausführung in /optimize

Tests werden jetzt von der Extension selbst erkannt und als parallele CPU-Prozesse gestartet — Judge bekommt den fertigen Output und führt keine Tests mehr selbst aus. - detectTestCommands(): erkennt pytest, npm test, cargo, go test, make test anhand von Framework-Markern (alle Checks parallel via Promise.all) - runTestsParallel(): startet alle erkannten Suiten gleichzeitig, kombiniert Output mit Status-Header pro Suite (max. 6000 Zeichen gesamt) - /optimize: Auto-Erkennung läuft einmalig nach Coder-Phase, vor dem Loop - --test-cmd bleibt als Override für Sonderfälle erhalten - Fallback: kein Framework erkannt → Judge führt Tests wie bisher selbst aus Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 21:39:21 +02:00 · 2026-05-20 21:39:21 +02:00 · 2c07fb9d1c
commit 2c07fb9d1c
parent d5a2c10fa6
1 changed files with 73 additions and 15 deletions
--- a/pi-coder-judge-extension.ts
+++ b/pi-coder-judge-extension.ts
@ -402,19 +402,59 @@ async function sendAndWait(
 }

 // Führt einen Shell-Befehl aus und gibt stdout+stderr zurück (max. 6000 Zeichen).
-// Wird von /optimize --test-cmd genutzt, damit Judge keine Tests selbst starten muss.
-async function runTests(
+// Erkennt Test-Suiten im Projektverzeichnis anhand von Framework-Markern.
+// Alle Checks laufen parallel — konservativ, keine False Positives.
+async function detectTestCommands(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext
+): Promise<string[]> {
+  const [hasPytest, hasNpm, hasCargo, hasGo, hasMake] = await Promise.all([
+    pi.exec("bash", ["-c",
+      "test -f pytest.ini || test -f conftest.py || " +
+      "(test -f pyproject.toml && grep -q 'pytest' pyproject.toml) || " +
+      "find . -maxdepth 4 \\( -name 'test_*.py' -o -name '*_test.py' \\) 2>/dev/null | grep -q ."
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f package.json && " +
+      "node -e \"const p=require('./package.json');process.exit(" +
+      "p.scripts&&p.scripts.test&&!p.scripts.test.includes('no test')?0:1)\" 2>/dev/null"
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c", "test -f Cargo.toml"], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f go.mod && find . -maxdepth 4 -name '*_test.go' 2>/dev/null | grep -q ."
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f Makefile && grep -qE '^test[[:space:]]*:' Makefile"
+    ], { cwd: ctx.cwd }),
+  ]);
+  return ([
+    hasPytest.code === 0 ? "pytest -x -q 2>&1"    : null,
+    hasNpm.code   === 0 ? "npm test 2>&1"          : null,
+    hasCargo.code === 0 ? "cargo test 2>&1"        : null,
+    hasGo.code    === 0 ? "go test ./... 2>&1"     : null,
+    hasMake.code  === 0 ? "make test 2>&1"         : null,
+  ] as (string | null)[]).filter((c): c is string => c !== null);
+}
+
+// Führt mehrere Test-Befehle parallel als CPU-Prozesse aus und liefert einen
+// kombinierten Output-Block für judgeWithTestsPrompt().
+async function runTestsParallel(
  pi: ExtensionAPI,
  ctx: ExtensionCommandContext,
-  cmd: string
+  cmds: string[]
 ): Promise<string> {
-  const result = await pi.exec("bash", ["-c", cmd], { cwd: ctx.cwd });
-  const output = (result.stdout + (result.stderr ? "\n" + result.stderr : "")).trim();
-  const MAX = 6000;
-  if (output.length > MAX) {
-    return output.slice(0, MAX) + `\n\n[… Ausgabe gekürzt, ${output.length} Zeichen gesamt]`;
-  }
-  return output || "(kein Output)";
+  const results = await Promise.all(
+    cmds.map(cmd => pi.exec("bash", ["-c", cmd], { cwd: ctx.cwd }))
+  );
+  const MAX_PER = Math.max(1000, Math.floor(6000 / cmds.length));
+  return results.map((r, i) => {
+    const raw = (r.stdout + (r.stderr ? "\n" + r.stderr : "")).trim();
+    const out = raw.length > MAX_PER
+      ? raw.slice(0, MAX_PER) + `\n[… gekürzt, ${raw.length} Zeichen]`
+      : raw || "(kein Output)";
+    const status = r.code === 0 ? "✓ OK" : `✗ Exit ${r.code}`;
+    return `=== ${cmds[i]} [${status}] ===\n${out}`;
+  }).join("\n\n");
 }

 // Liest den Text der letzten Assistenten-Antwort aus dem Session-Branch.
@ -655,7 +695,7 @@ export default function (pi: ExtensionAPI) {
  // ── Automatische Optimierungsschleife ────────────────────────────────────

  pi.registerCommand("optimize", {
-    description: "Coder→Judge→Fix-Schleife bis PASS + optional Doku. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"befehl\"]",
+    description: "Coder→Judge→Fix-Schleife bis PASS. Tests werden automatisch erkannt und parallel ausgeführt. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"override\"]",
    handler: async function (args: string, ctx: ExtensionCommandContext) {
      const roundsMatch = (args || "").match(/--rounds\s+(\d+)/);
      const maxRounds = roundsMatch ? Math.max(1, parseInt(roundsMatch[1], 10)) : 3;
@ -698,6 +738,22 @@ export default function (pi: ExtensionAPI) {
        if (cancelRequested) { cancelRequested = false; finalNotify(ctx, "⛔ Abgebrochen", "Nach Implementierung"); return; }
      }

+      // Test-Suiten einmalig ermitteln: --test-cmd überschreibt Auto-Erkennung.
+      // Läuft nach Coder, damit neu angelegte Test-Dateien bereits erkannt werden.
+      ctx.ui.setStatus("optimize", "Test-Suiten werden erkannt…");
+      const autoTestCmds: string[] = testCmd
+        ? [testCmd]
+        : await detectTestCommands(pi, ctx);
+      if (autoTestCmds.length > 0) {
+        const label = autoTestCmds.map(c => c.split(" ")[0]).join(", ");
+        ctx.ui.notify(
+          `${autoTestCmds.length} Test-Suite${autoTestCmds.length > 1 ? "n" : ""} erkannt: ${label}`,
+          "info"
+        );
+      } else {
+        ctx.ui.notify("Keine Test-Suiten erkannt — Judge führt Tests selbst aus.", "info");
+      }
+
      let lastBlockers = "";
      let verdict = "";

@ -706,10 +762,12 @@ export default function (pi: ExtensionAPI) {
        const prog = "●".repeat(round - 1) + "◉" + "○".repeat(maxRounds - round);
        await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");

-        if (testCmd) {
-          // Tests laufen in der Extension — Judge bekommt den Output fertig geliefert
-          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${testCmd})…`);
-          const testOutput = await runTests(pi, ctx, testCmd);
+        if (autoTestCmds.length > 0) {
+          const label = autoTestCmds.length === 1
+            ? autoTestCmds[0].split(" ")[0]
+            : `${autoTestCmds.length} Suiten parallel`;
+          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${label})…`);
+          const testOutput = await runTestsParallel(pi, ctx, autoTestCmds);
          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge analysiert Test-Ergebnis…`);
          await sendAndWait(pi, ctx, judgeWithTestsPrompt(testOutput, ""));
        } else {