From 2c07fb9d1cc47547525fe0caeadd52c0c2abd0e2 Mon Sep 17 00:00:00 2001
From: dschlueter <dschlueter@kitux.de>
Date: Wed, 20 May 2026 21:39:21 +0200
Subject: [PATCH] =?UTF-8?q?feat:=20automatische=20Test-Erkennung=20+=20par?=
 =?UTF-8?q?allele=20Ausf=C3=BChrung=20in=20/optimize?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests werden jetzt von der Extension selbst erkannt und als parallele
CPU-Prozesse gestartet — Judge bekommt den fertigen Output und führt
keine Tests mehr selbst aus.

- detectTestCommands(): erkennt pytest, npm test, cargo, go test, make test
  anhand von Framework-Markern (alle Checks parallel via Promise.all)
- runTestsParallel(): startet alle erkannten Suiten gleichzeitig, kombiniert
  Output mit Status-Header pro Suite (max. 6000 Zeichen gesamt)
- /optimize: Auto-Erkennung läuft einmalig nach Coder-Phase, vor dem Loop
- --test-cmd bleibt als Override für Sonderfälle erhalten
- Fallback: kein Framework erkannt → Judge führt Tests wie bisher selbst aus

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 pi-coder-judge-extension.ts | 88 ++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 15 deletions(-)
diff --git a/pi-coder-judge-extension.ts b/pi-coder-judge-extension.ts
index bcb25a6..aa10824 100644
--- a/pi-coder-judge-extension.ts
+++ b/pi-coder-judge-extension.ts
@@ -402,19 +402,59 @@ async function sendAndWait(
 }
 
 // Führt einen Shell-Befehl aus und gibt stdout+stderr zurück (max. 6000 Zeichen).
-// Wird von /optimize --test-cmd genutzt, damit Judge keine Tests selbst starten muss.
-async function runTests(
+// Erkennt Test-Suiten im Projektverzeichnis anhand von Framework-Markern.
+// Alle Checks laufen parallel — konservativ, keine False Positives.
+async function detectTestCommands(
+  pi: ExtensionAPI,
+  ctx: ExtensionCommandContext
+): Promise<string[]> {
+  const [hasPytest, hasNpm, hasCargo, hasGo, hasMake] = await Promise.all([
+    pi.exec("bash", ["-c",
+      "test -f pytest.ini || test -f conftest.py || " +
+      "(test -f pyproject.toml && grep -q 'pytest' pyproject.toml) || " +
+      "find . -maxdepth 4 \\( -name 'test_*.py' -o -name '*_test.py' \\) 2>/dev/null | grep -q ."
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f package.json && " +
+      "node -e \"const p=require('./package.json');process.exit(" +
+      "p.scripts&&p.scripts.test&&!p.scripts.test.includes('no test')?0:1)\" 2>/dev/null"
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c", "test -f Cargo.toml"], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f go.mod && find . -maxdepth 4 -name '*_test.go' 2>/dev/null | grep -q ."
+    ], { cwd: ctx.cwd }),
+    pi.exec("bash", ["-c",
+      "test -f Makefile && grep -qE '^test[[:space:]]*:' Makefile"
+    ], { cwd: ctx.cwd }),
+  ]);
+  return ([
+    hasPytest.code === 0 ? "pytest -x -q 2>&1"    : null,
+    hasNpm.code   === 0 ? "npm test 2>&1"          : null,
+    hasCargo.code === 0 ? "cargo test 2>&1"        : null,
+    hasGo.code    === 0 ? "go test ./... 2>&1"     : null,
+    hasMake.code  === 0 ? "make test 2>&1"         : null,
+  ] as (string | null)[]).filter((c): c is string => c !== null);
+}
+
+// Führt mehrere Test-Befehle parallel als CPU-Prozesse aus und liefert einen
+// kombinierten Output-Block für judgeWithTestsPrompt().
+async function runTestsParallel(
   pi: ExtensionAPI,
   ctx: ExtensionCommandContext,
-  cmd: string
+  cmds: string[]
 ): Promise<string> {
-  const result = await pi.exec("bash", ["-c", cmd], { cwd: ctx.cwd });
-  const output = (result.stdout + (result.stderr ? "\n" + result.stderr : "")).trim();
-  const MAX = 6000;
-  if (output.length > MAX) {
-    return output.slice(0, MAX) + `\n\n[… Ausgabe gekürzt, ${output.length} Zeichen gesamt]`;
-  }
-  return output || "(kein Output)";
+  const results = await Promise.all(
+    cmds.map(cmd => pi.exec("bash", ["-c", cmd], { cwd: ctx.cwd }))
+  );
+  const MAX_PER = Math.max(1000, Math.floor(6000 / cmds.length));
+  return results.map((r, i) => {
+    const raw = (r.stdout + (r.stderr ? "\n" + r.stderr : "")).trim();
+    const out = raw.length > MAX_PER
+      ? raw.slice(0, MAX_PER) + `\n[… gekürzt, ${raw.length} Zeichen]`
+      : raw || "(kein Output)";
+    const status = r.code === 0 ? "✓ OK" : `✗ Exit ${r.code}`;
+    return `=== ${cmds[i]} [${status}] ===\n${out}`;
+  }).join("\n\n");
 }
 
 // Liest den Text der letzten Assistenten-Antwort aus dem Session-Branch.
@@ -655,7 +695,7 @@ export default function (pi: ExtensionAPI) {
   // ── Automatische Optimierungsschleife ────────────────────────────────────
 
   pi.registerCommand("optimize", {
-    description: "Coder→Judge→Fix-Schleife bis PASS + optional Doku. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"befehl\"]",
+    description: "Coder→Judge→Fix-Schleife bis PASS. Tests werden automatisch erkannt und parallel ausgeführt. /optimize <auftrag> [--rounds N] [--with-doku] [--continue] [--test-cmd \"override\"]",
     handler: async function (args: string, ctx: ExtensionCommandContext) {
       const roundsMatch = (args || "").match(/--rounds\s+(\d+)/);
       const maxRounds = roundsMatch ? Math.max(1, parseInt(roundsMatch[1], 10)) : 3;
@@ -698,6 +738,22 @@ export default function (pi: ExtensionAPI) {
         if (cancelRequested) { cancelRequested = false; finalNotify(ctx, "⛔ Abgebrochen", "Nach Implementierung"); return; }
       }
 
+      // Test-Suiten einmalig ermitteln: --test-cmd überschreibt Auto-Erkennung.
+      // Läuft nach Coder, damit neu angelegte Test-Dateien bereits erkannt werden.
+      ctx.ui.setStatus("optimize", "Test-Suiten werden erkannt…");
+      const autoTestCmds: string[] = testCmd
+        ? [testCmd]
+        : await detectTestCommands(pi, ctx);
+      if (autoTestCmds.length > 0) {
+        const label = autoTestCmds.map(c => c.split(" ")[0]).join(", ");
+        ctx.ui.notify(
+          `${autoTestCmds.length} Test-Suite${autoTestCmds.length > 1 ? "n" : ""} erkannt: ${label}`,
+          "info"
+        );
+      } else {
+        ctx.ui.notify("Keine Test-Suiten erkannt — Judge führt Tests selbst aus.", "info");
+      }
+
       let lastBlockers = "";
       let verdict = "";
 
@@ -706,10 +762,12 @@ export default function (pi: ExtensionAPI) {
         const prog = "●".repeat(round - 1) + "◉" + "○".repeat(maxRounds - round);
         await switchModel(pi, ctx, "llama-cpp-judge", "qwen3.5-judge");
 
-        if (testCmd) {
-          // Tests laufen in der Extension — Judge bekommt den Output fertig geliefert
-          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${testCmd})…`);
-          const testOutput = await runTests(pi, ctx, testCmd);
+        if (autoTestCmds.length > 0) {
+          const label = autoTestCmds.length === 1
+            ? autoTestCmds[0].split(" ")[0]
+            : `${autoTestCmds.length} Suiten parallel`;
+          ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Tests laufen (${label})…`);
+          const testOutput = await runTestsParallel(pi, ctx, autoTestCmds);
           ctx.ui.setStatus("optimize", `${prog} Runde ${round}/${maxRounds}: Judge analysiert Test-Ergebnis…`);
           await sendAndWait(pi, ctx, judgeWithTestsPrompt(testOutput, ""));
         } else {