Neuer /version-Command und automatischer Trigger nach SHIP-Verdikt in /optimize: - getCurrentVersion() liest höchsten vX.Y.Z-Tag (git tag -l | sort -V) - analyzeBumpType() klassifiziert Commits (feat! → major, feat: → minor, fix: → patch) - detectVersionFile() findet package.json / Cargo.toml / pyproject.toml / VERSION - applyVersionBump() schreibt Version in Manifest + chore-Commit - runVersionBump() zeigt ctx.ui.select()-Dialog mit empfohlenem Bump-Typ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
78 lines
2.5 KiB
Bash
Executable file
78 lines
2.5 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
HF_HOME="${HF_HOME:-/home/dschlueter/nvme2n1p7_home/huggingface}"
|
|
MODEL_REL_PATH="models/qwen3/Qwen3.6-27B-Uncensored-HauhauCS-Aggressive-IQ4_XS.gguf"
|
|
IMAGE="ghcr.io/ggml-org/llama.cpp:server-cuda"
|
|
CONTAINER_NAME="qwen36-27b-judge"
|
|
HOST_PORT=8002
|
|
CONTAINER_PORT=8000
|
|
MODEL_ALIAS="qwen3.5-judge"
|
|
|
|
echo "[*] Verwende HF_HOME = $HF_HOME"
|
|
if [ ! -f "$HF_HOME/$MODEL_REL_PATH" ]; then
|
|
echo "[!] Modell-Datei nicht gefunden: $HF_HOME/$MODEL_REL_PATH" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}\$"; then
|
|
echo "[*] Stoppe existierenden Container $CONTAINER_NAME ..."
|
|
docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
echo "[*] Starte llama.cpp-Server für Judge ..."
|
|
docker run -d \
|
|
--gpus '"device=1,2"' \
|
|
--name "$CONTAINER_NAME" \
|
|
--restart unless-stopped \
|
|
-e HF_HOME="/hf_home" \
|
|
-v "$HF_HOME:/hf_home:ro" \
|
|
-p "${HOST_PORT}:${CONTAINER_PORT}" \
|
|
"$IMAGE" \
|
|
-m "/hf_home/${MODEL_REL_PATH}" \
|
|
--alias "${MODEL_ALIAS}" \
|
|
-c 262144 \
|
|
-n 16384 \
|
|
--jinja \
|
|
--chat-template-kwargs '{"enable_thinking":true}' \
|
|
--no-context-shift \
|
|
--temp 0.7 \
|
|
--top-p 0.80 \
|
|
--top-k 20 \
|
|
--min-p 0.01 \
|
|
--repeat-penalty 1.05 \
|
|
--main-gpu 0 \
|
|
--tensor-split 0.5,0.5 \
|
|
-ngl 999 \
|
|
-fa on \
|
|
--kv-unified \
|
|
--cache-type-k q4_0 \
|
|
--cache-type-v q4_0 \
|
|
--batch-size 512 \
|
|
--ubatch-size 256 \
|
|
--parallel 1 \
|
|
--cont-batching \
|
|
--host 0.0.0.0 \
|
|
--port "$CONTAINER_PORT"
|
|
|
|
echo "[*] Warte auf Modell-Bereitschaft (Completion-Check, max. 180 s) ..."
|
|
MODEL_READY=0
|
|
for i in {1..90}; do
|
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 \
|
|
-X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"model\":\"${MODEL_ALIAS}\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"max_tokens\":1,\"temperature\":0.0,\"stream\":false}")
|
|
if [ "$HTTP_CODE" = "200" ]; then MODEL_READY=1; break; fi
|
|
echo " [${i}/90] HTTP ${HTTP_CODE:-000} — Modell lädt noch, warte 2s ..."
|
|
sleep 2
|
|
done
|
|
|
|
if [ "$MODEL_READY" -ne 1 ]; then
|
|
echo "[!] Modell wurde nicht rechtzeitig bereit (kein HTTP 200 auf Completion)." >&2
|
|
docker logs --tail 200 "$CONTAINER_NAME" || true
|
|
exit 1
|
|
fi
|
|
|
|
echo "[*] Modell bereit — erster Completion-Request erfolgreich (HTTP 200)."
|
|
echo "[*] Server läuft auf http://0.0.0.0:${HOST_PORT}"
|
|
echo "[*] Stoppen mit: docker rm -f ${CONTAINER_NAME}"
|