chore: init pi_coder repository
Pi agent extension, model config, and LLaMA server startup scripts for the coder/judge workflow (ports 8001/8002).
This commit is contained in:
commit
4074e10c1a
6 changed files with 1075 additions and 0 deletions
90
start-coder.sh
Executable file
90
start-coder.sh
Executable file
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
HF_HOME="${HF_HOME:-/home/dschlueter/nvme2n1p7_home/huggingface}"
|
||||
MODEL_REL_PATH="models/qwen3/Qwen3.6-27B-Uncensored-HauhauCS-Aggressive-IQ4_XS.gguf"
|
||||
IMAGE="ghcr.io/ggml-org/llama.cpp:server-cuda"
|
||||
CONTAINER_NAME="qwen36-27b-coder"
|
||||
HOST_PORT=8001
|
||||
CONTAINER_PORT=8000
|
||||
MODEL_ALIAS="qwen3.5-coder"
|
||||
|
||||
echo "[*] Verwende HF_HOME = $HF_HOME"
|
||||
if [ ! -f "$HF_HOME/$MODEL_REL_PATH" ]; then
|
||||
echo "[!] Modell-Datei nicht gefunden: $HF_HOME/$MODEL_REL_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}\$"; then
|
||||
echo "[*] Stoppe existierenden Container $CONTAINER_NAME ..."
|
||||
docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
echo "[*] Starte llama.cpp-Server für Coder ..."
|
||||
docker run -d \
|
||||
--gpus '"device=1,2"' \
|
||||
--name "$CONTAINER_NAME" \
|
||||
--restart unless-stopped \
|
||||
-e HF_HOME="/hf_home" \
|
||||
-v "$HF_HOME:/hf_home:ro" \
|
||||
-p "${HOST_PORT}:${CONTAINER_PORT}" \
|
||||
"$IMAGE" \
|
||||
-m "/hf_home/${MODEL_REL_PATH}" \
|
||||
--alias "${MODEL_ALIAS}" \
|
||||
-c 131072 \
|
||||
-n 16384 \
|
||||
--jinja \
|
||||
--no-context-shift \
|
||||
--temp 0.2 \
|
||||
--top-p 0.95 \
|
||||
--top-k 40 \
|
||||
--min-p 0.01 \
|
||||
--repeat-penalty 1.05 \
|
||||
--main-gpu 0 \
|
||||
--tensor-split 0.5,0.5 \
|
||||
-ngl 999 \
|
||||
-fa on \
|
||||
--kv-unified \
|
||||
--cache-type-k q8_0 \
|
||||
--cache-type-v q8_0 \
|
||||
--batch-size 1024 \
|
||||
--ubatch-size 512 \
|
||||
--parallel 2 \
|
||||
--cont-batching \
|
||||
--host 0.0.0.0 \
|
||||
--port "$CONTAINER_PORT"
|
||||
|
||||
echo "[*] Warte auf HTTP ..."
|
||||
HTTP_READY=0
|
||||
for i in {1..90}; do
|
||||
if curl -s "http://localhost:${HOST_PORT}/health" >/dev/null 2>&1 || \
|
||||
curl -s "http://localhost:${HOST_PORT}/v1/models" >/dev/null 2>&1; then
|
||||
HTTP_READY=1
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
|
||||
if [ "$HTTP_READY" -ne 1 ]; then
|
||||
echo "[!] HTTP-Server wurde nicht rechtzeitig erreichbar." >&2
|
||||
docker logs --tail 200 "$CONTAINER_NAME" || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[*] Teste Chat-Completion ..."
|
||||
curl -s -X POST "http://localhost:${HOST_PORT}/v1/chat/completions" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"model\": \"${MODEL_ALIAS}\",
|
||||
\"messages\": [
|
||||
{ \"role\": \"system\", \"content\": \"Du bist ein präziser Coding-Assistent.\" },
|
||||
{ \"role\": \"user\", \"content\": \"Antworte nur mit dem Wort: bereit\" }
|
||||
],
|
||||
\"max_tokens\": 8,
|
||||
\"temperature\": 0.0,
|
||||
\"stream\": false
|
||||
}"
|
||||
|
||||
echo
|
||||
echo "[*] Server bereit auf http://0.0.0.0:${HOST_PORT}"
|
||||
echo "[*] Stoppen mit: docker rm -f ${CONTAINER_NAME}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue