#!/usr/bin/env bash set -euo pipefail # Konfiguration HF_HOME="${HF_HOME:-/home/dschlueter/nvme2n1p7_home/huggingface}" MODEL_REL_PATH="models/embeddings/bge-m3-q8_0.gguf" IMAGE="ghcr.io/ggml-org/llama.cpp:server-cuda" CONTAINER_NAME="qwen-embeddings" HOST_PORT=8001 CONTAINER_PORT=8001 echo "[*] Verwende HF_HOME = $HF_HOME" if [ ! -f "$HF_HOME/$MODEL_REL_PATH" ]; then echo "[!] Embedding-Modell-Datei nicht gefunden: $HF_HOME/$MODEL_REL_PATH" >&2 exit 1 fi # Optional: altes gleichnamiges Container-Exemplar stoppen if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}\\$"; then echo "[*] Stoppe existierenden Container $CONTAINER_NAME ..." docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true fi echo "[*] Starte llama.cpp-Embedding-Server-Container ($IMAGE) ..." docker run -d --gpus '"device=0"' \ --name "$CONTAINER_NAME" \ -e HF_HOME="/hf_home" \ -v "$HF_HOME:/hf_home:ro" \ -p "${HOST_PORT}:${CONTAINER_PORT}" \ "$IMAGE" \ --embedding \ -m "/hf_home/${MODEL_REL_PATH}" \ -c 8192 \ -ngl 999 \ -fa on \ --batch-size 1024 \ --ubatch-size 512 \ --host 0.0.0.0 \ --port "$CONTAINER_PORT" echo "[*] Container gestartet: $CONTAINER_NAME" echo "[*] Warte, bis HTTP-Port ${HOST_PORT} antwortet ..." READY=0 for i in {1..60}; do if curl -s "http://localhost:${HOST_PORT}/" >/dev/null 2>&1; then echo "[*] Server antwortet auf http://localhost:${HOST_PORT}/" READY=1 break fi echo "[*] Warte (${i}/60) ..." sleep 2 done if [ "$READY" -ne 1 ]; then echo "[!] Embedding-Server wurde nicht rechtzeitig erreichbar." >&2 echo "[*] Letzte Container-Logs:" docker logs --tail 200 "$CONTAINER_NAME" || true exit 1 fi sleep 3 echo "[*] Sende Test-Embedding-Request an /v1/embeddings ..." RESPONSE="$(curl -s -X POST "http://localhost:${HOST_PORT}/v1/embeddings" \ -H "Content-Type: application/json" \ -d '{ "model": "bge-m3-q8_0", "input": "Dies ist ein kurzer Testtext für den Embedding-Server." }')" echo echo "[*] Antwort vom Server:" echo "$RESPONSE" echo echo "[*] Zum Stoppen des Servers:" echo " docker rm -f $CONTAINER_NAME"