- Neue Befehle: /plan (Planungsmodus, nur PLAN.md), /cancel (Loop-Abbruch), /continue (Resume nach Unterbrechung), /discard (PLAN.md verwerfen) - contextWindow in models.json und llama.cpp-Servern: 131072 → 262144 - KV-Cache: q8_0 → q4_0 (weniger VRAM, passt zu 262k-Kontext auf 2× 3090) - parallel: 2 → 1 beim Coder (stabiler bei großem Kontext) - Optimize-Status mit ASCII-Fortschrittsbalken + Blocker-Preview - cancelRequested-Flag prüft nach jedem Loop-Schritt Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
111 lines
3.4 KiB
JSON
111 lines
3.4 KiB
JSON
{
|
|
"providers": {
|
|
"ollama": {
|
|
"baseUrl": "http://localhost:11434/v1",
|
|
"api": "openai-completions",
|
|
"apiKey": "ollama",
|
|
"compat": {
|
|
"supportsDeveloperRole": false,
|
|
"supportsReasoningEffort": false
|
|
},
|
|
"models": [
|
|
{ "id": "qwen2.5-coder:7b", "name": "Qwen2.5 Coder 7B (schnell)" },
|
|
{ "id": "qwen3-coder-30b-gpu:latest", "name": "Qwen3 Coder 30B GPU (Standard)" },
|
|
{ "id": "mistral-small3.2:24b", "name": "Mistral Small 3.2 24B" },
|
|
{ "id": "deepseek-r1:32b", "name": "DeepSeek R1 32B (Reasoning)" }
|
|
]
|
|
},
|
|
|
|
"llama-cpp": {
|
|
"baseUrl": "http://127.0.0.1:8000/v1",
|
|
"api": "openai-completions",
|
|
"apiKey": "none",
|
|
"compat": {
|
|
"supportsDeveloperRole": false,
|
|
"supportsReasoningEffort": false,
|
|
"maxTokensField": "max_tokens",
|
|
"thinkingFormat": "qwen-chat-template"
|
|
},
|
|
"models": [
|
|
{
|
|
"id": "qwen35b-uncensored",
|
|
"name": "Qwen3.6 35B Uncensored (llama.cpp :8000)"
|
|
},
|
|
{
|
|
"id": "qwen35b-moe-tools",
|
|
"name": "Qwen3.6 35B MoE Tools (llama.cpp :8000)"
|
|
}
|
|
]
|
|
},
|
|
|
|
"llama-cpp-coder": {
|
|
"baseUrl": "http://127.0.0.1:8001/v1",
|
|
"api": "openai-completions",
|
|
"apiKey": "none",
|
|
"compat": {
|
|
"supportsDeveloperRole": false,
|
|
"supportsReasoningEffort": false,
|
|
"maxTokensField": "max_tokens",
|
|
"thinkingFormat": "qwen-chat-template"
|
|
},
|
|
"models": [
|
|
{
|
|
"id": "qwen3.5-coder",
|
|
"name": "Qwen3.6 27B Coder (llama.cpp :8001)",
|
|
"reasoning": true,
|
|
"input": ["text"],
|
|
"contextWindow": 262144,
|
|
"maxTokens": 16384,
|
|
"cost": {
|
|
"input": 0,
|
|
"output": 0,
|
|
"cacheRead": 0,
|
|
"cacheWrite": 0
|
|
}
|
|
}
|
|
]
|
|
},
|
|
|
|
"llama-cpp-judge": {
|
|
"baseUrl": "http://127.0.0.1:8002/v1",
|
|
"api": "openai-completions",
|
|
"apiKey": "none",
|
|
"compat": {
|
|
"supportsDeveloperRole": false,
|
|
"supportsReasoningEffort": false,
|
|
"maxTokensField": "max_tokens",
|
|
"thinkingFormat": "qwen-chat-template"
|
|
},
|
|
"models": [
|
|
{
|
|
"id": "qwen3.5-judge",
|
|
"name": "Qwen3.6 27B Judge (llama.cpp :8002)",
|
|
"reasoning": true,
|
|
"input": ["text"],
|
|
"contextWindow": 262144,
|
|
"maxTokens": 8192,
|
|
"cost": {
|
|
"input": 0,
|
|
"output": 0,
|
|
"cacheRead": 0,
|
|
"cacheWrite": 0
|
|
}
|
|
}
|
|
]
|
|
},
|
|
|
|
"openrouter": {
|
|
"models": [
|
|
{ "id": "qwen/qwen3-235b-a22b:free", "name": "Qwen3 235B (Free)" },
|
|
{ "id": "deepseek/deepseek-r1:free", "name": "DeepSeek R1 (Free)" },
|
|
{ "id": "google/gemini-2.5-pro-exp-03-25:free", "name": "Gemini 2.5 Pro (Free)" },
|
|
{ "id": "meta-llama/llama-4-maverick:free", "name": "Llama 4 Maverick (Free)" },
|
|
{ "id": "microsoft/phi-4:free", "name": "Phi-4 (Free)" },
|
|
{ "id": "qwen/qwen-2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B (günstig)" },
|
|
{ "id": "deepseek/deepseek-r1", "name": "DeepSeek R1 Full (Reasoning)" },
|
|
{ "id": "qwen/qwen3-235b-a22b", "name": "Qwen3 235B Full" }
|
|
]
|
|
}
|
|
}
|
|
}
|
|
|