Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
43 lines
1.2 KiB
Go
43 lines
1.2 KiB
Go
package server
|
|
|
|
import (
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/indifferentketchup/llama-sidecar/internal/config"
|
|
"github.com/indifferentketchup/llama-sidecar/internal/pool"
|
|
)
|
|
|
|
func healthHandler(p *pool.Pool, cfg *config.Config, startedAt time.Time) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
sidecars := p.List()
|
|
writeJSON(w, http.StatusOK, map[string]any{
|
|
"status": "ok",
|
|
"sidecars": len(sidecars),
|
|
"max": cfg.MaxSidecars,
|
|
"uptime_seconds": int(time.Since(startedAt).Seconds()),
|
|
})
|
|
}
|
|
}
|
|
|
|
func listSidecarsHandler(p *pool.Pool) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
writeJSON(w, http.StatusOK, p.List())
|
|
}
|
|
}
|
|
|
|
func deleteSidecarHandler(p *pool.Pool) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
hash := r.PathValue("hash")
|
|
if hash == "" {
|
|
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "hash required"})
|
|
return
|
|
}
|
|
if err := p.Remove(hash); err != nil {
|
|
writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()})
|
|
return
|
|
}
|
|
writeJSON(w, http.StatusOK, map[string]string{"status": "removed"})
|
|
}
|
|
}
|