Files
llama-sidecar/internal/server/proxy.go
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

112 lines
2.9 KiB
Go

package server
import (
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"net/http/httputil"
"net/url"
"strings"
"github.com/indifferentketchup/llama-sidecar/internal/pool"
)
var shellUnsafe = strings.NewReplacer(
"`", "", "$", "", "|", "", ";", "", "&", "", "\n", "",
)
func parseFlags(raw string) ([]string, error) {
cleaned := shellUnsafe.Replace(raw)
if cleaned != raw {
return nil, fmt.Errorf("flags contain unsafe characters")
}
return splitArgs(strings.TrimSpace(raw)), nil
}
func splitArgs(s string) []string {
if s == "" {
return nil
}
return strings.Fields(s)
}
func proxyHandler(p *pool.Pool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
flagsRaw := r.Header.Get("X-Agent-Flags")
var flags []string
if flagsRaw != "" {
var err error
flags, err = parseFlags(flagsRaw)
if err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{
"error": err.Error(),
})
return
}
}
modelID := r.Header.Get("X-Model-Id")
if modelID == "" {
body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20))
if err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "failed to read body"})
return
}
var req struct {
Model string `json:"model"`
}
if err := json.Unmarshal(body, &req); err == nil && req.Model != "" {
modelID = req.Model
}
r.Body = io.NopCloser(strings.NewReader(string(body)))
r.ContentLength = int64(len(body))
}
if modelID == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "model not specified (X-Model-Id header or body.model)"})
return
}
sidecar, err := p.Acquire(r.Context(), modelID, flags)
if err != nil {
errMsg := err.Error()
status := http.StatusInternalServerError
if strings.Contains(errMsg, "validation:") {
status = http.StatusBadRequest
} else if strings.Contains(errMsg, "unknown model:") {
status = http.StatusNotFound
} else if strings.Contains(errMsg, "port allocation:") {
status = http.StatusServiceUnavailable
}
writeJSON(w, status, map[string]string{"error": errMsg})
return
}
target := &url.URL{
Scheme: "http",
Host: fmt.Sprintf("127.0.0.1:%d", sidecar.Port),
}
proxy := httputil.NewSingleHostReverseProxy(target)
proxy.ErrorHandler = func(rw http.ResponseWriter, req *http.Request, err error) {
slog.Error("upstream error", "hash", sidecar.Hash, "port", sidecar.Port, "err", err)
writeJSON(rw, http.StatusBadGateway, map[string]any{
"error": "upstream unavailable",
"error_detail": err.Error(),
"sidecar_hash": sidecar.Hash,
"sidecar_port": sidecar.Port,
"last_stderr": sidecar.LastStderr(),
})
}
sidecar.TouchLastUsed()
proxy.ServeHTTP(w, r)
}
}
func writeJSON(w http.ResponseWriter, status int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
json.NewEncoder(w).Encode(v)
}