Files
llama-sidecar/internal/config/config.go
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

140 lines
3.4 KiB
Go

package config
import (
"bytes"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
)
var utf8BOM = []byte{0xEF, 0xBB, 0xBF}
type Config struct {
Bind string
LlamaServerBin string
ModelDirMap map[string]string
PortRangeLo int
PortRangeHi int
MaxSidecars int
LogLevel string
BaseArgs []string
HealthTimeoutSeconds int
HealthIntervalSeconds int
}
func Load() (*Config, error) {
bin := os.Getenv("LLAMA_SERVER_BIN")
if bin == "" {
return nil, fmt.Errorf("LLAMA_SERVER_BIN is required")
}
if _, err := os.Stat(bin); err != nil {
return nil, fmt.Errorf("LLAMA_SERVER_BIN %q: %w", bin, err)
}
mapFile := os.Getenv("MODEL_DIR_MAP_FILE")
if mapFile == "" {
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE is required")
}
modelMap, err := loadModelMap(mapFile)
if err != nil {
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE: %w", err)
}
bind := envOr("LLAMA_SIDECAR_BIND", "127.0.0.1:8402")
logLevel := envOr("LOG_LEVEL", "info")
maxSidecars := envIntOr("MAX_SIDECARS", 2)
healthTimeout := envIntOr("HEALTH_TIMEOUT_SECONDS", 60)
healthInterval := envIntOr("HEALTH_INTERVAL_SECONDS", 30)
lo, hi, err := parsePortRange(envOr("PORT_RANGE", "8500-8599"))
if err != nil {
return nil, fmt.Errorf("PORT_RANGE: %w", err)
}
if hi-lo+1 < maxSidecars {
return nil, fmt.Errorf("PORT_RANGE %d-%d has %d ports but MAX_SIDECARS is %d", lo, hi, hi-lo+1, maxSidecars)
}
baseArgs := defaultBaseArgs()
if env := os.Getenv("BASE_ARGS"); env != "" {
var parsed []string
envBytes := bytes.TrimPrefix([]byte(env), utf8BOM)
if err := json.Unmarshal(envBytes, &parsed); err != nil {
return nil, fmt.Errorf("BASE_ARGS: invalid JSON array: %w", err)
}
baseArgs = parsed
}
return &Config{
Bind: bind,
LlamaServerBin: bin,
ModelDirMap: modelMap,
PortRangeLo: lo,
PortRangeHi: hi,
MaxSidecars: maxSidecars,
LogLevel: logLevel,
BaseArgs: baseArgs,
HealthTimeoutSeconds: healthTimeout,
HealthIntervalSeconds: healthInterval,
}, nil
}
func defaultBaseArgs() []string {
return []string{"-ngl", "999", "-c", "32768", "--flash-attn", "on", "--no-mmap"}
}
func loadModelMap(path string) (map[string]string, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
data = bytes.TrimPrefix(data, utf8BOM)
var m map[string]string
if err := json.Unmarshal(data, &m); err != nil {
return nil, fmt.Errorf("invalid JSON: %w", err)
}
if len(m) == 0 {
return nil, fmt.Errorf("model map is empty")
}
return m, nil
}
func parsePortRange(s string) (int, int, error) {
parts := strings.SplitN(s, "-", 2)
if len(parts) != 2 {
return 0, 0, fmt.Errorf("expected lo-hi format, got %q", s)
}
lo, err := strconv.Atoi(strings.TrimSpace(parts[0]))
if err != nil {
return 0, 0, fmt.Errorf("invalid lo port: %w", err)
}
hi, err := strconv.Atoi(strings.TrimSpace(parts[1]))
if err != nil {
return 0, 0, fmt.Errorf("invalid hi port: %w", err)
}
if hi <= lo {
return 0, 0, fmt.Errorf("hi (%d) must be > lo (%d)", hi, lo)
}
return lo, hi, nil
}
func envOr(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
func envIntOr(key string, fallback int) int {
v := os.Getenv(key)
if v == "" {
return fallback
}
n, err := strconv.Atoi(v)
if err != nil {
return fallback
}
return n
}