llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
139
internal/config/config.go
Normal file
139
internal/config/config.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var utf8BOM = []byte{0xEF, 0xBB, 0xBF}
|
||||
|
||||
type Config struct {
|
||||
Bind string
|
||||
LlamaServerBin string
|
||||
ModelDirMap map[string]string
|
||||
PortRangeLo int
|
||||
PortRangeHi int
|
||||
MaxSidecars int
|
||||
LogLevel string
|
||||
BaseArgs []string
|
||||
HealthTimeoutSeconds int
|
||||
HealthIntervalSeconds int
|
||||
}
|
||||
|
||||
func Load() (*Config, error) {
|
||||
bin := os.Getenv("LLAMA_SERVER_BIN")
|
||||
if bin == "" {
|
||||
return nil, fmt.Errorf("LLAMA_SERVER_BIN is required")
|
||||
}
|
||||
if _, err := os.Stat(bin); err != nil {
|
||||
return nil, fmt.Errorf("LLAMA_SERVER_BIN %q: %w", bin, err)
|
||||
}
|
||||
|
||||
mapFile := os.Getenv("MODEL_DIR_MAP_FILE")
|
||||
if mapFile == "" {
|
||||
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE is required")
|
||||
}
|
||||
modelMap, err := loadModelMap(mapFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE: %w", err)
|
||||
}
|
||||
|
||||
bind := envOr("LLAMA_SIDECAR_BIND", "127.0.0.1:8402")
|
||||
logLevel := envOr("LOG_LEVEL", "info")
|
||||
maxSidecars := envIntOr("MAX_SIDECARS", 2)
|
||||
healthTimeout := envIntOr("HEALTH_TIMEOUT_SECONDS", 60)
|
||||
healthInterval := envIntOr("HEALTH_INTERVAL_SECONDS", 30)
|
||||
|
||||
lo, hi, err := parsePortRange(envOr("PORT_RANGE", "8500-8599"))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("PORT_RANGE: %w", err)
|
||||
}
|
||||
if hi-lo+1 < maxSidecars {
|
||||
return nil, fmt.Errorf("PORT_RANGE %d-%d has %d ports but MAX_SIDECARS is %d", lo, hi, hi-lo+1, maxSidecars)
|
||||
}
|
||||
|
||||
baseArgs := defaultBaseArgs()
|
||||
if env := os.Getenv("BASE_ARGS"); env != "" {
|
||||
var parsed []string
|
||||
envBytes := bytes.TrimPrefix([]byte(env), utf8BOM)
|
||||
if err := json.Unmarshal(envBytes, &parsed); err != nil {
|
||||
return nil, fmt.Errorf("BASE_ARGS: invalid JSON array: %w", err)
|
||||
}
|
||||
baseArgs = parsed
|
||||
}
|
||||
|
||||
return &Config{
|
||||
Bind: bind,
|
||||
LlamaServerBin: bin,
|
||||
ModelDirMap: modelMap,
|
||||
PortRangeLo: lo,
|
||||
PortRangeHi: hi,
|
||||
MaxSidecars: maxSidecars,
|
||||
LogLevel: logLevel,
|
||||
BaseArgs: baseArgs,
|
||||
HealthTimeoutSeconds: healthTimeout,
|
||||
HealthIntervalSeconds: healthInterval,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func defaultBaseArgs() []string {
|
||||
return []string{"-ngl", "999", "-c", "32768", "--flash-attn", "on", "--no-mmap"}
|
||||
}
|
||||
|
||||
func loadModelMap(path string) (map[string]string, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data = bytes.TrimPrefix(data, utf8BOM)
|
||||
var m map[string]string
|
||||
if err := json.Unmarshal(data, &m); err != nil {
|
||||
return nil, fmt.Errorf("invalid JSON: %w", err)
|
||||
}
|
||||
if len(m) == 0 {
|
||||
return nil, fmt.Errorf("model map is empty")
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func parsePortRange(s string) (int, int, error) {
|
||||
parts := strings.SplitN(s, "-", 2)
|
||||
if len(parts) != 2 {
|
||||
return 0, 0, fmt.Errorf("expected lo-hi format, got %q", s)
|
||||
}
|
||||
lo, err := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("invalid lo port: %w", err)
|
||||
}
|
||||
hi, err := strconv.Atoi(strings.TrimSpace(parts[1]))
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("invalid hi port: %w", err)
|
||||
}
|
||||
if hi <= lo {
|
||||
return 0, 0, fmt.Errorf("hi (%d) must be > lo (%d)", hi, lo)
|
||||
}
|
||||
return lo, hi, nil
|
||||
}
|
||||
|
||||
func envOr(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func envIntOr(key string, fallback int) int {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return fallback
|
||||
}
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return n
|
||||
}
|
||||
Reference in New Issue
Block a user