Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
140 lines
3.4 KiB
Go
140 lines
3.4 KiB
Go
package config
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
var utf8BOM = []byte{0xEF, 0xBB, 0xBF}
|
|
|
|
type Config struct {
|
|
Bind string
|
|
LlamaServerBin string
|
|
ModelDirMap map[string]string
|
|
PortRangeLo int
|
|
PortRangeHi int
|
|
MaxSidecars int
|
|
LogLevel string
|
|
BaseArgs []string
|
|
HealthTimeoutSeconds int
|
|
HealthIntervalSeconds int
|
|
}
|
|
|
|
func Load() (*Config, error) {
|
|
bin := os.Getenv("LLAMA_SERVER_BIN")
|
|
if bin == "" {
|
|
return nil, fmt.Errorf("LLAMA_SERVER_BIN is required")
|
|
}
|
|
if _, err := os.Stat(bin); err != nil {
|
|
return nil, fmt.Errorf("LLAMA_SERVER_BIN %q: %w", bin, err)
|
|
}
|
|
|
|
mapFile := os.Getenv("MODEL_DIR_MAP_FILE")
|
|
if mapFile == "" {
|
|
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE is required")
|
|
}
|
|
modelMap, err := loadModelMap(mapFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("MODEL_DIR_MAP_FILE: %w", err)
|
|
}
|
|
|
|
bind := envOr("LLAMA_SIDECAR_BIND", "127.0.0.1:8402")
|
|
logLevel := envOr("LOG_LEVEL", "info")
|
|
maxSidecars := envIntOr("MAX_SIDECARS", 2)
|
|
healthTimeout := envIntOr("HEALTH_TIMEOUT_SECONDS", 60)
|
|
healthInterval := envIntOr("HEALTH_INTERVAL_SECONDS", 30)
|
|
|
|
lo, hi, err := parsePortRange(envOr("PORT_RANGE", "8500-8599"))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("PORT_RANGE: %w", err)
|
|
}
|
|
if hi-lo+1 < maxSidecars {
|
|
return nil, fmt.Errorf("PORT_RANGE %d-%d has %d ports but MAX_SIDECARS is %d", lo, hi, hi-lo+1, maxSidecars)
|
|
}
|
|
|
|
baseArgs := defaultBaseArgs()
|
|
if env := os.Getenv("BASE_ARGS"); env != "" {
|
|
var parsed []string
|
|
envBytes := bytes.TrimPrefix([]byte(env), utf8BOM)
|
|
if err := json.Unmarshal(envBytes, &parsed); err != nil {
|
|
return nil, fmt.Errorf("BASE_ARGS: invalid JSON array: %w", err)
|
|
}
|
|
baseArgs = parsed
|
|
}
|
|
|
|
return &Config{
|
|
Bind: bind,
|
|
LlamaServerBin: bin,
|
|
ModelDirMap: modelMap,
|
|
PortRangeLo: lo,
|
|
PortRangeHi: hi,
|
|
MaxSidecars: maxSidecars,
|
|
LogLevel: logLevel,
|
|
BaseArgs: baseArgs,
|
|
HealthTimeoutSeconds: healthTimeout,
|
|
HealthIntervalSeconds: healthInterval,
|
|
}, nil
|
|
}
|
|
|
|
func defaultBaseArgs() []string {
|
|
return []string{"-ngl", "999", "-c", "32768", "--flash-attn", "on", "--no-mmap"}
|
|
}
|
|
|
|
func loadModelMap(path string) (map[string]string, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
data = bytes.TrimPrefix(data, utf8BOM)
|
|
var m map[string]string
|
|
if err := json.Unmarshal(data, &m); err != nil {
|
|
return nil, fmt.Errorf("invalid JSON: %w", err)
|
|
}
|
|
if len(m) == 0 {
|
|
return nil, fmt.Errorf("model map is empty")
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
func parsePortRange(s string) (int, int, error) {
|
|
parts := strings.SplitN(s, "-", 2)
|
|
if len(parts) != 2 {
|
|
return 0, 0, fmt.Errorf("expected lo-hi format, got %q", s)
|
|
}
|
|
lo, err := strconv.Atoi(strings.TrimSpace(parts[0]))
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("invalid lo port: %w", err)
|
|
}
|
|
hi, err := strconv.Atoi(strings.TrimSpace(parts[1]))
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("invalid hi port: %w", err)
|
|
}
|
|
if hi <= lo {
|
|
return 0, 0, fmt.Errorf("hi (%d) must be > lo (%d)", hi, lo)
|
|
}
|
|
return lo, hi, nil
|
|
}
|
|
|
|
func envOr(key, fallback string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func envIntOr(key string, fallback int) int {
|
|
v := os.Getenv(key)
|
|
if v == "" {
|
|
return fallback
|
|
}
|
|
n, err := strconv.Atoi(v)
|
|
if err != nil {
|
|
return fallback
|
|
}
|
|
return n
|
|
}
|