llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
79
internal/config/config_test.go
Normal file
79
internal/config/config_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLoad_MissingRequired(t *testing.T) {
|
||||
os.Unsetenv("LLAMA_SERVER_BIN")
|
||||
os.Unsetenv("MODEL_DIR_MAP_FILE")
|
||||
_, err := Load()
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing LLAMA_SERVER_BIN")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePortRange(t *testing.T) {
|
||||
lo, hi, err := parsePortRange("8500-8599")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if lo != 8500 || hi != 8599 {
|
||||
t.Fatalf("got %d-%d", lo, hi)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePortRange_Bad(t *testing.T) {
|
||||
_, _, err := parsePortRange("abc")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
_, _, err = parsePortRange("100-50")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for hi <= lo")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadModelMap_BOM(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "model_map.json")
|
||||
content := append([]byte{0xEF, 0xBB, 0xBF}, []byte(`{"test-model": "/fake/path.gguf"}`)...)
|
||||
if err := os.WriteFile(path, content, 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
m, err := loadModelMap(path)
|
||||
if err != nil {
|
||||
t.Fatalf("BOM-prefixed JSON should parse: %v", err)
|
||||
}
|
||||
if m["test-model"] != "/fake/path.gguf" {
|
||||
t.Fatalf("unexpected map: %v", m)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultBaseArgs_FlashAttn(t *testing.T) {
|
||||
args := defaultBaseArgs()
|
||||
for i, a := range args {
|
||||
if a == "--flash-attn" && i+1 < len(args) && args[i+1] == "on" {
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Fatal("expected --flash-attn on in default args")
|
||||
}
|
||||
|
||||
func TestDefaultBaseArgs(t *testing.T) {
|
||||
args := defaultBaseArgs()
|
||||
if len(args) == 0 {
|
||||
t.Fatal("expected non-empty default args")
|
||||
}
|
||||
found := false
|
||||
for _, a := range args {
|
||||
if a == "--no-mmap" {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Fatal("expected --no-mmap in default args")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user