Files
llama-sidecar/internal/pool/sidecar_test.go
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

97 lines
2.4 KiB
Go

package pool
import (
"reflect"
"testing"
)
func TestBuildArgs_PreservesNonOverlapping(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768", "--flash-attn", "on", "--no-mmap"}
user := []string{"--top-k", "20"}
got := buildArgs(base, "/model.gguf", 8500, user)
// -c 32768 must survive (user didn't supply -c)
if !containsSeq(got, "-c", "32768") {
t.Errorf("-c 32768 missing from args: %v", got)
}
// --top-k 20 must be present (user flag)
if !containsSeq(got, "--top-k", "20") {
t.Errorf("--top-k 20 missing from args: %v", got)
}
// --model and --port injected
if !containsSeq(got, "--model", "/model.gguf") {
t.Errorf("--model missing: %v", got)
}
if !containsSeq(got, "--port", "8500") {
t.Errorf("--port missing: %v", got)
}
}
func TestBuildArgs_UserOverridesBase(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768"}
user := []string{"-c", "131072"}
got := buildArgs(base, "/model.gguf", 8500, user)
// base -c should be dropped, user -c should be present
count := 0
for i, tok := range got {
if tok == "-c" && i+1 < len(got) {
count++
if got[i+1] == "32768" {
t.Errorf("base -c 32768 should have been deduped: %v", got)
}
}
}
if count != 1 {
t.Errorf("expected exactly 1 -c flag, got %d in %v", count, got)
}
}
func TestBuildArgs_NoUserFlags(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768", "--no-mmap"}
got := buildArgs(base, "/model.gguf", 8500, nil)
if !containsSeq(got, "-c", "32768") {
t.Errorf("-c 32768 missing when no user flags: %v", got)
}
if !containsSeq(got, "--no-mmap") {
t.Errorf("--no-mmap missing: %v", got)
}
}
func TestDedupFlags_Mixed(t *testing.T) {
auto := []string{"--top-k", "40", "-c", "32768", "--no-mmap"}
user := []string{"--top-k", "20"}
got := dedupFlags(auto, user)
want := []string{"-c", "32768", "--no-mmap"}
if !reflect.DeepEqual(got, want) {
t.Errorf("dedupFlags = %v, want %v", got, want)
}
}
func TestDedupFlags_EqualsForm(t *testing.T) {
auto := []string{"--ctx-size=4096", "--no-mmap"}
user := []string{"--ctx-size", "8192"}
got := dedupFlags(auto, user)
want := []string{"--no-mmap"}
if !reflect.DeepEqual(got, want) {
t.Errorf("dedupFlags = %v, want %v", got, want)
}
}
func containsSeq(args []string, seq ...string) bool {
for i := 0; i <= len(args)-len(seq); i++ {
match := true
for j, s := range seq {
if args[i+j] != s {
match = false
break
}
}
if match {
return true
}
}
return false
}