Files
llama-sidecar/internal/pool/pool_test.go
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

152 lines
3.4 KiB
Go

package pool
import (
"context"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/indifferentketchup/llama-sidecar/internal/config"
)
type fakeSpawner struct {
spawnCount atomic.Int32
killCount atomic.Int32
}
func (f *fakeSpawner) Spawn(ctx context.Context, cfg *config.Config, modelID, modelPath string, flags []string, port int, hash string) (*Sidecar, error) {
f.spawnCount.Add(1)
s := &Sidecar{
Hash: hash,
ModelID: modelID,
ModelPath: modelPath,
Flags: flags,
Port: port,
Pid: 99999,
StartedAt: time.Now(),
stderr: newRingBuffer(8),
cancel: func() {},
}
s.healthy.Store(true)
s.LastUsed.Store(time.Now().UnixNano())
return s, nil
}
func (f *fakeSpawner) Kill(s *Sidecar) error {
f.killCount.Add(1)
return nil
}
func testConfig() *config.Config {
return &config.Config{
Bind: "127.0.0.1:0",
LlamaServerBin: "/fake/llama-server",
ModelDirMap: map[string]string{
"model-a": "/fake/model-a.gguf",
"model-b": "/fake/model-b.gguf",
},
PortRangeLo: 8500,
PortRangeHi: 8509,
MaxSidecars: 2,
BaseArgs: []string{"-ngl", "999"},
HealthTimeoutSeconds: 60,
}
}
func TestPool_AcquireSameKey(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
ctx := context.Background()
s1, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
s2, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
if s1.Hash != s2.Hash {
t.Fatalf("expected same sidecar, got different hashes: %s vs %s", s1.Hash, s2.Hash)
}
if fs.spawnCount.Load() != 1 {
t.Fatalf("expected 1 spawn, got %d", fs.spawnCount.Load())
}
}
func TestPool_EvictLRU(t *testing.T) {
cfg := testConfig()
cfg.MaxSidecars = 1
fs := &fakeSpawner{}
p := New(cfg, fs)
ctx := context.Background()
_, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
_, err = p.Acquire(ctx, "model-b", []string{"--top-k", "40"})
if err != nil {
t.Fatal(err)
}
if fs.spawnCount.Load() != 2 {
t.Fatalf("expected 2 spawns, got %d", fs.spawnCount.Load())
}
if fs.killCount.Load() != 1 {
t.Fatalf("expected 1 kill (eviction), got %d", fs.killCount.Load())
}
list := p.List()
if len(list) != 1 {
t.Fatalf("expected 1 sidecar, got %d", len(list))
}
if list[0].ModelID != "model-b" {
t.Fatalf("expected model-b, got %s", list[0].ModelID)
}
}
func TestPool_ValidatorReject(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
_, err := p.Acquire(context.Background(), "model-a", []string{"--model", "evil.gguf"})
if err == nil {
t.Fatal("expected validation error")
}
}
func TestPool_UnknownModel(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
_, err := p.Acquire(context.Background(), "nonexistent", nil)
if err == nil {
t.Fatal("expected unknown model error")
}
}
func TestPool_ConcurrentAcquire(t *testing.T) {
cfg := testConfig()
cfg.MaxSidecars = 10
cfg.PortRangeHi = 8599
fs := &fakeSpawner{}
p := New(cfg, fs)
ctx := context.Background()
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < 50; j++ {
_, _ = p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
}
}()
}
wg.Wait()
list := p.List()
if len(list) != 1 {
t.Fatalf("expected 1 sidecar (same key), got %d", len(list))
}
}