Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
152 lines
3.4 KiB
Go
152 lines
3.4 KiB
Go
package pool
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/indifferentketchup/llama-sidecar/internal/config"
|
|
)
|
|
|
|
type fakeSpawner struct {
|
|
spawnCount atomic.Int32
|
|
killCount atomic.Int32
|
|
}
|
|
|
|
func (f *fakeSpawner) Spawn(ctx context.Context, cfg *config.Config, modelID, modelPath string, flags []string, port int, hash string) (*Sidecar, error) {
|
|
f.spawnCount.Add(1)
|
|
s := &Sidecar{
|
|
Hash: hash,
|
|
ModelID: modelID,
|
|
ModelPath: modelPath,
|
|
Flags: flags,
|
|
Port: port,
|
|
Pid: 99999,
|
|
StartedAt: time.Now(),
|
|
stderr: newRingBuffer(8),
|
|
cancel: func() {},
|
|
}
|
|
s.healthy.Store(true)
|
|
s.LastUsed.Store(time.Now().UnixNano())
|
|
return s, nil
|
|
}
|
|
|
|
func (f *fakeSpawner) Kill(s *Sidecar) error {
|
|
f.killCount.Add(1)
|
|
return nil
|
|
}
|
|
|
|
func testConfig() *config.Config {
|
|
return &config.Config{
|
|
Bind: "127.0.0.1:0",
|
|
LlamaServerBin: "/fake/llama-server",
|
|
ModelDirMap: map[string]string{
|
|
"model-a": "/fake/model-a.gguf",
|
|
"model-b": "/fake/model-b.gguf",
|
|
},
|
|
PortRangeLo: 8500,
|
|
PortRangeHi: 8509,
|
|
MaxSidecars: 2,
|
|
BaseArgs: []string{"-ngl", "999"},
|
|
HealthTimeoutSeconds: 60,
|
|
}
|
|
}
|
|
|
|
func TestPool_AcquireSameKey(t *testing.T) {
|
|
fs := &fakeSpawner{}
|
|
p := New(testConfig(), fs)
|
|
ctx := context.Background()
|
|
|
|
s1, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
s2, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if s1.Hash != s2.Hash {
|
|
t.Fatalf("expected same sidecar, got different hashes: %s vs %s", s1.Hash, s2.Hash)
|
|
}
|
|
if fs.spawnCount.Load() != 1 {
|
|
t.Fatalf("expected 1 spawn, got %d", fs.spawnCount.Load())
|
|
}
|
|
}
|
|
|
|
func TestPool_EvictLRU(t *testing.T) {
|
|
cfg := testConfig()
|
|
cfg.MaxSidecars = 1
|
|
fs := &fakeSpawner{}
|
|
p := New(cfg, fs)
|
|
ctx := context.Background()
|
|
|
|
_, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
_, err = p.Acquire(ctx, "model-b", []string{"--top-k", "40"})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
if fs.spawnCount.Load() != 2 {
|
|
t.Fatalf("expected 2 spawns, got %d", fs.spawnCount.Load())
|
|
}
|
|
if fs.killCount.Load() != 1 {
|
|
t.Fatalf("expected 1 kill (eviction), got %d", fs.killCount.Load())
|
|
}
|
|
list := p.List()
|
|
if len(list) != 1 {
|
|
t.Fatalf("expected 1 sidecar, got %d", len(list))
|
|
}
|
|
if list[0].ModelID != "model-b" {
|
|
t.Fatalf("expected model-b, got %s", list[0].ModelID)
|
|
}
|
|
}
|
|
|
|
func TestPool_ValidatorReject(t *testing.T) {
|
|
fs := &fakeSpawner{}
|
|
p := New(testConfig(), fs)
|
|
_, err := p.Acquire(context.Background(), "model-a", []string{"--model", "evil.gguf"})
|
|
if err == nil {
|
|
t.Fatal("expected validation error")
|
|
}
|
|
}
|
|
|
|
func TestPool_UnknownModel(t *testing.T) {
|
|
fs := &fakeSpawner{}
|
|
p := New(testConfig(), fs)
|
|
_, err := p.Acquire(context.Background(), "nonexistent", nil)
|
|
if err == nil {
|
|
t.Fatal("expected unknown model error")
|
|
}
|
|
}
|
|
|
|
func TestPool_ConcurrentAcquire(t *testing.T) {
|
|
cfg := testConfig()
|
|
cfg.MaxSidecars = 10
|
|
cfg.PortRangeHi = 8599
|
|
fs := &fakeSpawner{}
|
|
p := New(cfg, fs)
|
|
ctx := context.Background()
|
|
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < 10; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for j := 0; j < 50; j++ {
|
|
_, _ = p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
list := p.List()
|
|
if len(list) != 1 {
|
|
t.Fatalf("expected 1 sidecar (same key), got %d", len(list))
|
|
}
|
|
}
|