llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
74
internal/pool/ports_test.go
Normal file
74
internal/pool/ports_test.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package pool
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPortAllocator_AllocateRelease(t *testing.T) {
|
||||
pa := NewPortAllocator(8500, 8502)
|
||||
p1, err := pa.Allocate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p2, err := pa.Allocate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
p3, err := pa.Allocate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// All three ports should be distinct
|
||||
if p1 == p2 || p2 == p3 || p1 == p3 {
|
||||
t.Fatalf("expected distinct ports: %d, %d, %d", p1, p2, p3)
|
||||
}
|
||||
|
||||
// Exhausted
|
||||
_, err = pa.Allocate()
|
||||
if err == nil {
|
||||
t.Fatal("expected error when exhausted")
|
||||
}
|
||||
|
||||
// Release and re-allocate
|
||||
pa.Release(p2)
|
||||
p4, err := pa.Allocate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if p4 != p2 {
|
||||
t.Fatalf("expected released port %d, got %d", p2, p4)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPortAllocator_Concurrent(t *testing.T) {
|
||||
pa := NewPortAllocator(8500, 8599)
|
||||
var wg sync.WaitGroup
|
||||
allocated := make(chan int, 100)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
p, err := pa.Allocate()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
allocated <- p
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
close(allocated)
|
||||
|
||||
seen := make(map[int]bool)
|
||||
for p := range allocated {
|
||||
if seen[p] {
|
||||
t.Fatalf("duplicate port %d", p)
|
||||
}
|
||||
seen[p] = true
|
||||
}
|
||||
if len(seen) != 100 {
|
||||
t.Fatalf("expected 100 ports, got %d", len(seen))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user