llama-sidecar v0.1.0: daemon + benchmarks + eval suite

Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 01:55:13 +00:00
parent babbb4f39b
commit fe7f36ae98
39 changed files with 4228 additions and 0 deletions

53
internal/pool/hash.go Normal file
View File

@@ -0,0 +1,53 @@
package pool
import (
"crypto/sha256"
"fmt"
"sort"
"strings"
"github.com/indifferentketchup/llama-sidecar/internal/validator"
)
// Hash computes a deterministic hash for a (modelID, flags) pair.
// Flag order does not affect the result.
func Hash(modelID string, flags []string) string {
type pair struct {
key, val string
}
var pairs []pair
i := 0
for i < len(flags) {
tok := flags[i]
key := validator.FlagName(tok)
if key == "" {
i++
continue
}
if idx := strings.IndexByte(tok, '='); idx >= 0 {
pairs = append(pairs, pair{key: tok[:idx], val: tok[idx+1:]})
i++
} else if i+1 < len(flags) && validator.FlagName(flags[i+1]) == "" {
pairs = append(pairs, pair{key: key, val: flags[i+1]})
i += 2
} else {
pairs = append(pairs, pair{key: key, val: ""})
i++
}
}
sort.Slice(pairs, func(a, b int) bool {
return pairs[a].key < pairs[b].key
})
var parts []string
for _, p := range pairs {
parts = append(parts, p.key+"\x1f"+p.val)
}
serialized := strings.Join(parts, "\x1e")
input := modelID + "\x1d" + serialized
sum := sha256.Sum256([]byte(input))
return fmt.Sprintf("%x", sum[:8])
}

View File

@@ -0,0 +1,53 @@
package pool
import (
"math/rand"
"testing"
)
func TestHash_OrderIndependence(t *testing.T) {
flags1 := []string{"--a", "1", "--b", "2", "--c", "3"}
h1 := Hash("foo", flags1)
for i := 0; i < 5; i++ {
shuffled := make([]string, len(flags1))
copy(shuffled, flags1)
// Shuffle pairs (each pair is 2 tokens)
pairs := make([][2]string, 0)
for j := 0; j < len(shuffled); j += 2 {
pairs = append(pairs, [2]string{shuffled[j], shuffled[j+1]})
}
rand.Shuffle(len(pairs), func(a, b int) { pairs[a], pairs[b] = pairs[b], pairs[a] })
var flat []string
for _, p := range pairs {
flat = append(flat, p[0], p[1])
}
h := Hash("foo", flat)
if h != h1 {
t.Errorf("iteration %d: hash %s != %s for order %v", i, h, h1, flat)
}
}
}
func TestHash_SeparatorCollision(t *testing.T) {
h1 := Hash("foo", []string{"--a\x1eb", "1"})
h2 := Hash("foo", []string{"--ab", "1"})
if h1 == h2 {
t.Error("separator collision: hashes should differ")
}
}
func TestHash_Length(t *testing.T) {
h := Hash("model", []string{"--top-k", "20"})
if len(h) != 16 {
t.Errorf("expected 16 hex chars, got %d: %s", len(h), h)
}
}
func TestHash_DifferentModels(t *testing.T) {
h1 := Hash("model-a", []string{"--top-k", "20"})
h2 := Hash("model-b", []string{"--top-k", "20"})
if h1 == h2 {
t.Error("different models should produce different hashes")
}
}

188
internal/pool/pool.go Normal file
View File

@@ -0,0 +1,188 @@
package pool
import (
"container/list"
"context"
"fmt"
"log/slog"
"sync"
"time"
"github.com/indifferentketchup/llama-sidecar/internal/config"
"github.com/indifferentketchup/llama-sidecar/internal/validator"
)
type SidecarInfo struct {
Hash string `json:"hash"`
ModelID string `json:"model_id"`
Flags []string `json:"flags"`
Port int `json:"port"`
Pid int `json:"pid"`
StartedAt time.Time `json:"started_at"`
LastUsed time.Time `json:"last_used"`
Healthy bool `json:"healthy"`
}
type Pool struct {
mu sync.Mutex
cfg *config.Config
sidecars map[string]*Sidecar
lru *list.List
lruIdx map[string]*list.Element
ports *PortAllocator
spawner Spawner
}
func New(cfg *config.Config, spawner Spawner) *Pool {
return &Pool{
cfg: cfg,
sidecars: make(map[string]*Sidecar),
lru: list.New(),
lruIdx: make(map[string]*list.Element),
ports: NewPortAllocator(cfg.PortRangeLo, cfg.PortRangeHi),
spawner: spawner,
}
}
func (p *Pool) Acquire(ctx context.Context, modelID string, flags []string) (*Sidecar, error) {
if _, err := validator.ValidateExtraArgs(flags); err != nil {
return nil, fmt.Errorf("validation: %w", err)
}
modelPath, ok := p.cfg.ModelDirMap[modelID]
if !ok {
return nil, fmt.Errorf("unknown model: %s", modelID)
}
hash := Hash(modelID, flags)
p.mu.Lock()
defer p.mu.Unlock()
if s, ok := p.sidecars[hash]; ok {
if s.Healthy() {
if el, ok := p.lruIdx[hash]; ok {
p.lru.MoveToFront(el)
}
s.TouchLastUsed()
return s, nil
}
p.removeLocked(hash)
}
if len(p.sidecars) >= p.cfg.MaxSidecars {
if err := p.evictLRULocked(); err != nil {
return nil, fmt.Errorf("eviction failed: %w", err)
}
}
port, err := p.ports.Allocate()
if err != nil {
return nil, fmt.Errorf("port allocation: %w", err)
}
p.mu.Unlock()
s, err := p.spawner.Spawn(ctx, p.cfg, modelID, modelPath, flags, port, hash)
p.mu.Lock()
if err != nil {
p.ports.Release(port)
return nil, fmt.Errorf("spawn: %w", err)
}
p.sidecars[hash] = s
el := p.lru.PushFront(hash)
p.lruIdx[hash] = el
return s, nil
}
func (p *Pool) List() []SidecarInfo {
p.mu.Lock()
defer p.mu.Unlock()
out := make([]SidecarInfo, 0, len(p.sidecars))
for _, s := range p.sidecars {
out = append(out, SidecarInfo{
Hash: s.Hash,
ModelID: s.ModelID,
Flags: s.Flags,
Port: s.Port,
Pid: s.Pid,
StartedAt: s.StartedAt,
LastUsed: time.Unix(0, s.LastUsed.Load()),
Healthy: s.Healthy(),
})
}
return out
}
func (p *Pool) Remove(hash string) error {
p.mu.Lock()
defer p.mu.Unlock()
if _, ok := p.sidecars[hash]; !ok {
return fmt.Errorf("sidecar %s not found", hash)
}
return p.removeLocked(hash)
}
func (p *Pool) Shutdown(ctx context.Context) error {
p.mu.Lock()
hashes := make([]string, 0, len(p.sidecars))
for h := range p.sidecars {
hashes = append(hashes, h)
}
p.mu.Unlock()
var wg sync.WaitGroup
for _, h := range hashes {
wg.Add(1)
go func(hash string) {
defer wg.Done()
p.mu.Lock()
s, ok := p.sidecars[hash]
p.mu.Unlock()
if !ok {
return
}
if err := p.spawner.Kill(s); err != nil {
slog.Error("shutdown kill failed", "hash", hash, "err", err)
}
}(h)
}
done := make(chan struct{})
go func() { wg.Wait(); close(done) }()
select {
case <-done:
case <-ctx.Done():
return ctx.Err()
}
slog.Info("pool shutdown complete", "count", len(hashes))
return nil
}
func (p *Pool) removeLocked(hash string) error {
s, ok := p.sidecars[hash]
if !ok {
return nil
}
delete(p.sidecars, hash)
if el, ok := p.lruIdx[hash]; ok {
p.lru.Remove(el)
delete(p.lruIdx, hash)
}
if err := p.spawner.Kill(s); err != nil {
slog.Error("kill failed during remove", "hash", hash, "err", err)
}
p.ports.Release(s.Port)
return nil
}
func (p *Pool) evictLRULocked() error {
back := p.lru.Back()
if back == nil {
return fmt.Errorf("pool full but LRU empty")
}
hash := back.Value.(string)
slog.Info("evicting LRU sidecar", "hash", hash)
return p.removeLocked(hash)
}

151
internal/pool/pool_test.go Normal file
View File

@@ -0,0 +1,151 @@
package pool
import (
"context"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/indifferentketchup/llama-sidecar/internal/config"
)
type fakeSpawner struct {
spawnCount atomic.Int32
killCount atomic.Int32
}
func (f *fakeSpawner) Spawn(ctx context.Context, cfg *config.Config, modelID, modelPath string, flags []string, port int, hash string) (*Sidecar, error) {
f.spawnCount.Add(1)
s := &Sidecar{
Hash: hash,
ModelID: modelID,
ModelPath: modelPath,
Flags: flags,
Port: port,
Pid: 99999,
StartedAt: time.Now(),
stderr: newRingBuffer(8),
cancel: func() {},
}
s.healthy.Store(true)
s.LastUsed.Store(time.Now().UnixNano())
return s, nil
}
func (f *fakeSpawner) Kill(s *Sidecar) error {
f.killCount.Add(1)
return nil
}
func testConfig() *config.Config {
return &config.Config{
Bind: "127.0.0.1:0",
LlamaServerBin: "/fake/llama-server",
ModelDirMap: map[string]string{
"model-a": "/fake/model-a.gguf",
"model-b": "/fake/model-b.gguf",
},
PortRangeLo: 8500,
PortRangeHi: 8509,
MaxSidecars: 2,
BaseArgs: []string{"-ngl", "999"},
HealthTimeoutSeconds: 60,
}
}
func TestPool_AcquireSameKey(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
ctx := context.Background()
s1, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
s2, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
if s1.Hash != s2.Hash {
t.Fatalf("expected same sidecar, got different hashes: %s vs %s", s1.Hash, s2.Hash)
}
if fs.spawnCount.Load() != 1 {
t.Fatalf("expected 1 spawn, got %d", fs.spawnCount.Load())
}
}
func TestPool_EvictLRU(t *testing.T) {
cfg := testConfig()
cfg.MaxSidecars = 1
fs := &fakeSpawner{}
p := New(cfg, fs)
ctx := context.Background()
_, err := p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
if err != nil {
t.Fatal(err)
}
_, err = p.Acquire(ctx, "model-b", []string{"--top-k", "40"})
if err != nil {
t.Fatal(err)
}
if fs.spawnCount.Load() != 2 {
t.Fatalf("expected 2 spawns, got %d", fs.spawnCount.Load())
}
if fs.killCount.Load() != 1 {
t.Fatalf("expected 1 kill (eviction), got %d", fs.killCount.Load())
}
list := p.List()
if len(list) != 1 {
t.Fatalf("expected 1 sidecar, got %d", len(list))
}
if list[0].ModelID != "model-b" {
t.Fatalf("expected model-b, got %s", list[0].ModelID)
}
}
func TestPool_ValidatorReject(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
_, err := p.Acquire(context.Background(), "model-a", []string{"--model", "evil.gguf"})
if err == nil {
t.Fatal("expected validation error")
}
}
func TestPool_UnknownModel(t *testing.T) {
fs := &fakeSpawner{}
p := New(testConfig(), fs)
_, err := p.Acquire(context.Background(), "nonexistent", nil)
if err == nil {
t.Fatal("expected unknown model error")
}
}
func TestPool_ConcurrentAcquire(t *testing.T) {
cfg := testConfig()
cfg.MaxSidecars = 10
cfg.PortRangeHi = 8599
fs := &fakeSpawner{}
p := New(cfg, fs)
ctx := context.Background()
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := 0; j < 50; j++ {
_, _ = p.Acquire(ctx, "model-a", []string{"--top-k", "20"})
}
}()
}
wg.Wait()
list := p.List()
if len(list) != 1 {
t.Fatalf("expected 1 sidecar (same key), got %d", len(list))
}
}

28
internal/pool/ports.go Normal file
View File

@@ -0,0 +1,28 @@
package pool
import "fmt"
type PortAllocator struct {
ports chan int
}
func NewPortAllocator(lo, hi int) *PortAllocator {
ch := make(chan int, hi-lo+1)
for p := lo; p <= hi; p++ {
ch <- p
}
return &PortAllocator{ports: ch}
}
func (pa *PortAllocator) Allocate() (int, error) {
select {
case p := <-pa.ports:
return p, nil
default:
return 0, fmt.Errorf("port allocator exhausted")
}
}
func (pa *PortAllocator) Release(port int) {
pa.ports <- port
}

View File

@@ -0,0 +1,74 @@
package pool
import (
"sync"
"testing"
)
func TestPortAllocator_AllocateRelease(t *testing.T) {
pa := NewPortAllocator(8500, 8502)
p1, err := pa.Allocate()
if err != nil {
t.Fatal(err)
}
p2, err := pa.Allocate()
if err != nil {
t.Fatal(err)
}
p3, err := pa.Allocate()
if err != nil {
t.Fatal(err)
}
// All three ports should be distinct
if p1 == p2 || p2 == p3 || p1 == p3 {
t.Fatalf("expected distinct ports: %d, %d, %d", p1, p2, p3)
}
// Exhausted
_, err = pa.Allocate()
if err == nil {
t.Fatal("expected error when exhausted")
}
// Release and re-allocate
pa.Release(p2)
p4, err := pa.Allocate()
if err != nil {
t.Fatal(err)
}
if p4 != p2 {
t.Fatalf("expected released port %d, got %d", p2, p4)
}
}
func TestPortAllocator_Concurrent(t *testing.T) {
pa := NewPortAllocator(8500, 8599)
var wg sync.WaitGroup
allocated := make(chan int, 100)
for i := 0; i < 100; i++ {
wg.Add(1)
go func() {
defer wg.Done()
p, err := pa.Allocate()
if err != nil {
return
}
allocated <- p
}()
}
wg.Wait()
close(allocated)
seen := make(map[int]bool)
for p := range allocated {
if seen[p] {
t.Fatalf("duplicate port %d", p)
}
seen[p] = true
}
if len(seen) != 100 {
t.Fatalf("expected 100 ports, got %d", len(seen))
}
}

313
internal/pool/sidecar.go Normal file
View File

@@ -0,0 +1,313 @@
package pool
import (
"bytes"
"context"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/indifferentketchup/llama-sidecar/internal/config"
"github.com/indifferentketchup/llama-sidecar/internal/validator"
)
type Sidecar struct {
Hash string
ModelID string
ModelPath string
Flags []string
Port int
Pid int
StartedAt time.Time
LastUsed atomic.Int64
healthy atomic.Bool
cmd *exec.Cmd
cancel context.CancelFunc
done chan error
stderr *ringBuffer
stopMon context.CancelFunc
stdinFile *os.File
stdoutR *os.File
stdoutFile *os.File
}
func (s *Sidecar) Healthy() bool {
return s.healthy.Load()
}
func (s *Sidecar) TouchLastUsed() {
s.LastUsed.Store(time.Now().UnixNano())
}
func (s *Sidecar) LastStderr() string {
return s.stderr.String()
}
// Spawner abstracts sidecar creation for testing.
type Spawner interface {
Spawn(ctx context.Context, cfg *config.Config, modelID, modelPath string, flags []string, port int, hash string) (*Sidecar, error)
Kill(s *Sidecar) error
}
type RealSpawner struct{}
func (rs *RealSpawner) Spawn(ctx context.Context, cfg *config.Config, modelID, modelPath string, flags []string, port int, hash string) (*Sidecar, error) {
args := buildArgs(cfg.BaseArgs, modelPath, port, flags)
_ = ctx
childCtx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(childCtx, cfg.LlamaServerBin, args...)
setPlatformAttrs(cmd)
devNull, err := os.Open(os.DevNull)
if err != nil {
cancel()
return nil, fmt.Errorf("open devnull: %w", err)
}
cmd.Stdin = devNull
stderr := newRingBuffer(64)
prefix := fmt.Sprintf("[sidecar:%s:%d] ", hash[:8], port)
cmd.Stderr = io.MultiWriter(stderr, &prefixWriter{prefix: prefix})
stdoutR, stdoutW, err := os.Pipe()
if err != nil {
cancel()
devNull.Close()
return nil, fmt.Errorf("stdout pipe: %w", err)
}
go io.Copy(io.Discard, stdoutR)
cmd.Stdout = stdoutW
slog.Info("spawning sidecar", "hash", hash, "model", modelID, "port", port, "args", strings.Join(args, " "))
if err := cmd.Start(); err != nil {
cancel()
return nil, fmt.Errorf("spawn failed: %w", err)
}
s := &Sidecar{
Hash: hash,
ModelID: modelID,
ModelPath: modelPath,
Flags: flags,
Port: port,
Pid: cmd.Process.Pid,
StartedAt: time.Now(),
cmd: cmd,
cancel: cancel,
done: make(chan error, 1),
stderr: stderr,
stdinFile: devNull,
stdoutR: stdoutR,
stdoutFile: stdoutW,
}
s.LastUsed.Store(time.Now().UnixNano())
go func() {
err := cmd.Wait()
s.healthy.Store(false)
exitCode := -1
if cmd.ProcessState != nil {
exitCode = cmd.ProcessState.ExitCode()
}
slog.Error("sidecar child exited",
"hash", hash,
"port", port,
"pid", s.Pid,
"exit_code", exitCode,
"wait_err", fmt.Sprintf("%v", err),
"uptime", time.Since(s.StartedAt).Round(time.Millisecond),
"stderr_tail", stderr.String(),
)
s.done <- err
close(s.done)
}()
// Wait for health
healthURL := fmt.Sprintf("http://127.0.0.1:%d/health", port)
deadline := time.Now().Add(time.Duration(cfg.HealthTimeoutSeconds) * time.Second)
for time.Now().Before(deadline) {
resp, err := http.Get(healthURL)
if err == nil {
resp.Body.Close()
if resp.StatusCode == 200 {
s.healthy.Store(true)
slog.Info("sidecar healthy", "hash", hash, "port", port, "elapsed", time.Since(s.StartedAt).Round(time.Millisecond))
monCtx, monCancel := context.WithCancel(ctx)
s.stopMon = monCancel
go s.healthMonitor(monCtx, cfg.HealthIntervalSeconds)
return s, nil
}
}
select {
case <-childCtx.Done():
return nil, fmt.Errorf("sidecar process exited during health check")
case <-time.After(500 * time.Millisecond):
}
}
_ = rs.Kill(s)
return nil, fmt.Errorf("health check timed out after %ds, last stderr: %s", cfg.HealthTimeoutSeconds, s.stderr.LastLine())
}
func (rs *RealSpawner) Kill(s *Sidecar) error {
if s.stopMon != nil {
s.stopMon()
}
s.cancel()
select {
case <-s.done:
case <-time.After(5 * time.Second):
if s.cmd.Process != nil {
_ = s.cmd.Process.Kill()
}
<-s.done
}
if s.stdinFile != nil {
s.stdinFile.Close()
}
if s.stdoutFile != nil {
s.stdoutFile.Close()
}
if s.stdoutR != nil {
s.stdoutR.Close()
}
slog.Info("sidecar killed", "hash", s.Hash, "port", s.Port)
return nil
}
func (s *Sidecar) healthMonitor(ctx context.Context, intervalSec int) {
ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
defer ticker.Stop()
failures := 0
url := fmt.Sprintf("http://127.0.0.1:%d/health", s.Port)
client := &http.Client{Timeout: 5 * time.Second}
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
resp, err := client.Get(url)
if err != nil || resp.StatusCode != 200 {
if resp != nil {
resp.Body.Close()
}
failures++
if failures >= 3 {
slog.Warn("sidecar unhealthy, marking for eviction", "hash", s.Hash, "port", s.Port)
s.healthy.Store(false)
return
}
} else {
resp.Body.Close()
failures = 0
}
}
}
}
func buildArgs(baseArgs []string, modelPath string, port int, userFlags []string) []string {
deduped := dedupFlags(baseArgs, userFlags)
args := make([]string, 0, len(deduped)+len(userFlags)+4)
args = append(args, deduped...)
args = append(args, "--model", modelPath)
args = append(args, "--port", strconv.Itoa(port))
args = append(args, userFlags...)
return args
}
// dedupFlags removes from autoArgs any flag that the user also supplied,
// so the user's value wins via llama.cpp's last-wins CLI parsing.
func dedupFlags(autoArgs, userArgs []string) []string {
userNames := make(map[string]bool)
for _, tok := range userArgs {
if name := validator.FlagName(tok); name != "" {
userNames[name] = true
}
}
out := make([]string, 0, len(autoArgs))
i := 0
for i < len(autoArgs) {
tok := autoArgs[i]
name := validator.FlagName(tok)
if name == "" || !userNames[name] {
out = append(out, tok)
i++
continue
}
if strings.Contains(tok, "=") {
i++
} else if i+1 < len(autoArgs) && validator.FlagName(autoArgs[i+1]) == "" {
i += 2
} else {
i++
}
}
return out
}
// Ring buffer for last N lines of stderr
type ringBuffer struct {
mu sync.Mutex
lines []string
max int
}
func newRingBuffer(max int) *ringBuffer {
return &ringBuffer{lines: make([]string, 0, max), max: max}
}
func (rb *ringBuffer) Write(p []byte) (int, error) {
rb.mu.Lock()
defer rb.mu.Unlock()
for _, line := range strings.Split(string(p), "\n") {
line = strings.TrimRight(line, "\r\n")
if line == "" {
continue
}
if len(rb.lines) >= rb.max {
rb.lines = rb.lines[1:]
}
rb.lines = append(rb.lines, line)
}
return len(p), nil
}
func (rb *ringBuffer) String() string {
rb.mu.Lock()
defer rb.mu.Unlock()
return strings.Join(rb.lines, "\n")
}
func (rb *ringBuffer) LastLine() string {
rb.mu.Lock()
defer rb.mu.Unlock()
if len(rb.lines) == 0 {
return ""
}
return rb.lines[len(rb.lines)-1]
}
type prefixWriter struct {
prefix string
buf bytes.Buffer
}
func (pw *prefixWriter) Write(p []byte) (int, error) {
pw.buf.Write(p)
for {
line, err := pw.buf.ReadString('\n')
if err != nil {
pw.buf.WriteString(line)
break
}
fmt.Fprint(os.Stderr, pw.prefix+line)
}
return len(p), nil
}

View File

@@ -0,0 +1,96 @@
package pool
import (
"reflect"
"testing"
)
func TestBuildArgs_PreservesNonOverlapping(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768", "--flash-attn", "on", "--no-mmap"}
user := []string{"--top-k", "20"}
got := buildArgs(base, "/model.gguf", 8500, user)
// -c 32768 must survive (user didn't supply -c)
if !containsSeq(got, "-c", "32768") {
t.Errorf("-c 32768 missing from args: %v", got)
}
// --top-k 20 must be present (user flag)
if !containsSeq(got, "--top-k", "20") {
t.Errorf("--top-k 20 missing from args: %v", got)
}
// --model and --port injected
if !containsSeq(got, "--model", "/model.gguf") {
t.Errorf("--model missing: %v", got)
}
if !containsSeq(got, "--port", "8500") {
t.Errorf("--port missing: %v", got)
}
}
func TestBuildArgs_UserOverridesBase(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768"}
user := []string{"-c", "131072"}
got := buildArgs(base, "/model.gguf", 8500, user)
// base -c should be dropped, user -c should be present
count := 0
for i, tok := range got {
if tok == "-c" && i+1 < len(got) {
count++
if got[i+1] == "32768" {
t.Errorf("base -c 32768 should have been deduped: %v", got)
}
}
}
if count != 1 {
t.Errorf("expected exactly 1 -c flag, got %d in %v", count, got)
}
}
func TestBuildArgs_NoUserFlags(t *testing.T) {
base := []string{"-ngl", "999", "-c", "32768", "--no-mmap"}
got := buildArgs(base, "/model.gguf", 8500, nil)
if !containsSeq(got, "-c", "32768") {
t.Errorf("-c 32768 missing when no user flags: %v", got)
}
if !containsSeq(got, "--no-mmap") {
t.Errorf("--no-mmap missing: %v", got)
}
}
func TestDedupFlags_Mixed(t *testing.T) {
auto := []string{"--top-k", "40", "-c", "32768", "--no-mmap"}
user := []string{"--top-k", "20"}
got := dedupFlags(auto, user)
want := []string{"-c", "32768", "--no-mmap"}
if !reflect.DeepEqual(got, want) {
t.Errorf("dedupFlags = %v, want %v", got, want)
}
}
func TestDedupFlags_EqualsForm(t *testing.T) {
auto := []string{"--ctx-size=4096", "--no-mmap"}
user := []string{"--ctx-size", "8192"}
got := dedupFlags(auto, user)
want := []string{"--no-mmap"}
if !reflect.DeepEqual(got, want) {
t.Errorf("dedupFlags = %v, want %v", got, want)
}
}
func containsSeq(args []string, seq ...string) bool {
for i := 0; i <= len(args)-len(seq); i++ {
match := true
for j, s := range seq {
if args[i+j] != s {
match = false
break
}
}
if match {
return true
}
}
return false
}

View File

@@ -0,0 +1,7 @@
//go:build !windows
package pool
import "os/exec"
func setPlatformAttrs(_ *exec.Cmd) {}

View File

@@ -0,0 +1,15 @@
//go:build windows
package pool
import (
"os/exec"
"syscall"
)
func setPlatformAttrs(cmd *exec.Cmd) {
cmd.SysProcAttr = &syscall.SysProcAttr{
HideWindow: true,
CreationFlags: 0x00000008 | 0x00000200, // DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP
}
}