Files
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

189 lines
4.0 KiB
Go

package pool
import (
"container/list"
"context"
"fmt"
"log/slog"
"sync"
"time"
"github.com/indifferentketchup/llama-sidecar/internal/config"
"github.com/indifferentketchup/llama-sidecar/internal/validator"
)
type SidecarInfo struct {
Hash string `json:"hash"`
ModelID string `json:"model_id"`
Flags []string `json:"flags"`
Port int `json:"port"`
Pid int `json:"pid"`
StartedAt time.Time `json:"started_at"`
LastUsed time.Time `json:"last_used"`
Healthy bool `json:"healthy"`
}
type Pool struct {
mu sync.Mutex
cfg *config.Config
sidecars map[string]*Sidecar
lru *list.List
lruIdx map[string]*list.Element
ports *PortAllocator
spawner Spawner
}
func New(cfg *config.Config, spawner Spawner) *Pool {
return &Pool{
cfg: cfg,
sidecars: make(map[string]*Sidecar),
lru: list.New(),
lruIdx: make(map[string]*list.Element),
ports: NewPortAllocator(cfg.PortRangeLo, cfg.PortRangeHi),
spawner: spawner,
}
}
func (p *Pool) Acquire(ctx context.Context, modelID string, flags []string) (*Sidecar, error) {
if _, err := validator.ValidateExtraArgs(flags); err != nil {
return nil, fmt.Errorf("validation: %w", err)
}
modelPath, ok := p.cfg.ModelDirMap[modelID]
if !ok {
return nil, fmt.Errorf("unknown model: %s", modelID)
}
hash := Hash(modelID, flags)
p.mu.Lock()
defer p.mu.Unlock()
if s, ok := p.sidecars[hash]; ok {
if s.Healthy() {
if el, ok := p.lruIdx[hash]; ok {
p.lru.MoveToFront(el)
}
s.TouchLastUsed()
return s, nil
}
p.removeLocked(hash)
}
if len(p.sidecars) >= p.cfg.MaxSidecars {
if err := p.evictLRULocked(); err != nil {
return nil, fmt.Errorf("eviction failed: %w", err)
}
}
port, err := p.ports.Allocate()
if err != nil {
return nil, fmt.Errorf("port allocation: %w", err)
}
p.mu.Unlock()
s, err := p.spawner.Spawn(ctx, p.cfg, modelID, modelPath, flags, port, hash)
p.mu.Lock()
if err != nil {
p.ports.Release(port)
return nil, fmt.Errorf("spawn: %w", err)
}
p.sidecars[hash] = s
el := p.lru.PushFront(hash)
p.lruIdx[hash] = el
return s, nil
}
func (p *Pool) List() []SidecarInfo {
p.mu.Lock()
defer p.mu.Unlock()
out := make([]SidecarInfo, 0, len(p.sidecars))
for _, s := range p.sidecars {
out = append(out, SidecarInfo{
Hash: s.Hash,
ModelID: s.ModelID,
Flags: s.Flags,
Port: s.Port,
Pid: s.Pid,
StartedAt: s.StartedAt,
LastUsed: time.Unix(0, s.LastUsed.Load()),
Healthy: s.Healthy(),
})
}
return out
}
func (p *Pool) Remove(hash string) error {
p.mu.Lock()
defer p.mu.Unlock()
if _, ok := p.sidecars[hash]; !ok {
return fmt.Errorf("sidecar %s not found", hash)
}
return p.removeLocked(hash)
}
func (p *Pool) Shutdown(ctx context.Context) error {
p.mu.Lock()
hashes := make([]string, 0, len(p.sidecars))
for h := range p.sidecars {
hashes = append(hashes, h)
}
p.mu.Unlock()
var wg sync.WaitGroup
for _, h := range hashes {
wg.Add(1)
go func(hash string) {
defer wg.Done()
p.mu.Lock()
s, ok := p.sidecars[hash]
p.mu.Unlock()
if !ok {
return
}
if err := p.spawner.Kill(s); err != nil {
slog.Error("shutdown kill failed", "hash", hash, "err", err)
}
}(h)
}
done := make(chan struct{})
go func() { wg.Wait(); close(done) }()
select {
case <-done:
case <-ctx.Done():
return ctx.Err()
}
slog.Info("pool shutdown complete", "count", len(hashes))
return nil
}
func (p *Pool) removeLocked(hash string) error {
s, ok := p.sidecars[hash]
if !ok {
return nil
}
delete(p.sidecars, hash)
if el, ok := p.lruIdx[hash]; ok {
p.lru.Remove(el)
delete(p.lruIdx, hash)
}
if err := p.spawner.Kill(s); err != nil {
slog.Error("kill failed during remove", "hash", hash, "err", err)
}
p.ports.Release(s.Port)
return nil
}
func (p *Pool) evictLRULocked() error {
back := p.lru.Back()
if back == nil {
return fmt.Errorf("pool full but LRU empty")
}
hash := back.Value.(string)
slog.Info("evicting LRU sidecar", "hash", hash)
return p.removeLocked(hash)
}