llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
188
internal/pool/pool.go
Normal file
188
internal/pool/pool.go
Normal file
@@ -0,0 +1,188 @@
|
||||
package pool
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/config"
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/validator"
|
||||
)
|
||||
|
||||
type SidecarInfo struct {
|
||||
Hash string `json:"hash"`
|
||||
ModelID string `json:"model_id"`
|
||||
Flags []string `json:"flags"`
|
||||
Port int `json:"port"`
|
||||
Pid int `json:"pid"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
LastUsed time.Time `json:"last_used"`
|
||||
Healthy bool `json:"healthy"`
|
||||
}
|
||||
|
||||
type Pool struct {
|
||||
mu sync.Mutex
|
||||
cfg *config.Config
|
||||
sidecars map[string]*Sidecar
|
||||
lru *list.List
|
||||
lruIdx map[string]*list.Element
|
||||
ports *PortAllocator
|
||||
spawner Spawner
|
||||
}
|
||||
|
||||
func New(cfg *config.Config, spawner Spawner) *Pool {
|
||||
return &Pool{
|
||||
cfg: cfg,
|
||||
sidecars: make(map[string]*Sidecar),
|
||||
lru: list.New(),
|
||||
lruIdx: make(map[string]*list.Element),
|
||||
ports: NewPortAllocator(cfg.PortRangeLo, cfg.PortRangeHi),
|
||||
spawner: spawner,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Pool) Acquire(ctx context.Context, modelID string, flags []string) (*Sidecar, error) {
|
||||
if _, err := validator.ValidateExtraArgs(flags); err != nil {
|
||||
return nil, fmt.Errorf("validation: %w", err)
|
||||
}
|
||||
|
||||
modelPath, ok := p.cfg.ModelDirMap[modelID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown model: %s", modelID)
|
||||
}
|
||||
|
||||
hash := Hash(modelID, flags)
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if s, ok := p.sidecars[hash]; ok {
|
||||
if s.Healthy() {
|
||||
if el, ok := p.lruIdx[hash]; ok {
|
||||
p.lru.MoveToFront(el)
|
||||
}
|
||||
s.TouchLastUsed()
|
||||
return s, nil
|
||||
}
|
||||
p.removeLocked(hash)
|
||||
}
|
||||
|
||||
if len(p.sidecars) >= p.cfg.MaxSidecars {
|
||||
if err := p.evictLRULocked(); err != nil {
|
||||
return nil, fmt.Errorf("eviction failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
port, err := p.ports.Allocate()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("port allocation: %w", err)
|
||||
}
|
||||
|
||||
p.mu.Unlock()
|
||||
s, err := p.spawner.Spawn(ctx, p.cfg, modelID, modelPath, flags, port, hash)
|
||||
p.mu.Lock()
|
||||
|
||||
if err != nil {
|
||||
p.ports.Release(port)
|
||||
return nil, fmt.Errorf("spawn: %w", err)
|
||||
}
|
||||
|
||||
p.sidecars[hash] = s
|
||||
el := p.lru.PushFront(hash)
|
||||
p.lruIdx[hash] = el
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (p *Pool) List() []SidecarInfo {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
out := make([]SidecarInfo, 0, len(p.sidecars))
|
||||
for _, s := range p.sidecars {
|
||||
out = append(out, SidecarInfo{
|
||||
Hash: s.Hash,
|
||||
ModelID: s.ModelID,
|
||||
Flags: s.Flags,
|
||||
Port: s.Port,
|
||||
Pid: s.Pid,
|
||||
StartedAt: s.StartedAt,
|
||||
LastUsed: time.Unix(0, s.LastUsed.Load()),
|
||||
Healthy: s.Healthy(),
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *Pool) Remove(hash string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if _, ok := p.sidecars[hash]; !ok {
|
||||
return fmt.Errorf("sidecar %s not found", hash)
|
||||
}
|
||||
return p.removeLocked(hash)
|
||||
}
|
||||
|
||||
func (p *Pool) Shutdown(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
hashes := make([]string, 0, len(p.sidecars))
|
||||
for h := range p.sidecars {
|
||||
hashes = append(hashes, h)
|
||||
}
|
||||
p.mu.Unlock()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, h := range hashes {
|
||||
wg.Add(1)
|
||||
go func(hash string) {
|
||||
defer wg.Done()
|
||||
p.mu.Lock()
|
||||
s, ok := p.sidecars[hash]
|
||||
p.mu.Unlock()
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if err := p.spawner.Kill(s); err != nil {
|
||||
slog.Error("shutdown kill failed", "hash", hash, "err", err)
|
||||
}
|
||||
}(h)
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() { wg.Wait(); close(done) }()
|
||||
select {
|
||||
case <-done:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
slog.Info("pool shutdown complete", "count", len(hashes))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Pool) removeLocked(hash string) error {
|
||||
s, ok := p.sidecars[hash]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
delete(p.sidecars, hash)
|
||||
if el, ok := p.lruIdx[hash]; ok {
|
||||
p.lru.Remove(el)
|
||||
delete(p.lruIdx, hash)
|
||||
}
|
||||
if err := p.spawner.Kill(s); err != nil {
|
||||
slog.Error("kill failed during remove", "hash", hash, "err", err)
|
||||
}
|
||||
p.ports.Release(s.Port)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Pool) evictLRULocked() error {
|
||||
back := p.lru.Back()
|
||||
if back == nil {
|
||||
return fmt.Errorf("pool full but LRU empty")
|
||||
}
|
||||
hash := back.Value.(string)
|
||||
slog.Info("evicting LRU sidecar", "hash", hash)
|
||||
return p.removeLocked(hash)
|
||||
}
|
||||
Reference in New Issue
Block a user