Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
189 lines
4.0 KiB
Go
189 lines
4.0 KiB
Go
package pool
|
|
|
|
import (
|
|
"container/list"
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/indifferentketchup/llama-sidecar/internal/config"
|
|
"github.com/indifferentketchup/llama-sidecar/internal/validator"
|
|
)
|
|
|
|
type SidecarInfo struct {
|
|
Hash string `json:"hash"`
|
|
ModelID string `json:"model_id"`
|
|
Flags []string `json:"flags"`
|
|
Port int `json:"port"`
|
|
Pid int `json:"pid"`
|
|
StartedAt time.Time `json:"started_at"`
|
|
LastUsed time.Time `json:"last_used"`
|
|
Healthy bool `json:"healthy"`
|
|
}
|
|
|
|
type Pool struct {
|
|
mu sync.Mutex
|
|
cfg *config.Config
|
|
sidecars map[string]*Sidecar
|
|
lru *list.List
|
|
lruIdx map[string]*list.Element
|
|
ports *PortAllocator
|
|
spawner Spawner
|
|
}
|
|
|
|
func New(cfg *config.Config, spawner Spawner) *Pool {
|
|
return &Pool{
|
|
cfg: cfg,
|
|
sidecars: make(map[string]*Sidecar),
|
|
lru: list.New(),
|
|
lruIdx: make(map[string]*list.Element),
|
|
ports: NewPortAllocator(cfg.PortRangeLo, cfg.PortRangeHi),
|
|
spawner: spawner,
|
|
}
|
|
}
|
|
|
|
func (p *Pool) Acquire(ctx context.Context, modelID string, flags []string) (*Sidecar, error) {
|
|
if _, err := validator.ValidateExtraArgs(flags); err != nil {
|
|
return nil, fmt.Errorf("validation: %w", err)
|
|
}
|
|
|
|
modelPath, ok := p.cfg.ModelDirMap[modelID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unknown model: %s", modelID)
|
|
}
|
|
|
|
hash := Hash(modelID, flags)
|
|
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
|
|
if s, ok := p.sidecars[hash]; ok {
|
|
if s.Healthy() {
|
|
if el, ok := p.lruIdx[hash]; ok {
|
|
p.lru.MoveToFront(el)
|
|
}
|
|
s.TouchLastUsed()
|
|
return s, nil
|
|
}
|
|
p.removeLocked(hash)
|
|
}
|
|
|
|
if len(p.sidecars) >= p.cfg.MaxSidecars {
|
|
if err := p.evictLRULocked(); err != nil {
|
|
return nil, fmt.Errorf("eviction failed: %w", err)
|
|
}
|
|
}
|
|
|
|
port, err := p.ports.Allocate()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("port allocation: %w", err)
|
|
}
|
|
|
|
p.mu.Unlock()
|
|
s, err := p.spawner.Spawn(ctx, p.cfg, modelID, modelPath, flags, port, hash)
|
|
p.mu.Lock()
|
|
|
|
if err != nil {
|
|
p.ports.Release(port)
|
|
return nil, fmt.Errorf("spawn: %w", err)
|
|
}
|
|
|
|
p.sidecars[hash] = s
|
|
el := p.lru.PushFront(hash)
|
|
p.lruIdx[hash] = el
|
|
return s, nil
|
|
}
|
|
|
|
func (p *Pool) List() []SidecarInfo {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
out := make([]SidecarInfo, 0, len(p.sidecars))
|
|
for _, s := range p.sidecars {
|
|
out = append(out, SidecarInfo{
|
|
Hash: s.Hash,
|
|
ModelID: s.ModelID,
|
|
Flags: s.Flags,
|
|
Port: s.Port,
|
|
Pid: s.Pid,
|
|
StartedAt: s.StartedAt,
|
|
LastUsed: time.Unix(0, s.LastUsed.Load()),
|
|
Healthy: s.Healthy(),
|
|
})
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (p *Pool) Remove(hash string) error {
|
|
p.mu.Lock()
|
|
defer p.mu.Unlock()
|
|
if _, ok := p.sidecars[hash]; !ok {
|
|
return fmt.Errorf("sidecar %s not found", hash)
|
|
}
|
|
return p.removeLocked(hash)
|
|
}
|
|
|
|
func (p *Pool) Shutdown(ctx context.Context) error {
|
|
p.mu.Lock()
|
|
hashes := make([]string, 0, len(p.sidecars))
|
|
for h := range p.sidecars {
|
|
hashes = append(hashes, h)
|
|
}
|
|
p.mu.Unlock()
|
|
|
|
var wg sync.WaitGroup
|
|
for _, h := range hashes {
|
|
wg.Add(1)
|
|
go func(hash string) {
|
|
defer wg.Done()
|
|
p.mu.Lock()
|
|
s, ok := p.sidecars[hash]
|
|
p.mu.Unlock()
|
|
if !ok {
|
|
return
|
|
}
|
|
if err := p.spawner.Kill(s); err != nil {
|
|
slog.Error("shutdown kill failed", "hash", hash, "err", err)
|
|
}
|
|
}(h)
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
go func() { wg.Wait(); close(done) }()
|
|
select {
|
|
case <-done:
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
slog.Info("pool shutdown complete", "count", len(hashes))
|
|
return nil
|
|
}
|
|
|
|
func (p *Pool) removeLocked(hash string) error {
|
|
s, ok := p.sidecars[hash]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
delete(p.sidecars, hash)
|
|
if el, ok := p.lruIdx[hash]; ok {
|
|
p.lru.Remove(el)
|
|
delete(p.lruIdx, hash)
|
|
}
|
|
if err := p.spawner.Kill(s); err != nil {
|
|
slog.Error("kill failed during remove", "hash", hash, "err", err)
|
|
}
|
|
p.ports.Release(s.Port)
|
|
return nil
|
|
}
|
|
|
|
func (p *Pool) evictLRULocked() error {
|
|
back := p.lru.Back()
|
|
if back == nil {
|
|
return fmt.Errorf("pool full but LRU empty")
|
|
}
|
|
hash := back.Value.(string)
|
|
slog.Info("evicting LRU sidecar", "hash", hash)
|
|
return p.removeLocked(hash)
|
|
}
|