llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
74
cmd/llama-sidecar/main.go
Normal file
74
cmd/llama-sidecar/main.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/config"
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/pool"
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/server"
|
||||
"github.com/indifferentketchup/llama-sidecar/internal/winsvc"
|
||||
)
|
||||
|
||||
func main() {
|
||||
cfg, err := config.Load()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "config error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
initLogger(cfg.LogLevel)
|
||||
slog.Info("starting llama-sidecar",
|
||||
"bind", cfg.Bind,
|
||||
"max_sidecars", cfg.MaxSidecars,
|
||||
"port_range", fmt.Sprintf("%d-%d", cfg.PortRangeLo, cfg.PortRangeHi),
|
||||
"models", len(cfg.ModelDirMap),
|
||||
"base_args", cfg.BaseArgs,
|
||||
)
|
||||
|
||||
startedAt := time.Now()
|
||||
spawner := &pool.RealSpawner{}
|
||||
p := pool.New(cfg, spawner)
|
||||
srv := server.New(cfg, p, startedAt)
|
||||
|
||||
go func() {
|
||||
slog.Info("listening", "addr", cfg.Bind)
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
slog.Error("server error", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}()
|
||||
|
||||
winsvc.RegisterShutdownHandler(context.Background(), func(ctx context.Context) error {
|
||||
slog.Info("draining HTTP server")
|
||||
drainCtx, drainCancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer drainCancel()
|
||||
if err := srv.Shutdown(drainCtx); err != nil {
|
||||
slog.Error("HTTP drain failed", "err", err)
|
||||
}
|
||||
slog.Info("shutting down sidecar pool")
|
||||
poolCtx, poolCancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer poolCancel()
|
||||
return p.Shutdown(poolCtx)
|
||||
})
|
||||
}
|
||||
|
||||
func initLogger(level string) {
|
||||
var lvl slog.Level
|
||||
switch level {
|
||||
case "debug":
|
||||
lvl = slog.LevelDebug
|
||||
case "warn":
|
||||
lvl = slog.LevelWarn
|
||||
case "error":
|
||||
lvl = slog.LevelError
|
||||
default:
|
||||
lvl = slog.LevelInfo
|
||||
}
|
||||
handler := slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: lvl})
|
||||
slog.SetDefault(slog.New(handler))
|
||||
}
|
||||
Reference in New Issue
Block a user