llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
150
internal/validator/validator_test.go
Normal file
150
internal/validator/validator_test.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package validator
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestValidateExtraArgs_DenyList(t *testing.T) {
|
||||
denied := []string{
|
||||
"-m", "--model",
|
||||
"-mu", "--model-url",
|
||||
"-dr", "--docker-repo",
|
||||
"-hf", "-hfr", "--hf-repo",
|
||||
"-hff", "--hf-file",
|
||||
"-hfv", "-hfrv", "--hf-repo-v",
|
||||
"-hffv", "--hf-file-v",
|
||||
"-hft", "--hf-token",
|
||||
"-mm", "--mmproj",
|
||||
"-mmu", "--mmproj-url",
|
||||
"--host", "--port", "--path", "--api-prefix", "--reuse-port",
|
||||
"--api-key", "--api-key-file",
|
||||
"--ssl-key-file", "--ssl-cert-file",
|
||||
"--webui", "--no-webui", "--ui", "--no-ui",
|
||||
"--ui-config", "--ui-config-file",
|
||||
"--ui-mcp-proxy", "--no-ui-mcp-proxy",
|
||||
"--models-dir", "--models-preset", "--models-max",
|
||||
"--models-autoload", "--no-models-autoload",
|
||||
}
|
||||
for _, flag := range denied {
|
||||
t.Run(flag, func(t *testing.T) {
|
||||
_, err := ValidateExtraArgs([]string{flag})
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for %s", flag)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateExtraArgs_SafeFlags(t *testing.T) {
|
||||
safe := []string{
|
||||
"-c", "--ctx-size", "-ngl", "--gpu-layers",
|
||||
"--top-k", "--cache-type-k", "--jinja", "--no-jinja",
|
||||
"--spec-draft-n-max", "-fa", "--flash-attn",
|
||||
"-t", "--threads", "-np", "--parallel", "--no-mmap",
|
||||
}
|
||||
for _, flag := range safe {
|
||||
t.Run(flag, func(t *testing.T) {
|
||||
out, err := ValidateExtraArgs([]string{flag})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for %s: %v", flag, err)
|
||||
}
|
||||
if len(out) != 1 || out[0] != flag {
|
||||
t.Fatalf("expected [%s], got %v", flag, out)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateExtraArgs_FlagEqualsValue(t *testing.T) {
|
||||
_, err := ValidateExtraArgs([]string{"--model=evil.gguf"})
|
||||
if err == nil {
|
||||
t.Fatal("expected error for --model=evil.gguf")
|
||||
}
|
||||
out, err := ValidateExtraArgs([]string{"--ctx-size=4096"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(out) != 1 || out[0] != "--ctx-size=4096" {
|
||||
t.Fatalf("expected [--ctx-size=4096], got %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateExtraArgs_NegativeNumber(t *testing.T) {
|
||||
out, err := ValidateExtraArgs([]string{"--seed", "-1"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(out) != 2 {
|
||||
t.Fatalf("expected 2 tokens, got %d", len(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateExtraArgs_Empty(t *testing.T) {
|
||||
out, err := ValidateExtraArgs(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if out != nil {
|
||||
t.Fatalf("expected nil, got %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsManagedFlag(t *testing.T) {
|
||||
if !IsManagedFlag("--model") {
|
||||
t.Fatal("--model should be managed")
|
||||
}
|
||||
if !IsManagedFlag("-m") {
|
||||
t.Fatal("-m should be managed")
|
||||
}
|
||||
if IsManagedFlag("-c") {
|
||||
t.Fatal("-c should not be managed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFlagName(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, want string
|
||||
}{
|
||||
{"--model=foo", "--model"},
|
||||
{"-c", "-c"},
|
||||
{"--top-k", "--top-k"},
|
||||
{"-1", ""},
|
||||
{"-0.5", ""},
|
||||
{"-", ""},
|
||||
{"--", ""},
|
||||
{"hello", ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := FlagName(tt.in)
|
||||
if got != tt.want {
|
||||
t.Errorf("FlagName(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripShadowingFlags(t *testing.T) {
|
||||
t.Run("strips context flag with value", func(t *testing.T) {
|
||||
out := StripShadowingFlags([]string{"-c", "4096", "--top-k", "40"})
|
||||
if len(out) != 2 || out[0] != "--top-k" || out[1] != "40" {
|
||||
t.Fatalf("got %v", out)
|
||||
}
|
||||
})
|
||||
t.Run("retains non-shadowing flags", func(t *testing.T) {
|
||||
out := StripShadowingFlags([]string{"--top-k", "40", "--top-p", "0.95"})
|
||||
if len(out) != 4 {
|
||||
t.Fatalf("got %v", out)
|
||||
}
|
||||
})
|
||||
t.Run("strips boolean jinja flag", func(t *testing.T) {
|
||||
out := StripShadowingFlags([]string{"--jinja", "--top-k", "40"})
|
||||
if len(out) != 2 || out[0] != "--top-k" {
|
||||
t.Fatalf("got %v", out)
|
||||
}
|
||||
})
|
||||
t.Run("strips equals form", func(t *testing.T) {
|
||||
out := StripShadowingFlags([]string{"--ctx-size=4096"})
|
||||
if len(out) != 0 {
|
||||
t.Fatalf("got %v", out)
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user