llama-sidecar v0.1.0: daemon + benchmarks + eval suite

Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00
parent babbb4f39b
commit fe7f36ae98
39 changed files with 4228 additions and 0 deletions
--- a/internal/validator/validator_test.go
+++ b/internal/validator/validator_test.go
@@ -0,0 +1,150 @@
+package validator
+
+import (
+	"testing"
+)
+
+func TestValidateExtraArgs_DenyList(t *testing.T) {
+	denied := []string{
+		"-m", "--model",
+		"-mu", "--model-url",
+		"-dr", "--docker-repo",
+		"-hf", "-hfr", "--hf-repo",
+		"-hff", "--hf-file",
+		"-hfv", "-hfrv", "--hf-repo-v",
+		"-hffv", "--hf-file-v",
+		"-hft", "--hf-token",
+		"-mm", "--mmproj",
+		"-mmu", "--mmproj-url",
+		"--host", "--port", "--path", "--api-prefix", "--reuse-port",
+		"--api-key", "--api-key-file",
+		"--ssl-key-file", "--ssl-cert-file",
+		"--webui", "--no-webui", "--ui", "--no-ui",
+		"--ui-config", "--ui-config-file",
+		"--ui-mcp-proxy", "--no-ui-mcp-proxy",
+		"--models-dir", "--models-preset", "--models-max",
+		"--models-autoload", "--no-models-autoload",
+	}
+	for _, flag := range denied {
+		t.Run(flag, func(t *testing.T) {
+			_, err := ValidateExtraArgs([]string{flag})
+			if err == nil {
+				t.Fatalf("expected error for %s", flag)
+			}
+		})
+	}
+}
+
+func TestValidateExtraArgs_SafeFlags(t *testing.T) {
+	safe := []string{
+		"-c", "--ctx-size", "-ngl", "--gpu-layers",
+		"--top-k", "--cache-type-k", "--jinja", "--no-jinja",
+		"--spec-draft-n-max", "-fa", "--flash-attn",
+		"-t", "--threads", "-np", "--parallel", "--no-mmap",
+	}
+	for _, flag := range safe {
+		t.Run(flag, func(t *testing.T) {
+			out, err := ValidateExtraArgs([]string{flag})
+			if err != nil {
+				t.Fatalf("unexpected error for %s: %v", flag, err)
+			}
+			if len(out) != 1 || out[0] != flag {
+				t.Fatalf("expected [%s], got %v", flag, out)
+			}
+		})
+	}
+}
+
+func TestValidateExtraArgs_FlagEqualsValue(t *testing.T) {
+	_, err := ValidateExtraArgs([]string{"--model=evil.gguf"})
+	if err == nil {
+		t.Fatal("expected error for --model=evil.gguf")
+	}
+	out, err := ValidateExtraArgs([]string{"--ctx-size=4096"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(out) != 1 || out[0] != "--ctx-size=4096" {
+		t.Fatalf("expected [--ctx-size=4096], got %v", out)
+	}
+}
+
+func TestValidateExtraArgs_NegativeNumber(t *testing.T) {
+	out, err := ValidateExtraArgs([]string{"--seed", "-1"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(out) != 2 {
+		t.Fatalf("expected 2 tokens, got %d", len(out))
+	}
+}
+
+func TestValidateExtraArgs_Empty(t *testing.T) {
+	out, err := ValidateExtraArgs(nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if out != nil {
+		t.Fatalf("expected nil, got %v", out)
+	}
+}
+
+func TestIsManagedFlag(t *testing.T) {
+	if !IsManagedFlag("--model") {
+		t.Fatal("--model should be managed")
+	}
+	if !IsManagedFlag("-m") {
+		t.Fatal("-m should be managed")
+	}
+	if IsManagedFlag("-c") {
+		t.Fatal("-c should not be managed")
+	}
+}
+
+func TestFlagName(t *testing.T) {
+	tests := []struct {
+		in, want string
+	}{
+		{"--model=foo", "--model"},
+		{"-c", "-c"},
+		{"--top-k", "--top-k"},
+		{"-1", ""},
+		{"-0.5", ""},
+		{"-", ""},
+		{"--", ""},
+		{"hello", ""},
+	}
+	for _, tt := range tests {
+		got := FlagName(tt.in)
+		if got != tt.want {
+			t.Errorf("FlagName(%q) = %q, want %q", tt.in, got, tt.want)
+		}
+	}
+}
+
+func TestStripShadowingFlags(t *testing.T) {
+	t.Run("strips context flag with value", func(t *testing.T) {
+		out := StripShadowingFlags([]string{"-c", "4096", "--top-k", "40"})
+		if len(out) != 2 || out[0] != "--top-k" || out[1] != "40" {
+			t.Fatalf("got %v", out)
+		}
+	})
+	t.Run("retains non-shadowing flags", func(t *testing.T) {
+		out := StripShadowingFlags([]string{"--top-k", "40", "--top-p", "0.95"})
+		if len(out) != 4 {
+			t.Fatalf("got %v", out)
+		}
+	})
+	t.Run("strips boolean jinja flag", func(t *testing.T) {
+		out := StripShadowingFlags([]string{"--jinja", "--top-k", "40"})
+		if len(out) != 2 || out[0] != "--top-k" {
+			t.Fatalf("got %v", out)
+		}
+	})
+	t.Run("strips equals form", func(t *testing.T) {
+		out := StripShadowingFlags([]string{"--ctx-size=4096"})
+		if len(out) != 0 {
+			t.Fatalf("got %v", out)
+		}
+	})
+}