Files
llama-sidecar/internal/validator/validator.go
indifferentketchup fe7f36ae98 llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with
LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port),
deterministic hash-keyed sidecar reuse. Windows service support via
schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx
decoupled child lifetime.

Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM
in JSON config, -fa → --flash-attn on default, child process exit after
one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED,
context.Background for child lifetime, background reaper goroutine).

bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks
automation to sam-desktop. Per-GGUF production flags from llama-swap
config with --ctx-size 32768 override.

eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) +
A/B model comparison (14 agent-typed prompts × 8 models). All scripts
resumable at individual question level.

94 Go tests, race detector clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-28 01:55:13 +00:00

157 lines
4.0 KiB
Go

// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
// Ported from studio/backend/core/inference/llama_server_args.py.
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
package validator
import (
"fmt"
"strings"
)
var denylistGroups = [][]string{
// Model identity
{"-m", "--model"},
{"-mu", "--model-url"},
{"-dr", "--docker-repo"},
{"-hf", "-hfr", "--hf-repo"},
{"-hff", "--hf-file"},
{"-hfv", "-hfrv", "--hf-repo-v"},
{"-hffv", "--hf-file-v"},
{"-hft", "--hf-token"},
{"-mm", "--mmproj"},
{"-mmu", "--mmproj-url"},
// Networking
{"--host"},
{"--port"},
{"--path"},
{"--api-prefix"},
{"--reuse-port"},
// Auth / TLS
{"--api-key"},
{"--api-key-file"},
{"--ssl-key-file"},
{"--ssl-cert-file"},
// Server UI / multi-model
{"--webui", "--no-webui"},
{"--ui", "--no-ui"},
{"--ui-config"},
{"--ui-config-file"},
{"--ui-mcp-proxy", "--no-ui-mcp-proxy"},
{"--models-dir"},
{"--models-preset"},
{"--models-max"},
{"--models-autoload", "--no-models-autoload"},
}
var denylist map[string]bool
func init() {
denylist = make(map[string]bool)
for _, group := range denylistGroups {
for _, flag := range group {
denylist[flag] = true
}
}
}
// FlagName returns the flag name for a CLI token, or "" if it isn't a flag.
// Peels --key=value to the bare --key. Numeric values like -1 or -0.5
// (e.g. --seed -1) are treated as values, not flags.
func FlagName(token string) string {
if !strings.HasPrefix(token, "-") || token == "-" || token == "--" {
return ""
}
if len(token) >= 2 && (token[1] >= '0' && token[1] <= '9' || token[1] == '.') {
return ""
}
if idx := strings.IndexByte(token, '='); idx >= 0 {
return token[:idx]
}
return token
}
// ValidateExtraArgs validates user-supplied llama-server args. Returns the
// args as a flat slice. Returns an error with the offending flag if any
// token resolves to a managed flag.
func ValidateExtraArgs(args []string) ([]string, error) {
if len(args) == 0 {
return nil, nil
}
out := make([]string, 0, len(args))
for _, raw := range args {
flag := FlagName(raw)
if flag != "" && denylist[flag] {
return nil, fmt.Errorf("llama-server flag '%s' is managed and cannot be passed as an extra arg", flag)
}
out = append(out, raw)
}
return out, nil
}
// IsManagedFlag returns true if flag is a managed llama-server flag.
func IsManagedFlag(flag string) bool {
return denylist[flag]
}
var contextFlags = setOf("-c", "--ctx-size")
var cacheFlags = setOf("-ctk", "--cache-type-k", "-ctv", "--cache-type-v")
var specFlags = setOf(
"--spec-default", "--spec-type", "--spec-ngram-size-n", "--spec-ngram-size",
"--draft-min", "--draft-max",
"--spec-draft-n-max", "--spec-draft-n-min", "--spec-draft-p-min", "--spec-draft-p-split",
"--spec-ngram-mod-n-match", "--spec-ngram-mod-n-min", "--spec-ngram-mod-n-max",
)
var templateFlags = setOf(
"--chat-template", "--chat-template-file", "--chat-template-kwargs",
"--jinja", "--no-jinja",
)
var booleanShadowingFlags = setOf("--spec-default", "--jinja", "--no-jinja")
func setOf(vals ...string) map[string]bool {
m := make(map[string]bool, len(vals))
for _, v := range vals {
m[v] = true
}
return m
}
// StripShadowingFlags removes flags that shadow first-class settings from
// the arg list. By default all shadowing groups are stripped.
func StripShadowingFlags(args []string) []string {
shadowing := make(map[string]bool)
for k, v := range contextFlags {
shadowing[k] = v
}
for k, v := range cacheFlags {
shadowing[k] = v
}
for k, v := range specFlags {
shadowing[k] = v
}
for k, v := range templateFlags {
shadowing[k] = v
}
out := make([]string, 0, len(args))
i, n := 0, len(args)
for i < n {
tok := args[i]
flag := FlagName(tok)
if flag == "" || !shadowing[flag] {
out = append(out, tok)
i++
continue
}
if booleanShadowingFlags[flag] || strings.Contains(tok, "=") {
i++
} else if i+1 < n && FlagName(args[i+1]) == "" {
i += 2
} else {
i++
}
}
return out
}