llama-sidecar v0.1.0: daemon + benchmarks + eval suite
Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
156
internal/validator/validator.go
Normal file
156
internal/validator/validator.go
Normal file
@@ -0,0 +1,156 @@
|
||||
// SPDX-License-Identifier: AGPL-3.0-only
|
||||
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
|
||||
// Ported from studio/backend/core/inference/llama_server_args.py.
|
||||
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
|
||||
|
||||
package validator
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var denylistGroups = [][]string{
|
||||
// Model identity
|
||||
{"-m", "--model"},
|
||||
{"-mu", "--model-url"},
|
||||
{"-dr", "--docker-repo"},
|
||||
{"-hf", "-hfr", "--hf-repo"},
|
||||
{"-hff", "--hf-file"},
|
||||
{"-hfv", "-hfrv", "--hf-repo-v"},
|
||||
{"-hffv", "--hf-file-v"},
|
||||
{"-hft", "--hf-token"},
|
||||
{"-mm", "--mmproj"},
|
||||
{"-mmu", "--mmproj-url"},
|
||||
// Networking
|
||||
{"--host"},
|
||||
{"--port"},
|
||||
{"--path"},
|
||||
{"--api-prefix"},
|
||||
{"--reuse-port"},
|
||||
// Auth / TLS
|
||||
{"--api-key"},
|
||||
{"--api-key-file"},
|
||||
{"--ssl-key-file"},
|
||||
{"--ssl-cert-file"},
|
||||
// Server UI / multi-model
|
||||
{"--webui", "--no-webui"},
|
||||
{"--ui", "--no-ui"},
|
||||
{"--ui-config"},
|
||||
{"--ui-config-file"},
|
||||
{"--ui-mcp-proxy", "--no-ui-mcp-proxy"},
|
||||
{"--models-dir"},
|
||||
{"--models-preset"},
|
||||
{"--models-max"},
|
||||
{"--models-autoload", "--no-models-autoload"},
|
||||
}
|
||||
|
||||
var denylist map[string]bool
|
||||
|
||||
func init() {
|
||||
denylist = make(map[string]bool)
|
||||
for _, group := range denylistGroups {
|
||||
for _, flag := range group {
|
||||
denylist[flag] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FlagName returns the flag name for a CLI token, or "" if it isn't a flag.
|
||||
// Peels --key=value to the bare --key. Numeric values like -1 or -0.5
|
||||
// (e.g. --seed -1) are treated as values, not flags.
|
||||
func FlagName(token string) string {
|
||||
if !strings.HasPrefix(token, "-") || token == "-" || token == "--" {
|
||||
return ""
|
||||
}
|
||||
if len(token) >= 2 && (token[1] >= '0' && token[1] <= '9' || token[1] == '.') {
|
||||
return ""
|
||||
}
|
||||
if idx := strings.IndexByte(token, '='); idx >= 0 {
|
||||
return token[:idx]
|
||||
}
|
||||
return token
|
||||
}
|
||||
|
||||
// ValidateExtraArgs validates user-supplied llama-server args. Returns the
|
||||
// args as a flat slice. Returns an error with the offending flag if any
|
||||
// token resolves to a managed flag.
|
||||
func ValidateExtraArgs(args []string) ([]string, error) {
|
||||
if len(args) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
out := make([]string, 0, len(args))
|
||||
for _, raw := range args {
|
||||
flag := FlagName(raw)
|
||||
if flag != "" && denylist[flag] {
|
||||
return nil, fmt.Errorf("llama-server flag '%s' is managed and cannot be passed as an extra arg", flag)
|
||||
}
|
||||
out = append(out, raw)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// IsManagedFlag returns true if flag is a managed llama-server flag.
|
||||
func IsManagedFlag(flag string) bool {
|
||||
return denylist[flag]
|
||||
}
|
||||
|
||||
var contextFlags = setOf("-c", "--ctx-size")
|
||||
var cacheFlags = setOf("-ctk", "--cache-type-k", "-ctv", "--cache-type-v")
|
||||
var specFlags = setOf(
|
||||
"--spec-default", "--spec-type", "--spec-ngram-size-n", "--spec-ngram-size",
|
||||
"--draft-min", "--draft-max",
|
||||
"--spec-draft-n-max", "--spec-draft-n-min", "--spec-draft-p-min", "--spec-draft-p-split",
|
||||
"--spec-ngram-mod-n-match", "--spec-ngram-mod-n-min", "--spec-ngram-mod-n-max",
|
||||
)
|
||||
var templateFlags = setOf(
|
||||
"--chat-template", "--chat-template-file", "--chat-template-kwargs",
|
||||
"--jinja", "--no-jinja",
|
||||
)
|
||||
var booleanShadowingFlags = setOf("--spec-default", "--jinja", "--no-jinja")
|
||||
|
||||
func setOf(vals ...string) map[string]bool {
|
||||
m := make(map[string]bool, len(vals))
|
||||
for _, v := range vals {
|
||||
m[v] = true
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// StripShadowingFlags removes flags that shadow first-class settings from
|
||||
// the arg list. By default all shadowing groups are stripped.
|
||||
func StripShadowingFlags(args []string) []string {
|
||||
shadowing := make(map[string]bool)
|
||||
for k, v := range contextFlags {
|
||||
shadowing[k] = v
|
||||
}
|
||||
for k, v := range cacheFlags {
|
||||
shadowing[k] = v
|
||||
}
|
||||
for k, v := range specFlags {
|
||||
shadowing[k] = v
|
||||
}
|
||||
for k, v := range templateFlags {
|
||||
shadowing[k] = v
|
||||
}
|
||||
|
||||
out := make([]string, 0, len(args))
|
||||
i, n := 0, len(args)
|
||||
for i < n {
|
||||
tok := args[i]
|
||||
flag := FlagName(tok)
|
||||
if flag == "" || !shadowing[flag] {
|
||||
out = append(out, tok)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if booleanShadowingFlags[flag] || strings.Contains(tok, "=") {
|
||||
i++
|
||||
} else if i+1 < n && FlagName(args[i+1]) == "" {
|
||||
i += 2
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user