Go daemon (cmd/llama-sidecar): per-agent llama-server process pool with LRU eviction, OpenAI-compatible proxy, flag validation (Unsloth port), deterministic hash-keyed sidecar reuse. Windows service support via schtasks/NSSM with DETACHED_PROCESS, stdout pipe drain, and request-ctx decoupled child lifetime. Bug fixes (3b.1–3b5): -c flag drop from StripShadowingFlags, UTF-8 BOM in JSON config, -fa → --flash-attn on default, child process exit after one request (stdin devnull, stdout pipe, CREATE_NO_WINDOW → DETACHED, context.Background for child lifetime, background reaper goroutine). bench/: MTP on/off throughput sweep across 8 GGUFs via SSH+schtasks automation to sam-desktop. Per-GGUF production flags from llama-swap config with --ctx-size 32768 override. eval/: accuracy benchmarks (MMLU 100q, GSM8K 50q, HumanEval 164) + A/B model comparison (14 agent-typed prompts × 8 models). All scripts resumable at individual question level. 94 Go tests, race detector clean. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
157 lines
4.0 KiB
Go
157 lines
4.0 KiB
Go
// SPDX-License-Identifier: AGPL-3.0-only
|
|
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved.
|
|
// Ported from studio/backend/core/inference/llama_server_args.py.
|
|
// Original: https://github.com/unslothai/unsloth/blob/main/studio/backend/core/inference/llama_server_args.py
|
|
|
|
package validator
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
var denylistGroups = [][]string{
|
|
// Model identity
|
|
{"-m", "--model"},
|
|
{"-mu", "--model-url"},
|
|
{"-dr", "--docker-repo"},
|
|
{"-hf", "-hfr", "--hf-repo"},
|
|
{"-hff", "--hf-file"},
|
|
{"-hfv", "-hfrv", "--hf-repo-v"},
|
|
{"-hffv", "--hf-file-v"},
|
|
{"-hft", "--hf-token"},
|
|
{"-mm", "--mmproj"},
|
|
{"-mmu", "--mmproj-url"},
|
|
// Networking
|
|
{"--host"},
|
|
{"--port"},
|
|
{"--path"},
|
|
{"--api-prefix"},
|
|
{"--reuse-port"},
|
|
// Auth / TLS
|
|
{"--api-key"},
|
|
{"--api-key-file"},
|
|
{"--ssl-key-file"},
|
|
{"--ssl-cert-file"},
|
|
// Server UI / multi-model
|
|
{"--webui", "--no-webui"},
|
|
{"--ui", "--no-ui"},
|
|
{"--ui-config"},
|
|
{"--ui-config-file"},
|
|
{"--ui-mcp-proxy", "--no-ui-mcp-proxy"},
|
|
{"--models-dir"},
|
|
{"--models-preset"},
|
|
{"--models-max"},
|
|
{"--models-autoload", "--no-models-autoload"},
|
|
}
|
|
|
|
var denylist map[string]bool
|
|
|
|
func init() {
|
|
denylist = make(map[string]bool)
|
|
for _, group := range denylistGroups {
|
|
for _, flag := range group {
|
|
denylist[flag] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// FlagName returns the flag name for a CLI token, or "" if it isn't a flag.
|
|
// Peels --key=value to the bare --key. Numeric values like -1 or -0.5
|
|
// (e.g. --seed -1) are treated as values, not flags.
|
|
func FlagName(token string) string {
|
|
if !strings.HasPrefix(token, "-") || token == "-" || token == "--" {
|
|
return ""
|
|
}
|
|
if len(token) >= 2 && (token[1] >= '0' && token[1] <= '9' || token[1] == '.') {
|
|
return ""
|
|
}
|
|
if idx := strings.IndexByte(token, '='); idx >= 0 {
|
|
return token[:idx]
|
|
}
|
|
return token
|
|
}
|
|
|
|
// ValidateExtraArgs validates user-supplied llama-server args. Returns the
|
|
// args as a flat slice. Returns an error with the offending flag if any
|
|
// token resolves to a managed flag.
|
|
func ValidateExtraArgs(args []string) ([]string, error) {
|
|
if len(args) == 0 {
|
|
return nil, nil
|
|
}
|
|
out := make([]string, 0, len(args))
|
|
for _, raw := range args {
|
|
flag := FlagName(raw)
|
|
if flag != "" && denylist[flag] {
|
|
return nil, fmt.Errorf("llama-server flag '%s' is managed and cannot be passed as an extra arg", flag)
|
|
}
|
|
out = append(out, raw)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// IsManagedFlag returns true if flag is a managed llama-server flag.
|
|
func IsManagedFlag(flag string) bool {
|
|
return denylist[flag]
|
|
}
|
|
|
|
var contextFlags = setOf("-c", "--ctx-size")
|
|
var cacheFlags = setOf("-ctk", "--cache-type-k", "-ctv", "--cache-type-v")
|
|
var specFlags = setOf(
|
|
"--spec-default", "--spec-type", "--spec-ngram-size-n", "--spec-ngram-size",
|
|
"--draft-min", "--draft-max",
|
|
"--spec-draft-n-max", "--spec-draft-n-min", "--spec-draft-p-min", "--spec-draft-p-split",
|
|
"--spec-ngram-mod-n-match", "--spec-ngram-mod-n-min", "--spec-ngram-mod-n-max",
|
|
)
|
|
var templateFlags = setOf(
|
|
"--chat-template", "--chat-template-file", "--chat-template-kwargs",
|
|
"--jinja", "--no-jinja",
|
|
)
|
|
var booleanShadowingFlags = setOf("--spec-default", "--jinja", "--no-jinja")
|
|
|
|
func setOf(vals ...string) map[string]bool {
|
|
m := make(map[string]bool, len(vals))
|
|
for _, v := range vals {
|
|
m[v] = true
|
|
}
|
|
return m
|
|
}
|
|
|
|
// StripShadowingFlags removes flags that shadow first-class settings from
|
|
// the arg list. By default all shadowing groups are stripped.
|
|
func StripShadowingFlags(args []string) []string {
|
|
shadowing := make(map[string]bool)
|
|
for k, v := range contextFlags {
|
|
shadowing[k] = v
|
|
}
|
|
for k, v := range cacheFlags {
|
|
shadowing[k] = v
|
|
}
|
|
for k, v := range specFlags {
|
|
shadowing[k] = v
|
|
}
|
|
for k, v := range templateFlags {
|
|
shadowing[k] = v
|
|
}
|
|
|
|
out := make([]string, 0, len(args))
|
|
i, n := 0, len(args)
|
|
for i < n {
|
|
tok := args[i]
|
|
flag := FlagName(tok)
|
|
if flag == "" || !shadowing[flag] {
|
|
out = append(out, tok)
|
|
i++
|
|
continue
|
|
}
|
|
if booleanShadowingFlags[flag] || strings.Contains(tok, "=") {
|
|
i++
|
|
} else if i+1 < n && FlagName(args[i+1]) == "" {
|
|
i += 2
|
|
} else {
|
|
i++
|
|
}
|
|
}
|
|
return out
|
|
}
|