feat(web,server): inference settings UI with per-session inference overrides

Adds Inference tab to SettingsPane with controls for temperature, top-p,
top-k, min-p, and other inference parameters. Server-side route and
provider config wiring to pass overrides through the inference pipeline.
This commit is contained in:
2026-06-07 22:16:29 +00:00
parent a72f7954b4
commit c132215064
7 changed files with 598 additions and 9 deletions

View File

@@ -57,11 +57,21 @@ interface ConfigLike {
LLAMA_SIDECAR_URL?: string;
}
export function resolveRoute(agent: AgentLike | null): RoutingInfo {
export function resolveRoute(
agent: AgentLike | null,
config?: ConfigLike,
): RoutingInfo {
// When llama_extra_args are explicitly set, route through sidecar with them.
const flags = agent?.llama_extra_args;
if (flags && flags.length > 0) {
return { route: 'sidecar', flags };
}
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
// route through sidecar to pick up the default base args (cache quant,
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
if (config?.LLAMA_SIDECAR_URL) {
return { route: 'sidecar', flags: [] };
}
return { route: 'swap', flags: null };
}
@@ -70,15 +80,13 @@ export function upstreamModel(
modelId: string,
agent?: AgentLike | null,
): LanguageModel {
const { route, flags } = resolveRoute(agent ?? null);
const { route, flags } = resolveRoute(agent ?? null, config);
if (route === 'sidecar') {
const url = config.LLAMA_SIDECAR_URL;
if (!url) {
throw new Error(
`Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
);
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
}
return sidecarProvider(url, flags!).chatModel(modelId);
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
}
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
}