feat(web,server): inference settings UI with per-session inference overrides
Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
This commit is contained in:
@@ -57,11 +57,21 @@ interface ConfigLike {
|
||||
LLAMA_SIDECAR_URL?: string;
|
||||
}
|
||||
|
||||
export function resolveRoute(agent: AgentLike | null): RoutingInfo {
|
||||
export function resolveRoute(
|
||||
agent: AgentLike | null,
|
||||
config?: ConfigLike,
|
||||
): RoutingInfo {
|
||||
// When llama_extra_args are explicitly set, route through sidecar with them.
|
||||
const flags = agent?.llama_extra_args;
|
||||
if (flags && flags.length > 0) {
|
||||
return { route: 'sidecar', flags };
|
||||
}
|
||||
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
|
||||
// route through sidecar to pick up the default base args (cache quant,
|
||||
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
|
||||
if (config?.LLAMA_SIDECAR_URL) {
|
||||
return { route: 'sidecar', flags: [] };
|
||||
}
|
||||
return { route: 'swap', flags: null };
|
||||
}
|
||||
|
||||
@@ -70,15 +80,13 @@ export function upstreamModel(
|
||||
modelId: string,
|
||||
agent?: AgentLike | null,
|
||||
): LanguageModel {
|
||||
const { route, flags } = resolveRoute(agent ?? null);
|
||||
const { route, flags } = resolveRoute(agent ?? null, config);
|
||||
if (route === 'sidecar') {
|
||||
const url = config.LLAMA_SIDECAR_URL;
|
||||
if (!url) {
|
||||
throw new Error(
|
||||
`Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
|
||||
);
|
||||
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
|
||||
}
|
||||
return sidecarProvider(url, flags!).chatModel(modelId);
|
||||
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
|
||||
}
|
||||
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user