feat(web,server): inference settings UI with per-session inference overrides

Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
2026-06-07 22:16:29 +00:00
parent a72f7954b4
commit c132215064
7 changed files with 598 additions and 9 deletions
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -57,11 +57,21 @@ interface ConfigLike {
  LLAMA_SIDECAR_URL?: string;
 }

-export function resolveRoute(agent: AgentLike | null): RoutingInfo {
+export function resolveRoute(
+  agent: AgentLike | null,
+  config?: ConfigLike,
+): RoutingInfo {
+  // When llama_extra_args are explicitly set, route through sidecar with them.
  const flags = agent?.llama_extra_args;
  if (flags && flags.length > 0) {
    return { route: 'sidecar', flags };
  }
+  // When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
+  // route through sidecar to pick up the default base args (cache quant,
+  // spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
+  if (config?.LLAMA_SIDECAR_URL) {
+    return { route: 'sidecar', flags: [] };
+  }
  return { route: 'swap', flags: null };
 }

@@ -70,15 +80,13 @@ export function upstreamModel(
  modelId: string,
  agent?: AgentLike | null,
 ): LanguageModel {
-  const { route, flags } = resolveRoute(agent ?? null);
+  const { route, flags } = resolveRoute(agent ?? null, config);
  if (route === 'sidecar') {
    const url = config.LLAMA_SIDECAR_URL;
    if (!url) {
-      throw new Error(
-        `Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
-      );
+      throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
    }
-    return sidecarProvider(url, flags!).chatModel(modelId);
+    return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
  }
  return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
 }