Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
93 lines
2.9 KiB
TypeScript
93 lines
2.9 KiB
TypeScript
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
import type { LanguageModel } from 'ai';
|
|
|
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
|
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
|
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
|
// Tailscale topology and exposing it over the public internet is gated by
|
|
// Authelia at the Caddy layer, not by API keys.
|
|
//
|
|
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
|
|
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
|
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
|
// stays cached since it has no per-request headers.
|
|
|
|
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
|
|
|
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
|
let provider = swapCache.get(baseURL);
|
|
if (!provider) {
|
|
provider = createOpenAICompatible({
|
|
name: 'llama-swap',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
});
|
|
swapCache.set(baseURL, provider);
|
|
}
|
|
return provider;
|
|
}
|
|
|
|
function sidecarProvider(
|
|
baseURL: string,
|
|
flags: string[],
|
|
): ReturnType<typeof createOpenAICompatible> {
|
|
return createOpenAICompatible({
|
|
name: 'llama-sidecar',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
headers: {
|
|
'X-Agent-Flags': flags.join(' '),
|
|
},
|
|
});
|
|
}
|
|
|
|
export type InferenceRoute = 'swap' | 'sidecar';
|
|
|
|
export interface RoutingInfo {
|
|
route: InferenceRoute;
|
|
flags: string[] | null;
|
|
}
|
|
|
|
interface AgentLike {
|
|
llama_extra_args: string[] | null;
|
|
}
|
|
|
|
interface ConfigLike {
|
|
LLAMA_SWAP_URL: string;
|
|
LLAMA_SIDECAR_URL?: string;
|
|
}
|
|
|
|
export function resolveRoute(
|
|
agent: AgentLike | null,
|
|
config?: ConfigLike,
|
|
): RoutingInfo {
|
|
// When llama_extra_args are explicitly set, route through sidecar with them.
|
|
const flags = agent?.llama_extra_args;
|
|
if (flags && flags.length > 0) {
|
|
return { route: 'sidecar', flags };
|
|
}
|
|
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
|
|
// route through sidecar to pick up the default base args (cache quant,
|
|
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
|
|
if (config?.LLAMA_SIDECAR_URL) {
|
|
return { route: 'sidecar', flags: [] };
|
|
}
|
|
return { route: 'swap', flags: null };
|
|
}
|
|
|
|
export function upstreamModel(
|
|
config: ConfigLike,
|
|
modelId: string,
|
|
agent?: AgentLike | null,
|
|
): LanguageModel {
|
|
const { route, flags } = resolveRoute(agent ?? null, config);
|
|
if (route === 'sidecar') {
|
|
const url = config.LLAMA_SIDECAR_URL;
|
|
if (!url) {
|
|
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
|
|
}
|
|
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
|
|
}
|
|
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
|
}
|