import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import type { LanguageModel } from 'ai'; // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from // config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the // upstream without touching env vars. No apiKey — llama-swap is unauth in our // Tailscale topology and exposing it over the public internet is gated by // Authelia at the Caddy layer, not by API keys. // // v2.4.1-sidecar: when the agent has llama_extra_args, route through // llama-sidecar instead. A fresh provider is created per call (not cached) // because the X-Agent-Flags header varies per agent. The llama-swap path // stays cached since it has no per-request headers. const swapCache = new Map>(); function getSwapProvider(baseURL: string): ReturnType { let provider = swapCache.get(baseURL); if (!provider) { provider = createOpenAICompatible({ name: 'llama-swap', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, includeUsage: true, }); swapCache.set(baseURL, provider); } return provider; } function sidecarProvider( baseURL: string, flags: string[], ): ReturnType { return createOpenAICompatible({ name: 'llama-sidecar', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, includeUsage: true, headers: { 'X-Agent-Flags': flags.join(' '), }, }); } export type InferenceRoute = 'swap' | 'sidecar'; export interface RoutingInfo { route: InferenceRoute; flags: string[] | null; } interface AgentLike { llama_extra_args: string[] | null; } interface ConfigLike { LLAMA_SWAP_URL: string; LLAMA_SIDECAR_URL?: string; } export function resolveRoute(agent: AgentLike | null): RoutingInfo { const flags = agent?.llama_extra_args; if (flags && flags.length > 0) { return { route: 'sidecar', flags }; } return { route: 'swap', flags: null }; } export function upstreamModel( config: ConfigLike, modelId: string, agent?: AgentLike | null, ): LanguageModel { const { route, flags } = resolveRoute(agent ?? null); if (route === 'sidecar') { const url = config.LLAMA_SIDECAR_URL; if (!url) { throw new Error( `Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`, ); } return sidecarProvider(url, flags!).chatModel(modelId); } return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId); }