import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { createDeepSeek } from '@ai-sdk/deepseek'; import type { LanguageModel } from 'ai'; // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from // config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the // upstream without touching env vars. No apiKey — llama-swap is unauth in our // Tailscale topology and exposing it over the public internet is gated by // Authelia at the Caddy layer, not by API keys. // // v2.4.1-sidecar: when the agent has llama_extra_args, route through // llama-sidecar instead. A fresh provider is created per call (not cached) // because the X-Agent-Flags header varies per agent. The llama-swap path // stays cached since it has no per-request headers. // // vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY // is set, route through the official @ai-sdk/deepseek provider (not // openai-compatible) so DeepSeek-specific features work: providerMetadata // with promptCacheHitTokens/promptCacheMissTokens, reasoning via // LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options. const swapCache = new Map>(); function getSwapProvider(baseURL: string): ReturnType { let provider = swapCache.get(baseURL); if (!provider) { provider = createOpenAICompatible({ name: 'llama-swap', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, includeUsage: true, }); swapCache.set(baseURL, provider); } return provider; } function sidecarProvider( baseURL: string, flags: string[], ): ReturnType { return createOpenAICompatible({ name: 'llama-sidecar', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, includeUsage: true, headers: { 'X-Agent-Flags': flags.join(' '), }, }); } const DEEPSEEK_MODEL_PREFIX = 'deepseek-'; export function isDeepSeekModel(modelId: string): boolean { return modelId.startsWith(DEEPSEEK_MODEL_PREFIX); } let deepseekProviderCache: ReturnType | null = null; function getDeepSeekProvider( apiKey: string, baseURL: string, ): ReturnType { if (!deepseekProviderCache) { deepseekProviderCache = createDeepSeek({ apiKey, baseURL, }); } return deepseekProviderCache; } export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek'; export interface RoutingInfo { route: InferenceRoute; flags: string[] | null; } interface AgentLike { llama_extra_args: string[] | null; } interface ConfigLike { LLAMA_SWAP_URL: string; LLAMA_SIDECAR_URL?: string; DEEPSEEK_API_KEY?: string; DEEPSEEK_BASE_URL?: string; } export function resolveRoute( agent: AgentLike | null, config?: ConfigLike, modelId?: string, ): RoutingInfo { // vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set, // route through the DeepSeek provider. Checked first so DeepSeek models // always bypass llama-swap/sidecar even when those are also configured. if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) { return { route: 'deepseek', flags: null }; } // When llama_extra_args are explicitly set, route through sidecar with them. const flags = agent?.llama_extra_args; if (flags && flags.length > 0) { return { route: 'sidecar', flags }; } // When LLAMA_SIDECAR_URL is configured (even without per-agent flags), // route through sidecar to pick up the default base args (cache quant, // spec decoding, slot save, etc.). Fall back to llama-swap otherwise. if (config?.LLAMA_SIDECAR_URL) { return { route: 'sidecar', flags: [] }; } return { route: 'swap', flags: null }; } export function upstreamModel( config: ConfigLike, modelId: string, agent?: AgentLike | null, ): LanguageModel { const { route, flags } = resolveRoute(agent ?? null, config, modelId); if (route === 'deepseek') { return getDeepSeekProvider( config.DEEPSEEK_API_KEY!, config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com', ).chat(modelId); } if (route === 'sidecar') { const url = config.LLAMA_SIDECAR_URL; if (!url) { throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`); } return sidecarProvider(url, (flags ?? [])).chatModel(modelId); } return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId); } /** Resolve the API endpoint for non-streaming calls (compaction, task-model). * Returns the URL + model + optional auth header for direct fetch() usage. */ export function resolveModelEndpoint( config: ConfigLike, modelId: string, ): { url: string; model: string; headers: Record } { const baseHeaders: Record = { 'Content-Type': 'application/json' }; if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) { const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, ''); return { url: baseURL, model: modelId, headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` }, }; } return { url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''), model: modelId, headers: baseHeaders, }; } /** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */ export function resetDeepSeekProvider(): void { deepseekProviderCache = null; }