import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import type { LanguageModel } from 'ai'; // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from // config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the // upstream without touching env vars. No apiKey — llama-swap is unauth in our // Tailscale topology and exposing it over the public internet is gated by // Authelia at the Caddy layer, not by API keys. const cache = new Map>(); function getProvider(baseURL: string): ReturnType { let provider = cache.get(baseURL); if (!provider) { provider = createOpenAICompatible({ name: 'llama-swap', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, // v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which // omits `stream_options.include_usage` from the request body. Without // it, llama.cpp / llama-swap never emits the trailing usage block, so // `result.usage` resolves with inputTokens=outputTokens=undefined and // tokens_used / ctx_used land as NULL in every messages row. Setting // true here re-enables the per-stream usage payload across all models // served via the llama-swap provider. includeUsage: true, }); cache.set(baseURL, provider); } return provider; } export function upstreamModel(baseURL: string, modelId: string): LanguageModel { return getProvider(baseURL).chatModel(modelId); }