boocode/apps/server/src/services/inference/provider.ts

import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { createDeepSeek } from '@ai-sdk/deepseek';
import type { LanguageModel } from 'ai';

// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
// Tailscale topology and exposing it over the public internet is gated by
// Authelia at the Caddy layer, not by API keys.
//
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
// llama-sidecar instead. A fresh provider is created per call (not cached)
// because the X-Agent-Flags header varies per agent. The llama-swap path
// stays cached since it has no per-request headers.
//
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
// is set, route through the official @ai-sdk/deepseek provider (not
// openai-compatible) so DeepSeek-specific features work: providerMetadata
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.

const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();

function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
  let provider = swapCache.get(baseURL);
  if (!provider) {
    provider = createOpenAICompatible({
      name: 'llama-swap',
      baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
      includeUsage: true,
    });
    swapCache.set(baseURL, provider);
  }
  return provider;
}

function sidecarProvider(
  baseURL: string,
  flags: string[],
): ReturnType<typeof createOpenAICompatible> {
  return createOpenAICompatible({
    name: 'llama-sidecar',
    baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
    includeUsage: true,
    headers: {
      'X-Agent-Flags': flags.join(' '),
    },
  });
}

const DEEPSEEK_MODEL_PREFIX = 'deepseek-';

export function isDeepSeekModel(modelId: string): boolean {
  return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
}

let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;

function getDeepSeekProvider(
  apiKey: string,
  baseURL: string,
): ReturnType<typeof createDeepSeek> {
  if (!deepseekProviderCache) {
    deepseekProviderCache = createDeepSeek({
      apiKey,
      baseURL,
    });
  }
  return deepseekProviderCache;
}

export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';

export interface RoutingInfo {
  route: InferenceRoute;
  flags: string[] | null;
}

interface AgentLike {
  llama_extra_args: string[] | null;
}

interface ConfigLike {
  LLAMA_SWAP_URL: string;
  LLAMA_SIDECAR_URL?: string;
  DEEPSEEK_API_KEY?: string;
  DEEPSEEK_BASE_URL?: string;
}

export function resolveRoute(
  agent: AgentLike | null,
  config?: ConfigLike,
  modelId?: string,
): RoutingInfo {
  // vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
  // route through the DeepSeek provider. Checked first so DeepSeek models
  // always bypass llama-swap/sidecar even when those are also configured.
  if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
    return { route: 'deepseek', flags: null };
  }
  // When llama_extra_args are explicitly set, route through sidecar with them.
  const flags = agent?.llama_extra_args;
  if (flags && flags.length > 0) {
    return { route: 'sidecar', flags };
  }
  // When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
  // route through sidecar to pick up the default base args (cache quant,
  // spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
  if (config?.LLAMA_SIDECAR_URL) {
    return { route: 'sidecar', flags: [] };
  }
  return { route: 'swap', flags: null };
}

export function upstreamModel(
  config: ConfigLike,
  modelId: string,
  agent?: AgentLike | null,
): LanguageModel {
  const { route, flags } = resolveRoute(agent ?? null, config, modelId);
  if (route === 'deepseek') {
    return getDeepSeekProvider(
      config.DEEPSEEK_API_KEY!,
      config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
    ).chat(modelId);
  }
  if (route === 'sidecar') {
    const url = config.LLAMA_SIDECAR_URL;
    if (!url) {
      throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
    }
    return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
  }
  return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
}

/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
 *  Returns the URL + model + optional auth header for direct fetch() usage. */
export function resolveModelEndpoint(
  config: ConfigLike,
  modelId: string,
): { url: string; model: string; headers: Record<string, string> } {
  const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
  if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
    const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
    return {
      url: baseURL,
      model: modelId,
      headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
    };
  }
  return {
    url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
    model: modelId,
    headers: baseHeaders,
  };
}

/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
export function resetDeepSeekProvider(): void {
  deepseekProviderCache = null;
}