DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
163 lines
5.4 KiB
TypeScript
163 lines
5.4 KiB
TypeScript
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
import { createDeepSeek } from '@ai-sdk/deepseek';
|
|
import type { LanguageModel } from 'ai';
|
|
|
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
|
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
|
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
|
// Tailscale topology and exposing it over the public internet is gated by
|
|
// Authelia at the Caddy layer, not by API keys.
|
|
//
|
|
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
|
|
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
|
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
|
// stays cached since it has no per-request headers.
|
|
//
|
|
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
|
|
// is set, route through the official @ai-sdk/deepseek provider (not
|
|
// openai-compatible) so DeepSeek-specific features work: providerMetadata
|
|
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
|
|
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
|
|
|
|
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
|
|
|
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
|
let provider = swapCache.get(baseURL);
|
|
if (!provider) {
|
|
provider = createOpenAICompatible({
|
|
name: 'llama-swap',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
});
|
|
swapCache.set(baseURL, provider);
|
|
}
|
|
return provider;
|
|
}
|
|
|
|
function sidecarProvider(
|
|
baseURL: string,
|
|
flags: string[],
|
|
): ReturnType<typeof createOpenAICompatible> {
|
|
return createOpenAICompatible({
|
|
name: 'llama-sidecar',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
headers: {
|
|
'X-Agent-Flags': flags.join(' '),
|
|
},
|
|
});
|
|
}
|
|
|
|
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
|
|
|
export function isDeepSeekModel(modelId: string): boolean {
|
|
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
|
|
}
|
|
|
|
let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
|
|
|
|
function getDeepSeekProvider(
|
|
apiKey: string,
|
|
baseURL: string,
|
|
): ReturnType<typeof createDeepSeek> {
|
|
if (!deepseekProviderCache) {
|
|
deepseekProviderCache = createDeepSeek({
|
|
apiKey,
|
|
baseURL,
|
|
});
|
|
}
|
|
return deepseekProviderCache;
|
|
}
|
|
|
|
export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
|
|
|
|
export interface RoutingInfo {
|
|
route: InferenceRoute;
|
|
flags: string[] | null;
|
|
}
|
|
|
|
interface AgentLike {
|
|
llama_extra_args: string[] | null;
|
|
}
|
|
|
|
interface ConfigLike {
|
|
LLAMA_SWAP_URL: string;
|
|
LLAMA_SIDECAR_URL?: string;
|
|
DEEPSEEK_API_KEY?: string;
|
|
DEEPSEEK_BASE_URL?: string;
|
|
}
|
|
|
|
export function resolveRoute(
|
|
agent: AgentLike | null,
|
|
config?: ConfigLike,
|
|
modelId?: string,
|
|
): RoutingInfo {
|
|
// vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
|
|
// route through the DeepSeek provider. Checked first so DeepSeek models
|
|
// always bypass llama-swap/sidecar even when those are also configured.
|
|
if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
|
|
return { route: 'deepseek', flags: null };
|
|
}
|
|
// When llama_extra_args are explicitly set, route through sidecar with them.
|
|
const flags = agent?.llama_extra_args;
|
|
if (flags && flags.length > 0) {
|
|
return { route: 'sidecar', flags };
|
|
}
|
|
// When LLAMA_SIDECAR_URL is configured (even without per-agent flags),
|
|
// route through sidecar to pick up the default base args (cache quant,
|
|
// spec decoding, slot save, etc.). Fall back to llama-swap otherwise.
|
|
if (config?.LLAMA_SIDECAR_URL) {
|
|
return { route: 'sidecar', flags: [] };
|
|
}
|
|
return { route: 'swap', flags: null };
|
|
}
|
|
|
|
export function upstreamModel(
|
|
config: ConfigLike,
|
|
modelId: string,
|
|
agent?: AgentLike | null,
|
|
): LanguageModel {
|
|
const { route, flags } = resolveRoute(agent ?? null, config, modelId);
|
|
if (route === 'deepseek') {
|
|
return getDeepSeekProvider(
|
|
config.DEEPSEEK_API_KEY!,
|
|
config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
|
|
).chat(modelId);
|
|
}
|
|
if (route === 'sidecar') {
|
|
const url = config.LLAMA_SIDECAR_URL;
|
|
if (!url) {
|
|
throw new Error(`Sidecar route selected but LLAMA_SIDECAR_URL is not set`);
|
|
}
|
|
return sidecarProvider(url, (flags ?? [])).chatModel(modelId);
|
|
}
|
|
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
|
}
|
|
|
|
/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
|
|
* Returns the URL + model + optional auth header for direct fetch() usage. */
|
|
export function resolveModelEndpoint(
|
|
config: ConfigLike,
|
|
modelId: string,
|
|
): { url: string; model: string; headers: Record<string, string> } {
|
|
const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
|
if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
|
|
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
|
return {
|
|
url: baseURL,
|
|
model: modelId,
|
|
headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
|
};
|
|
}
|
|
return {
|
|
url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
|
|
model: modelId,
|
|
headers: baseHeaders,
|
|
};
|
|
}
|
|
|
|
/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
|
|
export function resetDeepSeekProvider(): void {
|
|
deepseekProviderCache = null;
|
|
}
|