feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)

DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
2026-06-08 01:24:23 +00:00
parent c11e26090f
commit 203cfd2fa8
29 changed files with 916 additions and 42 deletions
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -1,4 +1,5 @@
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
+import { createDeepSeek } from '@ai-sdk/deepseek';
 import type { LanguageModel } from 'ai';

 // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
@@ -11,6 +12,12 @@ import type { LanguageModel } from 'ai';
 // llama-sidecar instead. A fresh provider is created per call (not cached)
 // because the X-Agent-Flags header varies per agent. The llama-swap path
 // stays cached since it has no per-request headers.
+//
+// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
+// is set, route through the official @ai-sdk/deepseek provider (not
+// openai-compatible) so DeepSeek-specific features work: providerMetadata
+// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
+// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.

 const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();

@@ -41,7 +48,28 @@ function sidecarProvider(
  });
 }

-export type InferenceRoute = 'swap' | 'sidecar';
+const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
+
+export function isDeepSeekModel(modelId: string): boolean {
+  return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
+}
+
+let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
+
+function getDeepSeekProvider(
+  apiKey: string,
+  baseURL: string,
+): ReturnType<typeof createDeepSeek> {
+  if (!deepseekProviderCache) {
+    deepseekProviderCache = createDeepSeek({
+      apiKey,
+      baseURL,
+    });
+  }
+  return deepseekProviderCache;
+}
+
+export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';

 export interface RoutingInfo {
  route: InferenceRoute;
@@ -55,12 +83,21 @@ interface AgentLike {
 interface ConfigLike {
  LLAMA_SWAP_URL: string;
  LLAMA_SIDECAR_URL?: string;
+  DEEPSEEK_API_KEY?: string;
+  DEEPSEEK_BASE_URL?: string;
 }

 export function resolveRoute(
  agent: AgentLike | null,
  config?: ConfigLike,
+  modelId?: string,
 ): RoutingInfo {
+  // vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
+  // route through the DeepSeek provider. Checked first so DeepSeek models
+  // always bypass llama-swap/sidecar even when those are also configured.
+  if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
+    return { route: 'deepseek', flags: null };
+  }
  // When llama_extra_args are explicitly set, route through sidecar with them.
  const flags = agent?.llama_extra_args;
  if (flags && flags.length > 0) {
@@ -80,7 +117,13 @@ export function upstreamModel(
  modelId: string,
  agent?: AgentLike | null,
 ): LanguageModel {
-  const { route, flags } = resolveRoute(agent ?? null, config);
+  const { route, flags } = resolveRoute(agent ?? null, config, modelId);
+  if (route === 'deepseek') {
+    return getDeepSeekProvider(
+      config.DEEPSEEK_API_KEY!,
+      config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
+    ).chat(modelId);
+  }
  if (route === 'sidecar') {
    const url = config.LLAMA_SIDECAR_URL;
    if (!url) {
@@ -90,3 +133,30 @@ export function upstreamModel(
  }
  return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
 }
+
+/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
+ *  Returns the URL + model + optional auth header for direct fetch() usage. */
+export function resolveModelEndpoint(
+  config: ConfigLike,
+  modelId: string,
+): { url: string; model: string; headers: Record<string, string> } {
+  const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
+  if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
+    const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
+    return {
+      url: baseURL,
+      model: modelId,
+      headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
+    };
+  }
+  return {
+    url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
+    model: modelId,
+    headers: baseHeaders,
+  };
+}
+
+/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
+export function resetDeepSeekProvider(): void {
+  deepseekProviderCache = null;
+}