feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)

DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
2026-06-08 01:24:23 +00:00
parent c11e26090f
commit 203cfd2fa8
29 changed files with 916 additions and 42 deletions
--- a/apps/server/src/routes/models.ts
+++ b/apps/server/src/routes/models.ts
@@ -2,26 +2,55 @@ import type { FastifyInstance } from 'fastify';
 import type { Config } from '../config.js';
 import type { ModelInfo } from '../types/api.js';

-interface LlamaSwapModelsResponse {
+interface ApiModelsResponse {
  data?: ModelInfo[];
 }

+const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
+  { id: 'deepseek-v4-flash', object: 'model', created: 0, owned_by: 'deepseek' },
+  { id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
+];
+
 export function registerModelRoutes(app: FastifyInstance, config: Config): void {
  app.get('/api/models', async (_req, reply) => {
+    const models: ModelInfo[] = [];
+
+    // 1. Fetch llama-swap models
    try {
      const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
-      if (!res.ok) {
-        reply.code(502);
-        return { error: `llama-swap returned ${res.status}` };
+      if (res.ok) {
+        const parsed = (await res.json()) as ApiModelsResponse;
+        if (parsed.data) models.push(...parsed.data);
      }
-      const parsed = (await res.json()) as LlamaSwapModelsResponse;
-      return parsed.data ?? [];
-    } catch (err) {
-      reply.code(502);
-      return {
-        error: 'failed to reach llama-swap',
-        details: err instanceof Error ? err.message : String(err),
-      };
+    } catch {
+      // llama-swap unreachable — proceed with whatever we have
    }
+
+    // 2. If DeepSeek is configured, fetch live models from their API
+    if (config.DEEPSEEK_API_KEY) {
+      try {
+        const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
+        const res = await fetch(`${baseURL}/v1/models`, {
+          headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
+          signal: AbortSignal.timeout(5_000),
+        });
+        if (res.ok) {
+          const parsed = (await res.json()) as ApiModelsResponse;
+          if (parsed.data) models.push(...parsed.data);
+        } else {
+          // API call failed — fall back to static model list
+          models.push(...DEEPSEEK_STATIC_MODELS);
+        }
+      } catch {
+        // Network error — fall back to static model list
+        models.push(...DEEPSEEK_STATIC_MODELS);
+      }
+    }
+
+    if (models.length === 0) {
+      reply.code(502);
+      return { error: 'no models available from any provider' };
+    }
+    return models;
  });
 }