feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)

DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
2026-06-08 01:24:23 +00:00
parent 31e5d9d4ab
commit c4079dd85c
29 changed files with 916 additions and 42 deletions
--- a/apps/server/src/services/inference/tool-phase.ts
+++ b/apps/server/src/services/inference/tool-phase.ts
@@ -6,6 +6,7 @@ import type { ToolExecCtx } from '../tools.js';
 import { matchToolGlob } from '../agents.js';
 import { maybeFlagForCompaction } from './payload.js';
 import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
+import { getServerPermission } from '../mcp-client.js';
 // v1.13.16: richer unknown-tool error so the model can self-correct when it
 // drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
 // Applies to all unknown tool names, not just <invoke>-derived ones — at the
@@ -17,6 +18,7 @@ import { formatUnknownToolError } from './tool-suggestions.js';
 // prompted about paths we couldn't grant anyway (e.g. /etc/passwd).
 import { resolveGrantRoot } from '../grant_resolver.js';
 import { stripToolMarkup } from './tool-call-parser.js';
+import { repairToolInput } from './tool-input-repair.js';
 import type { FailureKind } from './mistake-tracker.js';
 import type {
  InferenceContext,
@@ -34,6 +36,8 @@ async function executeToolCall(
  toolCall: ToolCall,
  extraRoots: readonly string[],
  toolCtx?: ToolExecCtx,
+  hooks?: import('../hooks.js').HookRunner,
+  sessionId?: string,
 ): Promise<{ output: unknown; truncated: boolean; error?: string; outcome: FailureKind | 'success' }> {
  // v#12 MistakeTracker: every return path carries an `outcome` so the turn
  // loop can detect a run of heterogeneous failures. The failure taxonomy
@@ -48,7 +52,61 @@ async function executeToolCall(
      outcome: 'tool_not_found',
    };
  }
-  const parsed = tool.inputSchema.safeParse(toolCall.args);
+  // MCP permission gate — block deny/ask before any Zod parsing or execution
+  const mcpPerm = getServerPermission(toolCall.name);
+  if (mcpPerm === 'deny') {
+    return { output: null, truncated: false, error: `blocked: MCP server denied tool '${toolCall.name}'`, outcome: 'permission_denied' };
+  }
+  if (mcpPerm === 'ask') {
+    return { output: null, truncated: false, error: `requires approval: tool '${toolCall.name}' needs user approval`, outcome: 'permission_denied' };
+  }
+  // vWhale: schema-based tool input repair. If the Zod parse fails, attempt
+  // heuristic repairs (type coercion, markdown-link unwrapping, array wrapping)
+  // and retry. Logs repairs for debugging.
+  let args = toolCall.args;
+  let parsed = tool.inputSchema.safeParse(args);
+  if (!parsed.success) {
+    const schema = tool.jsonSchema?.function?.parameters;
+    if (schema) {
+      const { repaired: repairedArgs, repairs } = repairToolInput(
+        schema as Record<string, unknown>,
+        args as Record<string, unknown>,
+      );
+      if (repairs.length > 0) {
+        const retry = tool.inputSchema.safeParse(repairedArgs);
+        if (retry.success) {
+          args = repairedArgs;
+          parsed = retry;
+        }
+      }
+    }
+  }
+  // vWhale: PreToolUse hook — can block execution.
+  if (hooks && sessionId) {
+    const hookResult = await hooks.run('PreToolUse', {
+      event: 'PreToolUse',
+      session_id: sessionId,
+      tool_name: toolCall.name,
+      tool_args: args as Record<string, unknown>,
+    });
+    if (hookResult.decision === 'block') {
+      return {
+        output: null,
+        truncated: false,
+        error: `blocked by hook: ${hookResult.reason ?? 'PreToolUse denied'}`,
+        outcome: 'permission_denied',
+      };
+    }
+    // Apply updated_input if the hook rewrote the args
+    if (hookResult.updated_input && typeof hookResult.updated_input === 'object') {
+      const reParsed = tool.inputSchema.safeParse(hookResult.updated_input);
+      if (reParsed.success) {
+        args = hookResult.updated_input as Record<string, unknown>;
+        parsed = reParsed;
+      }
+    }
+  }
+
  if (!parsed.success) {
    // v1.12 Track B.2: enrich the zod-reject path so the model sees a
    // one-line, tool-named hint ("tool 'search_symbols' rejected — query:
@@ -183,6 +241,8 @@ export async function executeToolPhase(
    tokens_used: updated?.tokens_used ?? null,
    ctx_used: updated?.ctx_used ?? null,
    ctx_max: updated?.ctx_max ?? null,
+    cache_tokens: result.cacheReadTokens ?? null,
+    reasoning_tokens: result.reasoningTokens ?? null,
    started_at: startedAt,
    finished_at: updated?.finished_at ?? null,
    model: session.model,
@@ -318,10 +378,22 @@ export async function executeToolPhase(
        });
        return;
      }
-      const tres = await executeToolCall(projectRoot, tc, session.allowed_read_paths, {
-        sql: ctx.sql,
-        sessionId,
-      });
+      const tres = await executeToolCall(
+        projectRoot, tc, session.allowed_read_paths,
+        { sql: ctx.sql, sessionId },
+        ctx.hooks, sessionId,
+      );
+      // vWhale: PostToolUse hook (best-effort, non-blocking).
+      if (ctx.hooks) {
+        ctx.hooks.run('PostToolUse', {
+          event: 'PostToolUse',
+          session_id: sessionId,
+          tool_name: tc.name,
+          tool_args: tc.args as Record<string, unknown>,
+          tool_result: tres.output,
+          tool_error: tres.error,
+        }).catch(() => {});
+      }
      // v#12 MistakeTracker: record the real execution outcome (success or a
      // FailureKind). This is the primary signal for heterogeneous-failure
      // detection.