diff --git a/.env.example b/.env.example
index ff57b77..41158c3 100644
--- a/.env.example
+++ b/.env.example
@@ -20,6 +20,12 @@ SEARXNG_URL=http://100.114.205.53:8888
 # with FAST_MODEL when unset.
 # TASK_MODEL_URL=http://100.90.172.55:7995
 
+# DeepSeek API key. When set, models with IDs starting with 'deepseek-'
+# (e.g. deepseek-chat, deepseek-reasoner, deepseek-v4-flash) route through
+# DeepSeek's API instead of llama-swap. Requires a DeepSeek Platform API key.
+# DEEPSEEK_API_KEY=sk-...
+# DEEPSEEK_BASE_URL=https://api.deepseek.com
+
 # v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
 # Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
 # sessions where the model only needs read-only filesystem access.
diff --git a/apps/coder/src/config.ts b/apps/coder/src/config.ts
index 294e9a5..f588d52 100644
--- a/apps/coder/src/config.ts
+++ b/apps/coder/src/config.ts
@@ -50,6 +50,8 @@ const ConfigSchema = z.object({
   // only reaped after it's been untouched this long (avoids sweeping a dir mid
   // ensureSessionWorktree create). 1h default.
   ORPHAN_WORKTREE_GRACE_MS: z.coerce.number().int().positive().default(3_600_000),
+  DEEPSEEK_API_KEY: z.string().optional(),
+  DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
 });
 
 export type Config = z.infer<typeof ConfigSchema>;
diff --git a/apps/coder/src/services/provider-snapshot.ts b/apps/coder/src/services/provider-snapshot.ts
index f2b36d4..c60d65e 100644
--- a/apps/coder/src/services/provider-snapshot.ts
+++ b/apps/coder/src/services/provider-snapshot.ts
@@ -29,6 +29,22 @@ interface AgentRow {
   last_probed_at: string | Date | null;
 }
 
+export async function fetchDeepSeekModels(config: Config): Promise<ProviderModel[]> {
+  if (!config.DEEPSEEK_API_KEY) return [];
+  try {
+    const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
+    const res = await fetch(`${baseURL}/v1/models`, {
+      headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
+      signal: AbortSignal.timeout(5_000),
+    });
+    if (!res.ok) return [];
+    const parsed = (await res.json()) as { data?: Array<{ id: string }> };
+    return (parsed.data ?? []).map((m) => ({ id: m.id, label: m.id }));
+  } catch {
+    return [];
+  }
+}
+
 export async function fetchLlamaSwapModels(config: Config): Promise<ProviderModel[]> {
   try {
     const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
@@ -256,7 +272,13 @@ export async function getProviderSnapshot(
   }
 
   const build = async (): Promise<ProviderSnapshotEntry[]> => {
-    const llamaModels = await fetchLlamaSwapModels(config);
+    const [llamaModels, deepseekModels] = await Promise.all([
+      fetchLlamaSwapModels(config),
+      fetchDeepSeekModels(config),
+    ]);
+    // Merge DeepSeek models into the llama-swap model pool so the boocode
+    // provider (which sources from llama-swap) also includes DeepSeek models.
+    const mergedModels = mergeModels(llamaModels, deepseekModels);
     const agents = await sql<AgentRow[]>`
       SELECT name, install_path, supports_acp, models, commands, label, transport, last_probed_at FROM available_agents
     `;
@@ -265,7 +287,7 @@ export async function getProviderSnapshot(
 
     const entries = await Promise.all(
       [...getResolvedRegistry().values()].map((resolved) =>
-        buildProviderEntry(resolved, agentMap.get(resolved.id), llamaModels, resolvedCwd, ttlMs, force),
+        buildProviderEntry(resolved, agentMap.get(resolved.id), mergedModels, resolvedCwd, ttlMs, force),
       ),
     );
 
diff --git a/apps/server/package.json b/apps/server/package.json
index 7a6ff25..f345f0a 100644
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -77,8 +77,9 @@
     "test": "vitest run"
   },
   "dependencies": {
-    "@boocode/contracts": "workspace:*",
+    "@ai-sdk/deepseek": "^2.0.35",
     "@ai-sdk/openai-compatible": "^2.0.47",
+    "@boocode/contracts": "workspace:*",
     "@fastify/static": "^7.0.4",
     "@fastify/websocket": "^10.0.1",
     "@modelcontextprotocol/sdk": "^1.29.0",
diff --git a/apps/server/src/config.ts b/apps/server/src/config.ts
index d7b6199..d69f1a0 100644
--- a/apps/server/src/config.ts
+++ b/apps/server/src/config.ts
@@ -26,6 +26,14 @@ const ConfigSchema = z.object({
   FAST_MODEL: z.string().optional(),
   TASK_MODEL_URL: z.string().url().optional(),
   LLAMA_SIDECAR_URL: z.string().url().optional(),
+  // vDeepSeek: DeepSeek API key for direct API access. When set, models
+  // with IDs starting with 'deepseek-' route through DeepSeek's API instead
+  // of llama-swap. Defaults to empty (DeepSeek routing disabled).
+  DEEPSEEK_API_KEY: z.string().optional(),
+  // Optional base URL override for DeepSeek API. Defaults to api.deepseek.com.
+  DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
+  // vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
+  HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
 });
 
 export type Config = z.infer<typeof ConfigSchema>;
diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts
index 913c7b1..19114f6 100644
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -31,6 +31,7 @@ import { loadMcpConfig } from './services/mcp-config.js';
 import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
 import { appendMcpTools } from './services/tools.js';
 import { refreshToolNames, getAgentsForProject } from './services/agents.js';
+import { loadHooksConfig, createHookRunner } from './services/hooks.js';
 
 async function main() {
   const config = loadConfig();
@@ -136,11 +137,17 @@ async function main() {
     app.log.warn({ err }, 'skills boot walk failed');
   }
 
+  // vWhale hooks: load hook config and create runner. Missing file = no hooks.
+  loadHooksConfig(config.HOOKS_CONFIG_PATH);
+  const hookRunner = createHookRunner();
+  const hasHooks = Object.keys(loadHooksConfig(config.HOOKS_CONFIG_PATH).hooks).length > 0;
+
   const inference = createInferenceRunner(
     {
       sql,
       config,
       log: app.log,
+      hooks: hasHooks ? hookRunner : undefined,
       publish: (sessionId, frame) => {
         // v1.13.11-b: route through the typed publishFrame so the broker's
         // Zod gate validates every inference frame before delivery.
@@ -166,7 +173,7 @@ async function main() {
     // bubble up so the route can reply 500 — manual /compact failures
     // should be loud (the user just clicked a button).
     runCompaction: (chatId) =>
-      compaction.process({ sql, config, log: app.log, broker, chatId }),
+      compaction.process({ sql, config, log: app.log, broker, chatId, hooks: hasHooks ? hookRunner : undefined }),
     cancelInference: async (sessionId, chatId) => {
       return inference.cancel(sessionId, chatId);
     },
diff --git a/apps/server/src/routes/models.ts b/apps/server/src/routes/models.ts
index b3266d0..f0bd3a8 100644
--- a/apps/server/src/routes/models.ts
+++ b/apps/server/src/routes/models.ts
@@ -2,26 +2,55 @@ import type { FastifyInstance } from 'fastify';
 import type { Config } from '../config.js';
 import type { ModelInfo } from '../types/api.js';
 
-interface LlamaSwapModelsResponse {
+interface ApiModelsResponse {
   data?: ModelInfo[];
 }
 
+const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
+  { id: 'deepseek-v4-flash', object: 'model', created: 0, owned_by: 'deepseek' },
+  { id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
+];
+
 export function registerModelRoutes(app: FastifyInstance, config: Config): void {
   app.get('/api/models', async (_req, reply) => {
+    const models: ModelInfo[] = [];
+
+    // 1. Fetch llama-swap models
     try {
       const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
-      if (!res.ok) {
-        reply.code(502);
-        return { error: `llama-swap returned ${res.status}` };
+      if (res.ok) {
+        const parsed = (await res.json()) as ApiModelsResponse;
+        if (parsed.data) models.push(...parsed.data);
       }
-      const parsed = (await res.json()) as LlamaSwapModelsResponse;
-      return parsed.data ?? [];
-    } catch (err) {
-      reply.code(502);
-      return {
-        error: 'failed to reach llama-swap',
-        details: err instanceof Error ? err.message : String(err),
-      };
+    } catch {
+      // llama-swap unreachable — proceed with whatever we have
     }
+
+    // 2. If DeepSeek is configured, fetch live models from their API
+    if (config.DEEPSEEK_API_KEY) {
+      try {
+        const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
+        const res = await fetch(`${baseURL}/v1/models`, {
+          headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
+          signal: AbortSignal.timeout(5_000),
+        });
+        if (res.ok) {
+          const parsed = (await res.json()) as ApiModelsResponse;
+          if (parsed.data) models.push(...parsed.data);
+        } else {
+          // API call failed — fall back to static model list
+          models.push(...DEEPSEEK_STATIC_MODELS);
+        }
+      } catch {
+        // Network error — fall back to static model list
+        models.push(...DEEPSEEK_STATIC_MODELS);
+      }
+    }
+
+    if (models.length === 0) {
+      reply.code(502);
+      return { error: 'no models available from any provider' };
+    }
+    return models;
   });
 }
diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql
index d6d3089..dd73ae2 100644
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -126,8 +126,8 @@ SELECT
      FROM message_parts p
     WHERE p.message_id = m.id AND p.kind = 'reasoning' AND p.hidden_at IS NULL) AS reasoning_parts,
   -- NEW columns MUST be appended at the end: CREATE OR REPLACE VIEW can't
-  -- reorder/rename existing columns (42P16). m.model added last.
-  m.model
+  -- reorder/rename existing columns (42P16). cache_tokens and reasoning_tokens added last.
+  m.model, m.cache_tokens, m.reasoning_tokens
 FROM messages m;
 
 -- v1.13.20: drop legacy tool_calls/tool_results columns. Reads have routed
@@ -204,6 +204,8 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS ctx_used INTEGER;
 ALTER TABLE messages ADD COLUMN IF NOT EXISTS ctx_max INTEGER;
 ALTER TABLE messages ADD COLUMN IF NOT EXISTS started_at TIMESTAMPTZ;
 ALTER TABLE messages ADD COLUMN IF NOT EXISTS finished_at TIMESTAMPTZ;
+ALTER TABLE messages ADD COLUMN IF NOT EXISTS cache_tokens INTEGER;
+ALTER TABLE messages ADD COLUMN IF NOT EXISTS reasoning_tokens INTEGER;
 
 ALTER TABLE sessions ADD COLUMN IF NOT EXISTS updated_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp();
 
diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts
index e4f0697..e4769bc 100644
--- a/apps/server/src/services/agents.ts
+++ b/apps/server/src/services/agents.ts
@@ -106,6 +106,8 @@ interface ParsedFrontmatter {
   // allowed" — the model responds text-only.
   steps?: number;
   llama_extra_args?: string[];
+  // vDeepSeek: thinking effort for DeepSeek V4 models.
+  reasoning_effort?: string;
 }
 
 // P5: table-driven validation for the "soft-range" numeric frontmatter fields.
@@ -386,6 +388,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
     max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
     steps: typeof fm.steps === 'number' ? fm.steps : null,
     llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
+    reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
   };
 }
 
diff --git a/apps/server/src/services/compaction.ts b/apps/server/src/services/compaction.ts
index b283f7b..a0e5e2d 100644
--- a/apps/server/src/services/compaction.ts
+++ b/apps/server/src/services/compaction.ts
@@ -24,6 +24,8 @@ import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
 import * as modelContextLookup from './model-context.js';
 import { SENTINEL_KINDS } from './inference/sentinels.js';
 import type { OpenAiMessage } from './inference/payload.js';
+import { resolveModelEndpoint } from './inference/provider.js';
+import type { HookRunner } from './hooks.js';
 
 // v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
 // (opencode session/overflow.ts pattern). Replaces the v1.11.0-era
@@ -346,20 +348,22 @@ interface CompletionResult {
   completionTokens: number;
 }
 
-async function callLlamaSwap(
+async function callLlm(
   config: Config,
   model: string,
   messages: OpenAiMessage[],
   log: FastifyBaseLogger,
 ): Promise<CompletionResult> {
-  const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
+  const { url, headers, model: resolvedModel } = resolveModelEndpoint(config, model);
+  const res = await fetch(`${url}/v1/chat/completions`, {
     method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ model, messages, stream: false }),
+    headers,
+    body: JSON.stringify({ model: resolvedModel, messages, stream: false }),
   });
   if (!res.ok) {
     const text = await res.text().catch(() => '');
-    throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
+    const prefix = model.startsWith('deepseek-') ? 'deepseek' : 'llama-swap';
+    throw new Error(`${prefix} returned ${res.status}: ${text.slice(0, 200)}`);
   }
   const json = (await res.json()) as {
     choices?: Array<{ message?: { content?: string } }>;
@@ -383,6 +387,8 @@ export interface ProcessInput {
   log: FastifyBaseLogger;
   broker: Broker;
   chatId: string;
+  /** vWhale: lifecycle hooks runner. Undefined when no hooks configured. */
+  hooks?: HookRunner;
 }
 
 // Runs one round of anchored rolling compaction on `chatId`. No-ops cleanly
@@ -497,6 +503,17 @@ export async function process(input: ProcessInput): Promise<void> {
     at: new Date().toISOString(),
   });
 
+  // vWhale: PreCompact hook (best-effort, non-blocking).
+  const msgBefore = messages.length;
+  if (input.hooks) {
+    input.hooks.run('PreCompact', {
+      event: 'PreCompact',
+      session_id: sessionId,
+      chat_id: chatId,
+      messages_before: msgBefore,
+    }).catch(() => {});
+  }
+
   // try/finally so the dot ALWAYS drops back to idle, even if the LLM call
   // throws or a downstream DB write fails. The succeeded flag gates the
   // 'compacted' frame + final log: we only signal completion to the UI when
@@ -506,7 +523,7 @@ export async function process(input: ProcessInput): Promise<void> {
   let result: CompletionResult | undefined;
   try {
     // 7. Single completion (no tools). Throws on llama-swap failure.
-    result = await callLlamaSwap(config, session.model, payload, log);
+    result = await callLlm(config, session.model, payload, log);
 
     // 7b. v1.11.3: fetch the model's true context window from llama-swap's
     // /upstream/<model>/props (the streaming completion doesn't carry it).
@@ -558,6 +575,18 @@ export async function process(input: ProcessInput): Promise<void> {
     `;
 
     succeeded = true;
+
+    // vWhale: PostCompact hook (best-effort, non-blocking).
+    if (input.hooks) {
+      input.hooks.run('PostCompact', {
+        event: 'PostCompact',
+        session_id: sessionId,
+        chat_id: chatId,
+        messages_before: msgBefore,
+        messages_after: sel.head.length,
+        summary: (result?.content ?? '').slice(0, 500),
+      }).catch(() => {});
+    }
   } finally {
     // Always restore the dot. Status='idle' (not 'error') even on failure —
     // the caller logs/re-surfaces the error separately; the dot doesn't
diff --git a/apps/server/src/services/hooks.ts b/apps/server/src/services/hooks.ts
new file mode 100644
index 0000000..ab60909
--- /dev/null
+++ b/apps/server/src/services/hooks.ts
@@ -0,0 +1,299 @@
+/**
+ * vWhale: lifecycle hook runner. Hooks are shell commands that fire at key
+ * points in the inference pipeline. Each hook receives a JSON payload on
+ * stdin and can return JSON on stdout to influence behavior.
+ *
+ * Inspired by Whale's hook system with 11 lifecycle events. BooCode
+ * implements the most relevant subset: PreToolUse, PostToolUse,
+ * UserPromptSubmit, Stop, PreCompact, PostCompact.
+ *
+ * Config: JSON file at HOOKS_CONFIG_PATH (default /data/hooks.json).
+ * Format:
+ * ```json
+ * {
+ *   "hooks": {
+ *     "PreToolUse": [
+ *       { "match": "shell_run", "command": "python3 /data/hooks/check_shell.py", "timeout": 30 }
+ *     ],
+ *     "Stop": [
+ *       { "command": "node /data/hooks/log_turn.mjs" }
+ *     ]
+ *   }
+ * }
+ * ```
+ */
+
+import { spawn } from 'node:child_process';
+import { readFileSync, existsSync } from 'node:fs';
+import type { FastifyBaseLogger } from 'fastify';
+
+// ─── Events ───────────────────────────────────────────────────────────────
+
+export type HookEvent =
+  | 'PreToolUse'
+  | 'PostToolUse'
+  | 'UserPromptSubmit'
+  | 'Stop'
+  | 'PreCompact'
+  | 'PostCompact';
+
+const ALL_EVENTS: HookEvent[] = [
+  'PreToolUse',
+  'PostToolUse',
+  'UserPromptSubmit',
+  'Stop',
+  'PreCompact',
+  'PostCompact',
+];
+
+// ─── Config ────────────────────────────────────────────────────────────────
+
+export interface HookConfig {
+  /** Glob or exact tool name to match (PreToolUse/PostToolUse only). Omit or '*' for all. */
+  match?: string;
+  /** Shell command to run. Receives JSON payload on stdin. */
+  command: string;
+  /** Timeout in seconds (default 30). */
+  timeout?: number;
+}
+
+export interface HooksConfig {
+  hooks: Partial<Record<HookEvent, HookConfig[]>>;
+}
+
+// ─── Payloads ──────────────────────────────────────────────────────────────
+
+export interface PreToolUsePayload {
+  event: 'PreToolUse';
+  session_id: string;
+  tool_name: string;
+  tool_args: Record<string, unknown>;
+}
+
+export interface PostToolUsePayload {
+  event: 'PostToolUse';
+  session_id: string;
+  tool_name: string;
+  tool_args: Record<string, unknown>;
+  tool_result: unknown;
+  tool_error?: string;
+}
+
+export interface UserPromptSubmitPayload {
+  event: 'UserPromptSubmit';
+  session_id: string;
+  chat_id: string;
+  prompt: string;
+}
+
+export interface StopPayload {
+  event: 'Stop';
+  session_id: string;
+  chat_id: string;
+  last_assistant_text: string;
+  turn: number;
+}
+
+export interface PreCompactPayload {
+  event: 'PreCompact';
+  session_id: string;
+  chat_id: string;
+  messages_before: number;
+}
+
+export interface PostCompactPayload {
+  event: 'PostCompact';
+  session_id: string;
+  chat_id: string;
+  messages_before: number;
+  messages_after: number;
+  summary: string;
+}
+
+export type HookPayload =
+  | PreToolUsePayload
+  | PostToolUsePayload
+  | UserPromptSubmitPayload
+  | StopPayload
+  | PreCompactPayload
+  | PostCompactPayload;
+
+// ─── Response ──────────────────────────────────────────────────────────────
+
+export type HookDecision = 'pass' | 'warn' | 'block';
+
+export interface HookResponse {
+  decision?: HookDecision;
+  reason?: string;
+  /** When present, replaces the original tool args / user prompt. */
+  updated_input?: Record<string, unknown> | string;
+  /** Injected into the model's context for the next turn. */
+  additional_context?: string;
+}
+
+// ─── Runner ────────────────────────────────────────────────────────────────
+
+export interface HookRunner {
+  /** Run all hooks for the given event. Returns the effective response. */
+  run(event: HookEvent, payload: HookPayload, log?: FastifyBaseLogger): Promise<HookResponse>;
+}
+
+let hooksConfig: HooksConfig | null = null;
+let hooksPath: string | null = null;
+
+/** Load hooks config from disk. Missing file = no hooks. Never throws. */
+export function loadHooksConfig(path: string): HooksConfig {
+  hooksPath = path;
+  if (!existsSync(path)) {
+    hooksConfig = { hooks: {} };
+    return hooksConfig;
+  }
+  try {
+    const raw = readFileSync(path, 'utf8');
+    const parsed = JSON.parse(raw) as HooksConfig;
+    hooksConfig = {
+      hooks: { ...parsed.hooks },
+    };
+    // Validate event names
+    for (const event of Object.keys(hooksConfig.hooks)) {
+      if (!ALL_EVENTS.includes(event as HookEvent)) {
+        console.warn(`hooks: unknown event '${event}' in ${path} — ignoring`);
+        delete hooksConfig.hooks[event as HookEvent];
+      }
+    }
+  } catch (err) {
+    console.error(`hooks: failed to load ${path}`, err);
+    hooksConfig = { hooks: {} };
+  }
+  return hooksConfig;
+}
+
+/** Reload the config file (call after a PATCH). */
+export function reloadHooksConfig(): HooksConfig {
+  if (hooksPath) return loadHooksConfig(hooksPath);
+  hooksConfig = { hooks: {} };
+  return hooksConfig;
+}
+
+function getConfig(): HooksConfig {
+  return hooksConfig ?? { hooks: {} };
+}
+
+/** Create a HookRunner for the current config. */
+export function createHookRunner(): HookRunner {
+  return {
+    async run(event, payload, log): Promise<HookResponse> {
+      const configs = getConfig().hooks[event];
+      if (!configs || configs.length === 0) return { decision: 'pass' };
+
+      // Pre-filter by match pattern for tool events
+      const toolName = 'tool_name' in payload ? (payload as PreToolUsePayload).tool_name : undefined;
+
+      let effective: HookResponse = { decision: 'pass' };
+
+      for (const cfg of configs) {
+        // Skip if match doesn't apply
+        if (toolName && cfg.match && cfg.match !== '*' && cfg.match !== toolName) continue;
+
+        const result = await runSingleHook(cfg, payload, log);
+        // Merge decisions: block > warn > pass
+        if (result.decision === 'block') {
+          effective = { ...result, decision: 'block' };
+          break; // block is terminal
+        }
+        if (result.decision === 'warn' && effective.decision !== 'block') {
+          effective = { ...result, decision: 'warn' };
+        }
+        // Merge additional_context and updated_input
+        if (result.additional_context) {
+          effective.additional_context = effective.additional_context
+            ? effective.additional_context + '\n' + result.additional_context
+            : result.additional_context;
+        }
+        if (result.updated_input && !effective.updated_input) {
+          effective.updated_input = result.updated_input;
+        }
+      }
+
+      return effective;
+    },
+  };
+}
+
+async function runSingleHook(
+  cfg: HookConfig,
+  payload: HookPayload,
+  log?: FastifyBaseLogger,
+): Promise<HookResponse> {
+  const timeoutMs = (cfg.timeout ?? 30) * 1000;
+
+  return new Promise((resolve) => {
+    const child = spawn('sh', ['-c', cfg.command], {
+      stdio: ['pipe', 'pipe', 'pipe'],
+      timeout: timeoutMs,
+      env: { ...process.env },
+    });
+
+    const stdout: Buffer[] = [];
+    const stderr: Buffer[] = [];
+
+    child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
+    child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
+
+    let settled = false;
+    const timer = setTimeout(() => {
+      if (!settled) {
+        settled = true;
+        child.kill('SIGTERM');
+        log?.warn({ event: payload.event, command: cfg.command }, 'hooks: timeout');
+        resolve({ decision: 'warn', reason: 'hook timed out' });
+      }
+    }, timeoutMs);
+
+    child.on('error', (err) => {
+      if (!settled) {
+        settled = true;
+        clearTimeout(timer);
+        log?.warn({ err, event: payload.event }, 'hooks: spawn error');
+        resolve({ decision: 'warn', reason: `hook failed: ${err.message}` });
+      }
+    });
+
+    child.on('close', (code) => {
+      if (settled) return;
+      settled = true;
+      clearTimeout(timer);
+
+      const out = Buffer.concat(stdout).toString('utf8').trim();
+      const errOut = Buffer.concat(stderr).toString('utf8').trim();
+
+      if (code !== 0 && !out) {
+        log?.warn({ event: payload.event, code, stderr: errOut.slice(0, 200) }, 'hooks: non-zero exit');
+        resolve({ decision: 'warn', reason: `hook exited ${code}` });
+        return;
+      }
+
+      // Parse stdout as JSON response
+      if (out) {
+        try {
+          const parsed = JSON.parse(out) as HookResponse;
+          resolve(parsed);
+          return;
+        } catch {
+          // Not JSON — treat as pass with stdout as context
+          if (out.length > 0) {
+            resolve({ decision: 'pass', additional_context: out });
+            return;
+          }
+        }
+      }
+
+      resolve({ decision: 'pass' });
+    });
+
+    // Write payload to stdin
+    const json = JSON.stringify(payload);
+    child.stdin.write(json);
+    child.stdin.end();
+  });
+}
diff --git a/apps/server/src/services/inference/error-handler.ts b/apps/server/src/services/inference/error-handler.ts
index 4bd438d..b0aed4a 100644
--- a/apps/server/src/services/inference/error-handler.ts
+++ b/apps/server/src/services/inference/error-handler.ts
@@ -122,6 +122,8 @@ export async function finalizeStreamedRow(
     completionTokens: number | null;
     promptTokens: number | null;
     startedAt: string | null;
+    cacheTokens?: number | null;
+    reasoningTokens?: number | null;
     beforeComplete?: () => Promise<void>;
   },
 ): Promise<void> {
@@ -137,6 +139,8 @@ export async function finalizeStreamedRow(
         tokens_used = ${opts.completionTokens},
         ctx_used = ${opts.promptTokens},
         ctx_max = ${nCtx},
+        cache_tokens = ${opts.cacheTokens ?? null},
+        reasoning_tokens = ${opts.reasoningTokens ?? null},
         finished_at = clock_timestamp()
     WHERE id = ${opts.messageId}
     RETURNING tokens_used, ctx_used, ctx_max, finished_at
@@ -149,6 +153,8 @@ export async function finalizeStreamedRow(
     tokens_used: updated?.tokens_used ?? null,
     ctx_used: updated?.ctx_used ?? null,
     ctx_max: updated?.ctx_max ?? null,
+    cache_tokens: opts.cacheTokens ?? null,
+    reasoning_tokens: opts.reasoningTokens ?? null,
     started_at: opts.startedAt,
     finished_at: updated?.finished_at ?? null,
     model: opts.model,
@@ -188,7 +194,7 @@ export async function finalizeCompletion(
 ): Promise<void> {
   const { sessionId, chatId, assistantMessageId } = args;
   const content = stripToolMarkup(result.content, { final: true });
-  const { finishReason, promptTokens, completionTokens } = result;
+  const { finishReason, promptTokens, completionTokens, cacheReadTokens, reasoningTokens } = result;
 
   // v1.11.3: see executeToolPhase for the rationale.
   const mctx = await modelContext.getModelContext(session.model);
@@ -203,6 +209,8 @@ export async function finalizeCompletion(
         tokens_used = ${completionTokens},
         ctx_used = ${promptTokens},
         ctx_max = ${nCtx},
+        cache_tokens = ${cacheReadTokens ?? null},
+        reasoning_tokens = ${reasoningTokens ?? null},
         model = ${session.model},
         finished_at = clock_timestamp()
     WHERE id = ${assistantMessageId}
@@ -268,6 +276,8 @@ export async function finalizeCompletion(
     tokens_used: updated?.tokens_used ?? null,
     ctx_used: updated?.ctx_used ?? null,
     ctx_max: updated?.ctx_max ?? null,
+    cache_tokens: cacheReadTokens ?? null,
+    reasoning_tokens: reasoningTokens ?? null,
     started_at: startedAt,
     finished_at: updated?.finished_at ?? null,
     model: session.model,
diff --git a/apps/server/src/services/inference/provider.ts b/apps/server/src/services/inference/provider.ts
index cf10dfe..8191561 100644
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -1,4 +1,5 @@
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
+import { createDeepSeek } from '@ai-sdk/deepseek';
 import type { LanguageModel } from 'ai';
 
 // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
@@ -11,6 +12,12 @@ import type { LanguageModel } from 'ai';
 // llama-sidecar instead. A fresh provider is created per call (not cached)
 // because the X-Agent-Flags header varies per agent. The llama-swap path
 // stays cached since it has no per-request headers.
+//
+// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
+// is set, route through the official @ai-sdk/deepseek provider (not
+// openai-compatible) so DeepSeek-specific features work: providerMetadata
+// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
+// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
 
 const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
 
@@ -41,7 +48,28 @@ function sidecarProvider(
   });
 }
 
-export type InferenceRoute = 'swap' | 'sidecar';
+const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
+
+export function isDeepSeekModel(modelId: string): boolean {
+  return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
+}
+
+let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
+
+function getDeepSeekProvider(
+  apiKey: string,
+  baseURL: string,
+): ReturnType<typeof createDeepSeek> {
+  if (!deepseekProviderCache) {
+    deepseekProviderCache = createDeepSeek({
+      apiKey,
+      baseURL,
+    });
+  }
+  return deepseekProviderCache;
+}
+
+export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
 
 export interface RoutingInfo {
   route: InferenceRoute;
@@ -55,12 +83,21 @@ interface AgentLike {
 interface ConfigLike {
   LLAMA_SWAP_URL: string;
   LLAMA_SIDECAR_URL?: string;
+  DEEPSEEK_API_KEY?: string;
+  DEEPSEEK_BASE_URL?: string;
 }
 
 export function resolveRoute(
   agent: AgentLike | null,
   config?: ConfigLike,
+  modelId?: string,
 ): RoutingInfo {
+  // vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
+  // route through the DeepSeek provider. Checked first so DeepSeek models
+  // always bypass llama-swap/sidecar even when those are also configured.
+  if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
+    return { route: 'deepseek', flags: null };
+  }
   // When llama_extra_args are explicitly set, route through sidecar with them.
   const flags = agent?.llama_extra_args;
   if (flags && flags.length > 0) {
@@ -80,7 +117,13 @@ export function upstreamModel(
   modelId: string,
   agent?: AgentLike | null,
 ): LanguageModel {
-  const { route, flags } = resolveRoute(agent ?? null, config);
+  const { route, flags } = resolveRoute(agent ?? null, config, modelId);
+  if (route === 'deepseek') {
+    return getDeepSeekProvider(
+      config.DEEPSEEK_API_KEY!,
+      config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
+    ).chat(modelId);
+  }
   if (route === 'sidecar') {
     const url = config.LLAMA_SIDECAR_URL;
     if (!url) {
@@ -90,3 +133,30 @@ export function upstreamModel(
   }
   return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
 }
+
+/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
+ *  Returns the URL + model + optional auth header for direct fetch() usage. */
+export function resolveModelEndpoint(
+  config: ConfigLike,
+  modelId: string,
+): { url: string; model: string; headers: Record<string, string> } {
+  const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
+  if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
+    const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
+    return {
+      url: baseURL,
+      model: modelId,
+      headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
+    };
+  }
+  return {
+    url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
+    model: modelId,
+    headers: baseHeaders,
+  };
+}
+
+/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
+export function resetDeepSeekProvider(): void {
+  deepseekProviderCache = null;
+}
diff --git a/apps/server/src/services/inference/stream-phase-adapter.ts b/apps/server/src/services/inference/stream-phase-adapter.ts
index 7094649..c16262a 100644
--- a/apps/server/src/services/inference/stream-phase-adapter.ts
+++ b/apps/server/src/services/inference/stream-phase-adapter.ts
@@ -13,7 +13,7 @@ import type { OpenAiMessage } from './payload.js';
 import { extractToolCallBlocks } from './tool-call-parser.js';
 import { classifyStreamError } from './stream-error-classifier.js';
 import type { StreamResult } from './types.js';
-import { upstreamModel } from './provider.js';
+import { isDeepSeekModel, upstreamModel } from './provider.js';
 import {
   jsonSchema,
   streamText,
@@ -51,6 +51,9 @@ export interface StreamOptions {
   dry_base?: number | null;
   dry_allowed_length?: number | null;
   dry_penalty_last_n?: number | null;
+  // vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
+  // API param for deepseek-v4-flash / deepseek-v4-pro models.
+  reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
 }
 
 // P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
@@ -74,6 +77,7 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
     dry_base: agent?.dry_base ?? undefined,
     dry_allowed_length: agent?.dry_allowed_length ?? undefined,
     dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
+    reasoning_effort: agent?.reasoning_effort ?? undefined,
   };
 }
 
@@ -272,6 +276,19 @@ export async function streamCompletion(
   // before this. They now go through the same extraBody path as the new params.
   const samplerBody = buildSamplerProviderOptions(opts);
 
+  // vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
+  let deepseekProviderOptions:
+    | { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
+    | undefined;
+  if (isDeepSeekModel(model)) {
+    const dsEffort = opts.reasoning_effort;
+    const thinkingEnabled = dsEffort && dsEffort !== 'off';
+    deepseekProviderOptions = {
+      thinking: { type: thinkingEnabled ? 'enabled' : 'disabled' },
+      ...(thinkingEnabled ? { reasoningEffort: dsEffort } : {}),
+    };
+  }
+
   // F6: per-chunk stall deadline. If the model stops emitting chunks for
   // STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
   // abort check below then throws AbortError → handleAbortOrError writes
@@ -297,7 +314,14 @@ export async function streamCompletion(
     ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
     ...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
     ...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
-    ...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
+    ...(samplerBody || deepseekProviderOptions
+      ? {
+          providerOptions: {
+            ...(samplerBody ? { openaiCompatible: samplerBody } : {}),
+            ...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
+          },
+        }
+      : {}),
     abortSignal: effectiveSignal,
   });
 
@@ -401,12 +425,26 @@ export async function streamCompletion(
 
   // Usage lands as a promise on the result; awaiting after fullStream is
   // drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
+  // Some providers (llama-swap via openai-compatible) return plain numbers;
+  // others (deepseek via @ai-sdk/deepseek) return {total, cacheRead, noCache, ...}.
   let promptTokens: number | null = null;
   let completionTokens: number | null = null;
+  let cacheReadTokens: number | null = null;
+  let reasoningTokens: number | null = null;
   try {
     const usage = await result.usage;
-    if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
-    if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
+    if (typeof usage.inputTokens === 'number') {
+      promptTokens = usage.inputTokens;
+    } else if (usage.inputTokens && typeof usage.inputTokens === 'object') {
+      promptTokens = (usage.inputTokens as Record<string, number | undefined>).total ?? null;
+      cacheReadTokens = (usage.inputTokens as Record<string, number | undefined>).cacheRead ?? null;
+    }
+    if (typeof usage.outputTokens === 'number') {
+      completionTokens = usage.outputTokens;
+    } else if (usage.outputTokens && typeof usage.outputTokens === 'object') {
+      completionTokens = (usage.outputTokens as Record<string, number | undefined>).total ?? null;
+      reasoningTokens = (usage.outputTokens as Record<string, number | undefined>).reasoning ?? null;
+    }
   } catch {
     // Some providers omit usage on partial streams; leave both null.
   }
@@ -422,6 +460,13 @@ export async function streamCompletion(
     );
   }
 
+  if (cacheReadTokens !== null || reasoningTokens !== null) {
+    ctx.log.debug(
+      { promptTokens, completionTokens, cacheReadTokens, reasoningTokens, model },
+      'streamCompletion: deepseek usage breakdown',
+    );
+  }
+
   return {
     finishReason,
     content,
@@ -429,6 +474,10 @@ export async function streamCompletion(
     promptTokens,
     completionTokens,
     reasoning: reasoningAccumulated,
+    // vDeepSeek: optional usage breakdown populated when the provider returns
+    // structured usage (cache hit tokens, reasoning tokens).
+    cacheReadTokens: cacheReadTokens ?? undefined,
+    reasoningTokens: reasoningTokens ?? undefined,
   };
   } finally {
     // Clear the stall timer whether the stream completes normally, throws, or
diff --git a/apps/server/src/services/inference/tool-input-repair.ts b/apps/server/src/services/inference/tool-input-repair.ts
new file mode 100644
index 0000000..52941f0
--- /dev/null
+++ b/apps/server/src/services/inference/tool-input-repair.ts
@@ -0,0 +1,179 @@
+/**
+ * vWhale: schema-based tool input repair. When the model emits tool call args
+ * that don't match the expected types (common with weaker models), apply
+ * heuristic repairs before falling through to the Zod parse.
+ *
+ * Inspired by Whale's RepairToolInputForSpec:
+ *   - Coerce string "true"/"false" → boolean
+ *   - Unwrap markdown autolinks in string fields: <file:///path> → /path
+ *   - Wrap bare values in arrays when schema expects array
+ *   - Convert "42.0" decimal string → "42" for integer fields
+ *   - Recurse into objects to repair nested properties
+ */
+
+export interface ToolInputRepair {
+  field: string;
+  kind: string;
+  detail: string;
+}
+
+const MARKDOWN_AUTOLINK_RE = /^<(?:file|path):\/\/(.+?)>$/;
+
+/**
+ * Attempt to repair tool call args against the tool's JSON Schema.
+ * Returns the (possibly modified) args plus a list of repairs applied.
+ */
+export function repairToolInput(
+  schema: Record<string, unknown> | undefined,
+  args: Record<string, unknown>,
+): { repaired: Record<string, unknown>; repairs: ToolInputRepair[] } {
+  const repairs: ToolInputRepair[] = [];
+  if (!schema || typeof schema !== 'object') {
+    return { repaired: args, repairs };
+  }
+
+  const properties = (schema as Record<string, unknown>).properties as
+    Record<string, unknown> | undefined;
+  if (!properties) {
+    return { repaired: args, repairs };
+  }
+
+  const required = new Set<string>(
+    Array.isArray((schema as Record<string, unknown>).required)
+      ? (schema as Record<string, unknown>).required as string[]
+      : [],
+  );
+
+  const repaired: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(args)) {
+    const propSchema = properties[key] as Record<string, unknown> | undefined;
+    if (propSchema && value !== null && value !== undefined) {
+      repaired[key] = repairValue(key, propSchema, value, repairs, required.has(key));
+    } else {
+      repaired[key] = value;
+    }
+  }
+
+  // Drop keys not in the schema (only for required fields that are missing)
+  // to avoid polluting the model with hallucinated params.
+  for (const key of Object.keys(repaired)) {
+    if (!(key in properties)) {
+      repairs.push({ field: key, kind: 'removed_unknown', detail: `Removed unknown parameter '${key}'` });
+      delete repaired[key];
+    }
+  }
+
+  return { repaired, repairs };
+}
+
+function repairValue(
+  field: string,
+  schema: Record<string, unknown>,
+  value: unknown,
+  repairs: ToolInputRepair[],
+  required: boolean,
+): unknown {
+  const schemaType = schema.type;
+  const isArray = schemaType === 'array' || Array.isArray(schemaType)
+    ? schemaType === 'array' || (Array.isArray(schemaType) && schemaType.includes('array'))
+    : false;
+  const isObject = schemaType === 'object';
+  const isBoolean = schemaType === 'boolean';
+  const isInteger = schemaType === 'integer' || schemaType === 'number';
+  const isString = schemaType === 'string';
+
+  // --- Array repair: wrap bare value or empty object ---
+  if (isArray) {
+    if (!Array.isArray(value)) {
+      if (typeof value === 'string') {
+        // Try parsing as JSON array first
+        try {
+          const parsed = JSON.parse(value);
+          if (Array.isArray(parsed)) {
+            repairs.push({ field, kind: 'parsed_json_array', detail: `Parsed string as JSON array for '${field}'` });
+            return parsed;
+          }
+        } catch { /* not JSON */ }
+      }
+      if (typeof value === 'object' && value !== null && Object.keys(value).length === 0) {
+        if (required) {
+          repairs.push({ field, kind: 'empty_object_to_array', detail: `Converted empty object to empty array for '${field}'` });
+          return [];
+        }
+        repairs.push({ field, kind: 'empty_object_to_undefined', detail: `Removed empty object for optional array '${field}'` });
+        return undefined;
+      }
+      repairs.push({ field, kind: 'wrapped_in_array', detail: `Wrapped bare value in array for '${field}'` });
+      return [value];
+    }
+    // Recurse into array items
+    const itemsSchema = schema.items as Record<string, unknown> | undefined;
+    if (itemsSchema) {
+      return value.map((item, i) => repairValue(`${field}[${i}]`, itemsSchema, item, repairs, required));
+    }
+    return value;
+  }
+
+  // --- Object repair: recurse into properties ---
+  if (isObject && typeof value === 'object' && value !== null && !Array.isArray(value)) {
+    const props = (schema.properties as Record<string, unknown>) ?? {};
+    const repaired: Record<string, unknown> = {};
+    for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
+      const propSchema = props[k] as Record<string, unknown> | undefined;
+      if (propSchema) {
+        repaired[k] = repairValue(`${field}.${k}`, propSchema, v, repairs, required);
+      } else {
+        repaired[k] = v;
+      }
+    }
+    return repaired;
+  }
+
+  // --- String repair: unwrap markdown autolinks ---
+  if (isString && typeof value === 'string') {
+    const match = value.match(MARKDOWN_AUTOLINK_RE);
+    if (match) {
+      repairs.push({ field, kind: 'unwrapped_markdown_link', detail: `Unwrapped markdown autolink for '${field}': ${value}` });
+      return match[1];
+    }
+    return value;
+  }
+
+  // --- Boolean coercion ---
+  if (isBoolean && typeof value === 'string') {
+    const lower = value.toLowerCase();
+    if (lower === 'true') {
+      repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → true for '${field}'` });
+      return true;
+    }
+    if (lower === 'false') {
+      repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → false for '${field}'` });
+      return false;
+    }
+    return value;
+  }
+
+  // --- Integer coercion: "42.0" → 42 ---
+  if (isInteger && typeof value === 'string') {
+    const num = Number(value);
+    if (!Number.isNaN(num)) {
+      repairs.push({ field, kind: 'coerced_to_number', detail: `Coerced string '${value}' → ${num} for '${field}'` });
+      return num;
+    }
+    return value;
+  }
+
+  // --- Integer coercion: boolean → 0/1 ---
+  if (isInteger && typeof value === 'boolean') {
+    repairs.push({ field, kind: 'coerced_boolean_to_integer', detail: `Coerced boolean ${value} → ${value ? 1 : 0} for '${field}'` });
+    return value ? 1 : 0;
+  }
+
+  // --- Empty string to null for optional fields ---
+  if (value === '' && !required) {
+    repairs.push({ field, kind: 'empty_string_to_undefined', detail: `Converted empty string for optional '${field}'` });
+    return undefined;
+  }
+
+  return value;
+}
diff --git a/apps/server/src/services/inference/tool-phase.ts b/apps/server/src/services/inference/tool-phase.ts
index f0e8d44..1adeb2e 100644
--- a/apps/server/src/services/inference/tool-phase.ts
+++ b/apps/server/src/services/inference/tool-phase.ts
@@ -6,6 +6,7 @@ import type { ToolExecCtx } from '../tools.js';
 import { matchToolGlob } from '../agents.js';
 import { maybeFlagForCompaction } from './payload.js';
 import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
+import { getServerPermission } from '../mcp-client.js';
 // v1.13.16: richer unknown-tool error so the model can self-correct when it
 // drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
 // Applies to all unknown tool names, not just <invoke>-derived ones — at the
@@ -17,6 +18,7 @@ import { formatUnknownToolError } from './tool-suggestions.js';
 // prompted about paths we couldn't grant anyway (e.g. /etc/passwd).
 import { resolveGrantRoot } from '../grant_resolver.js';
 import { stripToolMarkup } from './tool-call-parser.js';
+import { repairToolInput } from './tool-input-repair.js';
 import type { FailureKind } from './mistake-tracker.js';
 import type {
   InferenceContext,
@@ -34,6 +36,8 @@ async function executeToolCall(
   toolCall: ToolCall,
   extraRoots: readonly string[],
   toolCtx?: ToolExecCtx,
+  hooks?: import('../hooks.js').HookRunner,
+  sessionId?: string,
 ): Promise<{ output: unknown; truncated: boolean; error?: string; outcome: FailureKind | 'success' }> {
   // v#12 MistakeTracker: every return path carries an `outcome` so the turn
   // loop can detect a run of heterogeneous failures. The failure taxonomy
@@ -48,7 +52,61 @@ async function executeToolCall(
       outcome: 'tool_not_found',
     };
   }
-  const parsed = tool.inputSchema.safeParse(toolCall.args);
+  // MCP permission gate — block deny/ask before any Zod parsing or execution
+  const mcpPerm = getServerPermission(toolCall.name);
+  if (mcpPerm === 'deny') {
+    return { output: null, truncated: false, error: `blocked: MCP server denied tool '${toolCall.name}'`, outcome: 'permission_denied' };
+  }
+  if (mcpPerm === 'ask') {
+    return { output: null, truncated: false, error: `requires approval: tool '${toolCall.name}' needs user approval`, outcome: 'permission_denied' };
+  }
+  // vWhale: schema-based tool input repair. If the Zod parse fails, attempt
+  // heuristic repairs (type coercion, markdown-link unwrapping, array wrapping)
+  // and retry. Logs repairs for debugging.
+  let args = toolCall.args;
+  let parsed = tool.inputSchema.safeParse(args);
+  if (!parsed.success) {
+    const schema = tool.jsonSchema?.function?.parameters;
+    if (schema) {
+      const { repaired: repairedArgs, repairs } = repairToolInput(
+        schema as Record<string, unknown>,
+        args as Record<string, unknown>,
+      );
+      if (repairs.length > 0) {
+        const retry = tool.inputSchema.safeParse(repairedArgs);
+        if (retry.success) {
+          args = repairedArgs;
+          parsed = retry;
+        }
+      }
+    }
+  }
+  // vWhale: PreToolUse hook — can block execution.
+  if (hooks && sessionId) {
+    const hookResult = await hooks.run('PreToolUse', {
+      event: 'PreToolUse',
+      session_id: sessionId,
+      tool_name: toolCall.name,
+      tool_args: args as Record<string, unknown>,
+    });
+    if (hookResult.decision === 'block') {
+      return {
+        output: null,
+        truncated: false,
+        error: `blocked by hook: ${hookResult.reason ?? 'PreToolUse denied'}`,
+        outcome: 'permission_denied',
+      };
+    }
+    // Apply updated_input if the hook rewrote the args
+    if (hookResult.updated_input && typeof hookResult.updated_input === 'object') {
+      const reParsed = tool.inputSchema.safeParse(hookResult.updated_input);
+      if (reParsed.success) {
+        args = hookResult.updated_input as Record<string, unknown>;
+        parsed = reParsed;
+      }
+    }
+  }
+
   if (!parsed.success) {
     // v1.12 Track B.2: enrich the zod-reject path so the model sees a
     // one-line, tool-named hint ("tool 'search_symbols' rejected — query:
@@ -183,6 +241,8 @@ export async function executeToolPhase(
     tokens_used: updated?.tokens_used ?? null,
     ctx_used: updated?.ctx_used ?? null,
     ctx_max: updated?.ctx_max ?? null,
+    cache_tokens: result.cacheReadTokens ?? null,
+    reasoning_tokens: result.reasoningTokens ?? null,
     started_at: startedAt,
     finished_at: updated?.finished_at ?? null,
     model: session.model,
@@ -318,10 +378,22 @@ export async function executeToolPhase(
         });
         return;
       }
-      const tres = await executeToolCall(projectRoot, tc, session.allowed_read_paths, {
-        sql: ctx.sql,
-        sessionId,
-      });
+      const tres = await executeToolCall(
+        projectRoot, tc, session.allowed_read_paths,
+        { sql: ctx.sql, sessionId },
+        ctx.hooks, sessionId,
+      );
+      // vWhale: PostToolUse hook (best-effort, non-blocking).
+      if (ctx.hooks) {
+        ctx.hooks.run('PostToolUse', {
+          event: 'PostToolUse',
+          session_id: sessionId,
+          tool_name: tc.name,
+          tool_args: tc.args as Record<string, unknown>,
+          tool_result: tres.output,
+          tool_error: tres.error,
+        }).catch(() => {});
+      }
       // v#12 MistakeTracker: record the real execution outcome (success or a
       // FailureKind). This is the primary signal for heterogeneous-failure
       // detection.
diff --git a/apps/server/src/services/inference/turn.ts b/apps/server/src/services/inference/turn.ts
index 12c4ff3..f6dacb2 100644
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -144,6 +144,7 @@ export async function runAssistantTurn(
           log: ctx.log,
           broker: ctx.broker,
           chatId,
+          hooks: ctx.hooks,
         });
       } catch (err) {
         ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
@@ -214,6 +215,16 @@ export async function runAssistantTurn(
 
     // ---- non-tool finish → finalize and exit ----
     if (result.toolCalls.length === 0) {
+      // vWhale: Stop hook (best-effort, non-blocking).
+      if (ctx.hooks) {
+        ctx.hooks.run('Stop', {
+          event: 'Stop',
+          session_id: sessionId,
+          chat_id: chatId,
+          last_assistant_text: result.content.slice(0, 500),
+          turn: stepNumber,
+        }).catch(() => {});
+      }
       await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
       break;
     }
@@ -309,6 +320,22 @@ export async function runAssistantTurn(
     assistantMessageId = toolPhaseResult.nextAssistantId!;
   }
 
+  // vWhale: Stop hook at post-loop exit (best-effort, non-blocking).
+  if (ctx.hooks) {
+    const loaded = await loadContext(ctx.sql, sessionId, chatId);
+    const lastAssistant = loaded?.history?.slice().reverse().find(
+      (m: import('../../types/api.js').Message) => m.role === 'assistant',
+    );
+    const content = lastAssistant?.content ?? '';
+    ctx.hooks.run('Stop', {
+      event: 'Stop',
+      session_id: sessionId,
+      chat_id: chatId,
+      last_assistant_text: content.slice(0, 500),
+      turn: stepNumber,
+    }).catch(() => {});
+  }
+
   // ---- post-loop: step-cap sentinel ----
   // When the loop exits because stepNumber reached effectiveCap, the last
   // iteration's tool phase returned 'continue' with a nextAssistantId that
diff --git a/apps/server/src/services/inference/types.ts b/apps/server/src/services/inference/types.ts
index d3f3bd0..94892a3 100644
--- a/apps/server/src/services/inference/types.ts
+++ b/apps/server/src/services/inference/types.ts
@@ -19,6 +19,7 @@ import type {
   UserStreamFrame,
 } from '../../types/api.js';
 import type { Broker } from '../broker.js';
+import type { HookRunner } from '../hooks.js';
 import type { MistakeState } from './mistake-tracker.js';
 
 export interface StreamPhaseState {
@@ -77,6 +78,8 @@ export interface InferenceFrame {
   started_at?: string | null;
   finished_at?: string | null;
   model?: string;
+  cache_tokens?: number | null;
+  reasoning_tokens?: number | null;
   session_id?: string;
   name?: string;
   // orchestrator frames ([D-6])
@@ -117,6 +120,9 @@ export interface InferenceContext {
   // inference goes through `publish`); keeping a separate field avoids
   // tempting other code paths into bypassing the session-id binding.
   broker: Broker;
+  // vWhale: lifecycle hooks runner. Undefined when no hooks configured.
+  // Hook calls are best-effort — a failing hook never blocks inference.
+  hooks?: HookRunner;
 }
 
 export interface StreamResult {
@@ -128,6 +134,12 @@ export interface StreamResult {
   // v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
   // Empty string when the model doesn't emit reasoning (most cases).
   reasoning: string;
+  // vDeepSeek: optional cache-hit token count from DeepSeek's API.
+  // Only populated when using @ai-sdk/deepseek provider (not llama-swap).
+  cacheReadTokens?: number;
+  // vDeepSeek: optional reasoning token count from DeepSeek's API.
+  // Only populated when using @ai-sdk/deepseek provider (not llama-swap).
+  reasoningTokens?: number;
 }
 
 export interface TurnArgs {
diff --git a/apps/server/src/services/mcp-client.ts b/apps/server/src/services/mcp-client.ts
index a2017c2..f588236 100644
--- a/apps/server/src/services/mcp-client.ts
+++ b/apps/server/src/services/mcp-client.ts
@@ -31,11 +31,14 @@ interface McpToolDef {
   annotations?: McpToolAnnotations;
 }
 
+export type McpPermission = 'allow' | 'ask' | 'deny';
+
 interface ServerState {
   client: Client;
   transport: StreamableHTTPClientTransport | StdioClientTransport;
   tools: ToolDef<Record<string, unknown>>[];
   type: 'streamableHttp' | 'stdio';
+  permission: McpPermission;
 }
 
 // ---- Module-level state ----
@@ -137,6 +140,14 @@ export async function callTool(
   }
 }
 
+/** Return the permission level for a given MCP server. Defaults to 'allow' if unknown. */
+export function getServerPermission(prefixedToolName: string): McpPermission {
+  const serverName = toolToServer.get(prefixedToolName);
+  if (!serverName) return 'allow';
+  const state = servers.get(serverName);
+  return state?.permission ?? 'allow';
+}
+
 /** Return all wrapped ToolDefs from all connected servers, flattened. */
 export function getTools(): ToolDef<Record<string, unknown>>[] {
   const all: ToolDef<Record<string, unknown>>[] = [];
@@ -214,7 +225,8 @@ async function connectServer(entry: McpServerEntry): Promise<void> {
     toolToServer.set(wrapped.name, name);
   }
 
-  servers.set(name, { client, transport, tools, type: config.type });
+  const permission = (config as { permission?: McpPermission }).permission ?? 'allow';
+  servers.set(name, { client, transport, tools, type: config.type, permission });
 
   log!.info(
     { server: name, type: config.type, count: tools.length, names: tools.map((t) => t.name) },
diff --git a/apps/server/src/services/mcp-config.ts b/apps/server/src/services/mcp-config.ts
index fcb04ba..e1e1750 100644
--- a/apps/server/src/services/mcp-config.ts
+++ b/apps/server/src/services/mcp-config.ts
@@ -17,12 +17,15 @@ import type { FastifyBaseLogger } from 'fastify';
 
 // ---- Zod schema ----
 
+const McpPermissionSchema = z.enum(['allow', 'ask', 'deny']).default('allow');
+
 const McpServerConfigSchema = z.discriminatedUnion('type', [
   z.object({
     type: z.literal('streamableHttp'),
     url: z.string().url(),
     headers: z.record(z.string()).optional(),
     enabled: z.boolean().default(true),
+    permission: McpPermissionSchema,
   }),
   z.object({
     type: z.literal('stdio'),
@@ -30,6 +33,7 @@ const McpServerConfigSchema = z.discriminatedUnion('type', [
     args: z.array(z.string()).default([]),
     env: z.record(z.string()).optional(),
     enabled: z.boolean().default(true),
+    permission: McpPermissionSchema,
   }),
 ]);
 
diff --git a/apps/server/src/services/message-columns.ts b/apps/server/src/services/message-columns.ts
index 33a4cac..42d1a40 100644
--- a/apps/server/src/services/message-columns.ts
+++ b/apps/server/src/services/message-columns.ts
@@ -7,10 +7,12 @@
 
 export const MESSAGE_COLUMNS =
   'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
-  'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
+  'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
+  'started_at, finished_at, created_at, metadata, ' +
   'summary, tail_start_id, compacted_at, model';
 
 export const INFERENCE_MESSAGE_COLUMNS =
   'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
-  'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
+  'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
+  'started_at, finished_at, created_at, metadata, ' +
   'reasoning_parts, model';
diff --git a/apps/server/src/services/model-context.ts b/apps/server/src/services/model-context.ts
index d390bfe..4ef6710 100644
--- a/apps/server/src/services/model-context.ts
+++ b/apps/server/src/services/model-context.ts
@@ -37,7 +37,18 @@ export function configureModelContext(opts: { llamaSwapUrl: string }): void {
   llamaSwapUrl = opts.llamaSwapUrl;
 }
 
+// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
+// Return a reasonable default context so compaction estimates work.
+const DEEPSEEK_DEFAULT_N_CTX = 131_072;
+const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
+
 export async function getModelContext(model: string): Promise<ModelContext | null> {
+  // vDeepSeek: DeepSeek models have no /upstream/<model>/props. Use a static
+  // default so compaction doesn't fall to the buffer-only path with tiny limits.
+  if (model.startsWith(DEEPSEEK_MODEL_PREFIX)) {
+    return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
+  }
+
   // 1. Positive cache hit — no TTL check, model n_ctx is invariant.
   const pos = positiveCache.get(model);
   if (pos) return pos;
diff --git a/apps/server/src/services/system-prompt.ts b/apps/server/src/services/system-prompt.ts
index a1dde52..533b1dd 100644
--- a/apps/server/src/services/system-prompt.ts
+++ b/apps/server/src/services/system-prompt.ts
@@ -101,7 +101,7 @@ export interface PrefixFingerprint {
   has_agent_system_prompt: boolean;
   has_session_override: boolean;
   has_project_override: boolean;
-  route: 'swap' | 'sidecar';
+  route: 'swap' | 'sidecar' | 'deepseek';
 }
 
 export interface PrefixDrift {
@@ -129,7 +129,7 @@ interface ObservedInputs {
   has_agent_system_prompt: boolean;
   has_session_override: boolean;
   has_project_override: boolean;
-  route: 'swap' | 'sidecar';
+  route: 'swap' | 'sidecar' | 'deepseek';
 }
 
 interface ObserverEntry {
diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts
index 0146343..a1264f7 100644
--- a/apps/server/src/types/api.ts
+++ b/apps/server/src/types/api.ts
@@ -127,6 +127,9 @@ export interface Agent {
   // bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
   steps: number | null;
   llama_extra_args: string[] | null;
+  // vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
+  // Maps to DeepSeek's reasoning_effort API param.
+  reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
 }
 
 // One entry per malformed `## Name` block. Per-block errors don't fail the
@@ -206,6 +209,8 @@ export interface Message {
   tokens_used: number | null;
   ctx_used: number | null;
   ctx_max: number | null;
+  cache_tokens: number | null;
+  reasoning_tokens: number | null;
   started_at: string | null;
   finished_at: string | null;
   created_at: string;
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index fba90dc..ed01ff8 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -152,6 +152,8 @@ export interface Message {
   tokens_used: number | null;
   ctx_used: number | null;
   ctx_max: number | null;
+  cache_tokens: number | null;
+  reasoning_tokens: number | null;
   // model-attribution: which model produced this assistant message (null for
   // user/system rows + pre-attribution messages). Rendered as a chip.
   model: string | null;
diff --git a/apps/web/src/components/MessageBubble.tsx b/apps/web/src/components/MessageBubble.tsx
index 4ecb881..7f50505 100644
--- a/apps/web/src/components/MessageBubble.tsx
+++ b/apps/web/src/components/MessageBubble.tsx
@@ -156,9 +156,16 @@ function StatsLine({ message }: { message: Message }) {
         : `${ctxUsed} ctx`
       : null;
 
+  const cacheHit = message.cache_tokens;
+  const reasoning = message.reasoning_tokens;
+  const cachePart = typeof cacheHit === 'number' && cacheHit > 0 ? `cache ${cacheHit}` : null;
+  const reasoningPart = typeof reasoning === 'number' && reasoning > 0 ? `think ${reasoning}` : null;
+
   const parts: string[] = [`${tokens} tokens`];
   if (tps !== null) parts.push(`${tps.toFixed(1)} tok/s`);
   if (ctxPart) parts.push(ctxPart);
+  if (cachePart) parts.push(cachePart);
+  if (reasoningPart) parts.push(reasoningPart);
 
   return (
     <div className="text-[10px] font-mono text-muted-foreground">
diff --git a/apps/web/src/hooks/useSessionStream.ts b/apps/web/src/hooks/useSessionStream.ts
index 75c4429..32ab9d3 100644
--- a/apps/web/src/hooks/useSessionStream.ts
+++ b/apps/web/src/hooks/useSessionStream.ts
@@ -123,6 +123,8 @@ function applyFrame(state: State, frame: WsFrame): State {
               ...(frame.tokens_used !== undefined ? { tokens_used: frame.tokens_used } : {}),
               ...(frame.ctx_used !== undefined ? { ctx_used: frame.ctx_used } : {}),
               ...(frame.ctx_max !== undefined ? { ctx_max: frame.ctx_max } : {}),
+              ...(frame.cache_tokens !== undefined ? { cache_tokens: frame.cache_tokens } : {}),
+              ...(frame.reasoning_tokens !== undefined ? { reasoning_tokens: frame.reasoning_tokens } : {}),
               ...(frame.started_at !== undefined ? { started_at: frame.started_at } : {}),
               ...(frame.finished_at !== undefined ? { finished_at: frame.finished_at } : {}),
               ...(frame.model !== undefined ? { model: frame.model } : {}),
diff --git a/data/AGENTS.md b/data/AGENTS.md
index 6d4e0dd..f62779c 100644
--- a/data/AGENTS.md
+++ b/data/AGENTS.md
@@ -6,7 +6,7 @@ Operating rules for every agent in this registry. Full procedures live in the `c
 
 **Worktrees** — Isolate work in a worktree when it is parallel to in-progress work, risky/experimental, a hotfix interrupting other work, or splits into independent units — just create when clear, propose in one line when ambiguous, skip quick/small single-stream work. Branch from a stable base (default branch); worktrees persist (never auto-remove or auto-merge); they isolate code state, not runtime (ports/DBs/services still collide). Full heuristic: invoke `using-worktrees`.
 
-**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context).
+**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context). DeepSeek V4 models also accept `reasoning_effort` (low/medium/high/xhigh/max); omit to disable thinking mode. Example: `reasoning_effort: 'high'`.
 
 **Reasoning budget** — To cap a reasoning model's thinking tokens, pass `--reasoning-budget` through `llama_extra_args` (already permitted by the deny-list validator; routes the agent to llama-sidecar). Example frontmatter line: `llama_extra_args: ["--reasoning-budget", "2048"]`. This is a sidecar process flag, not a chat-completion body param — distinct from the sampling knobs above.
 
diff --git a/packages/contracts/src/ws-frames.ts b/packages/contracts/src/ws-frames.ts
index 06b9e59..2a4bf7c 100644
--- a/packages/contracts/src/ws-frames.ts
+++ b/packages/contracts/src/ws-frames.ts
@@ -116,6 +116,8 @@ export const MessageCompleteFrame = z.object({
   tokens_used: z.number().int().nonnegative().nullable().optional(),
   ctx_used: z.number().int().nonnegative().nullable().optional(),
   ctx_max: z.number().int().positive().nullable().optional(),
+  cache_tokens: z.number().int().nonnegative().nullable().optional(),
+  reasoning_tokens: z.number().int().nonnegative().nullable().optional(),
   started_at: IsoTimestamp.nullable().optional(),
   finished_at: IsoTimestamp.nullable().optional(),
   // nullable: external-coder turns carry task.model, which is null when no