chore: snapshot main sync

2026-06-17 20:08:31 +00:00
parent b18de2a331
commit 8bd32537cf
354 changed files with 10208 additions and 9230 deletions
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -68,6 +68,14 @@
    "./skill-invoke": {
      "types": "./dist/services/skill-invoke.d.ts",
      "default": "./dist/services/skill-invoke.js"
+    },
+    "./mcp-config": {
+      "types": "./dist/services/mcp-config.d.ts",
+      "default": "./dist/services/mcp-config.js"
+    },
+    "./mcp-client": {
+      "types": "./dist/services/mcp-client.d.ts",
+      "default": "./dist/services/mcp-client.js"
    }
  },
  "scripts": {
@@ -77,6 +85,7 @@
    "test": "vitest run"
  },
  "dependencies": {
+    "@ai-sdk/anthropic": "^3.0.84",
    "@ai-sdk/deepseek": "^2.0.35",
    "@ai-sdk/openai-compatible": "^2.0.47",
    "@boocode/contracts": "workspace:*",
--- a/apps/server/src/config.ts
+++ b/apps/server/src/config.ts
@@ -8,7 +8,7 @@ const ConfigSchema = z.object({
  LLAMA_SWAP_URL: z.string().url(),
  PROJECT_ROOT_WHITELIST: z.string().default('/opt'),
  BOOTSTRAP_ROOT: z.string().default('/opt/projects'),
-  DEFAULT_MODEL: z.string().default('qwen3.6-35b-a3b-mxfp4'),
+  DEFAULT_MODEL: z.string().default('sam-desktop/qwen3.6-35b-a3b'),
  LOG_LEVEL: z.string().default('info'),
  // v1.11.8: SearXNG JSON endpoint for web_search / web_fetch tools.
  // Defaults to the internal Tailscale Fathom URL (bypasses Authelia).
@@ -31,12 +31,20 @@ const ConfigSchema = z.object({
  DEEPSEEK_API_KEY: z.string().optional(),
  // Optional base URL override for DeepSeek API. Defaults to api.deepseek.com.
  DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
+  // Beta endpoint for experimental features (strict tools, prefix completion, etc.).
+  // Defaults to api.deepseek.com/beta. When set, deepseek calls with tools or
+  // prefix content route through this endpoint.
+  DEEPSEEK_BETA_BASE_URL: z.string().url().default('https://api.deepseek.com/beta'),
+  // Hosted Anthropic Claude. When set, models with provider id "anthropic"
+  // (or bare "claude-*" ids) route through the Anthropic Messages API via
+  // @ai-sdk/anthropic instead of llama-swap. Unset = Claude routing disabled.
+  ANTHROPIC_API_KEY: z.string().optional(),
+  ANTHROPIC_BASE_URL: z.string().url().optional(),
  // vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
  HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
  // vMultiProvider: path to the local providers config JSON file. Missing file
  // = legacy synthesis from LLAMA_SWAP_URL.
  LLAMA_PROVIDERS_PATH: z.string().optional(),
-  // BooControl host service origin. Used by /api/control/* proxy routes.
  BOOCONTROL_URL: z.string().url().optional(),
 });

--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -10,6 +10,7 @@ import { registerProjectRoutes } from './routes/projects.js';
 import { registerSessionRoutes } from './routes/sessions.js';
 import { registerSettingsRoutes } from './routes/settings.js';
 import { registerMessageRoutes } from './routes/messages.js';
+import { registerMessageFeedbackRoutes } from './routes/messages-feedback.js';
 import { registerArtifactRoutes } from './routes/artifacts.js';
 import { registerChatRoutes } from './routes/chats.js';
 import { registerSidebarRoutes } from './routes/sidebar.js';
@@ -17,6 +18,7 @@ import { registerWebSocket } from './routes/ws.js';
 import { registerCoderProxy } from './routes/coder-proxy.js';
 import { registerControlProxy } from './routes/control-proxy.js';
 import { registerModelRoutes } from './routes/models.js';
+import { registerProviderRoutes } from './routes/providers.js';
 import { registerAgentRoutes } from './routes/agents.js';
 import { registerSkillsRoutes } from './routes/skills.js';
 import { registerTraceRoutes } from './routes/traces.js';
@@ -35,7 +37,7 @@ import { cleanupTruncations } from './services/truncate.js';
 import { loadMcpConfig } from './services/mcp-config.js';
 import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
 import { appendMcpTools } from './services/tools.js';
-import { refreshToolNames, getAgentsForProject } from './services/agents.js';
+import { refreshToolNames } from "./services/agents.js";
 import { loadHooksConfig, createHookRunner } from './services/hooks.js';
 import { loadLlamaProviders } from './services/llama-providers.js';

@@ -119,6 +121,7 @@ async function main() {
  registerSessionRoutes(app, sql, config, broker);
  registerSettingsRoutes(app, sql);
  registerModelRoutes(app, config);
+  registerProviderRoutes(app);
  registerAgentRoutes(app, sql);
  registerSidebarRoutes(app, sql);
  registerChatRoutes(app, sql, broker, config, {
@@ -126,15 +129,17 @@ async function main() {
      // Reuse the inference runner's context pattern for compare mode.
      // Each compare run gets its own AbortController; cancellation keyed by
      // chatId (cancels ALL parallel runs in that compare group).
+      let streamSeq = 0;
      const compareCtx: import('./services/inference/types.js').InferenceContext = {
        sql,
        config,
        log: app.log,
        publish: (sid, frame) => {
-          broker.publishFrame(sid, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
+          frame.stream_seq = streamSeq++;
+          broker.publishFrame(sid, frame as import('@boocode/contracts/ws-frames').WsFrame);
        },
        publishUser: (frame) => {
-          broker.publishUserFrame('default', frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
+          broker.publishUserFrame('default', frame as import('@boocode/contracts/ws-frames').WsFrame);
        },
        broker,
        hooks: hasHooks ? hookRunner : undefined,
@@ -169,6 +174,7 @@ async function main() {
  const hookRunner = createHookRunner();
  const hasHooks = Object.keys(loadHooksConfig(config.HOOKS_CONFIG_PATH).hooks).length > 0;

+  let streamSeq = 0;
  const inference = createInferenceRunner(
    {
      sql,
@@ -176,9 +182,8 @@ async function main() {
      log: app.log,
      hooks: hasHooks ? hookRunner : undefined,
      publish: (sessionId, frame) => {
-        // v1.13.11-b: route through the typed publishFrame so the broker's
-        // Zod gate validates every inference frame before delivery.
-        broker.publishFrame(sessionId, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
+        frame.stream_seq = streamSeq++;
+        broker.publishFrame(sessionId, frame as import('@boocode/contracts/ws-frames').WsFrame);
      },
      // v1.11: broker handle for compaction.process to publish 'compacted'
      // frames on the per-session channel. Inference's regular publish path
@@ -187,7 +192,7 @@ async function main() {
      broker,
    },
    (user, frame) => {
-      broker.publishUserFrame(user, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
+      broker.publishUserFrame(user, frame as import('@boocode/contracts/ws-frames').WsFrame);
    }
  );
  // v2.x: wire the background subagent task system to the inference runner.
@@ -242,6 +247,7 @@ async function main() {
      broker.publishFrame(sessionId, frame as import('@boocode/contracts/ws-frames').WsFrame);
    },
  });
+  registerMessageFeedbackRoutes(app, sql);
  registerArtifactRoutes(app, sql);
  registerSkillsRoutes(app, sql, {
    enqueueInference: (sessionId, chatId, assistantId, user) => {
--- a/apps/server/src/routes/chats.ts
+++ b/apps/server/src/routes/chats.ts
@@ -522,7 +522,6 @@ export function registerChatRoutes(

        const { message, models } = parsed.data;

-        // Check for active inference first.
        if (compareHandlers.hasActiveInference(req.params.id)) {
          reply.code(409);
          return { error: 'chat is currently streaming; stop it first' };
--- a/apps/server/src/routes/messages-feedback.ts
+++ b/apps/server/src/routes/messages-feedback.ts
@@ -0,0 +1,58 @@
+import type { FastifyInstance } from 'fastify';
+import { z } from 'zod';
+import type { Sql } from '../db.js';
+import type { MessageMetadata } from '../types/api.js';
+
+const FeedbackBody = z.object({
+  value: z.enum(['up', 'down']),
+});
+
+export function registerMessageFeedbackRoutes(app: FastifyInstance, sql: Sql): void {
+  app.post<{ Params: { id: string; message_id: string } }>(
+    '/api/chats/:id/messages/:message_id/feedback',
+    async (req, reply) => {
+      const parsed = FeedbackBody.safeParse(req.body);
+      if (!parsed.success) {
+        reply.code(400);
+        return { error: 'invalid body', details: parsed.error.flatten() };
+      }
+      const { id: chatId, message_id: messageId } = req.params;
+      const { value } = parsed.data;
+
+      const msg = await sql<{ id: string; role: string; metadata: MessageMetadata | null }[]>`
+        SELECT id, role, metadata FROM messages WHERE id = ${messageId} AND chat_id = ${chatId}
+      `;
+      if (msg.length === 0) {
+        reply.code(404);
+        return { error: 'message not found' };
+      }
+
+      // Only allow feedback on assistant messages.
+      if (msg[0]!.role !== 'assistant') {
+        reply.code(400);
+        return { error: 'only assistant messages can receive feedback' };
+      }
+
+      // Check if feedback already exists
+      const existingMeta = msg[0]!.metadata;
+      if (existingMeta && existingMeta.kind === 'feedback') {
+        reply.code(409);
+        return { error: 'feedback already recorded' };
+      }
+
+      const feedbackMeta: MessageMetadata = {
+        kind: 'feedback',
+        value,
+        chat_id: chatId,
+      };
+
+      await sql`
+        UPDATE messages
+        SET metadata = ${sql.json(feedbackMeta as never)}, updated_at = clock_timestamp()
+        WHERE id = ${messageId}
+      `;
+
+      return { ok: true };
+    },
+  );
+}
--- a/apps/server/src/routes/messages.ts
+++ b/apps/server/src/routes/messages.ts
@@ -10,80 +10,7 @@ import type { Chat, Message, MessageMetadata, Session, ToolCall } from '../types
 import { resolveGrantRoot } from '../services/grant_resolver.js';
 import { MESSAGE_COLUMNS } from '../services/message-columns.js';
 import { setServerPermission, getServerName } from '../services/mcp-client.js';
-
-// Shared lookup for the answer_user_input + grant_read_access pause-resume
-// endpoints. Finds the originating assistant tool_call by id in message_parts,
-// validates the tool name, finds the pending tool_result part, and checks the
-// already-answered guard. Returns ok:true+context on success, ok:false+HTTP
-// status+body on any error (caller does reply.code(ctx.code); return ctx.body).
-type PendingToolLookupResult =
-  | {
-      ok: true;
-      foundCall: ToolCall;
-      toolMessageId: string;
-      toolRow: { message_id: string; payload: { tool_call_id: string; output: unknown } };
-    }
-  | { ok: false; code: number; body: Record<string, unknown> };
-
-async function lookupPendingToolCall(
-  sql: Sql,
-  chatId: string,
-  tool_call_id: string,
-  expectedToolName: string,
-  wrongToolError: string,
-): Promise<PendingToolLookupResult> {
-  // Find the assistant's tool_call by id via message_parts.
-  const callerRows = await sql<{
-    message_id: string;
-    payload: { id: string; name: string; args: Record<string, unknown> };
-  }[]>`
-    SELECT p.message_id, p.payload
-    FROM message_parts p
-    JOIN messages m ON m.id = p.message_id
-    WHERE m.chat_id = ${chatId}
-      AND m.role = 'assistant'
-      AND p.kind = 'tool_call'
-      AND p.payload->>'id' = ${tool_call_id}
-    ORDER BY m.created_at DESC
-    LIMIT 1
-  `;
-  const callerRow = callerRows[0];
-  if (!callerRow) return { ok: false, code: 404, body: { error: 'unknown_tool_call_id' } };
-
-  const foundCall: ToolCall = {
-    id: callerRow.payload.id,
-    name: callerRow.payload.name,
-    args: callerRow.payload.args,
-  };
-  if (foundCall.name !== expectedToolName) {
-    return { ok: false, code: 400, body: { error: wrongToolError } };
-  }
-
-  // Find the pending tool_result part by tool_call_id.
-  const toolRows = await sql<{
-    message_id: string;
-    payload: { tool_call_id: string; output: unknown };
-  }[]>`
-    SELECT p.message_id, p.payload
-    FROM message_parts p
-    JOIN messages m ON m.id = p.message_id
-    WHERE m.chat_id = ${chatId}
-      AND m.role = 'tool'
-      AND p.kind = 'tool_result'
-      AND p.payload->>'tool_call_id' = ${tool_call_id}
-    ORDER BY m.created_at DESC
-    LIMIT 1
-  `;
-  const toolRow = toolRows[0];
-  if (!toolRow) {
-    return { ok: false, code: 404, body: { error: 'unknown_tool_call_id', detail: 'tool message not found' } };
-  }
-  if (toolRow.payload && toolRow.payload.output !== null) {
-    return { ok: false, code: 409, body: { error: 'tool_call_already_answered' } };
-  }
-
-  return { ok: true, foundCall, toolMessageId: toolRow.message_id, toolRow };
-}
+import { lookupPendingToolCall } from '../services/pending-tool-lookup.js';

 const SendBody = z.object({
  content: z.string().min(1).max(64_000),
@@ -146,11 +73,6 @@ const RequestReadAccessArgs = z.object({

 interface MessageHandlers {
  enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
-  // v1.11: returns a promise that resolves after compaction.process finishes
-  // (await the LLM call). Throws on failure — the route surfaces a 500.
-  // Replaces the v1.10 enqueueCompact (which fired-and-forgot a kind='compact'
-  // streaming row). The new anchored-rolling strategy inserts a single
-  // summary=true assistant row only after the LLM responds.
  runCompaction: (chatId: string) => Promise<void>;
  publishUserMessage: (
    sessionId: string,
@@ -360,11 +282,6 @@ export function registerMessageRoutes(
    }
  );

-  // v1.11: manual /compact. Was a streaming kind='compact' row inserted by
-  // this handler; now delegates to the anchored-rolling compaction service.
-  // Synchronous (we await the LLM call) — callers either await or rely on
-  // the 'compacted' WS frame to refresh their view. The response carries
-  // no body of interest; the new summary row arrives via the WS frame.
  app.post<{ Params: { id: string } }>(
    '/api/chats/:id/compact',
    async (req, reply) => {
@@ -908,56 +825,4 @@ export function registerMessageRoutes(
      return { ok: true };
    },
  );
-
-  const FeedbackBody = z.object({
-    value: z.enum(['up', 'down']),
-  });
-
-  app.post<{ Params: { id: string; message_id: string } }>(
-    '/api/chats/:id/messages/:message_id/feedback',
-    async (req, reply) => {
-      const parsed = FeedbackBody.safeParse(req.body);
-      if (!parsed.success) {
-        reply.code(400);
-        return { error: 'invalid body', details: parsed.error.flatten() };
-      }
-      const { id: chatId, message_id: messageId } = req.params;
-      const { value } = parsed.data;
-
-      const msg = await sql<{ id: string; role: string; metadata: MessageMetadata | null }[]>`
-        SELECT id, role, metadata FROM messages WHERE id = ${messageId} AND chat_id = ${chatId}
-      `;
-      if (msg.length === 0) {
-        reply.code(404);
-        return { error: 'message not found' };
-      }
-
-      // Only allow feedback on assistant messages.
-      if (msg[0]!.role !== 'assistant') {
-        reply.code(400);
-        return { error: 'only assistant messages can receive feedback' };
-      }
-
-      // Check if feedback already exists
-      const existingMeta = msg[0]!.metadata;
-      if (existingMeta && existingMeta.kind === 'feedback') {
-        reply.code(409);
-        return { error: 'feedback already recorded' };
-      }
-
-      const feedbackMeta: MessageMetadata = {
-        kind: 'feedback',
-        value,
-        chat_id: chatId,
-      };
-
-      await sql`
-        UPDATE messages
-        SET metadata = ${sql.json(feedbackMeta as never)}, updated_at = clock_timestamp()
-        WHERE id = ${messageId}
-      `;
-
-      return { ok: true };
-    },
-  );
 }
--- a/apps/server/src/routes/models.ts
+++ b/apps/server/src/routes/models.ts
@@ -12,6 +12,15 @@ const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
  { id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
 ];

+// Anthropic's /v1/models needs different headers (x-api-key + anthropic-version)
+// and a different response shape, so we surface a curated static list instead.
+const ANTHROPIC_STATIC_MODELS: ModelInfo[] = [
+  { id: 'claude-opus-4-8', object: 'model', created: 0, owned_by: 'anthropic' },
+  { id: 'claude-opus-4-7', object: 'model', created: 0, owned_by: 'anthropic' },
+  { id: 'claude-sonnet-4-6', object: 'model', created: 0, owned_by: 'anthropic' },
+  { id: 'claude-haiku-4-5', object: 'model', created: 0, owned_by: 'anthropic' },
+];
+
 export function registerModelRoutes(app: FastifyInstance, config: Config): void {
  app.get('/api/models', async (_req, reply) => {
    const providers: ModelCatalogProvider[] = [];
@@ -62,6 +71,12 @@ export function registerModelRoutes(app: FastifyInstance, config: Config): void
      providers.push({ id: 'deepseek', label: 'DeepSeek', models: deepseekModels });
    }

+    // 3. If Anthropic is configured, add a synthetic "anthropic" provider group.
+    if (config.ANTHROPIC_API_KEY) {
+      const anthropicModels = ANTHROPIC_STATIC_MODELS.map((m) => ({ ...m, id: `anthropic/${m.id}` }));
+      providers.push({ id: 'anthropic', label: 'Anthropic', models: anthropicModels });
+    }
+
    if (providers.length === 0) {
      reply.code(502);
      return { error: 'no models available from any provider' };
--- a/apps/server/src/routes/projects.ts
+++ b/apps/server/src/routes/projects.ts
@@ -656,7 +656,6 @@ export function registerProjectRoutes(
      try { root = await resolveProjectRoot(projectPath); }
      catch (err) { if (err instanceof PathScopeError) { reply.code(404); return { error: (err as Error).message }; } throw err; }
      const target = body.data.path.startsWith('/') ? body.data.path : resolve(root, body.data.path);
-      // Validate path stays within project root
      const realTarget = await realpath(target).catch(() => target);
      if (!realTarget.startsWith(root + sep) && realTarget !== root) {
        reply.code(403);
@@ -668,14 +667,12 @@ export function registerProjectRoutes(
        await rename(tmp, target);
        return { ok: true };
      } catch (err) {
-        // Clean up tmp on failure
        await access(tmp).then(() => rename(tmp, target + '.bak').catch(() => {})).catch(() => {});
        throw err;
      }
    },
  );

-  // GET /api/projects/:id/files
  app.get<{ Params: { id: string } }>(
    '/api/projects/:id/files',
    async (req, reply) => {
--- a/apps/server/src/routes/providers.ts
+++ b/apps/server/src/routes/providers.ts
@@ -0,0 +1,36 @@
+import type { FastifyInstance } from 'fastify';
+import { getProviderStatus, unloadProvider, unloadModel } from '../services/provider-status.js';
+
+export function registerProviderRoutes(app: FastifyInstance): void {
+  app.get('/api/providers/status', async (_req, reply) => {
+    try {
+      const result = await getProviderStatus();
+      return reply.send(result);
+    } catch (err) {
+      return reply.status(502).send({
+        error: 'failed to query provider statuses',
+        detail: err instanceof Error ? err.message : String(err),
+      });
+    }
+  });
+
+  app.post('/api/providers/:providerId/unload', async (req, reply) => {
+    const params = req.params as { providerId: string };
+    const ok = await unloadProvider(params.providerId);
+    if (!ok) {
+      return reply.status(404).send({ error: `provider ${params.providerId} not found or unload failed` });
+    }
+    return reply.send({ status: 'ok', providerId: params.providerId });
+  });
+
+  app.post('/api/providers/:providerId/unload/:modelId', async (req, reply) => {
+    const params = req.params as { providerId: string; modelId: string };
+    const ok = await unloadModel(params.providerId, params.modelId);
+    if (!ok) {
+      return reply.status(404).send({
+        error: `unload failed for provider ${params.providerId}, model ${params.modelId}`,
+      });
+    }
+    return reply.send({ status: 'ok', providerId: params.providerId, modelId: params.modelId });
+  });
+}
--- a/apps/server/src/schema.sql
+++ b/apps/server/src/schema.sql
@@ -78,6 +78,19 @@ END $$;
 CREATE INDEX IF NOT EXISTS message_parts_hidden_idx
  ON message_parts (message_id) WHERE hidden_at IS NULL;

+-- v2.x-workflow-sdk: add retry_count for future tool retry observability.
+-- Idempotent: information_schema guard skips on re-run. Existing rows
+-- receive 0 via DEFAULT; no existing retry logic — column is plumbing only.
+DO $$
+BEGIN
+  IF NOT EXISTS (
+    SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'message_parts' AND column_name = 'retry_count'
+  ) THEN
+    ALTER TABLE message_parts ADD COLUMN retry_count int NOT NULL DEFAULT 0;
+  END IF;
+END $$;
+
 -- v1.13.13: extend message_parts.kind to allow 'synthesis'. Existing DBs were
 -- created with the pre-v1.13.13 CHECK constraint that did NOT include
 -- 'synthesis'; drop + re-add the constraint with the extended enum. Fresh
@@ -219,7 +232,7 @@ CREATE TABLE IF NOT EXISTS settings (
  value JSONB NOT NULL
 );

-INSERT INTO settings (key, value) VALUES ('default_model', '"qwen3.6-35b-a3b-mxfp4"') ON CONFLICT (key) DO NOTHING;
+INSERT INTO settings (key, value) VALUES ('default_model', '"sam-desktop/qwen3.6-35b-a3b"') ON CONFLICT (key) DO NOTHING;

 -- v1.12.1: deprecated session_panes table removed. Workspace pane state now
 -- lives in sessions.workspace_panes (jsonb), see below.
--- a/apps/server/src/services/tests/inference.test.ts
+++ b/apps/server/src/services/tests/inference.test.ts
@@ -262,6 +262,31 @@ describe('buildMessagesPayload', async () => {
    expect(result[4]).toMatchObject({ role: 'assistant', content: 'here it is' });
  });

+  it('preserves every tool result across a multi-step tool turn', async () => {
+    // Regression anchor (dcp-context-corruption-fix): a multi-step tool turn
+    // must deliver every prior step's tool output to the payload. Tool rows
+    // carry content='' with the output in tool_results; no pre-processing step
+    // may drop them.
+    const session = makeSession();
+    const project = makeProject();
+    const history: Message[] = [
+      makeMessage('user', 'read x and y'),
+      makeMessage('assistant', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: {} }] }),
+      makeMessage('tool', '', { tool_results: { tool_call_id: 'c1', output: 'OUT1', truncated: false } }),
+      makeMessage('assistant', '', { tool_calls: [{ id: 'c2', name: 'view_file', args: {} }] }),
+      makeMessage('tool', '', { tool_results: { tool_call_id: 'c2', output: 'OUT2', truncated: false } }),
+    ];
+    const result = await buildMessagesPayload(session, project, history);
+    const toolContents = result.filter((m) => m.role === 'tool').map((m) => m.content);
+    expect(toolContents).toContain('OUT1');
+    expect(toolContents).toContain('OUT2');
+    // Both assistant turns retain their tool_calls (not stripped as orphans).
+    const assistantsWithCalls = result.filter(
+      (m) => m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0
+    );
+    expect(assistantsWithCalls).toHaveLength(2);
+  });
+
  it('strips assistant tool_calls when matching tool results are missing', async () => {
    const session = makeSession();
    const project = makeProject();
--- a/apps/server/src/services/tests/model-context.test.ts
+++ b/apps/server/src/services/tests/model-context.test.ts
@@ -376,7 +376,7 @@ describe('getModelContext — bare-id resolution through default provider (W3)',

    const result = await getModelContext('deepseek-v4-pro');
    expect(result).not.toBeNull();
-    expect(result!.n_ctx).toBe(131_072);
+    expect(result!.n_ctx).toBe(1_000_000);
    expect(fetchSpy).not.toHaveBeenCalled();
  });

@@ -385,7 +385,7 @@ describe('getModelContext — bare-id resolution through default provider (W3)',

    const result = await getModelContext('deepseek/deepseek-v4-pro');
    expect(result).not.toBeNull();
-    expect(result!.n_ctx).toBe(131_072);
+    expect(result!.n_ctx).toBe(1_000_000);
    expect(fetchSpy).not.toHaveBeenCalled();
  });
 });
--- a/apps/server/src/services/tests/parts.test.ts
+++ b/apps/server/src/services/tests/parts.test.ts
@@ -82,6 +82,46 @@ describe('partsFromAssistantMessage', () => {
      [1, 'tool_call'],
    ]);
  });
+
+  it('Phase 2: signed reasoning blocks become one reasoning part each, supersede the joined string', () => {
+    const parts = partsFromAssistantMessage({
+      content: 'done',
+      tool_calls: null,
+      reasoning: 'block1block2', // the joined fallback — must be ignored here
+      reasoningBlocks: [
+        { text: 'block1', signature: 'sig1' },
+        { text: 'block2', signature: 'sig2' },
+      ],
+    });
+    expect(parts.map((p) => [p.sequence, p.kind])).toEqual([
+      [0, 'reasoning'],
+      [1, 'reasoning'],
+      [2, 'text'],
+    ]);
+    expect(parts[0]!.payload).toEqual({ text: 'block1', signature: 'sig1' });
+    expect(parts[1]!.payload).toEqual({ text: 'block2', signature: 'sig2' });
+  });
+
+  it('Phase 2: an empty-text block with a signature is still persisted (display:omitted)', () => {
+    const parts = partsFromAssistantMessage({
+      content: '',
+      tool_calls: null,
+      reasoningBlocks: [{ text: '', signature: 'sig-only' }],
+    });
+    expect(parts.map((p) => [p.kind, p.payload])).toEqual([
+      ['reasoning', { text: '', signature: 'sig-only' }],
+    ]);
+  });
+
+  it('Phase 2: empty reasoningBlocks falls back to the joined reasoning string', () => {
+    const parts = partsFromAssistantMessage({
+      content: 'x',
+      tool_calls: null,
+      reasoning: 'plain reasoning',
+      reasoningBlocks: [],
+    });
+    expect(parts[0]!.payload).toEqual({ text: 'plain reasoning' });
+  });
 });

 describe('partsFromToolMessage', () => {
--- a/apps/server/src/services/tests/provider.test.ts
+++ b/apps/server/src/services/tests/provider.test.ts
@@ -152,6 +152,47 @@ describe('resolveModelProvider — bare id legacy fallback', () => {
  });
 });

+// ---------------------------------------------------------------------------
+// Anthropic route
+// ---------------------------------------------------------------------------
+
+describe('resolveModelProvider — anthropic route', () => {
+  const cfg = { LLAMA_SWAP_URL: 'http://localhost:8080', ANTHROPIC_API_KEY: 'sk-ant' };
+
+  it('routes composite "anthropic/" id to the anthropic wire', () => {
+    const r = resolveModelProvider('anthropic/claude-opus-4-8', cfg);
+    expect(r.route).toBe('anthropic');
+    expect(r.providerId).toBe('anthropic');
+    expect(r.wireModelId).toBe('claude-opus-4-8');
+    expect(r.baseUrl).toBe('https://api.anthropic.com');
+  });
+
+  it('routes bare "claude-*" id to anthropic when configured', () => {
+    const r = resolveModelProvider('claude-sonnet-4-6', cfg);
+    expect(r.route).toBe('anthropic');
+    expect(r.wireModelId).toBe('claude-sonnet-4-6');
+  });
+
+  it('bare "claude-*" stays on swap when ANTHROPIC_API_KEY is unset', () => {
+    const r = resolveModelProvider('claude-opus-4-8', { LLAMA_SWAP_URL: 'http://localhost:8080' });
+    expect(r.route).toBe('swap');
+  });
+
+  it('honors ANTHROPIC_BASE_URL override and strips trailing slash', () => {
+    const r = resolveModelProvider('claude-opus-4-8', {
+      ...cfg,
+      ANTHROPIC_BASE_URL: 'https://proxy.example.com/',
+    });
+    expect(r.baseUrl).toBe('https://proxy.example.com');
+  });
+
+  it('resolveModelEndpoint throws for the anthropic wire (no OpenAI direct-fetch)', () => {
+    expect(() => resolveModelEndpoint(cfg, 'anthropic/claude-opus-4-8')).toThrow(
+      /anthropic wire has no OpenAI-compatible direct-fetch endpoint/,
+    );
+  });
+});
+
 // ---------------------------------------------------------------------------
 // upstreamModel uses the resolver
 // ---------------------------------------------------------------------------
@@ -306,3 +347,65 @@ describe('resolveModelProvider — gateway routing (P7)', () => {
    expect((model as any).modelId).toBe('auto:code');
  });
 });
+
+// ---------------------------------------------------------------------------
+// P7 G3: bare auto:* footgun fix
+// ---------------------------------------------------------------------------
+
+describe('resolveModelProvider — bare auto:* routing (G3)', () => {
+  const config = { LLAMA_SWAP_URL: 'http://localhost:8080' };
+
+  it('bare "auto:code" with a gateway registered routes to gateway (not the default swap host)', () => {
+    mockProvidersList = [
+      ...mockProvidersList,
+      { id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
+    ];
+    const r = resolveModelProvider('auto:code', config);
+    expect(r.route).toBe('gateway');
+    expect(r.baseUrl).toBe('http://100.114.205.53:9503');
+    expect(r.wireModelId).toBe('auto:code');
+  });
+
+  it('bare "auto:code" with NO gateway resolves to gateway_error, never swap', () => {
+    const r = resolveModelProvider('auto:code', config);
+    expect(r.route).toBe('gateway_error');
+    expect(r.gatewayReason).toBe('offline');
+    expect(r.baseUrl).not.toBe(config.LLAMA_SWAP_URL);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// DeepSeek beta endpoint routing (A5)
+// ---------------------------------------------------------------------------
+
+describe('upstreamModel — DeepSeek beta routing', () => {
+  const dsConfig = {
+    LLAMA_SWAP_URL: 'http://localhost:8080',
+    DEEPSEEK_API_KEY: 'sk-test',
+    DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
+    DEEPSEEK_BETA_BASE_URL: 'https://api.deepseek.com/beta',
+  };
+
+  it('DeepSeek without useBeta returns a model (stable endpoint)', () => {
+    const model = upstreamModel(dsConfig, 'deepseek-v4-pro');
+    expect(model).toBeDefined();
+    expect((model as any).modelId).toBe('deepseek-v4-pro');
+  });
+
+  it('DeepSeek with useBeta returns a model (beta endpoint)', () => {
+    const model = upstreamModel(dsConfig, 'deepseek-v4-pro', null, undefined, true);
+    expect(model).toBeDefined();
+    expect((model as any).modelId).toBe('deepseek-v4-pro');
+  });
+
+  it('DeepSeek composite with useBeta returns a model', () => {
+    const model = upstreamModel(dsConfig, 'deepseek/deepseek-v4-pro', null, undefined, true);
+    expect(model).toBeDefined();
+    expect((model as any).modelId).toBe('deepseek-v4-pro');
+  });
+
+  it('non-DeepSeek with useBeta ignores the flag', () => {
+    const model = upstreamModel(dsConfig, 'qwen3.6', null, undefined, true);
+    expect(model).toBeDefined();
+  });
+});
--- a/apps/server/src/services/agents.ts
+++ b/apps/server/src/services/agents.ts
@@ -27,8 +27,6 @@ export function refreshToolNames(): void {
 }
 const DEFAULT_TEMPERATURE = 0.7;

-// ---- Tool glob matching (v1.15.0-mcp-multi) --------------------------------
-
 /**
 * Simple glob match for tool names. Supports `*` as a wildcard for any
 * characters. No `?` or `**` — tool names are flat (no path separators).
@@ -81,8 +79,6 @@ export function slugify(name: string): string {
    .replace(/^-+|-+$/g, '');
 }

-// ---- AGENTS.md parser ------------------------------------------------------
-
 interface ParsedFrontmatter {
  temperature?: number;
  top_p?: number;
@@ -108,6 +104,10 @@ interface ParsedFrontmatter {

  // vDeepSeek: thinking effort for DeepSeek V4 models.
  reasoning_effort?: string;
+
+  // vDeepSeek: JSON output mode and prefix completion for DeepSeek V4.
+  response_format?: Record<string, unknown>;
+  prefix_content?: string;
 }

 // P5: table-driven validation for the "soft-range" numeric frontmatter fields.
@@ -362,6 +362,12 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
    steps: typeof fm.steps === 'number' ? fm.steps : null,

    reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
+
+    response_format:
+      fm.response_format && typeof fm.response_format === 'object' && (fm.response_format as Record<string, unknown>).type === 'json_object'
+        ? { type: 'json_object' as const }
+        : null,
+    prefix_content: typeof fm.prefix_content === 'string' && fm.prefix_content.length > 0 ? fm.prefix_content : null,
  };
 }

@@ -399,8 +405,6 @@ export function isAgentRegistryMarkdown(content: string): boolean {
  return true;
 }

-// ---- mtime-keyed cache + public API ----------------------------------------
-
 interface CacheEntry {
  globalMtime: number | null;
  projectMtime: number | null;
--- a/apps/server/src/services/artifacts.ts
+++ b/apps/server/src/services/artifacts.ts
@@ -31,8 +31,6 @@ export interface ArtifactWriteResult {

 const ARTIFACT_SUBDIR = '.boocode/artifacts';

-// ---- slug helpers ----
-
 // Lowercase, replace non-alnum runs with '-', trim leading/trailing '-',
 // collapse repeated '-', cap at 60 chars. Empty → 'artifact'.
 function slugify(input: string): string {
@@ -118,8 +116,6 @@ export function deriveHtmlTitle(html: string): string | null {
  return inner.slice(0, 80);
 }

-// ---- HTML detection (B4) ----
-
 // Returns the inner HTML content if `text` is a recognised HTML artifact:
 //   - starts with <!DOCTYPE html> (case-insensitive, whitespace-trimmed), OR
 //   - wrapped entirely in a fenced ```html ... ``` block.
@@ -142,8 +138,6 @@ export function detectHtmlArtifact(text: string): string | null {
  return null;
 }

-// ---- path resolution ----
-
 // Resolve `<projectRoot>/.boocode/artifacts/<filename>` and verify the
 // result stays under projectRoot. Mirrors the v1.13.18 codecontext_client.ts
 // approach: realpath projectRoot first, then prefix-check the candidate.
--- a/apps/server/src/services/audit/session-manager.ts
+++ b/apps/server/src/services/audit/session-manager.ts
@@ -1,4 +1,4 @@
-import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
 import { join } from 'node:path';
 import {
  ensureRunsDir,
--- a/apps/server/src/services/compaction.ts
+++ b/apps/server/src/services/compaction.ts
@@ -24,7 +24,7 @@ import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
 import * as modelContextLookup from './model-context.js';
 import { SENTINEL_KINDS } from './inference/sentinels.js';
 import type { OpenAiMessage } from './inference/payload.js';
-import { resolveModelEndpoint } from './inference/provider.js';
+import { resolveModelEndpoint, resolveModelProvider } from './inference/provider.js';
 import type { HookRunner } from './hooks.js';

 // v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
@@ -58,8 +58,6 @@ export interface CompactionMessage {
  created_at: string;
 }

-// === overflow ===
-
 // Returns the token budget at which overflow fires. Triggers compaction at
 // 85% of contextLimit (opencode session/overflow.ts pattern). Returns 0 when
 // the context limit is unknown — caller treats 0 as "do not trigger overflow",
@@ -83,8 +81,6 @@ export function isOverflow(usage: Usage, contextLimit: number): boolean {
  return (usage.prompt_tokens + usage.completion_tokens) >= budget;
 }

-// === selection ===
-
 interface Turn {
  start: number;
  end: number;
@@ -185,8 +181,6 @@ export function select(
  };
 }

-// === file-provenance ledger (#12, Part B) ===
-
 // Read tools whose path/target arg names a file or directory that was read.
 // BooChat (apps/server) is read-only — there are no write tools, so the ledger
 // only ever has a "Files Read" side (apps/coder can add "Modified" later).
@@ -233,8 +227,6 @@ export function buildFilesReadContext(head: CompactionMessage[]): string | null
  return ['## Files Read', ...paths.map((p) => `- ${p}`)].join('\n');
 }

-// === prompt assembly ===
-
 // Build the final user message that asks the model to (re)produce the
 // anchored summary. `context` is reserved for future plugin injection;
 // callers pass [] today.
@@ -336,8 +328,6 @@ export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
  return out;
 }

-// === llama-swap call ===
-
 // Non-streaming completion. Opencode streams; for a one-shot summary call a
 // single POST is less code and the latency hit is acceptable (the user
 // doesn't see this directly — useSessionStream emits the toast + refetches
@@ -379,8 +369,6 @@ async function callLlm(
  return { content, promptTokens, completionTokens };
 }

-// === entry point ===
-
 export interface ProcessInput {
  sql: Sql;
  config: Config;
@@ -523,7 +511,14 @@ export async function process(input: ProcessInput): Promise<void> {
  let result: CompletionResult | undefined;
  try {
    // 7. Single completion (no tools). Throws on llama-swap failure.
-    result = await callLlm(config, session.model, payload, log);
+    // Anthropic chat models speak /v1/messages, which callLlm's OpenAI-shaped
+    // direct fetch can't target — summarize with a local model instead (a fast/
+    // default model's summary is acceptable; the alternative is a hard crash).
+    const summaryModel =
+      resolveModelProvider(session.model, config).route === 'anthropic'
+        ? (config.FAST_MODEL ?? config.DEFAULT_MODEL)
+        : session.model;
+    result = await callLlm(config, summaryModel, payload, log);

    // 7b. v1.11.3: fetch the model's true context window from the provider's
    // /upstream/<wireModelId>/props (the streaming completion doesn't carry it).
--- a/apps/server/src/services/hooks.ts
+++ b/apps/server/src/services/hooks.ts
@@ -27,8 +27,6 @@ import { spawn } from 'node:child_process';
 import { readFileSync, existsSync } from 'node:fs';
 import type { FastifyBaseLogger } from 'fastify';

-// ─── Events ───────────────────────────────────────────────────────────────
-
 export type HookEvent =
  | 'PreToolUse'
  | 'PostToolUse'
@@ -46,8 +44,6 @@ const ALL_EVENTS: HookEvent[] = [
  'PostCompact',
 ];

-// ─── Config ────────────────────────────────────────────────────────────────
-
 export interface HookConfig {
  /** Glob or exact tool name to match (PreToolUse/PostToolUse only). Omit or '*' for all. */
  match?: string;
@@ -61,8 +57,6 @@ export interface HooksConfig {
  hooks: Partial<Record<HookEvent, HookConfig[]>>;
 }

-// ─── Payloads ──────────────────────────────────────────────────────────────
-
 export interface PreToolUsePayload {
  event: 'PreToolUse';
  session_id: string;
@@ -118,21 +112,16 @@ export type HookPayload =
  | PreCompactPayload
  | PostCompactPayload;

-// ─── Response ──────────────────────────────────────────────────────────────
-
 export type HookDecision = 'pass' | 'warn' | 'block';

 export interface HookResponse {
  decision?: HookDecision;
  reason?: string;
-  /** When present, replaces the original tool args / user prompt. */
  updated_input?: Record<string, unknown> | string;
  /** Injected into the model's context for the next turn. */
  additional_context?: string;
 }

-// ─── Runner ────────────────────────────────────────────────────────────────
-
 export interface HookRunner {
  /** Run all hooks for the given event. Returns the effective response. */
  run(event: HookEvent, payload: HookPayload, log?: FastifyBaseLogger): Promise<HookResponse>;
@@ -154,7 +143,6 @@ export function loadHooksConfig(path: string): HooksConfig {
    hooksConfig = {
      hooks: { ...parsed.hooks },
    };
-    // Validate event names
    for (const event of Object.keys(hooksConfig.hooks)) {
      if (!ALL_EVENTS.includes(event as HookEvent)) {
        console.warn(`hooks: unknown event '${event}' in ${path} — ignoring`);
@@ -273,7 +261,6 @@ async function runSingleHook(
        return;
      }

-      // Parse stdout as JSON response
      if (out) {
        try {
          const parsed = JSON.parse(out) as HookResponse;
@@ -291,7 +278,6 @@ async function runSingleHook(
      resolve({ decision: 'pass' });
    });

-    // Write payload to stdin
    const json = JSON.stringify(payload);
    child.stdin.write(json);
    child.stdin.end();
--- a/apps/server/src/services/inference/tests/think-splitter.test.ts
+++ b/apps/server/src/services/inference/tests/think-splitter.test.ts
@@ -0,0 +1,63 @@
+import { describe, it, expect } from 'vitest';
+import { ThinkSplitter } from '../think-splitter.js';
+
+/** Feed deltas through a splitter and concatenate the reasoning/text outputs. */
+function run(deltas: string[]): { reasoning: string; text: string } {
+  const s = new ThinkSplitter();
+  let reasoning = '';
+  let text = '';
+  for (const d of deltas) {
+    const r = s.push(d);
+    reasoning += r.reasoning;
+    text += r.text;
+  }
+  const tail = s.flush();
+  reasoning += tail.reasoning;
+  text += tail.text;
+  return { reasoning, text };
+}
+
+describe('ThinkSplitter', () => {
+  it('passes through ordinary content unchanged (no arm)', () => {
+    expect(run(['Hello ', 'world'])).toEqual({ reasoning: '', text: 'Hello world' });
+  });
+
+  it('splits a whole-buffer think block', () => {
+    expect(run(['<think>reasoning here</think>answer'])).toEqual({
+      reasoning: 'reasoning here',
+      text: 'answer',
+    });
+  });
+
+  it('discards whitespace before <think> and after </think>', () => {
+    expect(run(['  <think>r</think>\n\nanswer'])).toEqual({ reasoning: 'r', text: 'answer' });
+  });
+
+  it('handles the open tag split across deltas', () => {
+    expect(run(['<thi', 'nk>cot</think>out'])).toEqual({ reasoning: 'cot', text: 'out' });
+  });
+
+  it('handles the close tag split across deltas (the core reason this exists)', () => {
+    expect(run(['<think>abc</thi', 'nk>tail'])).toEqual({ reasoning: 'abc', text: 'tail' });
+  });
+
+  it('does not hijack content that only mentions the tag mid-stream', () => {
+    expect(run(['use the ', '<think> tag'])).toEqual({ reasoning: '', text: 'use the <think> tag' });
+  });
+
+  it('emits reasoning incrementally while inside, holding partial close tags', () => {
+    const s = new ThinkSplitter();
+    expect(s.push('<think>aaa')).toEqual({ reasoning: 'aaa', text: '' });
+    // a lone "</" could be the start of the closer, so it is held back
+    expect(s.push('bbb</')).toEqual({ reasoning: 'bbb', text: '' });
+    expect(s.push('think>done')).toEqual({ reasoning: '', text: 'done' });
+  });
+
+  it('treats an unterminated think block at stream end as reasoning', () => {
+    expect(run(['<think>never closed'])).toEqual({ reasoning: 'never closed', text: '' });
+  });
+
+  it('passes through a tag-like opener that is not <think>', () => {
+    expect(run(['<div>hello</div>'])).toEqual({ reasoning: '', text: '<div>hello</div>' });
+  });
+});
--- a/apps/server/src/services/inference/compute-diff.ts
+++ b/apps/server/src/services/inference/compute-diff.ts
@@ -6,7 +6,6 @@
 * without pulling in a full diff library.
 */

-// Write-tool names that can produce file diffs.
 export const WRITE_TOOL_NAMES = new Set([
  'edit_file',
  'create_file',
@@ -68,7 +67,6 @@ export function computeDiff(oldStr: string, newStr: string, filePath: string): s
  const start = Math.max(0, firstDiff - contextBefore);
  const end = Math.min(maxLen - 1, lastDiff + contextAfter);

-  // Build the unified diff hunk
  const hunkLines: string[] = [];
  const hunkOldStart = start + 1; // 1-indexed
  const hunkNewStart = start + 1;
--- a/apps/server/src/services/inference/dcp/tests/deduplication.test.ts
+++ b/apps/server/src/services/inference/dcp/tests/deduplication.test.ts
@@ -1,33 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { deduplicate } from '../strategies/deduplication.js';
-import type { DcpMessage } from '../messages.js';
-
-describe('deduplicate', () => {
-  it('removes consecutive identical tool_call+tool_result pairs', () => {
-    const messages: DcpMessage[] = [
-      { role: 'user', content: 'search for x' },
-      { role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result1', tool_call_id: '1' },
-      // Duplicate pair
-      { role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result1', tool_call_id: '2' },
-    ];
-
-    const { messages: result, stats } = deduplicate(messages);
-    expect(result).toHaveLength(3); // user + first pair
-    expect(stats.removedCount).toBe(2);
-  });
-
-  it('preserves non-duplicate content', () => {
-    const messages: DcpMessage[] = [
-      { role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result1', tool_call_id: '1' },
-      { role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result2', tool_call_id: '2' }, // Different result
-    ];
-
-    const { messages: result, stats } = deduplicate(messages);
-    expect(result).toHaveLength(4);
-    expect(stats.removedCount).toBe(0);
-  });
-});
--- a/apps/server/src/services/inference/dcp/tests/messages.test.ts
+++ b/apps/server/src/services/inference/dcp/tests/messages.test.ts
@@ -1,22 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { toDcpMessages, fromDcpMessages } from '../messages.js';
-
-describe('toDcpMessages', () => {
-  it('converts user messages', () => {
-    const result = toDcpMessages([{ role: 'user', content: 'hello' }]);
-    expect(result[0].role).toBe('user');
-    expect(result[0].content).toBe('hello');
-  });
-
-  it('marks Error: content as isError', () => {
-    const result = toDcpMessages([{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' }]);
-    expect(result[0].isError).toBe(true);
-  });
-});
-
-describe('fromDcpMessages', () => {
-  it('round-trips messages', () => {
-    const original = [{ role: 'user', content: 'hello' }];
-    expect(fromDcpMessages(toDcpMessages(original))).toEqual(original);
-  });
-});
--- a/apps/server/src/services/inference/dcp/tests/purge-errors.test.ts
+++ b/apps/server/src/services/inference/dcp/tests/purge-errors.test.ts
@@ -1,33 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { purgeErrors } from '../strategies/purge-errors.js';
-import type { DcpMessage } from '../messages.js';
-
-describe('purgeErrors', () => {
-  it('removes tool results where content starts with Error:', () => {
-    const messages: DcpMessage[] = [
-      { role: 'tool', content: 'Error: file not found', tool_call_id: '1' },
-      { role: 'tool', content: '{"files":[]}', tool_call_id: '2' },
-    ];
-    const { messages: result, stats } = purgeErrors(messages);
-    expect(result).toHaveLength(1);
-    expect(stats.removedCount).toBe(1);
-  });
-
-  it('removes empty tool results', () => {
-    const messages: DcpMessage[] = [
-      { role: 'tool', content: '', tool_call_id: '1' },
-    ];
-    const { messages: result, stats } = purgeErrors(messages);
-    expect(result).toHaveLength(0);
-    expect(stats.removedCount).toBe(1);
-  });
-
-  it('preserves valid tool results', () => {
-    const messages: DcpMessage[] = [
-      { role: 'tool', content: '{"files":["a.ts"]}', tool_call_id: '1' },
-    ];
-    const { messages: result, stats } = purgeErrors(messages);
-    expect(result).toHaveLength(1);
-    expect(stats.removedCount).toBe(0);
-  });
-});
--- a/apps/server/src/services/inference/dcp/tests/transform.test.ts
+++ b/apps/server/src/services/inference/dcp/tests/transform.test.ts
@@ -1,25 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { transformMessages } from '../transform.js';
-import type { DcpMessage } from '../messages.js';
-
-describe('transformMessages', () => {
-  it('applies dedup then purge in order', () => {
-    const input: DcpMessage[] = [
-      { role: 'user', content: 'hello' },
-      { role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result', tool_call_id: '1' },
-      { role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
-      { role: 'tool', content: 'result', tool_call_id: '2' }, // Dup
-    ];
-
-    const { messages, stats } = transformMessages('test-chat', input);
-    expect(stats.removedCount).toBeGreaterThan(0);
-    expect(messages.length).toBeLessThan(input.length);
-  });
-
-  it('handles empty input', () => {
-    const { messages, stats } = transformMessages('empty', []);
-    expect(messages).toHaveLength(0);
-    expect(stats.removedCount).toBe(0);
-  });
-});
--- a/apps/server/src/services/inference/dcp/index.ts
+++ b/apps/server/src/services/inference/dcp/index.ts
@@ -1,4 +0,0 @@
-export { transformMessages } from './transform.js';
-export type { DcpMessage } from './messages.js';
-export { toDcpMessages, fromDcpMessages } from './messages.js';
-export { getDcpState, clearDcpState } from './state.js';
--- a/apps/server/src/services/inference/dcp/messages.ts
+++ b/apps/server/src/services/inference/dcp/messages.ts
@@ -1,34 +0,0 @@
-// DCP message shape adapter.
-// Converts between BooCode MessagePart[] and the DCP internal shape.
-// Clean-room implementation — no AGPL source copied.
-
-export interface DcpMessage {
-  role: 'user' | 'assistant' | 'tool';
-  content: string;
-  tool_call_id?: string;
-  tool_calls?: Array<{ id: string; name: string; arguments: string }>;
-  isError?: boolean;
-}
-
-export function toDcpMessages(parts: any[]): DcpMessage[] {
-  return parts.map((p: any) => {
-    const msg: DcpMessage = { role: p.role, content: p.content ?? '' };
-    if (p.tool_call_id) msg.tool_call_id = p.tool_call_id;
-    if (p.tool_calls) msg.tool_calls = p.tool_calls;
-    if (p.isError) msg.isError = true;
-    if (p.role === 'tool' && p.content && p.content.startsWith('Error:')) {
-      msg.isError = true;
-    }
-    return msg;
-  });
-}
-
-export function fromDcpMessages(msgs: DcpMessage[]): any[] {
-  return msgs.map((m) => ({
-    role: m.role,
-    content: m.content,
-    ...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {}),
-    ...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
-    ...(m.isError ? { isError: true } : {}),
-  }));
-}
--- a/apps/server/src/services/inference/dcp/state.ts
+++ b/apps/server/src/services/inference/dcp/state.ts
@@ -1,27 +0,0 @@
-// Per-chat session state for DCP.
-// Tracks last transform timestamp and message count to avoid re-processing.
-
-interface ChatDcpState {
-  lastTransformAt: number;
-  lastMessageCount: number;
-}
-
-const chatStates = new Map<string, ChatDcpState>();
-
-export function getDcpState(chatId: string): ChatDcpState | undefined {
-  return chatStates.get(chatId);
-}
-
-export function setDcpState(chatId: string, messageCount: number): void {
-  chatStates.set(chatId, { lastTransformAt: Date.now(), lastMessageCount: messageCount });
-}
-
-export function clearDcpState(chatId: string): void {
-  chatStates.delete(chatId);
-}
-
-export function shouldTransform(chatId: string, messageCount: number): boolean {
-  const state = chatStates.get(chatId);
-  if (!state) return true;
-  return state.lastMessageCount !== messageCount;
-}
--- a/apps/server/src/services/inference/dcp/strategies/deduplication.ts
+++ b/apps/server/src/services/inference/dcp/strategies/deduplication.ts
@@ -1,50 +0,0 @@
-import type { DcpMessage } from '../messages.js';
-
-export function deduplicate(messages: DcpMessage[]): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
-  const result: DcpMessage[] = [];
-  let removedCount = 0;
-  let freedTokens = 0;
-  let i = 0;
-
-  while (i < messages.length) {
-    const current: DcpMessage = messages[i]!;
-    const next = messages[i + 1];
-
-    if (
-      current.role === 'assistant' &&
-      current.tool_calls &&
-      next &&
-      next.role === 'tool' &&
-      next.tool_call_id === current.tool_calls[0]?.id
-    ) {
-      const nextNext = messages[i + 2];
-      const nextNextNext = messages[i + 3];
-
-      if (
-        nextNext &&
-        nextNext.role === 'assistant' &&
-        nextNext.tool_calls &&
-        nextNextNext &&
-        nextNextNext.role === 'tool' &&
-        nextNextNext.tool_call_id === nextNext.tool_calls[0]?.id &&
-        nextNext.tool_calls[0]?.name === current.tool_calls[0]?.name &&
-        nextNext.tool_calls[0]?.arguments === current.tool_calls[0]?.arguments &&
-        nextNextNext.content === next.content
-      ) {
-        result.push(current, next);
-        i += 4;
-        removedCount += 2;
-        freedTokens += Math.ceil(nextNext.content.length / 4);
-        freedTokens += Math.ceil(current.content.length / 4);
-      } else {
-        result.push(current);
-        i++;
-      }
-    } else {
-      result.push(current);
-      i++;
-    }
-  }
-
-  return { messages: result, stats: { removedCount, freedTokens } };
-}
--- a/apps/server/src/services/inference/dcp/strategies/purge-errors.ts
+++ b/apps/server/src/services/inference/dcp/strategies/purge-errors.ts
@@ -1,34 +0,0 @@
-// Purge-errors strategy — removes failed/empty tool_result entries.
-// Clean-room implementation.
-
-import type { DcpMessage } from '../messages.js';
-
-const ERROR_PREFIXES = ['Error:', 'error:', 'Error: '];
-const DEFAULT_WINDOW = 5;
-
-export function purgeErrors(
-  messages: DcpMessage[],
-  windowSize: number = DEFAULT_WINDOW,
-): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
-  const result: DcpMessage[] = [];
-  let removedCount = 0;
-  let freedTokens = 0;
-
-  for (const msg of messages) {
-    if (msg.role === 'tool') {
-      const shouldRemove =
-        msg.isError ||
-        ERROR_PREFIXES.some((p) => msg.content.startsWith(p)) ||
-        msg.content.trim() === '';
-
-      if (shouldRemove) {
-        removedCount++;
-        freedTokens += Math.ceil(msg.content.length / 4);
-        continue; // Skip this message
-      }
-    }
-    result.push(msg);
-  }
-
-  return { messages: result, stats: { removedCount, freedTokens } };
-}
--- a/apps/server/src/services/inference/dcp/transform.ts
+++ b/apps/server/src/services/inference/dcp/transform.ts
@@ -1,52 +0,0 @@
-// Transform orchestrator — runs DCP strategies in sequence.
-// Clean-room implementation.
-
-import type { DcpMessage } from './messages.js';
-import { deduplicate } from './strategies/deduplication.js';
-import { purgeErrors } from './strategies/purge-errors.js';
-import { getDcpState, setDcpState, shouldTransform } from './state.js';
-
-export interface TransformStats {
-  removedCount: number;
-  freedTokens: number;
-  dedupRemoved: number;
-  purgeRemoved: number;
-}
-
-export interface TransformResult {
-  messages: DcpMessage[];
-  stats: TransformStats;
-}
-
-export function transformMessages(chatId: string, messages: DcpMessage[]): TransformResult {
-  if (!shouldTransform(chatId, messages.length)) {
-    return { messages, stats: { removedCount: 0, freedTokens: 0, dedupRemoved: 0, purgeRemoved: 0 } };
-  }
-
-  let m = messages;
-
-  // Step 1: Deduplicate
-  const dedupResult = deduplicate(m);
-  m = dedupResult.messages;
-  const dedupRemoved = dedupResult.stats.removedCount;
-
-  // Step 2: Purge errors
-  const purgeResult = purgeErrors(m);
-  m = purgeResult.messages;
-  const purgeRemoved = purgeResult.stats.removedCount;
-
-  const totalRemoved = dedupRemoved + purgeRemoved;
-  const totalFreed = dedupResult.stats.freedTokens + purgeResult.stats.freedTokens;
-
-  setDcpState(chatId, messages.length);
-
-  return {
-    messages: m,
-    stats: {
-      removedCount: totalRemoved,
-      freedTokens: totalFreed,
-      dedupRemoved,
-      purgeRemoved,
-    },
-  };
-}
--- a/apps/server/src/services/inference/error-handler.ts
+++ b/apps/server/src/services/inference/error-handler.ts
@@ -7,8 +7,7 @@ import {
 } from '../artifacts.js';
 import * as modelContext from '../model-context.js';
 import { maybeFlagForCompaction } from './payload.js';
-import { insertParts, partsFromAssistantMessage } from './parts.js';
-import type { PartInsert } from './parts.js';
+import { insertParts, partsFromAssistantMessage, type PartInsert } from "./parts.js";
 import { stripToolMarkup } from './tool-call-parser.js';
 import type { InferenceContext, StreamResult, TurnArgs } from './types.js';

@@ -232,6 +231,7 @@ export async function finalizeCompletion(
    content,
    tool_calls: null,
    reasoning: result.reasoning,
+    reasoningBlocks: result.reasoningBlocks,
  }).map((p) => ({
    ...p,
    message_id: assistantMessageId,
--- a/apps/server/src/services/inference/parts.ts
+++ b/apps/server/src/services/inference/parts.ts
@@ -40,11 +40,13 @@ export async function insertParts(sql: Sql, parts: PartInsert[]): Promise<void>
        sequence: p.sequence,
        kind: p.kind,
        payload: sql.json(p.payload as never),
+        retry_count: 0,
      })),
      'message_id',
      'sequence',
      'kind',
      'payload',
+      'retry_count',
    )}
  `;
 }
@@ -62,10 +64,24 @@ export function partsFromAssistantMessage(args: {
  // Most rows have none — only models with separate reasoning channels
  // (qwen3.6 etc.) populate this.
  reasoning?: string;
+  // Phase 2 (anthropic): per-thinking-block reasoning with signatures. When
+  // present (and non-empty) this supersedes `reasoning` — one reasoning part
+  // per block, each carrying its signature for verbatim replay.
+  reasoningBlocks?: Array<{ text: string; signature?: string }>;
 }): Omit<PartInsert, 'message_id'>[] {
  const out: Omit<PartInsert, 'message_id'>[] = [];
  let seq = 0;
-  if (args.reasoning && args.reasoning.length > 0) {
+  const blocks = args.reasoningBlocks?.filter((b) => b.text.length > 0 || b.signature);
+  if (blocks && blocks.length > 0) {
+    for (const b of blocks) {
+      out.push({
+        sequence: seq,
+        kind: 'reasoning',
+        payload: { text: b.text, ...(b.signature ? { signature: b.signature } : {}) },
+      });
+      seq += 1;
+    }
+  } else if (args.reasoning && args.reasoning.length > 0) {
    out.push({ sequence: seq, kind: 'reasoning', payload: { text: args.reasoning } });
    seq += 1;
  }
--- a/apps/server/src/services/inference/payload.ts
+++ b/apps/server/src/services/inference/payload.ts
@@ -28,6 +28,10 @@ export interface OpenAiMessage {
  // this into the AI SDK ReasoningPart when forwarding to the model so
  // reasoning models can resume mid-thought across tool-call boundaries.
  reasoning?: string;
+  // Phase 2 (anthropic): per-thinking-block reasoning with signatures, from the
+  // same reasoning_parts rows. toModelMessages replays each signed block
+  // verbatim (the joined `reasoning` string can't carry per-block signatures).
+  reasoning_blocks?: Array<{ text: string; signature?: string }>;
 }

 // v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the
@@ -185,6 +189,13 @@ export async function buildMessagesPayload(
      // message are rare but concat preserves ordering. Skip when absent.
      if (m.reasoning_parts && m.reasoning_parts.length > 0) {
        msg.reasoning = m.reasoning_parts.map((p) => p.text ?? '').join('');
+        // Carry per-block text+signature for verbatim anthropic replay.
+        if (m.reasoning_parts.some((p) => p.signature)) {
+          msg.reasoning_blocks = m.reasoning_parts.map((p) => ({
+            text: p.text ?? '',
+            ...(p.signature ? { signature: p.signature } : {}),
+          }));
+        }
      }
      const hasPayload =
        (msg.content != null && msg.content.trim().length > 0) ||
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -1,7 +1,9 @@
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
 import { createDeepSeek } from '@ai-sdk/deepseek';
+import { createAnthropic } from '@ai-sdk/anthropic';
 import type { LanguageModel } from 'ai';
 import { getLlamaProviders, parseModelRef } from '../llama-providers.js';
+import { GATEWAY_KIND, isGatewayVirtualModel } from '@boocode/contracts/gateway';

 // v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
 // config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
@@ -53,44 +55,57 @@ export function isDeepSeekModel(modelId: string): boolean {
  return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
 }

-let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
+// Cache keyed by apiKey+baseURL so a runtime env change (and resetDeepSeekProvider)
+// can't hand back a provider built with stale credentials.
+const deepseekProviderCache = new Map<string, ReturnType<typeof createDeepSeek>>();

 function getDeepSeekProvider(
  apiKey: string,
  baseURL: string,
 ): ReturnType<typeof createDeepSeek> {
-  if (!deepseekProviderCache) {
-    deepseekProviderCache = createDeepSeek({
-      apiKey,
-      baseURL,
-    });
+  const key = `${apiKey}||${baseURL}`;
+  let provider = deepseekProviderCache.get(key);
+  if (!provider) {
+    provider = createDeepSeek({ apiKey, baseURL });
+    deepseekProviderCache.set(key, provider);
  }
-  return deepseekProviderCache;
+  return provider;
 }

-// ---------------------------------------------------------------------------
-// Provider-aware resolver (W2, D-2, D-3)
-// ---------------------------------------------------------------------------
-
 // P7: 'gateway' routes to the BooControl auto:* gateway (OpenAI-compatible,
 // does its own policy routing + failover). 'gateway_error' is the
 // present-but-unhealthy / orphaned-session state: the session selected an
 // auto:* model but the gateway provider is missing/disabled, so we surface a
 // clean error instead of silently mis-routing to LLAMA_SWAP_URL.
-export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error';
+export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error' | 'anthropic';

-/** Provider registry `kind` marking the BooControl routing gateway. */
-export const GATEWAY_KIND = 'boocontrol-gateway';
+const ANTHROPIC_MODEL_PREFIX = 'claude-';

-/**
- * Whether a (bare) wire model id is a gateway virtual model. Used to detect an
- * orphaned auto:* session whose gateway registry entry was removed — the id
- * still looks like a gateway model, so resolve to gateway_error, never swap.
- */
-export function isGatewayVirtualModel(wireModelId: string): boolean {
-  return wireModelId === 'auto' || wireModelId.startsWith('auto:');
+/** Legacy prefix check for bare "claude-*" ids, mirroring isDeepSeekModel. */
+export function isAnthropicModel(modelId: string): boolean {
+  return modelId.startsWith(ANTHROPIC_MODEL_PREFIX);
 }

+// Cache keyed by apiKey+baseURL, same rationale as the DeepSeek cache.
+const anthropicProviderCache = new Map<string, ReturnType<typeof createAnthropic>>();
+
+function getAnthropicProvider(apiKey: string, baseURL?: string): ReturnType<typeof createAnthropic> {
+  const key = `${apiKey}||${baseURL ?? ''}`;
+  let provider = anthropicProviderCache.get(key);
+  if (!provider) {
+    provider = createAnthropic({ apiKey, ...(baseURL ? { baseURL } : {}) });
+    anthropicProviderCache.set(key, provider);
+  }
+  return provider;
+}
+
+/** Invalidate the cached Anthropic provider (e.g. when env vars change at runtime). */
+export function resetAnthropicProvider(): void {
+  anthropicProviderCache.clear();
+}
+
+export { GATEWAY_KIND, isGatewayVirtualModel } from '@boocode/contracts/gateway';
+
 export interface ResolvedModel {
  /** Routing destination. */
  route: InferenceRoute;
@@ -114,6 +129,9 @@ interface ConfigLike {
  LLAMA_SWAP_URL: string;
  DEEPSEEK_API_KEY?: string;
  DEEPSEEK_BASE_URL?: string;
+  DEEPSEEK_BETA_BASE_URL?: string;
+  ANTHROPIC_API_KEY?: string;
+  ANTHROPIC_BASE_URL?: string;
 }

 /**
@@ -137,8 +155,6 @@ export function resolveModelProvider(
  const deepseekConfigured = !!config.DEEPSEEK_API_KEY;
  const deepseekBaseUrl = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');

-  // --- DeepSeek routing ---
-  // Explicit provider id "deepseek" → DeepSeek SDK.
  if (providerId === 'deepseek' && deepseekConfigured) {
    return {
      route: 'deepseek',
@@ -161,13 +177,30 @@ export function resolveModelProvider(
    };
  }

-  // --- Local provider routing ---
+  // Hosted Anthropic Claude (different wire: /v1/messages, x-api-key). Routed
+  // by provider id "anthropic" or, for bare ids, the legacy "claude-*" prefix
+  // when ANTHROPIC_API_KEY is configured. baseUrl is the API base for the
+  // streaming path; resolveModelEndpoint (direct OpenAI-shaped fetch) does NOT
+  // support this wire and throws.
+  const anthropicConfigured = !!config.ANTHROPIC_API_KEY;
+  const anthropicBaseUrl = (config.ANTHROPIC_BASE_URL ?? 'https://api.anthropic.com').replace(/\/+$/, '');
+  if (providerId === 'anthropic' && anthropicConfigured) {
+    return { route: 'anthropic', baseUrl: anthropicBaseUrl, wireModelId, isLegacyBareId, providerId: 'anthropic' };
+  }
+  if (isLegacyBareId && isAnthropicModel(wireModelId) && anthropicConfigured) {
+    return { route: 'anthropic', baseUrl: anthropicBaseUrl, wireModelId, isLegacyBareId: true, providerId: 'anthropic' };
+  }
+
+  if (isLegacyBareId && isGatewayVirtualModel(wireModelId)) {
+    const gw = providers.providers.find((p) => p.kind === GATEWAY_KIND);
+    if (gw) {
+      return { route: 'gateway', baseUrl: gw.baseUrl, wireModelId, isLegacyBareId, providerId: gw.id };
+    }
+    return { route: 'gateway_error', baseUrl: '', wireModelId, isLegacyBareId, providerId, gatewayReason: 'offline' };
+  }
+
  const provider = providers.providers.find((p) => p.id === providerId);

-  // --- Gateway routing (P7) ---
-  // A known gateway-kind provider → route to the gateway as an OpenAI-compatible
-  // upstream (it does its own policy routing). The gateway forwards X-Boo-Source
-  // to the chosen target so attribution survives the extra hop.
  if (provider && provider.kind === GATEWAY_KIND) {
    return {
      route: 'gateway',
@@ -242,15 +275,26 @@ export function upstreamModel(
  modelId: string,
  agent?: AgentLike | null,
  source?: string,
+  useBeta?: boolean,
 ): LanguageModel {
  const resolved = resolveModelProvider(modelId, config);
  if (resolved.route === 'deepseek') {
+    const baseUrl = useBeta
+      ? (config.DEEPSEEK_BETA_BASE_URL ?? 'https://api.deepseek.com/beta')
+      : resolved.baseUrl;
    return getDeepSeekProvider(
      config.DEEPSEEK_API_KEY!,
-      resolved.baseUrl,
+      baseUrl,
    ).chat(resolved.wireModelId);
  }

+  if (resolved.route === 'anthropic') {
+    const baseURL = config.ANTHROPIC_BASE_URL
+      ? `${config.ANTHROPIC_BASE_URL.replace(/\/+$/, '')}/v1`
+      : undefined;
+    return getAnthropicProvider(config.ANTHROPIC_API_KEY!, baseURL)(resolved.wireModelId);
+  }
+
  // P7: gateway is OpenAI-compatible — same adapter as swap, pointed at the
  // gateway baseUrl. The gateway resolves the policy + forwards X-Boo-Source.
  if (resolved.route === 'gateway') {
@@ -285,6 +329,16 @@ export function resolveModelEndpoint(
    };
  }

+  // Anthropic speaks /v1/messages (x-api-key, blocks content) — the OpenAI-shaped
+  // direct fetch these callers use cannot target it. Compaction guards against
+  // this by summarizing with a local model; surface a clear error if anything
+  // else direct-fetches a Claude model.
+  if (resolved.route === 'anthropic') {
+    throw new Error(
+      `anthropic wire has no OpenAI-compatible direct-fetch endpoint (compaction/task-model): ${modelId}`,
+    );
+  }
+
  // P7: orphaned auto:* session with no gateway — fail loud (no swap fallback).
  if (resolved.route === 'gateway_error') {
    throw new Error(
@@ -304,5 +358,5 @@ export function resolveModelEndpoint(

 /** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
 export function resetDeepSeekProvider(): void {
-  deepseekProviderCache = null;
+  deepseekProviderCache.clear();
 }
--- a/apps/server/src/services/inference/state-graph.ts
+++ b/apps/server/src/services/inference/state-graph.ts
@@ -26,7 +26,6 @@ import {
  buildMessagesPayload,
  loadContext,
 } from './payload.js';
-import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
 import {
  finalizeCompletion,
  finalizeEmpty,
@@ -79,7 +78,7 @@ async function detectAndRunBuild(
  const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
  const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
  try {
-    const out = await new Promise<string>((resolve, reject) => {
+    const out = await new Promise<string>((resolve, _reject) => {
      execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
        (err, stdout, stderr) => {
          if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
@@ -230,7 +229,7 @@ export async function runGraph(
 }

 // -- PLAN node ------------------------------------------------------------
-// Top-of-loop gate → compaction → loadContext → DCP → buildPayload → stream
+// Top-of-loop gate → compaction → loadContext → buildPayload → stream

 async function planNode(
  ctx: InferenceContext,
@@ -311,18 +310,6 @@ async function planNode(
  const projectRoot = await resolveProjectRoot(iterProject.path);
  state.projectRoot = projectRoot;

-  // 4. DCP transform
-  try {
-    const dcpMsgs = toDcpMessages(history);
-    const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
-    if (stats.removedCount > 0) {
-      ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
-      history = fromDcpMessages(pruned) as typeof history;
-    }
-  } catch (err) {
-    ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
-  }
-
  // 5. Log step boundary
  ctx.log.info(
    { sessionId, chatId, step: state.stepNumber, assistantMessageId: state.assistantMessageId },
--- a/apps/server/src/services/inference/stream-phase-adapter.ts
+++ b/apps/server/src/services/inference/stream-phase-adapter.ts
@@ -11,9 +11,10 @@ import type { Agent, ToolCall } from '../../types/api.js';
 import type { ToolJsonSchema } from '../tools.js';
 import type { OpenAiMessage } from './payload.js';
 import { extractToolCallBlocks } from './tool-call-parser.js';
+import { ThinkSplitter } from './think-splitter.js';
 import { classifyStreamError } from './stream-error-classifier.js';
 import type { StreamResult } from './types.js';
-import { isDeepSeekModel, upstreamModel } from './provider.js';
+import { resolveModelProvider, upstreamModel } from './provider.js';
 import {
  jsonSchema,
  streamText,
@@ -54,6 +55,10 @@ export interface StreamOptions {
  // vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
  // API param for deepseek-v4-flash / deepseek-v4-pro models.
  reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+  // vDeepSeek: JSON output mode. When set, model outputs valid JSON.
+  response_format?: { type: 'json_object' };
+  // vDeepSeek: prefix content for chat prefix completion.
+  prefix_content?: string;
 }

 // P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
@@ -78,6 +83,8 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
    dry_allowed_length: agent?.dry_allowed_length ?? undefined,
    dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
    reasoning_effort: agent?.reasoning_effort ?? undefined,
+    response_format: agent?.response_format ?? undefined,
+    prefix_content: agent?.prefix_content ?? undefined,
  };
 }

@@ -125,7 +132,11 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
    }
    if (m.role === 'assistant') {
      const hasTools = m.tool_calls && m.tool_calls.length > 0;
-      const hasReasoning = typeof m.reasoning === 'string' && m.reasoning.length > 0;
+      // Anthropic signed thinking blocks (Phase 2) must be replayed verbatim per
+      // block with their signature; other reasoning uses the joined string.
+      const signedBlocks = (m.reasoning_blocks ?? []).filter((b) => b.signature);
+      const hasReasoning =
+        (typeof m.reasoning === 'string' && m.reasoning.length > 0) || signedBlocks.length > 0;
      if (!hasTools && !hasReasoning) {
        // Bare text assistant (string content). null content + no tool_calls
        // is degenerate but harmless to forward.
@@ -136,12 +147,20 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
      // assistant content array. Reasoning models (qwen3.6) consume their
      // prior reasoning context to resume mid-thought across tool boundaries.
      const parts: Array<
-        | { type: 'reasoning'; text: string }
+        | { type: 'reasoning'; text: string; providerOptions?: Record<string, Record<string, JSONValue>> }
        | { type: 'text'; text: string }
        | { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown }
      > = [];
-      if (hasReasoning) {
-        parts.push({ type: 'reasoning', text: m.reasoning! });
+      if (signedBlocks.length > 0) {
+        for (const b of m.reasoning_blocks!) {
+          parts.push({
+            type: 'reasoning',
+            text: b.text,
+            ...(b.signature ? { providerOptions: { anthropic: { signature: b.signature } } } : {}),
+          });
+        }
+      } else if (typeof m.reasoning === 'string' && m.reasoning.length > 0) {
+        parts.push({ type: 'reasoning', text: m.reasoning });
      }
      if (m.content && m.content.length > 0) {
        parts.push({ type: 'text', text: m.content });
@@ -187,12 +206,13 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
 // No `execute` field: BooCode runs tools itself in tool-phase.ts; streamText
 // surfaces the tool-call parts via fullStream and we capture them for the
 // outer loop to dispatch.
-function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<typeof tool>> {
+function buildAiTools(schemas: ToolJsonSchema[], strict?: boolean): Record<string, ReturnType<typeof tool>> {
  const out: Record<string, ReturnType<typeof tool>> = {};
  for (const s of schemas) {
    out[s.function.name] = tool({
      description: s.function.description,
      inputSchema: jsonSchema(s.function.parameters),
+      ...(strict ? { strict } : {}),
    });
  }
  return out;
@@ -235,7 +255,19 @@ export async function streamCompletion(
 ): Promise<StreamResult> {
  const aiMessages = toModelMessages(messages);
  const hasTools = opts.tools !== null && opts.tools.length > 0;
-  const aiTools = hasTools ? buildAiTools(opts.tools!) : undefined;
+  // DeepSeek detection via providerId (handles both bare "deepseek-*" and
+  // composite "deepseek/model" — JD1 fix).
+  const resolvedModel = resolveModelProvider(model, ctx.config);
+  const isDsModel = resolvedModel.providerId === 'deepseek';
+  // Anthropic's Messages API rejects the llama.cpp sampler extensions and
+  // out-of-range temperature/top_p (Opus 4.x), so we omit all of them for the
+  // anthropic route and let the model's defaults apply.
+  const isAnthropic = resolvedModel.route === 'anthropic';
+  // Extended thinking is opt-in per agent via reasoning_effort (same gate as
+  // DeepSeek). Adaptive lets Claude choose depth; display:'summarized' surfaces
+  // the reasoning to BooChat (Opus 4.7+ default 'omitted' = empty-text blocks).
+  const anthropicThinkingEnabled = isAnthropic && !!opts.reasoning_effort && opts.reasoning_effort !== 'off';
+  const aiTools = hasTools ? buildAiTools(opts.tools!, isDsModel) : undefined;

  const startedAt = Date.now();
  // v1.13.1-C: accumulate reasoning text across reasoning-delta parts.
@@ -244,6 +276,18 @@ export async function streamCompletion(
  // Replaces the v1.13.1-A counter-only diagnostic.
  let reasoningAccumulated = '';

+  // Phase 2 (anthropic): reasoning grouped per thinking block (keyed by the
+  // stream part id = content-block index), each carrying its signature. The
+  // signature arrives as a reasoning-delta with empty text + providerMetadata.
+  // Insertion order is preserved (Map), so replay order matches the model's.
+  const reasoningBlockMap = new Map<string, { text: string; signature?: string }>();
+
+  // Peel inline <think>...</think> reasoning out of the text-delta channel for
+  // local models that don't use a structured reasoning channel. Arms only when
+  // content starts with <think>; otherwise a verbatim pass-through (no-op for
+  // models whose reasoning already arrives via reasoning-delta).
+  const thinkSplitter = new ThinkSplitter();
+
  // v1.13.3: experimental_repairToolCall keeps the stream alive when the
  // model emits a malformed tool call (bad JSON args, unknown name, etc.).
  // Without a repair function streamText throws and the WHOLE stream dies;
@@ -274,13 +318,14 @@ export async function streamCompletion(
  // the openai-compatible provider dropped it with an "unsupported feature: topK"
  // warning and min_p was never wired at all, so both were dead on the wire
  // before this. They now go through the same extraBody path as the new params.
-  const samplerBody = buildSamplerProviderOptions(opts);
+  // Omit llama.cpp sampler extensions on the anthropic route (rejected there).
+  const samplerBody = isAnthropic ? undefined : buildSamplerProviderOptions(opts);

  // vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
  let deepseekProviderOptions:
    | { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
    | undefined;
-  if (isDeepSeekModel(model)) {
+  if (isDsModel) {
    const dsEffort = opts.reasoning_effort;
    const thinkingEnabled = dsEffort && dsEffort !== 'off';
    deepseekProviderOptions = {
@@ -305,20 +350,40 @@ export async function streamCompletion(
    ? AbortSignal.any([signal, stallAc.signal])
    : stallAc.signal;

+  // vDeepSeek: chat prefix completion (B2). When prefix_content is set and
+  // the model is DeepSeek, inject an assistant message with prefix=true.
+  // If the last message is already an assistant, replace its content to
+  // avoid two consecutive assistant messages (AV5 fix).
+  let prefixMessages = aiMessages;
+  const useBeta = isDsModel && (hasTools || !!opts.prefix_content);
+  if (opts.prefix_content && isDsModel) {
+    const prefixMsg = { role: 'assistant' as const, content: opts.prefix_content };
+    const last = prefixMessages[prefixMessages.length - 1];
+    if (last && last.role === 'assistant') {
+      prefixMessages = [...prefixMessages.slice(0, -1), prefixMsg];
+    } else {
+      prefixMessages = [...prefixMessages, prefixMsg];
+    }
+  }
+
  const result = streamText({
-    model: upstreamModel(ctx.config, model, agent ?? null, 'boochat'),
-    messages: aiMessages,
+    model: upstreamModel(ctx.config, model, agent ?? null, 'boochat', useBeta),
+    messages: prefixMessages,
    ...(aiTools
      ? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
      : {}),
-    ...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
-    ...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
-    ...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
-    ...(samplerBody || deepseekProviderOptions
+    ...(!isAnthropic && typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
+    ...(!isAnthropic && typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
+    ...(!isAnthropic && typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
+    ...(!isAnthropic && opts.response_format ? { responseFormat: { type: 'json_object' } } : {}),
+    ...(samplerBody || deepseekProviderOptions || anthropicThinkingEnabled
      ? {
          providerOptions: {
            ...(samplerBody ? { openaiCompatible: samplerBody } : {}),
            ...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
+            ...(anthropicThinkingEnabled
+              ? { anthropic: { thinking: { type: 'adaptive', display: 'summarized' } } }
+              : {}),
          },
        }
      : {}),
@@ -341,7 +406,12 @@ export async function streamCompletion(
    bumpStallTimer();
    switch (part.type) {
      case 'text-delta': {
-        pendingBuffer += part.text;
+        // Peel any inline <think> reasoning before tool-call extraction; the
+        // reasoning span accumulates exactly like a structured reasoning-delta.
+        const split = thinkSplitter.push(part.text);
+        if (split.reasoning) reasoningAccumulated += split.reasoning;
+        if (!split.text) break;
+        pendingBuffer += split.text;
        // v1.13.16: unified extraction. The helper finds the earliest-opening
        // complete <tool_call> or <invoke> block, flushes prose between/around
        // them, holds any partial opener for the next chunk, and silently
@@ -373,11 +443,18 @@ export async function streamCompletion(
        break;
      }
      case 'reasoning-delta': {
-        // v1.13.1-C: accumulate; finalizeCompletion / executeToolPhase
-        // dual-write the resulting text as a kind='reasoning' part.
+        // v1.13.1-C: accumulate the joined string (compaction prose + non-
+        // anthropic replay). Phase 2: also group per block id and capture the
+        // Anthropic signature (arrives on a delta with empty text).
        if (typeof part.text === 'string') {
          reasoningAccumulated += part.text;
        }
+        const blk = reasoningBlockMap.get(part.id) ?? { text: '' };
+        if (typeof part.text === 'string') blk.text += part.text;
+        const sig = (part.providerMetadata as Record<string, Record<string, unknown> | undefined> | undefined)
+          ?.anthropic?.signature;
+        if (typeof sig === 'string') blk.signature = sig;
+        reasoningBlockMap.set(part.id, blk);
        break;
      }
      case 'finish': {
@@ -403,6 +480,12 @@ export async function streamCompletion(
    }
  }

+  // Resolve any text the splitter was holding (an unterminated <think> block
+  // becomes reasoning; a held partial opener becomes text).
+  const splitTail = thinkSplitter.flush();
+  if (splitTail.reasoning) reasoningAccumulated += splitTail.reasoning;
+  if (splitTail.text) pendingBuffer += splitTail.text;
+
  // v1.13.1-A: drain any buffered partial XML opener as plain text. The
  // pre-AI-SDK path did this on stream end too — better to leak `<tool_c`
  // than vanish the text.
@@ -467,6 +550,13 @@ export async function streamCompletion(
    );
  }

+  const reasoningBlocks = Array.from(reasoningBlockMap.values()).filter(
+    (b) => b.text.length > 0 || b.signature,
+  );
+  // Only signed blocks need verbatim per-block replay; text-only reasoning uses
+  // the joined string, so omit reasoningBlocks unless something was signed.
+  const hasSignedReasoning = reasoningBlocks.some((b) => b.signature);
+
  return {
    finishReason,
    content,
@@ -474,6 +564,7 @@ export async function streamCompletion(
    promptTokens,
    completionTokens,
    reasoning: reasoningAccumulated,
+    ...(hasSignedReasoning ? { reasoningBlocks } : {}),
    // vDeepSeek: optional usage breakdown populated when the provider returns
    // structured usage (cache hit tokens, reasoning tokens).
    cacheReadTokens: cacheReadTokens ?? undefined,
--- a/apps/server/src/services/inference/think-splitter.ts
+++ b/apps/server/src/services/inference/think-splitter.ts
@@ -0,0 +1,100 @@
+// ThinkSplitter — peels inline <think>...</think> reasoning out of streamed text
+// content. Some local models (QwQ, DeepSeek-R1 distills, MiniMax) served raw
+// emit their chain-of-thought inline in the assistant `content` channel rather
+// than on a structured reasoning channel; BooCode's stream adapter otherwise
+// treats that as ordinary prose. This splitter routes the reasoning span to the
+// reasoning accumulator and passes the rest through unchanged.
+//
+// Ported from deepseek-reasonix internal/provider/openai/think.go. Two
+// guarantees make it safe to run on every text delta:
+//   1. It only ARMS if the turn's content begins with <think> (after leading
+//      whitespace), so an answer that merely mentions the tag is never hijacked.
+//   2. For any content that does not start with <think> it degrades to a
+//      verbatim pass-through (a no-op for models on a structured reasoning
+//      channel).
+// It buffers partial closing tags across chunk boundaries so a `</thi` split
+// across two deltas is not mistaken for prose.
+
+const OPEN = '<think>';
+const CLOSE = '</think>';
+const LEADING_WS = /^[ \t\r\n]+/;
+
+type State = 'probe' | 'inside' | 'passthrough';
+
+export interface SplitResult {
+  /** Text classified as reasoning (the inside of a <think> block). */
+  reasoning: string;
+  /** Text classified as ordinary content to pass through. */
+  text: string;
+}
+
+/**
+ * Longest proper suffix of `s` that is a prefix of `marker`. Used to hold back
+ * the bytes that might be the start of a closing tag split across chunks. Never
+ * returns the full marker length (that is a complete match, handled separately).
+ */
+function markerSuffixLen(s: string, marker: string): number {
+  const max = Math.min(marker.length - 1, s.length);
+  for (let n = max; n > 0; n--) {
+    if (marker.startsWith(s.slice(s.length - n))) return n;
+  }
+  return 0;
+}
+
+/** Stateful, single-stream splitter. Create one per streamed completion. */
+export class ThinkSplitter {
+  private state: State = 'probe';
+  private buf = '';
+
+  push(s: string): SplitResult {
+    if (this.state === 'passthrough') return { reasoning: '', text: s };
+    if (this.state === 'inside') return this.scanClose(s);
+
+    // probe
+    this.buf += s;
+    const trimmed = this.buf.replace(LEADING_WS, '');
+    if (trimmed.length < OPEN.length) {
+      // Not enough yet to decide. Hold only if still a viable <think> prefix.
+      if (OPEN.startsWith(trimmed)) return { reasoning: '', text: '' };
+      return this.drainPassthrough();
+    }
+    if (trimmed.startsWith(OPEN)) {
+      this.state = 'inside';
+      this.buf = '';
+      return this.scanClose(trimmed.slice(OPEN.length));
+    }
+    return this.drainPassthrough();
+  }
+
+  /** Resolve any buffered remainder at stream end. */
+  flush(): SplitResult {
+    const r = this.buf;
+    this.buf = '';
+    if (this.state === 'inside') return { reasoning: r, text: '' };
+    return { reasoning: '', text: r };
+  }
+
+  private scanClose(s: string): SplitResult {
+    this.buf += s;
+    const idx = this.buf.indexOf(CLOSE);
+    if (idx >= 0) {
+      const reasoning = this.buf.slice(0, idx);
+      const text = this.buf.slice(idx + CLOSE.length).replace(LEADING_WS, '');
+      this.buf = '';
+      this.state = 'passthrough';
+      return { reasoning, text };
+    }
+    // No full closing tag yet — emit everything except a possible partial tag.
+    const keep = markerSuffixLen(this.buf, CLOSE);
+    const reasoning = this.buf.slice(0, this.buf.length - keep);
+    this.buf = this.buf.slice(this.buf.length - keep);
+    return { reasoning, text: '' };
+  }
+
+  private drainPassthrough(): SplitResult {
+    const text = this.buf;
+    this.buf = '';
+    this.state = 'passthrough';
+    return { reasoning: '', text };
+  }
+}
--- a/apps/server/src/services/inference/tool-input-repair.ts
+++ b/apps/server/src/services/inference/tool-input-repair.ts
@@ -82,7 +82,6 @@ function repairValue(
  const isInteger = schemaType === 'integer' || schemaType === 'number';
  const isString = schemaType === 'string';

-  // --- Array repair: wrap bare value or empty object ---
  if (isArray) {
    if (!Array.isArray(value)) {
      if (typeof value === 'string') {
@@ -114,7 +113,6 @@ function repairValue(
    return value;
  }

-  // --- Object repair: recurse into properties ---
  if (isObject && typeof value === 'object' && value !== null && !Array.isArray(value)) {
    const props = (schema.properties as Record<string, unknown>) ?? {};
    const repaired: Record<string, unknown> = {};
@@ -129,7 +127,6 @@ function repairValue(
    return repaired;
  }

-  // --- String repair: unwrap markdown autolinks ---
  if (isString && typeof value === 'string') {
    const match = value.match(MARKDOWN_AUTOLINK_RE);
    if (match) {
@@ -139,7 +136,6 @@ function repairValue(
    return value;
  }

-  // --- Boolean coercion ---
  if (isBoolean && typeof value === 'string') {
    const lower = value.toLowerCase();
    if (lower === 'true') {
@@ -153,7 +149,6 @@ function repairValue(
    return value;
  }

-  // --- Integer coercion: "42.0" → 42 ---
  if (isInteger && typeof value === 'string') {
    const num = Number(value);
    if (!Number.isNaN(num)) {
@@ -163,13 +158,11 @@ function repairValue(
    return value;
  }

-  // --- Integer coercion: boolean → 0/1 ---
  if (isInteger && typeof value === 'boolean') {
    repairs.push({ field, kind: 'coerced_boolean_to_integer', detail: `Coerced boolean ${value} → ${value ? 1 : 0} for '${field}'` });
    return value ? 1 : 0;
  }

-  // --- Empty string to null for optional fields ---
  if (value === '' && !required) {
    repairs.push({ field, kind: 'empty_string_to_undefined', detail: `Converted empty string for optional '${field}'` });
    return undefined;
--- a/apps/server/src/services/inference/tool-phase.ts
+++ b/apps/server/src/services/inference/tool-phase.ts
@@ -1,8 +1,7 @@
 import type { Agent, Session, ToolCall } from '../../types/api.js';
 import * as modelContext from '../model-context.js';
 import { PathScopeError } from '../path_guard.js';
-import { TOOLS_BY_NAME } from '../tools.js';
-import type { ToolExecCtx } from '../tools.js';
+import { TOOLS_BY_NAME, type ToolExecCtx } from "../tools.js";
 import { matchToolGlob } from '../agents.js';
 import { maybeFlagForCompaction } from './payload.js';
 import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
@@ -214,6 +213,7 @@ export async function executeToolPhase(
      content,
      tool_calls: toolCalls,
      reasoning: result.reasoning,
+      reasoningBlocks: result.reasoningBlocks,
    }).map((p) => ({
      ...p,
      message_id: assistantMessageId,
@@ -404,7 +404,7 @@ export async function executeToolPhase(
      });
      const tres = await executeToolCall(
        projectRoot, tc, session.allowed_read_paths,
-        { sql: ctx.sql, sessionId },
+        { sql: ctx.sql, sessionId, toolCallId: tc.id },
        ctx.hooks, sessionId,
      );
      // tool_trace instrumentation - finish
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -21,7 +21,6 @@ import {
  buildMessagesPayload,
  loadContext,
 } from './payload.js';
-import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
 import {
  finalizeCompletion,
  finalizeEmpty,
@@ -88,9 +87,8 @@ async function detectAndRunBuild(
  const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
  const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';

-  // Run the build.
  try {
-    const out = await new Promise<string>((resolve, reject) => {
+    const out = await new Promise<string>((resolve, _reject) => {
      execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
        (err, stdout, stderr) => {
          if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
@@ -136,7 +134,6 @@ export { buildMessagesPayload } from './payload.js';
 // turn.ts type-hub-and-leaf near-cycle. They are re-exported from there via
 // inference/index.ts for the public surface.

-
 export async function runAssistantTurn(
  ctx: InferenceContext,
  args: TurnArgs,
@@ -211,7 +208,6 @@ export async function runAssistantTurn(
  let pendingRecoveryNote: string | undefined = args.pendingRecoveryNote;

  if (session.state_graph_enabled) {
-    // ---- optional state graph path ----
    const gProjectRoot = await resolveProjectRoot(project.path);
    const graphResult = await runGraph(ctx, args, { effectiveCap, budget, agent, projectRoot: gProjectRoot });
    stepNumber = graphResult.stepNumber;
@@ -221,7 +217,6 @@ export async function runAssistantTurn(
    // mistakeTracker is the same object reference (mutated in place by the graph).
  } else {
    while (stepNumber < effectiveCap) {
-      // ---- top-of-loop gate: doom-loop, then budget (pure decision) ----
      const decision = decideStep({ recentToolCalls, toolsUsed, budget });
      if (decision.kind === 'doom') {
        // Need fresh history for the summary.
@@ -244,10 +239,6 @@ export async function runAssistantTurn(
      }
      // decision.kind === 'stream' → proceed with compaction + stream + tools.

-      // ---- compaction check ----
-      // v1.11: if the prior turn flagged this chat for compaction, run it
-      // before loadContext so we read post-compaction history. Swallow
-      // failures and proceed with un-compacted history.
      const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
        SELECT needs_compaction FROM chats WHERE id = ${chatId}
      `;
@@ -267,7 +258,6 @@ export async function runAssistantTurn(
        }
      }

-      // ---- load context (must re-load each iteration — new messages since last step) ----
      const loaded = await loadContext(ctx.sql, sessionId, chatId);
      if (!loaded) {
        ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
@@ -279,17 +269,6 @@ export async function runAssistantTurn(
      }
      const projectRoot = await resolveProjectRoot(iterProject.path);

-      try {
-        const dcpMsgs = toDcpMessages(history);
-        const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
-        if (stats.removedCount > 0) {
-          ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
-          history = fromDcpMessages(pruned) as typeof history;
-        }
-      } catch (err) {
-        ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
-      }
-
      // v1.14.0: log step boundary for instrumentation. step_start parts are in
      // the schema CHECK but not emitted here — writing to the assistant message
      // before the stream phase creates a sequence-0 collision with
@@ -297,7 +276,6 @@ export async function runAssistantTurn(
      // since the frontend doesn't render step boundaries in v1.14.
      ctx.log.info({ sessionId, chatId, step: stepNumber, assistantMessageId }, 'step_start');

-      // ---- build messages + stream phase ----
      const messages = await buildMessagesPayload(iterSession, iterProject, history, agent, ctx.log);
      const webToolsEnabled =
        iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
@@ -331,7 +309,6 @@ export async function runAssistantTurn(
        break;
      }

-      // ---- non-tool finish → finalize and exit ----
      if (result.toolCalls.length === 0) {
        // vWhale: Stop hook (best-effort, non-blocking).
        if (ctx.hooks) {
@@ -347,15 +324,6 @@ export async function runAssistantTurn(
        break;
      }

-      // ---- steps: 0 edge case ----
-      // effectiveCap check above guarantees we're inside the loop, but this
-      // guard handles the theoretical case where the model emits tool calls
-      // on step 0 when effectiveCap would have been 0 (impossible since the
-      // while condition prevents entry, but kept for safety). If effectiveCap
-      // is 1 and we're on step 0, tool calls ARE executed — steps counts
-      // iterations, not post-first-stream.
-
-      // ---- tool phase ----
      let toolPhaseResult: ToolPhaseResult;
      try {
        toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
@@ -366,7 +334,6 @@ export async function runAssistantTurn(
        break;
      }

-      // ---- update loop locals ----
      toolsUsed += toolPhaseResult.toolCallCount;
      recentToolCalls = [...recentToolCalls, ...toolPhaseResult.toolCalls];
      stepNumber++;
@@ -466,7 +433,6 @@ export async function runAssistantTurn(
    }).catch(() => {});
  }

-  // ---- persist agent snapshot (best-effort, never blocks inference) ----
  const snapLoaded = await loadContext(ctx.sql, sessionId, chatId).catch(() => null);
  if (snapLoaded) {
    await saveAgentSnapshot(ctx.sql, chatId, {
@@ -479,10 +445,6 @@ export async function runAssistantTurn(
    }).catch(() => {});
  }

-  // ---- post-loop: step-cap sentinel ----
-  // When the loop exits because stepNumber reached effectiveCap, the last
-  // iteration's tool phase returned 'continue' with a nextAssistantId that
-  // is still in 'streaming' status (unfilled). Use it for the wrap-up.
  if (stepNumber >= effectiveCap && effectiveCap < Infinity) {
    const loaded = await loadContext(ctx.sql, sessionId, chatId);
    if (loaded) {
@@ -559,9 +521,6 @@ export async function runInference(
  });
 }

-// v2.8-compare: run inference with a model override and compare group id.
-// Used by the compare endpoint to run the same message through N models in
-// parallel. Each call publishes frames scoped to its compare_group_id.
 export async function runInferenceWithModel(
  ctx: InferenceContext,
  sessionId: string,
@@ -652,4 +611,3 @@ export function createInferenceRunner(
    },
  };
 }
-
--- a/apps/server/src/services/inference/types.ts
+++ b/apps/server/src/services/inference/types.ts
@@ -27,9 +27,6 @@ export interface StreamPhaseState {
  startedAt: string | null;
 }

-// 500ms keeps the DB UPDATE rate bounded under heavy streaming. Used by
-// executeStreamPhase, runCapHitSummary, and runDoomLoopSummary — every site
-// that does a debounced content flush during streaming.
 export const DB_FLUSH_INTERVAL_MS = 500;

 export interface InferenceFrame {
@@ -123,6 +120,7 @@ export interface InferenceFrame {
  analysis_ready?: boolean;
  cross_exam_id?: string;
  delta?: string;
+  stream_seq?: number;
 }

 export type FramePublisher = (sessionId: string, frame: InferenceFrame) => void;
@@ -153,6 +151,12 @@ export interface StreamResult {
  // v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
  // Empty string when the model doesn't emit reasoning (most cases).
  reasoning: string;
+  // Phase 2 (anthropic): reasoning split per thinking block, each with its
+  // Anthropic signature. Adaptive thinking auto-enables interleaved thinking,
+  // so a turn can carry several signed blocks — they MUST be replayed verbatim
+  // per block (a joined string + one signature would 400). Empty/undefined for
+  // models without signed reasoning.
+  reasoningBlocks?: Array<{ text: string; signature?: string }>;
  // vDeepSeek: optional cache-hit token count from DeepSeek's API.
  // Only populated when using @ai-sdk/deepseek provider (not llama-swap).
  cacheReadTokens?: number;
@@ -184,8 +188,6 @@ export interface TurnArgs {
  // Never persisted — mirrors how the cap-hit/doom-loop notes live only inside
  // the summary call's messages array.
  pendingRecoveryNote?: string;
-  // v2.8-compare: when set, overrides the session model for this single turn.
-  // Used by the compare endpoint to run the same message through N models.
  modelOverride?: string;
  // v2.8-compare: opaque group id that rides on every published frame.
  compareGroupId?: string;
--- a/apps/server/src/services/mcp-client.ts
+++ b/apps/server/src/services/mcp-client.ts
@@ -16,8 +16,6 @@ import type { FastifyBaseLogger } from 'fastify';
 import type { McpServerEntry, McpServerConfig } from './mcp-config.js';
 import type { ToolDef } from './tools.js';

-// ---- Types ----
-
 interface McpToolAnnotations {
  readOnlyHint?: boolean;
  destructiveHint?: boolean;
@@ -41,8 +39,6 @@ interface ServerState {
  permission: McpPermission;
 }

-// ---- Module-level state ----
-
 const servers = new Map<string, ServerState>();
 // Reverse map: prefixed tool name → server name (built during discovery)
 const toolToServer = new Map<string, string>();
@@ -50,8 +46,6 @@ let log: FastifyBaseLogger | null = null;

 const MAX_RESULT_BYTES = 5 * 1024 * 1024;

-// ---- Public API ----
-
 /**
 * Connect to all configured MCP servers, discover tools, and wrap them.
 * Per-server graceful degradation: a failing server is logged and skipped.
@@ -148,7 +142,6 @@ export function getServerPermission(prefixedToolName: string): McpPermission {
  return state?.permission ?? 'allow';
 }

-/** Override the permission for a server. Used by the approval flow. */
 export function setServerPermission(serverName: string, permission: McpPermission): void {
  const state = servers.get(serverName);
  if (state) {
@@ -208,8 +201,6 @@ export async function shutdown(): Promise<void> {
  toolToServer.clear();
 }

-// ---- Internal helpers ----
-
 async function connectServer(entry: McpServerEntry): Promise<void> {
  const { name, config } = entry;

--- a/apps/server/src/services/mcp-config.ts
+++ b/apps/server/src/services/mcp-config.ts
@@ -15,8 +15,6 @@ import { readFileSync } from 'node:fs';
 import { z } from 'zod';
 import type { FastifyBaseLogger } from 'fastify';

-// ---- Zod schema ----
-
 const McpPermissionSchema = z.enum(['allow', 'ask', 'deny']).default('allow');

 const McpServerConfigSchema = z.discriminatedUnion('type', [
@@ -48,8 +46,6 @@ export interface McpServerEntry {
  config: McpServerConfig;
 }

-// ---- Env-var substitution ----
-
 const ENV_VAR_PATTERN = /\{env:([A-Za-z_][A-Za-z0-9_]*)\}/g;

 /**
@@ -91,8 +87,6 @@ export function substituteEnvVars(
  return value;
 }

-// ---- Loader ----
-
 /**
 * Read and validate the MCP config file. Returns enabled servers only.
 * File missing → log info, return []. Parse/validation error → log warn, return [].
--- a/apps/server/src/services/memory/scan.ts
+++ b/apps/server/src/services/memory/scan.ts
@@ -1,8 +1,7 @@
 import { homedir } from 'node:os';
 import { join } from 'node:path';
 import { readFile, readdir } from 'node:fs/promises';
-import type { MemoryEntry } from './entries.js';
-import { parseMemoryEntries } from './entries.js';
+import { parseMemoryEntries, type MemoryEntry } from "./entries.js";
 import { getMemoryRoot } from './paths.js';

 export interface MemoryScope {
--- a/apps/server/src/services/memory/store.ts
+++ b/apps/server/src/services/memory/store.ts
@@ -1,7 +1,6 @@
 import { readFile, writeFile, readdir } from 'node:fs/promises';
 import { join } from 'node:path';
-import type { MemoryTopic } from './paths.js';
-import { getTopicDir } from './paths.js';
+import { getTopicDir, type MemoryTopic } from "./paths.js";

 export async function readTopicFiles(root: string, topic: MemoryTopic): Promise<Map<string, string>> {
  const dir = getTopicDir(root, topic);
--- a/apps/server/src/services/model-context.ts
+++ b/apps/server/src/services/model-context.ts
@@ -44,6 +44,8 @@ type ConfigForModelContext = {
  LLAMA_SWAP_URL: string;
  DEEPSEEK_API_KEY?: string;
  DEEPSEEK_BASE_URL?: string;
+  ANTHROPIC_API_KEY?: string;
+  ANTHROPIC_BASE_URL?: string;
 };

 /**
@@ -67,7 +69,21 @@ export function configureModelContext(

 // vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
 // Return a reasonable default context so compaction estimates work.
-const DEEPSEEK_DEFAULT_N_CTX = 131_072;
+const DEEPSEEK_DEFAULT_N_CTX = 1_000_000;
+
+// Anthropic Claude models also have no props endpoint. Static windows: the
+// 4.x Opus/Sonnet family is 1M; Haiku is 200K.
+const ANTHROPIC_HAIKU_N_CTX = 200_000;
+const ANTHROPIC_DEFAULT_N_CTX = 1_000_000;
+
+/** Static context window for hosted providers without a props endpoint, or null. */
+function staticHostedNCtx(resolved: ReturnType<typeof resolveModelProvider>): number | null {
+  if (resolved.providerId === 'deepseek') return DEEPSEEK_DEFAULT_N_CTX;
+  if (resolved.route === 'anthropic') {
+    return resolved.wireModelId.includes('haiku') ? ANTHROPIC_HAIKU_N_CTX : ANTHROPIC_DEFAULT_N_CTX;
+  }
+  return null;
+}

 export async function getModelContext(model: string): Promise<ModelContext | null> {
  // Resolve the model through the provider-aware resolver. For composite
@@ -84,12 +100,11 @@ export async function getModelContext(model: string): Promise<ModelContext | nul

  const resolved = resolveModelProvider(model, config);

-  // DeepSeek models (by provider id) have no /upstream/<model>/props.
-  // Use a static default so compaction doesn't fall to the buffer-only
-  // path with tiny limits.
-  if (resolved.providerId === 'deepseek') {
-    return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
-  }
+  // Hosted providers (DeepSeek, Anthropic) have no /upstream/<model>/props
+  // endpoint — use a static window so compaction doesn't fall to the
+  // buffer-only path with tiny limits.
+  const staticCtx = staticHostedNCtx(resolved);
+  if (staticCtx !== null) return { n_ctx: staticCtx };

  // P7: orphaned auto:* session with no gateway configured — no props endpoint
  // to query. Negative-cache and return null; compaction degrades gracefully.
--- a/apps/server/src/services/pending-tool-lookup.ts
+++ b/apps/server/src/services/pending-tool-lookup.ts
@@ -0,0 +1,76 @@
+import type { Sql } from '../db.js';
+import type { ToolCall } from '../types/api.js';
+
+// Shared lookup for the answer_user_input + grant_read_access pause-resume
+// endpoints. Finds the originating assistant tool_call by id in message_parts,
+// validates the tool name, finds the pending tool_result part, and checks the
+// already-answered guard. Returns ok:true+context on success, ok:false+HTTP
+// status+body on any error (caller does reply.code(ctx.code); return ctx.body).
+export type PendingToolLookupResult =
+  | {
+      ok: true;
+      foundCall: ToolCall;
+      toolMessageId: string;
+      toolRow: { message_id: string; payload: { tool_call_id: string; output: unknown } };
+    }
+  | { ok: false; code: number; body: Record<string, unknown> };
+
+export async function lookupPendingToolCall(
+  sql: Sql,
+  chatId: string,
+  tool_call_id: string,
+  expectedToolName: string,
+  wrongToolError: string,
+): Promise<PendingToolLookupResult> {
+  // Find the assistant's tool_call by id via message_parts.
+  const callerRows = await sql<{
+    message_id: string;
+    payload: { id: string; name: string; args: Record<string, unknown> };
+  }[]>`
+    SELECT p.message_id, p.payload
+    FROM message_parts p
+    JOIN messages m ON m.id = p.message_id
+    WHERE m.chat_id = ${chatId}
+      AND m.role = 'assistant'
+      AND p.kind = 'tool_call'
+      AND p.payload->>'id' = ${tool_call_id}
+    ORDER BY m.created_at DESC
+    LIMIT 1
+  `;
+  const callerRow = callerRows[0];
+  if (!callerRow) return { ok: false, code: 404, body: { error: 'unknown_tool_call_id' } };
+
+  const foundCall: ToolCall = {
+    id: callerRow.payload.id,
+    name: callerRow.payload.name,
+    args: callerRow.payload.args,
+  };
+  if (foundCall.name !== expectedToolName) {
+    return { ok: false, code: 400, body: { error: wrongToolError } };
+  }
+
+  // Find the pending tool_result part by tool_call_id.
+  const toolRows = await sql<{
+    message_id: string;
+    payload: { tool_call_id: string; output: unknown };
+  }[]>`
+    SELECT p.message_id, p.payload
+    FROM message_parts p
+    JOIN messages m ON m.id = p.message_id
+    WHERE m.chat_id = ${chatId}
+      AND m.role = 'tool'
+      AND p.kind = 'tool_result'
+      AND p.payload->>'tool_call_id' = ${tool_call_id}
+    ORDER BY m.created_at DESC
+    LIMIT 1
+  `;
+  const toolRow = toolRows[0];
+  if (!toolRow) {
+    return { ok: false, code: 404, body: { error: 'unknown_tool_call_id', detail: 'tool message not found' } };
+  }
+  if (toolRow.payload && toolRow.payload.output !== null) {
+    return { ok: false, code: 409, body: { error: 'tool_call_already_answered' } };
+  }
+
+  return { ok: true, foundCall, toolMessageId: toolRow.message_id, toolRow };
+}
--- a/apps/server/src/services/project_bootstrap.ts
+++ b/apps/server/src/services/project_bootstrap.ts
@@ -102,19 +102,15 @@ export async function bootstrapProject(
  let gitea_pushed = false;
  let gitea_remote_url: string | null = null;

-  // Step 1: mkdir
  await mkdir(fullPath, { recursive: false });
  folder_created = true;
  log.info({ fullPath }, 'project_bootstrap: folder created');

-  // Step 2: write .gitignore
  await writeFile(resolve(fullPath, '.gitignore'), GITIGNORE_TEMPLATE, 'utf8');

-  // Step 3: git init -b main
  await execFileAsync('git', ['init', '-b', 'main'], { cwd: fullPath });
  git_initialized = true;

-  // Step 4: git add + commit (per-command -c, no global config touch)
  await execFileAsync('git', ['add', '.gitignore'], { cwd: fullPath });
  await execFileAsync(
    'git',
@@ -129,7 +125,6 @@ export async function bootstrapProject(
  first_commit = true;
  log.info({ folder }, 'project_bootstrap: initial commit');

-  // Step 5: optional Gitea remote
  if (options.createGiteaRemote) {
    if (!config.GITEA_TOKEN) {
      warnings.push('Gitea remote skipped — token not configured');
@@ -144,7 +139,6 @@ export async function bootstrapProject(
        gitea_remote_url = repo.html_url;
        log.info({ folder, html_url: repo.html_url }, 'project_bootstrap: gitea repo created');

-        // Step 6: git remote add + push
        try {
          const sshUrl = repo.ssh_url.replace('git.indifferentketchup.com', '100.114.205.53');
          await execFileAsync('git', ['remote', 'add', 'origin', sshUrl], { cwd: fullPath });
--- a/apps/server/src/services/provider-status.ts
+++ b/apps/server/src/services/provider-status.ts
@@ -0,0 +1,400 @@
+import { getLlamaProviders, type LlamaProvider } from './llama-providers.js';
+import { GATEWAY_KIND } from '@boocode/contracts/gateway';
+import { loadConfig } from '../config.js';
+
+export interface RunningModel {
+  model: string;
+  compositeId: string;
+  state: string;
+  cmd: string;
+  proxy: string;
+  ttl: number;
+  name: string;
+  description: string;
+}
+
+export interface CloudModelMeta {
+  id: string;
+  name?: string;
+  contextLength?: number;
+  maxOutputTokens?: number;
+  pricing?: { input: number; output: number; cached?: number };
+  modalities?: string[];
+  supportsToolCalling?: boolean;
+  supportsReasoning?: boolean;
+}
+
+export interface RateLimitInfo {
+  requestsRemaining?: number;
+  requestsLimit?: number;
+  tokensRemaining?: number;
+  tokensLimit?: number;
+  resetAt?: number;
+}
+
+export interface GpuInfo {
+  index: number;
+  name: string;
+  temperature: number;
+  utilizationPct: number;
+  memoryUsedMb: number;
+  memoryTotalMb: number;
+  powerDrawW: number;
+}
+
+export interface ProviderStatus {
+  id: string;
+  label: string;
+  baseUrl: string;
+  kind: string;
+  healthy: boolean;
+  healthyError?: string;
+  running: RunningModel[];
+  cloudModels?: CloudModelMeta[];
+  rateLimits?: RateLimitInfo;
+  gpus?: GpuInfo[];
+  hostLoad?: { cpuSat: number; memSat: number; load1: number };
+  fetchError?: string;
+}
+
+export interface ProviderStatusResponse {
+  providers: ProviderStatus[];
+  timestamp: number;
+}
+
+const HEALTH_CACHE_TTL_MS = 30_000;
+const RUNNING_CACHE_TTL_MS = 5_000;
+const CLOUD_MODELS_CACHE_TTL_MS = 120_000;
+const METRICS_CACHE_TTL_MS = 10_000;
+const FETCH_TIMEOUT_MS = 3_000;
+const CLOUD_FETCH_TIMEOUT_MS = 8_000;
+
+const healthCache = new Map<string, { value: boolean; error?: string; at: number }>();
+const runningCache = new Map<string, { value: RunningModel[]; error?: string; at: number }>();
+const cloudModelsCache = new Map<string, { value: CloudModelMeta[]; at: number }>();
+const metricsCache = new Map<string, { gpus: GpuInfo[]; hostLoad?: ProviderStatus['hostLoad']; at: number }>();
+
+async function fetchJson(url: string, timeoutMs = FETCH_TIMEOUT_MS, headers?: Record<string, string>): Promise<unknown> {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  try {
+    const res = await fetch(url, { signal: controller.signal, headers });
+    if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`);
+    return await res.json();
+  } finally {
+    clearTimeout(timer);
+  }
+}
+
+async function fetchHealth(provider: LlamaProvider): Promise<{ healthy: boolean; error?: string }> {
+  const cacheKey = `health:${provider.id}/${provider.baseUrl}`;
+  const cached = healthCache.get(cacheKey);
+  if (cached && Date.now() - cached.at < HEALTH_CACHE_TTL_MS) {
+    return { healthy: cached.value, error: cached.error };
+  }
+
+  try {
+    const url = `${provider.baseUrl.replace(/\/+$/, '')}/health`;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+    try {
+      const res = await fetch(url, { signal: controller.signal });
+      const healthy = res.ok;
+      const entry = { value: healthy, error: res.ok ? undefined : `HTTP ${res.status}`, at: Date.now() };
+      healthCache.set(cacheKey, entry);
+      return { healthy, error: entry.error };
+    } finally {
+      clearTimeout(timer);
+    }
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    healthCache.set(cacheKey, { value: false, error: msg, at: Date.now() });
+    return { healthy: false, error: msg };
+  }
+}
+
+async function fetchRunning(provider: LlamaProvider): Promise<{ running: RunningModel[]; error?: string }> {
+  const cacheKey = `running:${provider.id}/${provider.baseUrl}`;
+  const cached = runningCache.get(cacheKey);
+  if (cached && Date.now() - cached.at < RUNNING_CACHE_TTL_MS) {
+    return { running: cached.value, error: cached.error };
+  }
+
+  try {
+    const data = (await fetchJson(`${provider.baseUrl.replace(/\/+$/, '')}/running`)) as {
+      running: Omit<RunningModel, 'compositeId'>[];
+    };
+    const raw = data?.running ?? [];
+    const running: RunningModel[] = raw.map((m) => ({
+      ...m,
+      compositeId: `${provider.id}/${m.model}`,
+    }));
+    runningCache.set(cacheKey, { value: running, at: Date.now() });
+    return { running };
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    const cachedFallback = runningCache.get(cacheKey);
+    const fallback = cachedFallback?.value ?? [];
+    runningCache.set(cacheKey, { value: fallback, error: msg, at: Date.now() });
+    return { running: fallback, error: msg };
+  }
+}
+
+async function fetchCloudModels(
+  provider: LlamaProvider,
+  apiKey?: string,
+): Promise<CloudModelMeta[]> {
+  const cacheKey = `cloud:${provider.id}/${provider.baseUrl}`;
+  const cached = cloudModelsCache.get(cacheKey);
+  if (cached && Date.now() - cached.at < CLOUD_MODELS_CACHE_TTL_MS) return cached.value;
+
+  try {
+    let models: CloudModelMeta[];
+    const cleanBase = provider.baseUrl.replace(/\/+$/, '');
+
+    if (provider.kind === 'openrouter') {
+      const headers: Record<string, string> = {};
+      if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
+      const data = (await fetchJson(`${cleanBase}/api/v1/models`, CLOUD_FETCH_TIMEOUT_MS, headers)) as {
+        data?: Array<{
+          id: string; name?: string;
+          context_length?: number; top_provider?: { max_completion_tokens?: number };
+          pricing?: { prompt?: string; completion?: string };
+          architecture?: { modality?: string; tokenizer?: string };
+          description?: string;
+        }>;
+      };
+      models = (data.data ?? []).map((m) => ({
+        id: `${provider.id}/${m.id}`,
+        name: m.name || m.id,
+        contextLength: m.context_length,
+        maxOutputTokens: m.top_provider?.max_completion_tokens,
+        pricing: {
+          input: parseFloatPricing(m.pricing?.prompt),
+          output: parseFloatPricing(m.pricing?.completion),
+        },
+        modalities: m.architecture?.modality ? [m.architecture.modality] : ['text'],
+        supportsToolCalling: (m.description ?? '').toLowerCase().includes('tool'),
+        supportsReasoning:
+          (m.description ?? '').toLowerCase().includes('reason') ||
+          (m.description ?? '').toLowerCase().includes('think'),
+      }));
+    } else if (provider.kind === 'deepseek') {
+      models = [
+        {
+          id: `${provider.id}/deepseek-v4-flash`,
+          name: 'DeepSeek V4 Flash',
+          contextLength: 1_000_000,
+          maxOutputTokens: 384_000,
+          pricing: { input: 0.14, output: 0.28, cached: 0.0028 },
+          modalities: ['text'],
+          supportsToolCalling: true,
+          supportsReasoning: true,
+        },
+        {
+          id: `${provider.id}/deepseek-v4-pro`,
+          name: 'DeepSeek V4 Pro',
+          contextLength: 1_000_000,
+          maxOutputTokens: 384_000,
+          pricing: { input: 0.435, output: 0.87, cached: 0.003625 },
+          modalities: ['text'],
+          supportsToolCalling: true,
+          supportsReasoning: true,
+        },
+      ];
+    } else {
+      models = [];
+    }
+
+    cloudModelsCache.set(cacheKey, { value: models, at: Date.now() });
+    return models;
+  } catch {
+    return [];
+  }
+}
+
+function parseFloatPricing(val?: string): number {
+  if (!val) return 0;
+  const n = parseFloat(val);
+  return isNaN(n) ? 0 : n;
+}
+
+async function fetchMetrics(provider: LlamaProvider): Promise<{
+  gpus: GpuInfo[];
+  hostLoad?: ProviderStatus['hostLoad'];
+}> {
+  const cacheKey = `metrics:${provider.id}/${provider.baseUrl}`;
+  const cached = metricsCache.get(cacheKey);
+  if (cached && Date.now() - cached.at < METRICS_CACHE_TTL_MS) return cached;
+
+  try {
+    const text = await fetch(`${provider.baseUrl.replace(/\/+$/, '')}/metrics`)
+      .then((r) => (r.ok ? r.text() : Promise.reject(new Error(`HTTP ${r.status}`))));
+    const gpus = parsePrometheusGpuMetrics(text);
+    const hostLoad = parsePrometheusHostMetrics(text);
+    const entry = { gpus, hostLoad, at: Date.now() };
+    metricsCache.set(cacheKey, entry);
+    return entry;
+  } catch {
+    return { gpus: [] };
+  }
+}
+
+function parsePrometheusGpuMetrics(text: string): GpuInfo[] {
+  const gpuMap = new Map<number, Partial<GpuInfo>>();
+  // GPU name extraction: parse label from type line that follows the value line.
+  const nameRe = /nvidia_gpu_name\{gpu="(\d+)"[^}]*name="([^"]+)"/g;
+  let nm: RegExpExecArray | null;
+  while ((nm = nameRe.exec(text)) !== null) {
+    if (!nm[1] || !nm[2]) continue;
+    const idx = parseInt(nm[1], 10);
+    if (!gpuMap.has(idx)) gpuMap.set(idx, { index: idx } as GpuInfo);
+    (gpuMap.get(idx) as Record<string, unknown>)['name'] = nm[2];
+  }
+
+  const patterns: Array<{ regex: RegExp; field: keyof GpuInfo; scale?: number }> = [
+    { regex: /nvidia_gpu_temperature_celsius\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'temperature' },
+    { regex: /nvidia_gpu_utilization_ratio\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'utilizationPct', scale: 100 },
+    { regex: /nvidia_gpu_memory_used_bytes\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'memoryUsedMb', scale: 1 / (1024 * 1024) },
+    { regex: /nvidia_gpu_memory_total_bytes\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'memoryTotalMb', scale: 1 / (1024 * 1024) },
+    { regex: /nvidia_gpu_power_draw_watts\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'powerDrawW' },
+  ];
+
+  for (const { regex, field, scale } of patterns) {
+    let match: RegExpExecArray | null;
+    const re = new RegExp(regex.source, regex.flags);
+    while ((match = re.exec(text)) !== null) {
+      if (!match[1] || match[2] === undefined) continue;
+      const idx = parseInt(match[1], 10);
+      const raw = parseFloat(match[2]);
+      const val = scale != null ? raw * scale : raw;
+      if (!gpuMap.has(idx)) gpuMap.set(idx, { index: idx } as GpuInfo);
+      (gpuMap.get(idx) as Record<string, unknown>)[field] = val;
+    }
+  }
+
+  return Array.from(gpuMap.values()).filter((g) => g.name != null) as GpuInfo[];
+}
+
+function parsePrometheusHostMetrics(text: string): ProviderStatus['hostLoad'] {
+  const cpuMatch = /node_cpu_seconds_total/.test(text) ? /cpu_usage_active\{[^}]*\}\s+([\d.]+)/.exec(text) : null;
+  const memMatch = /node_memory_MemAvailable_bytes\s+([\d.]+)/.exec(text);
+  const memTotalMatch = /node_memory_MemTotal_bytes\s+([\d.]+)/.exec(text);
+  const load1Match = /node_load1\s+([\d.]+)/.exec(text);
+
+  if (!memMatch?.[1] || !memTotalMatch?.[1]) return undefined;
+
+  const memAvail = parseFloat(memMatch[1]);
+  const memTotal = parseFloat(memTotalMatch[1]);
+  return {
+    cpuSat: cpuMatch?.[1] ? parseFloat(cpuMatch[1]) / 100 : 0,
+    memSat: 1 - memAvail / memTotal,
+    load1: load1Match?.[1] ? parseFloat(load1Match[1]) : 0,
+  };
+}
+
+export async function getProviderStatus(): Promise<ProviderStatusResponse> {
+  const config = loadConfig();
+  const registry = getLlamaProviders();
+  const statuses = await Promise.all(
+    registry.providers.map(async (provider): Promise<ProviderStatus> => {
+      if (provider.kind === GATEWAY_KIND) {
+        return { id: provider.id, label: provider.label, baseUrl: provider.baseUrl, kind: provider.kind, healthy: true, running: [] };
+      }
+
+      const cloudKinds = new Set(['openrouter', 'deepseek']);
+      if (cloudKinds.has(provider.kind)) {
+        const health = await fetchHealth(provider);
+        let apiKey: string | undefined;
+        if (provider.kind === 'openrouter') {
+          const raw = process.env['OPENROUTER_API_KEY'];
+          apiKey = raw ?? undefined;
+        } else if (provider.kind === 'deepseek') {
+          apiKey = config.DEEPSEEK_API_KEY;
+        }
+
+        const cloudModels = await fetchCloudModels(provider, apiKey);
+        return {
+          id: provider.id,
+          label: provider.label,
+          baseUrl: provider.baseUrl,
+          kind: provider.kind,
+          healthy: health.healthy,
+          healthyError: health.error,
+          running: [],
+          cloudModels,
+        };
+      }
+
+      const [health, running, metrics] = await Promise.all([
+        fetchHealth(provider),
+        fetchRunning(provider),
+        fetchMetrics(provider).catch(() => ({ gpus: [] as GpuInfo[], hostLoad: undefined })),
+      ]);
+
+      return {
+        id: provider.id,
+        label: provider.label,
+        baseUrl: provider.baseUrl,
+        kind: provider.kind,
+        healthy: health.healthy,
+        healthyError: health.error,
+        running: running.running,
+        gpus: metrics.gpus,
+        hostLoad: metrics.hostLoad,
+        fetchError: running.error,
+      };
+    }),
+  );
+
+  return { providers: statuses, timestamp: Date.now() };
+}
+
+export async function unloadProvider(providerId: string): Promise<boolean> {
+  const registry = getLlamaProviders();
+  const provider = registry.providers.find((p) => p.id === providerId);
+  if (!provider || provider.kind === GATEWAY_KIND || !provider.kind.endsWith('swap')) return false;
+
+  try {
+    const url = `${provider.baseUrl.replace(/\/+$/, '')}/api/models/unload`;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), 10_000);
+    try {
+      const res = await fetch(url, { method: 'POST', signal: controller.signal });
+      if (res.ok) runningCache.delete(`running:${provider.id}/${provider.baseUrl}`);
+      return res.ok;
+    } finally {
+      clearTimeout(timer);
+    }
+  } catch {
+    return false;
+  }
+}
+
+export async function unloadModel(providerId: string, modelId: string): Promise<boolean> {
+  const registry = getLlamaProviders();
+  const provider = registry.providers.find((p) => p.id === providerId);
+  if (!provider || provider.kind === GATEWAY_KIND || !provider.kind.endsWith('swap')) return false;
+
+  try {
+    const url = `${provider.baseUrl.replace(/\/+$/, '')}/api/models/unload/${encodeURIComponent(modelId)}`;
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), 10_000);
+    try {
+      const res = await fetch(url, { method: 'POST', signal: controller.signal });
+      if (res.ok) runningCache.delete(`running:${provider.id}/${provider.baseUrl}`);
+      return res.ok;
+    } finally {
+      clearTimeout(timer);
+    }
+  } catch {
+    return false;
+  }
+}
+
+export function invalidateProviderCache(): void {
+  healthCache.clear();
+  runningCache.clear();
+  cloudModelsCache.clear();
+  metricsCache.clear();
+}
--- a/apps/server/src/services/secret_guard.ts
+++ b/apps/server/src/services/secret_guard.ts
@@ -128,12 +128,6 @@ export const DEFAULT_SECURITY_IGNORE_FILETYPES: ReadonlyArray<string> = [
  ...BOOCODE_ADDITIONS,
 ];

-// === glob compilation ======================================================
-// Tiny glob-to-regex. No new prod dep — the patterns we ship are simple
-// (literal | name* | *.ext | dir/). Covers ~95% of glob spec, which is
-// 100% of what this list uses. If patterns ever grow to need `**`, `[]`,
-// `{a,b}`, or negation, swap in picomatch.
-
 interface CompiledPattern {
  regex: RegExp;
  // 'basename' = test against the trailing path component only.
@@ -158,8 +152,6 @@ function compile(pattern: string): CompiledPattern {

 const COMPILED: ReadonlyArray<CompiledPattern> = DEFAULT_SECURITY_IGNORE_FILETYPES.map(compile);

-// === public API ============================================================
-
 // Returns true when `relPath` matches a known-secret pattern. Case-insensitive
 // (regex 'i' flag). Always normalize path separators to `/` so Windows-origin
 // paths match the same patterns. Empty or root-only paths return false.
--- a/apps/server/src/services/skills.ts
+++ b/apps/server/src/services/skills.ts
@@ -35,11 +35,6 @@ interface CachedSkill extends Skill {
 const cache = new Map<string, CachedSkill>();
 let lastWalkedAt = 0;

-// ---- Frontmatter parser ----------------------------------------------------
-// Minimal `---\n...\n---` extractor. Only `name` and `description` keys are
-// honored; other frontmatter keys are silently ignored for forward-compat
-// with the anthropics/skills upstream spec.
-
 interface Frontmatter {
  name?: string;
  description?: string;
@@ -91,8 +86,6 @@ function parseSkillFile(content: string): ParsedSkillFile {
  return { name: fm.name, description: fm.description, body };
 }

-// ---- Tree walk -------------------------------------------------------------
-
 // Fixed depth-3 scan: /data/skills/<group>/<skill>/SKILL.md. Two layers of
 // readdir, no recursion. Group folders without SKILL.md are skipped silently;
 // LICENSE / ATTRIBUTION.md / other non-SKILL.md files are ignored entirely.
@@ -145,8 +138,6 @@ async function walkSkills(root: string): Promise<CachedSkill[]> {
  return found;
 }

-// ---- Cache ----------------------------------------------------------------
-
 async function ensureCache(): Promise<void> {
  const now = Date.now();
  if (cache.size > 0 && now - lastWalkedAt < LIST_CACHE_TTL_MS) return;
@@ -186,8 +177,6 @@ async function ensureCache(): Promise<void> {
  lastWalkedAt = now;
 }

-// ---- Public API -----------------------------------------------------------
-
 export async function listSkills(): Promise<Skill[]> {
  await ensureCache();
  return Array.from(cache.values()).map((s) => ({
--- a/apps/server/src/services/synthesisPipeline.ts
+++ b/apps/server/src/services/synthesisPipeline.ts
@@ -34,7 +34,7 @@ import type { InferenceContext, TurnArgs } from './inference/types.js';
 export const SYNTHESIS_TOOLS: ReadonlySet<string> = new Set([
  'boocontext_boocontext_overview',
  'boocontext_boocontext_symbols',
-  'boocontext_codesight_get_blast_radius',
+  'boocontext_boocontext_get_blast_radius',
 ]);

 const TOP_N_FILES = 5;
@@ -52,7 +52,7 @@ const SYNTH_TIMEOUT_MS = 90_000;
 // File-extension regex for referenced-file extraction. Limited to source-
 // language extensions so we don't pull in lockfiles, images, etc.
 const FILE_PATH_RE =
-  /(?:^|[`'"<\s\(\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<\)\]\s,;:]|$)/gm;
+  /(?:^|[`'"<\s(\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<)\]\s,;:]|$)/gm;

 export interface SynthesisParams {
  ctx: InferenceContext;
--- a/apps/server/src/services/tools/background-subagent-tools.ts
+++ b/apps/server/src/services/tools/background-subagent-tools.ts
@@ -24,10 +24,6 @@ import {
  getBackgroundTaskResult,
 } from '../background-task.js';

-// ---------------------------------------------------------------------------
-// spawn_subagent
-// ---------------------------------------------------------------------------
-
 export const SpawnSubagentInput = z.object({
  input: z.string().min(1).describe('The task to execute in the background'),
  model: z
@@ -139,10 +135,6 @@ export const spawnSubagent: ToolDef<SpawnSubagentInputT> = {
  },
 };

-// ---------------------------------------------------------------------------
-// subagent_status
-// ---------------------------------------------------------------------------
-
 export const SubagentStatusInput = z.object({
  task_id: z.string().uuid().describe('Task ID from spawn_subagent'),
 });
@@ -218,10 +210,6 @@ export const subagentStatus: ToolDef<SubagentStatusInputT> = {
  },
 };

-// ---------------------------------------------------------------------------
-// subagent_result
-// ---------------------------------------------------------------------------
-
 export const SubagentResultInput = z.object({
  task_id: z.string().uuid().describe('Task ID from spawn_subagent'),
 });
--- a/apps/server/src/services/tools/registry.ts
+++ b/apps/server/src/services/tools/registry.ts
@@ -5,7 +5,7 @@ import { webSearch } from '../web_search.js';
 import { webFetch } from '../web_fetch.js';
 // v2.8.24: All codecontext tools removed. Boocontext MCP tools are appended
 // at startup via appendMcpTools(). Agent tool lists reference the MCP tool
-// names (boocontext_boocontext_*, boocontext_codesight_*) directly.
+// names (boocontext_boocontext_*), boocontext_boocontext_*) directly.
 // v1.13.17-cross-repo-reads: cross-repo read grant request tool. Paired
 // with the pause-on-pending-grant branch in inference/tool-phase.ts and the
 // POST /api/chats/:id/grant_read_access endpoint in routes/messages.ts.
--- a/apps/server/src/services/tools/types.ts
+++ b/apps/server/src/services/tools/types.ts
@@ -18,6 +18,7 @@ export interface ToolJsonSchema {
 export interface ToolExecCtx {
  sql: Sql;
  sessionId: string;
+  toolCallId?: string;
 }

 export interface ToolDef<TInput> {
--- a/apps/server/src/services/workflow/catalog.ts
+++ b/apps/server/src/services/workflow/catalog.ts
@@ -8,10 +8,6 @@

 import { createHash } from 'node:crypto';

-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
 /**
 * A built-in workflow definition shipped with BooCode.
 */
@@ -32,10 +28,6 @@ export interface BuiltinWorkflow {
  generateScript: (args?: Record<string, unknown>) => string;
 }

-// ---------------------------------------------------------------------------
-// Script templates (shared helpers)
-// ---------------------------------------------------------------------------
-
 /**
 * Stable JSON serialisation for generating deterministic cache keys from
 * structured arguments. Keys are sorted so the same data always produces
@@ -52,12 +44,6 @@ function stableJson(value: unknown): string {
  return `{${pairs.join(',')}}`;
 }

-/**
- * Compute a deterministic SHA-256 fingerprint for a combined spec + args
- * payload. Used by the resumability cache to detect unchanged agent tasks.
- *
- * Exported for testing.
- */
 export function fingerprintAgentTask(
  prompt: string,
  spec: Record<string, unknown>,
@@ -68,10 +54,6 @@ export function fingerprintAgentTask(
    .digest('hex');
 }

-// ---------------------------------------------------------------------------
-// Built-in workflow definitions
-// ---------------------------------------------------------------------------
-
 function generateDeepResearchScript(_args?: Record<string, unknown>): string {
  return `
 export const meta = {
@@ -90,7 +72,6 @@ export default async function main(args) {
  const query = args?.query ?? 'No query provided';
  log('deep-research: starting with query: ' + query);

-  // Phase 1: Scope
  phase('Scope');
  const scope = await agent(
    'Analyse this research query and produce a search plan with 3-5 key sub-questions: ' + query,
@@ -98,7 +79,6 @@ export default async function main(args) {
  );
  log('Scope completed');

-  // Phase 2: Search
  phase('Search');
  const searchResults = await agent(
    'Based on the scope, search for authoritative sources. Return a list of 3-5 URLs with brief annotations.',
@@ -106,7 +86,6 @@ export default async function main(args) {
  );
  log('Search completed');

-  // Phase 3: Fetch
  phase('Fetch');
  const fetchedContent = await agent(
    'Extract and summarise the key information from these sources: ' + JSON.stringify(searchResults),
@@ -114,7 +93,6 @@ export default async function main(args) {
  );
  log('Fetch completed');

-  // Phase 4: Verify
  phase('Verify');
  const verified = await agent(
    'Cross-reference the fetched information. Note any contradictions, gaps, or weak sources: ' + JSON.stringify(fetchedContent),
@@ -122,7 +100,6 @@ export default async function main(args) {
  );
  log('Verify completed');

-  // Phase 5: Synthesise
  phase('Synthesise');
  const report = await agent(
    'Synthesise the verified information into a structured report with findings, sources, and confidence levels: ' + JSON.stringify(verified),
@@ -161,28 +138,24 @@ export default async function main(args) {
    { label: 'read-context', phase: 'context' },
  );

-  // Phase 1: Correctness
  phase('Correctness');
  const correctness = await agent(
    'Review this code for correctness. Check logical errors, edge cases, type safety, and concurrency issues:\\n' + JSON.stringify(context),
    { label: 'correctness-review', phase: 'correctness' },
  );

-  // Phase 2: Security
  phase('Security');
  const security = await agent(
    'Review this code for security vulnerabilities. Check for injection, auth bypasses, unsafe deserialisation, secret exposure:\\n' + JSON.stringify(context),
    { label: 'security-review', phase: 'security' },
  );

-  // Phase 3: Performance
  phase('Performance');
  const performance = await agent(
    'Review this code for performance issues. Check algorithmic complexity, unnecessary allocations, I/O patterns, caching opportunities:\\n' + JSON.stringify(context),
    { label: 'performance-review', phase: 'performance' },
  );

-  // Phase 4: Synthesise
  phase('Synthesise');
  const report = await agent(
    'Merge these three review perspectives into one structured report with severity-ranked findings:\\n' +
@@ -271,10 +244,6 @@ export default async function main(args) {
 `.trim();
 }

-// ---------------------------------------------------------------------------
-// Registry
-// ---------------------------------------------------------------------------
-
 /**
 * All built-in workflow definitions shipped with BooCode.
 */
--- a/apps/server/src/services/workflow/discovery.ts
+++ b/apps/server/src/services/workflow/discovery.ts
@@ -109,7 +109,6 @@ export function findWorkflow(
  name: string,
  projectRoot: string,
 ): WorkflowMeta | undefined {
-  // Check built-in catalog first
  const builtin = getBuiltinWorkflow(name);
  if (builtin) {
    return {
--- a/apps/server/src/services/workflow/manager.ts
+++ b/apps/server/src/services/workflow/manager.ts
@@ -45,10 +45,6 @@ const AGENT_TASK_TIMEOUT_MS = 300_000;
 */
 const POLL_INTERVAL_MS = 500;

-/**
- * Maximum time for the entire workflow run (30 minutes).
- */
-const WORKFLOW_TIMEOUT_MS = 1_800_000;

 /**
 * Token budget tracker. Tracks total token spend across agent calls.
@@ -110,8 +106,6 @@ export class WorkflowManager {
    private broker: Broker,
  ) {}

-  // ---- public API ----
-
  /**
   * Discover all available workflow scripts.
   */
@@ -154,7 +148,6 @@ export class WorkflowManager {
    }

    try {
-      // Load meta by executing the script in a throwaway context
      const context = this.#createMinimalContext('meta-loader');
      const code = readFileSync(found.sourceFile, 'utf8');
      const finalCode = isEsmSyntax(code) ? transformEsmToCjs(code) : code;
@@ -209,7 +202,6 @@ export class WorkflowManager {
    this.#runs.set(runId, state);
    this.#emit({ type: 'run_started', runId, name });

-    // Run asynchronously — caller receives the runId immediately.
    void this.#executeRun(state, found.sourceFile, args ?? {});

    return { runId };
@@ -259,8 +251,6 @@ export class WorkflowManager {
    };
  }

-  // ---- internal execution ----
-
  /**
   * Execute the workflow script in the sandbox.
   */
@@ -373,7 +363,6 @@ export class WorkflowManager {
    spec: AgentTaskSpec,
    signal?: AbortSignal,
  ): Promise<unknown> {
-    // ---- 0. Check resumability cache before creating a new task ----
    const cacheKeyStr = cacheKey(spec, '');
    const cached = getCachedResult(cacheKeyStr);
    if (cached) {
@@ -382,7 +371,6 @@ export class WorkflowManager {

    const model = spec.model ?? null;

-    // ---- 1. Create a session for this agent task ----
    const sessionName = `workflow-agent-${spec.label ?? 'task'}`;
    const sessionResult = await this.sql.begin(async (tx) => {
      const [session] = await tx<{ id: string }[]>`
@@ -395,7 +383,6 @@ export class WorkflowManager {
    });
    const sessionId = sessionResult.id;

-    // ---- 2. Create a chat in this session ----
    const chatResult = await this.sql.begin(async (tx) => {
      const [chat] = await tx<{ id: string }[]>`
        INSERT INTO chats (session_id, name)
@@ -407,8 +394,7 @@ export class WorkflowManager {
    });
    const chatId = chatResult.id;

-    // ---- 3. Insert user message + streaming assistant message ----
-    const { userMessageId, assistantMessageId } = await this.sql.begin(async (tx) => {
+    const { userMessageId: _userMessageId, assistantMessageId } = await this.sql.begin(async (tx) => {
      const [userMsg] = await tx<{ id: string }[]>`
        INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
        VALUES (${sessionId}, ${chatId}, 'user', ${prompt}, 'complete', clock_timestamp())
@@ -425,8 +411,6 @@ export class WorkflowManager {
      };
    });

-    // ---- 4. Dispatch inference ----
-    // Create a bounded InferenceContext that won't crash on missing WS
    const ctx: import('../inference/types.js').InferenceContext = {
      sql: this.sql,
      config: this.config,
@@ -451,7 +435,6 @@ export class WorkflowManager {
      signal?.removeEventListener('abort', onAbort);
    });

-    // ---- 5. Poll for completion ----
    try {
      const result = await this.#pollForCompletion(
        chatId,
@@ -607,7 +590,7 @@ export class WorkflowManager {
   * Create a minimal WorkflowContext for non-execution purposes
   * (e.g. loading meta).
   */
-  #createMinimalContext(runId: string): Record<string, unknown> {
+  #createMinimalContext(_runId: string): Record<string, unknown> {
    return {
      agent: () => Promise.reject(new Error('Not available in this context')),
      parallel: () => Promise.reject(new Error('Not available in this context')),
@@ -634,8 +617,6 @@ export class WorkflowManager {
  }
 }

-// ---- internal types ----
-
 /**
 * Metadata returned from listWorkflows / getWorkflow.
 */
--- a/apps/server/src/services/workflow/resumability.ts
+++ b/apps/server/src/services/workflow/resumability.ts
@@ -8,10 +8,6 @@
 import { createHash } from 'node:crypto';
 import type { AgentTaskSpec } from './types.js';

-// ---------------------------------------------------------------------------
-// Types
-// ---------------------------------------------------------------------------
-
 /**
 * Shape of a cached agent task result. Mirrors the successful fields of
 * `AgentTaskResult` without the runtime-only `cached` flag.
@@ -31,10 +27,6 @@ interface CacheEntry {
  insertedAt: number;
 }

-// ---------------------------------------------------------------------------
-// Cache store
-// ---------------------------------------------------------------------------
-
 /**
 * Default TTL for cached entries (30 minutes).
 * After this period entries are considered stale and are evicted on access.
@@ -51,10 +43,6 @@ const MAX_ENTRIES = 500;
 */
 const cache = new Map<string, CacheEntry>();

-// ---------------------------------------------------------------------------
-// Public API
-// ---------------------------------------------------------------------------
-
 /**
 * Build a deterministic SHA-256 hash for an agent task specification.
 *
@@ -168,10 +156,6 @@ export function cacheSize(): number {
  return cache.size;
 }

-// ---------------------------------------------------------------------------
-// Internal helpers
-// ---------------------------------------------------------------------------
-
 /**
 * Stable JSON serialisation that produces the same output string for the same
 * data regardless of JavaScript object property insertion order.
--- a/apps/server/src/services/workflow/sandbox.ts
+++ b/apps/server/src/services/workflow/sandbox.ts
@@ -76,7 +76,6 @@ export function isEsmSyntax(code: string): boolean {
 */
 export function buildSandbox(context: WorkflowContext): Record<string, unknown> {
  return {
-    // --- Workflow API (from context) ---
    agent: context.agent,
    parallel: context.parallel,
    pipeline: context.pipeline,
@@ -86,7 +85,6 @@ export function buildSandbox(context: WorkflowContext): Record<string, unknown>
    args: context.args,
    workflow: context.workflow,

-    // --- Safe built-ins ---
    console: {
      log: context.log,
      warn: context.log,
@@ -122,7 +120,6 @@ export function buildSandbox(context: WorkflowContext): Record<string, unknown>
    true: true,
    false: false,

-    // --- CommonJS interop ---
    module: { exports: {} },
    exports: {},
    require: undefined, // intentionally disabled
--- a/apps/server/src/types/api.ts
+++ b/apps/server/src/types/api.ts
@@ -132,6 +132,11 @@ export interface Agent {
  // vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
  // Maps to DeepSeek's reasoning_effort API param.
  reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
+  // vDeepSeek: JSON output mode. When set, model outputs valid JSON object.
+  response_format: { type: 'json_object' } | null;
+  // vDeepSeek: prefix content for chat prefix completion. When set, injects
+  // an assistant prefix message forcing the model to complete from it.
+  prefix_content: string | null;
 }

 // One entry per malformed `## Name` block. Per-block errors don't fail the
@@ -224,7 +229,9 @@ export interface Message {
  // (qwen3.6 etc.). Populated from message_parts via the messages_with_parts
  // view's reasoning_parts column. Optional — most rows have no reasoning
  // and the API may omit the field on legacy responses.
-  reasoning_parts?: Array<{ text: string }> | null;
+  // `signature` (Phase 2) carries the Anthropic thinking-block signature so it
+  // can be replayed verbatim on the next turn. Absent for non-anthropic models.
+  reasoning_parts?: Array<{ text: string; signature?: string }> | null;
  // v1.11: anchored rolling compaction. Optional so consumers that SELECT
  // the pre-v1.11 column set still type-check. See compaction.ts +
  // schema.sql for semantics.