Compare commits
6 Commits
v2.8.17-bo
...
v2.8.18-do
| Author | SHA1 | Date | |
|---|---|---|---|
| 917a229363 | |||
| 39be5ce413 | |||
| 378e29308e | |||
| 8f6a814ab0 | |||
| 3c019a2281 | |||
| 203cfd2fa8 |
@@ -20,6 +20,12 @@ SEARXNG_URL=http://100.114.205.53:8888
|
|||||||
# with FAST_MODEL when unset.
|
# with FAST_MODEL when unset.
|
||||||
# TASK_MODEL_URL=http://100.90.172.55:7995
|
# TASK_MODEL_URL=http://100.90.172.55:7995
|
||||||
|
|
||||||
|
# DeepSeek API key. When set, models with IDs starting with 'deepseek-'
|
||||||
|
# (e.g. deepseek-chat, deepseek-reasoner, deepseek-v4-flash) route through
|
||||||
|
# DeepSeek's API instead of llama-swap. Requires a DeepSeek Platform API key.
|
||||||
|
# DEEPSEEK_API_KEY=sk-...
|
||||||
|
# DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||||
|
|
||||||
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
||||||
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
||||||
# sessions where the model only needs read-only filesystem access.
|
# sessions where the model only needs read-only filesystem access.
|
||||||
|
|||||||
@@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
All notable changes per release tag. Most recent on top, ordered by tag creation date (which matches the git history). Tag names follow `vMAJOR.MINOR.PATCH-slug` — the slug describes what shipped, so the tag name alone is enough to recall the batch.
|
All notable changes per release tag. Most recent on top, ordered by tag creation date (which matches the git history). Tag names follow `vMAJOR.MINOR.PATCH-slug` — the slug describes what shipped, so the tag name alone is enough to recall the batch.
|
||||||
|
|
||||||
|
## v2.8.18-deepseek-whale-lift — 2026-06-08
|
||||||
|
|
||||||
|
Integrates DeepSeek API directly into BooChat and BooCoder via `@ai-sdk/deepseek`, replacing the generic `openai-compatible` wrapper. DeepSeek V4 models (`deepseek-v4-flash`, `deepseek-v4-pro`) with configurable thinking effort levels appear in both chat and coder pane model pickers. Full token tracking — cache hit tokens and reasoning tokens — flow from the API through new DB columns and WS frames into the UI message stats line. Lifts three high-value features from the Whale codebase: a schema-based tool input repair system that coerces types and unwraps markdown autolinks before Zod validation, a shell-based lifecycle hooks system (PreToolUse, PostToolUse, Stop, PreCompact, PostCompact) with JSON stdin/stdout contract, and per-MCP-server permissions (allow/ask/deny) gating tool execution.
|
||||||
|
|
||||||
## v2.8.0-fork-lifts — 2026-06-07
|
## v2.8.0-fork-lifts — 2026-06-07
|
||||||
|
|
||||||
Completes the eight fork-lift integrations from `/opt/forks` into BooCode: boocontext sidecar upgrade, LSP code intelligence, DCP clean-room pruning, institutional memory, subagent protocol enhancements, plugin hook host, inference reliability (tool-shim + loop detectors), and TokenScope token breakdown. Backfills edit safety guards (truncation + dropped imports) and the TokenScope analyzer/persist module. Closes the fork-lifts-mit epic.
|
Completes the eight fork-lift integrations from `/opt/forks` into BooCode: boocontext sidecar upgrade, LSP code intelligence, DCP clean-room pruning, institutional memory, subagent protocol enhancements, plugin hook host, inference reliability (tool-shim + loop detectors), and TokenScope token breakdown. Backfills edit safety guards (truncation + dropped imports) and the TokenScope analyzer/persist module. Closes the fork-lifts-mit epic.
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ const ConfigSchema = z.object({
|
|||||||
// only reaped after it's been untouched this long (avoids sweeping a dir mid
|
// only reaped after it's been untouched this long (avoids sweeping a dir mid
|
||||||
// ensureSessionWorktree create). 1h default.
|
// ensureSessionWorktree create). 1h default.
|
||||||
ORPHAN_WORKTREE_GRACE_MS: z.coerce.number().int().positive().default(3_600_000),
|
ORPHAN_WORKTREE_GRACE_MS: z.coerce.number().int().positive().default(3_600_000),
|
||||||
|
DEEPSEEK_API_KEY: z.string().optional(),
|
||||||
|
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Config = z.infer<typeof ConfigSchema>;
|
export type Config = z.infer<typeof ConfigSchema>;
|
||||||
|
|||||||
@@ -29,6 +29,22 @@ interface AgentRow {
|
|||||||
last_probed_at: string | Date | null;
|
last_probed_at: string | Date | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function fetchDeepSeekModels(config: Config): Promise<ProviderModel[]> {
|
||||||
|
if (!config.DEEPSEEK_API_KEY) return [];
|
||||||
|
try {
|
||||||
|
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||||
|
const res = await fetch(`${baseURL}/v1/models`, {
|
||||||
|
headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||||
|
signal: AbortSignal.timeout(5_000),
|
||||||
|
});
|
||||||
|
if (!res.ok) return [];
|
||||||
|
const parsed = (await res.json()) as { data?: Array<{ id: string }> };
|
||||||
|
return (parsed.data ?? []).map((m) => ({ id: m.id, label: m.id }));
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function fetchLlamaSwapModels(config: Config): Promise<ProviderModel[]> {
|
export async function fetchLlamaSwapModels(config: Config): Promise<ProviderModel[]> {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
||||||
@@ -256,7 +272,13 @@ export async function getProviderSnapshot(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const build = async (): Promise<ProviderSnapshotEntry[]> => {
|
const build = async (): Promise<ProviderSnapshotEntry[]> => {
|
||||||
const llamaModels = await fetchLlamaSwapModels(config);
|
const [llamaModels, deepseekModels] = await Promise.all([
|
||||||
|
fetchLlamaSwapModels(config),
|
||||||
|
fetchDeepSeekModels(config),
|
||||||
|
]);
|
||||||
|
// Merge DeepSeek models into the llama-swap model pool so the boocode
|
||||||
|
// provider (which sources from llama-swap) also includes DeepSeek models.
|
||||||
|
const mergedModels = mergeModels(llamaModels, deepseekModels);
|
||||||
const agents = await sql<AgentRow[]>`
|
const agents = await sql<AgentRow[]>`
|
||||||
SELECT name, install_path, supports_acp, models, commands, label, transport, last_probed_at FROM available_agents
|
SELECT name, install_path, supports_acp, models, commands, label, transport, last_probed_at FROM available_agents
|
||||||
`;
|
`;
|
||||||
@@ -265,7 +287,7 @@ export async function getProviderSnapshot(
|
|||||||
|
|
||||||
const entries = await Promise.all(
|
const entries = await Promise.all(
|
||||||
[...getResolvedRegistry().values()].map((resolved) =>
|
[...getResolvedRegistry().values()].map((resolved) =>
|
||||||
buildProviderEntry(resolved, agentMap.get(resolved.id), llamaModels, resolvedCwd, ttlMs, force),
|
buildProviderEntry(resolved, agentMap.get(resolved.id), mergedModels, resolvedCwd, ttlMs, force),
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -77,8 +77,9 @@
|
|||||||
"test": "vitest run"
|
"test": "vitest run"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@boocode/contracts": "workspace:*",
|
"@ai-sdk/deepseek": "^2.0.35",
|
||||||
"@ai-sdk/openai-compatible": "^2.0.47",
|
"@ai-sdk/openai-compatible": "^2.0.47",
|
||||||
|
"@boocode/contracts": "workspace:*",
|
||||||
"@fastify/static": "^7.0.4",
|
"@fastify/static": "^7.0.4",
|
||||||
"@fastify/websocket": "^10.0.1",
|
"@fastify/websocket": "^10.0.1",
|
||||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||||
|
|||||||
@@ -26,6 +26,14 @@ const ConfigSchema = z.object({
|
|||||||
FAST_MODEL: z.string().optional(),
|
FAST_MODEL: z.string().optional(),
|
||||||
TASK_MODEL_URL: z.string().url().optional(),
|
TASK_MODEL_URL: z.string().url().optional(),
|
||||||
LLAMA_SIDECAR_URL: z.string().url().optional(),
|
LLAMA_SIDECAR_URL: z.string().url().optional(),
|
||||||
|
// vDeepSeek: DeepSeek API key for direct API access. When set, models
|
||||||
|
// with IDs starting with 'deepseek-' route through DeepSeek's API instead
|
||||||
|
// of llama-swap. Defaults to empty (DeepSeek routing disabled).
|
||||||
|
DEEPSEEK_API_KEY: z.string().optional(),
|
||||||
|
// Optional base URL override for DeepSeek API. Defaults to api.deepseek.com.
|
||||||
|
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||||
|
// vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
|
||||||
|
HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Config = z.infer<typeof ConfigSchema>;
|
export type Config = z.infer<typeof ConfigSchema>;
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ import { loadMcpConfig } from './services/mcp-config.js';
|
|||||||
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
||||||
import { appendMcpTools } from './services/tools.js';
|
import { appendMcpTools } from './services/tools.js';
|
||||||
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
||||||
|
import { loadHooksConfig, createHookRunner } from './services/hooks.js';
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const config = loadConfig();
|
const config = loadConfig();
|
||||||
@@ -136,11 +137,17 @@ async function main() {
|
|||||||
app.log.warn({ err }, 'skills boot walk failed');
|
app.log.warn({ err }, 'skills boot walk failed');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vWhale hooks: load hook config and create runner. Missing file = no hooks.
|
||||||
|
loadHooksConfig(config.HOOKS_CONFIG_PATH);
|
||||||
|
const hookRunner = createHookRunner();
|
||||||
|
const hasHooks = Object.keys(loadHooksConfig(config.HOOKS_CONFIG_PATH).hooks).length > 0;
|
||||||
|
|
||||||
const inference = createInferenceRunner(
|
const inference = createInferenceRunner(
|
||||||
{
|
{
|
||||||
sql,
|
sql,
|
||||||
config,
|
config,
|
||||||
log: app.log,
|
log: app.log,
|
||||||
|
hooks: hasHooks ? hookRunner : undefined,
|
||||||
publish: (sessionId, frame) => {
|
publish: (sessionId, frame) => {
|
||||||
// v1.13.11-b: route through the typed publishFrame so the broker's
|
// v1.13.11-b: route through the typed publishFrame so the broker's
|
||||||
// Zod gate validates every inference frame before delivery.
|
// Zod gate validates every inference frame before delivery.
|
||||||
@@ -166,7 +173,7 @@ async function main() {
|
|||||||
// bubble up so the route can reply 500 — manual /compact failures
|
// bubble up so the route can reply 500 — manual /compact failures
|
||||||
// should be loud (the user just clicked a button).
|
// should be loud (the user just clicked a button).
|
||||||
runCompaction: (chatId) =>
|
runCompaction: (chatId) =>
|
||||||
compaction.process({ sql, config, log: app.log, broker, chatId }),
|
compaction.process({ sql, config, log: app.log, broker, chatId, hooks: hasHooks ? hookRunner : undefined }),
|
||||||
cancelInference: async (sessionId, chatId) => {
|
cancelInference: async (sessionId, chatId) => {
|
||||||
return inference.cancel(sessionId, chatId);
|
return inference.cancel(sessionId, chatId);
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -2,26 +2,55 @@ import type { FastifyInstance } from 'fastify';
|
|||||||
import type { Config } from '../config.js';
|
import type { Config } from '../config.js';
|
||||||
import type { ModelInfo } from '../types/api.js';
|
import type { ModelInfo } from '../types/api.js';
|
||||||
|
|
||||||
interface LlamaSwapModelsResponse {
|
interface ApiModelsResponse {
|
||||||
data?: ModelInfo[];
|
data?: ModelInfo[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
|
||||||
|
{ id: 'deepseek-v4-flash', object: 'model', created: 0, owned_by: 'deepseek' },
|
||||||
|
{ id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
|
||||||
|
];
|
||||||
|
|
||||||
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
|
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
|
||||||
app.get('/api/models', async (_req, reply) => {
|
app.get('/api/models', async (_req, reply) => {
|
||||||
|
const models: ModelInfo[] = [];
|
||||||
|
|
||||||
|
// 1. Fetch llama-swap models
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
||||||
if (!res.ok) {
|
if (res.ok) {
|
||||||
reply.code(502);
|
const parsed = (await res.json()) as ApiModelsResponse;
|
||||||
return { error: `llama-swap returned ${res.status}` };
|
if (parsed.data) models.push(...parsed.data);
|
||||||
}
|
}
|
||||||
const parsed = (await res.json()) as LlamaSwapModelsResponse;
|
} catch {
|
||||||
return parsed.data ?? [];
|
// llama-swap unreachable — proceed with whatever we have
|
||||||
} catch (err) {
|
|
||||||
reply.code(502);
|
|
||||||
return {
|
|
||||||
error: 'failed to reach llama-swap',
|
|
||||||
details: err instanceof Error ? err.message : String(err),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 2. If DeepSeek is configured, fetch live models from their API
|
||||||
|
if (config.DEEPSEEK_API_KEY) {
|
||||||
|
try {
|
||||||
|
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||||
|
const res = await fetch(`${baseURL}/v1/models`, {
|
||||||
|
headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||||
|
signal: AbortSignal.timeout(5_000),
|
||||||
|
});
|
||||||
|
if (res.ok) {
|
||||||
|
const parsed = (await res.json()) as ApiModelsResponse;
|
||||||
|
if (parsed.data) models.push(...parsed.data);
|
||||||
|
} else {
|
||||||
|
// API call failed — fall back to static model list
|
||||||
|
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Network error — fall back to static model list
|
||||||
|
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (models.length === 0) {
|
||||||
|
reply.code(502);
|
||||||
|
return { error: 'no models available from any provider' };
|
||||||
|
}
|
||||||
|
return models;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,11 +32,18 @@ CREATE TABLE IF NOT EXISTS messages (
|
|||||||
content TEXT NOT NULL DEFAULT '',
|
content TEXT NOT NULL DEFAULT '',
|
||||||
status TEXT NOT NULL DEFAULT 'complete',
|
status TEXT NOT NULL DEFAULT 'complete',
|
||||||
last_seq INT NOT NULL DEFAULT 0,
|
last_seq INT NOT NULL DEFAULT 0,
|
||||||
|
cache_tokens INTEGER,
|
||||||
|
reasoning_tokens INTEGER,
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, created_at);
|
CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, created_at);
|
||||||
|
|
||||||
|
-- vDeepSeek: add cache/reasoning token columns early so messages_with_parts
|
||||||
|
-- view (defined below) can reference them. IF NOT EXISTS guards re-runs.
|
||||||
|
ALTER TABLE messages ADD COLUMN IF NOT EXISTS cache_tokens INTEGER;
|
||||||
|
ALTER TABLE messages ADD COLUMN IF NOT EXISTS reasoning_tokens INTEGER;
|
||||||
|
|
||||||
-- v1.13.0: granular message parts table. v1.13.20: legacy tool_calls/
|
-- v1.13.0: granular message parts table. v1.13.20: legacy tool_calls/
|
||||||
-- tool_results columns dropped; message_parts is now the sole source of
|
-- tool_results columns dropped; message_parts is now the sole source of
|
||||||
-- truth for tool calls, tool results, and reasoning. ON DELETE CASCADE
|
-- truth for tool calls, tool results, and reasoning. ON DELETE CASCADE
|
||||||
@@ -126,8 +133,8 @@ SELECT
|
|||||||
FROM message_parts p
|
FROM message_parts p
|
||||||
WHERE p.message_id = m.id AND p.kind = 'reasoning' AND p.hidden_at IS NULL) AS reasoning_parts,
|
WHERE p.message_id = m.id AND p.kind = 'reasoning' AND p.hidden_at IS NULL) AS reasoning_parts,
|
||||||
-- NEW columns MUST be appended at the end: CREATE OR REPLACE VIEW can't
|
-- NEW columns MUST be appended at the end: CREATE OR REPLACE VIEW can't
|
||||||
-- reorder/rename existing columns (42P16). m.model added last.
|
-- reorder/rename existing columns (42P16). cache_tokens and reasoning_tokens added last.
|
||||||
m.model
|
m.model, m.cache_tokens, m.reasoning_tokens
|
||||||
FROM messages m;
|
FROM messages m;
|
||||||
|
|
||||||
-- v1.13.20: drop legacy tool_calls/tool_results columns. Reads have routed
|
-- v1.13.20: drop legacy tool_calls/tool_results columns. Reads have routed
|
||||||
|
|||||||
@@ -106,6 +106,8 @@ interface ParsedFrontmatter {
|
|||||||
// allowed" — the model responds text-only.
|
// allowed" — the model responds text-only.
|
||||||
steps?: number;
|
steps?: number;
|
||||||
llama_extra_args?: string[];
|
llama_extra_args?: string[];
|
||||||
|
// vDeepSeek: thinking effort for DeepSeek V4 models.
|
||||||
|
reasoning_effort?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// P5: table-driven validation for the "soft-range" numeric frontmatter fields.
|
// P5: table-driven validation for the "soft-range" numeric frontmatter fields.
|
||||||
@@ -386,6 +388,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
|
|||||||
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
|
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
|
||||||
steps: typeof fm.steps === 'number' ? fm.steps : null,
|
steps: typeof fm.steps === 'number' ? fm.steps : null,
|
||||||
llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
|
llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
|
||||||
|
reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,10 @@
|
|||||||
|
// DEPRECATED (Phase 4, Domain 2, v2.8.14): This HTTP client routes through
|
||||||
|
// the Go codecontext sidecar (http://codecontext:8080). Superseded by the
|
||||||
|
// boocontext MCP server. New callers should use boocontext MCP tool wrappers
|
||||||
|
// directly. Keep this file for backward compatibility — the 16 existing
|
||||||
|
// codecontext tool wrappers (under tools/codecontext/) still call through
|
||||||
|
// callCodecontext(). Remove after full migration.
|
||||||
|
//
|
||||||
// v1.12 Track B.2: shared HTTP client for the codecontext sidecar. The 8
|
// v1.12 Track B.2: shared HTTP client for the codecontext sidecar. The 8
|
||||||
// per-tool wrappers under tools/codecontext/ all funnel through callCodecontext
|
// per-tool wrappers under tools/codecontext/ all funnel through callCodecontext
|
||||||
// — they're thin adapters that supply toolName + args + projectPath. The
|
// — they're thin adapters that supply toolName + args + projectPath. The
|
||||||
@@ -112,6 +119,11 @@ export async function callCodecontext(
|
|||||||
req: CodecontextRequest,
|
req: CodecontextRequest,
|
||||||
fetcher: typeof fetch = fetch,
|
fetcher: typeof fetch = fetch,
|
||||||
): Promise<CodecontextResponse> {
|
): Promise<CodecontextResponse> {
|
||||||
|
// DEPRECATED: This function routes through the Go codecontext sidecar at
|
||||||
|
// http://codecontext:8080. New callers should use boocontext MCP instead.
|
||||||
|
console.warn(
|
||||||
|
`[deprecated] callCodecontext("${req.toolName}") — route through boocontext MCP instead`,
|
||||||
|
);
|
||||||
// Step 1: realpath the project root, then realpath the requested target_dir
|
// Step 1: realpath the project root, then realpath the requested target_dir
|
||||||
// (defaulting to projectPath when the caller didn't pass one — the 12 wrappers
|
// (defaulting to projectPath when the caller didn't pass one — the 12 wrappers
|
||||||
// never pass target_dir; tests can override). A non-existent target_dir
|
// never pass target_dir; tests can override). A non-existent target_dir
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
|
|||||||
import * as modelContextLookup from './model-context.js';
|
import * as modelContextLookup from './model-context.js';
|
||||||
import { SENTINEL_KINDS } from './inference/sentinels.js';
|
import { SENTINEL_KINDS } from './inference/sentinels.js';
|
||||||
import type { OpenAiMessage } from './inference/payload.js';
|
import type { OpenAiMessage } from './inference/payload.js';
|
||||||
|
import { resolveModelEndpoint } from './inference/provider.js';
|
||||||
|
import type { HookRunner } from './hooks.js';
|
||||||
|
|
||||||
// v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
|
// v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
|
||||||
// (opencode session/overflow.ts pattern). Replaces the v1.11.0-era
|
// (opencode session/overflow.ts pattern). Replaces the v1.11.0-era
|
||||||
@@ -346,20 +348,22 @@ interface CompletionResult {
|
|||||||
completionTokens: number;
|
completionTokens: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function callLlamaSwap(
|
async function callLlm(
|
||||||
config: Config,
|
config: Config,
|
||||||
model: string,
|
model: string,
|
||||||
messages: OpenAiMessage[],
|
messages: OpenAiMessage[],
|
||||||
log: FastifyBaseLogger,
|
log: FastifyBaseLogger,
|
||||||
): Promise<CompletionResult> {
|
): Promise<CompletionResult> {
|
||||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
const { url, headers, model: resolvedModel } = resolveModelEndpoint(config, model);
|
||||||
|
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers,
|
||||||
body: JSON.stringify({ model, messages, stream: false }),
|
body: JSON.stringify({ model: resolvedModel, messages, stream: false }),
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
const text = await res.text().catch(() => '');
|
const text = await res.text().catch(() => '');
|
||||||
throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
|
const prefix = model.startsWith('deepseek-') ? 'deepseek' : 'llama-swap';
|
||||||
|
throw new Error(`${prefix} returned ${res.status}: ${text.slice(0, 200)}`);
|
||||||
}
|
}
|
||||||
const json = (await res.json()) as {
|
const json = (await res.json()) as {
|
||||||
choices?: Array<{ message?: { content?: string } }>;
|
choices?: Array<{ message?: { content?: string } }>;
|
||||||
@@ -383,6 +387,8 @@ export interface ProcessInput {
|
|||||||
log: FastifyBaseLogger;
|
log: FastifyBaseLogger;
|
||||||
broker: Broker;
|
broker: Broker;
|
||||||
chatId: string;
|
chatId: string;
|
||||||
|
/** vWhale: lifecycle hooks runner. Undefined when no hooks configured. */
|
||||||
|
hooks?: HookRunner;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Runs one round of anchored rolling compaction on `chatId`. No-ops cleanly
|
// Runs one round of anchored rolling compaction on `chatId`. No-ops cleanly
|
||||||
@@ -497,6 +503,17 @@ export async function process(input: ProcessInput): Promise<void> {
|
|||||||
at: new Date().toISOString(),
|
at: new Date().toISOString(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// vWhale: PreCompact hook (best-effort, non-blocking).
|
||||||
|
const msgBefore = messages.length;
|
||||||
|
if (input.hooks) {
|
||||||
|
input.hooks.run('PreCompact', {
|
||||||
|
event: 'PreCompact',
|
||||||
|
session_id: sessionId,
|
||||||
|
chat_id: chatId,
|
||||||
|
messages_before: msgBefore,
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// try/finally so the dot ALWAYS drops back to idle, even if the LLM call
|
// try/finally so the dot ALWAYS drops back to idle, even if the LLM call
|
||||||
// throws or a downstream DB write fails. The succeeded flag gates the
|
// throws or a downstream DB write fails. The succeeded flag gates the
|
||||||
// 'compacted' frame + final log: we only signal completion to the UI when
|
// 'compacted' frame + final log: we only signal completion to the UI when
|
||||||
@@ -506,7 +523,7 @@ export async function process(input: ProcessInput): Promise<void> {
|
|||||||
let result: CompletionResult | undefined;
|
let result: CompletionResult | undefined;
|
||||||
try {
|
try {
|
||||||
// 7. Single completion (no tools). Throws on llama-swap failure.
|
// 7. Single completion (no tools). Throws on llama-swap failure.
|
||||||
result = await callLlamaSwap(config, session.model, payload, log);
|
result = await callLlm(config, session.model, payload, log);
|
||||||
|
|
||||||
// 7b. v1.11.3: fetch the model's true context window from llama-swap's
|
// 7b. v1.11.3: fetch the model's true context window from llama-swap's
|
||||||
// /upstream/<model>/props (the streaming completion doesn't carry it).
|
// /upstream/<model>/props (the streaming completion doesn't carry it).
|
||||||
@@ -558,6 +575,18 @@ export async function process(input: ProcessInput): Promise<void> {
|
|||||||
`;
|
`;
|
||||||
|
|
||||||
succeeded = true;
|
succeeded = true;
|
||||||
|
|
||||||
|
// vWhale: PostCompact hook (best-effort, non-blocking).
|
||||||
|
if (input.hooks) {
|
||||||
|
input.hooks.run('PostCompact', {
|
||||||
|
event: 'PostCompact',
|
||||||
|
session_id: sessionId,
|
||||||
|
chat_id: chatId,
|
||||||
|
messages_before: msgBefore,
|
||||||
|
messages_after: sel.head.length,
|
||||||
|
summary: (result?.content ?? '').slice(0, 500),
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
// Always restore the dot. Status='idle' (not 'error') even on failure —
|
// Always restore the dot. Status='idle' (not 'error') even on failure —
|
||||||
// the caller logs/re-surfaces the error separately; the dot doesn't
|
// the caller logs/re-surfaces the error separately; the dot doesn't
|
||||||
|
|||||||
299
apps/server/src/services/hooks.ts
Normal file
299
apps/server/src/services/hooks.ts
Normal file
@@ -0,0 +1,299 @@
|
|||||||
|
/**
|
||||||
|
* vWhale: lifecycle hook runner. Hooks are shell commands that fire at key
|
||||||
|
* points in the inference pipeline. Each hook receives a JSON payload on
|
||||||
|
* stdin and can return JSON on stdout to influence behavior.
|
||||||
|
*
|
||||||
|
* Inspired by Whale's hook system with 11 lifecycle events. BooCode
|
||||||
|
* implements the most relevant subset: PreToolUse, PostToolUse,
|
||||||
|
* UserPromptSubmit, Stop, PreCompact, PostCompact.
|
||||||
|
*
|
||||||
|
* Config: JSON file at HOOKS_CONFIG_PATH (default /data/hooks.json).
|
||||||
|
* Format:
|
||||||
|
* ```json
|
||||||
|
* {
|
||||||
|
* "hooks": {
|
||||||
|
* "PreToolUse": [
|
||||||
|
* { "match": "shell_run", "command": "python3 /data/hooks/check_shell.py", "timeout": 30 }
|
||||||
|
* ],
|
||||||
|
* "Stop": [
|
||||||
|
* { "command": "node /data/hooks/log_turn.mjs" }
|
||||||
|
* ]
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { spawn } from 'node:child_process';
|
||||||
|
import { readFileSync, existsSync } from 'node:fs';
|
||||||
|
import type { FastifyBaseLogger } from 'fastify';
|
||||||
|
|
||||||
|
// ─── Events ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type HookEvent =
|
||||||
|
| 'PreToolUse'
|
||||||
|
| 'PostToolUse'
|
||||||
|
| 'UserPromptSubmit'
|
||||||
|
| 'Stop'
|
||||||
|
| 'PreCompact'
|
||||||
|
| 'PostCompact';
|
||||||
|
|
||||||
|
const ALL_EVENTS: HookEvent[] = [
|
||||||
|
'PreToolUse',
|
||||||
|
'PostToolUse',
|
||||||
|
'UserPromptSubmit',
|
||||||
|
'Stop',
|
||||||
|
'PreCompact',
|
||||||
|
'PostCompact',
|
||||||
|
];
|
||||||
|
|
||||||
|
// ─── Config ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface HookConfig {
|
||||||
|
/** Glob or exact tool name to match (PreToolUse/PostToolUse only). Omit or '*' for all. */
|
||||||
|
match?: string;
|
||||||
|
/** Shell command to run. Receives JSON payload on stdin. */
|
||||||
|
command: string;
|
||||||
|
/** Timeout in seconds (default 30). */
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HooksConfig {
|
||||||
|
hooks: Partial<Record<HookEvent, HookConfig[]>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Payloads ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface PreToolUsePayload {
|
||||||
|
event: 'PreToolUse';
|
||||||
|
session_id: string;
|
||||||
|
tool_name: string;
|
||||||
|
tool_args: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PostToolUsePayload {
|
||||||
|
event: 'PostToolUse';
|
||||||
|
session_id: string;
|
||||||
|
tool_name: string;
|
||||||
|
tool_args: Record<string, unknown>;
|
||||||
|
tool_result: unknown;
|
||||||
|
tool_error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface UserPromptSubmitPayload {
|
||||||
|
event: 'UserPromptSubmit';
|
||||||
|
session_id: string;
|
||||||
|
chat_id: string;
|
||||||
|
prompt: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface StopPayload {
|
||||||
|
event: 'Stop';
|
||||||
|
session_id: string;
|
||||||
|
chat_id: string;
|
||||||
|
last_assistant_text: string;
|
||||||
|
turn: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PreCompactPayload {
|
||||||
|
event: 'PreCompact';
|
||||||
|
session_id: string;
|
||||||
|
chat_id: string;
|
||||||
|
messages_before: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PostCompactPayload {
|
||||||
|
event: 'PostCompact';
|
||||||
|
session_id: string;
|
||||||
|
chat_id: string;
|
||||||
|
messages_before: number;
|
||||||
|
messages_after: number;
|
||||||
|
summary: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type HookPayload =
|
||||||
|
| PreToolUsePayload
|
||||||
|
| PostToolUsePayload
|
||||||
|
| UserPromptSubmitPayload
|
||||||
|
| StopPayload
|
||||||
|
| PreCompactPayload
|
||||||
|
| PostCompactPayload;
|
||||||
|
|
||||||
|
// ─── Response ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type HookDecision = 'pass' | 'warn' | 'block';
|
||||||
|
|
||||||
|
export interface HookResponse {
|
||||||
|
decision?: HookDecision;
|
||||||
|
reason?: string;
|
||||||
|
/** When present, replaces the original tool args / user prompt. */
|
||||||
|
updated_input?: Record<string, unknown> | string;
|
||||||
|
/** Injected into the model's context for the next turn. */
|
||||||
|
additional_context?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Runner ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface HookRunner {
|
||||||
|
/** Run all hooks for the given event. Returns the effective response. */
|
||||||
|
run(event: HookEvent, payload: HookPayload, log?: FastifyBaseLogger): Promise<HookResponse>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let hooksConfig: HooksConfig | null = null;
|
||||||
|
let hooksPath: string | null = null;
|
||||||
|
|
||||||
|
/** Load hooks config from disk. Missing file = no hooks. Never throws. */
|
||||||
|
export function loadHooksConfig(path: string): HooksConfig {
|
||||||
|
hooksPath = path;
|
||||||
|
if (!existsSync(path)) {
|
||||||
|
hooksConfig = { hooks: {} };
|
||||||
|
return hooksConfig;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const raw = readFileSync(path, 'utf8');
|
||||||
|
const parsed = JSON.parse(raw) as HooksConfig;
|
||||||
|
hooksConfig = {
|
||||||
|
hooks: { ...parsed.hooks },
|
||||||
|
};
|
||||||
|
// Validate event names
|
||||||
|
for (const event of Object.keys(hooksConfig.hooks)) {
|
||||||
|
if (!ALL_EVENTS.includes(event as HookEvent)) {
|
||||||
|
console.warn(`hooks: unknown event '${event}' in ${path} — ignoring`);
|
||||||
|
delete hooksConfig.hooks[event as HookEvent];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`hooks: failed to load ${path}`, err);
|
||||||
|
hooksConfig = { hooks: {} };
|
||||||
|
}
|
||||||
|
return hooksConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reload the config file (call after a PATCH). */
|
||||||
|
export function reloadHooksConfig(): HooksConfig {
|
||||||
|
if (hooksPath) return loadHooksConfig(hooksPath);
|
||||||
|
hooksConfig = { hooks: {} };
|
||||||
|
return hooksConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getConfig(): HooksConfig {
|
||||||
|
return hooksConfig ?? { hooks: {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a HookRunner for the current config. */
|
||||||
|
export function createHookRunner(): HookRunner {
|
||||||
|
return {
|
||||||
|
async run(event, payload, log): Promise<HookResponse> {
|
||||||
|
const configs = getConfig().hooks[event];
|
||||||
|
if (!configs || configs.length === 0) return { decision: 'pass' };
|
||||||
|
|
||||||
|
// Pre-filter by match pattern for tool events
|
||||||
|
const toolName = 'tool_name' in payload ? (payload as PreToolUsePayload).tool_name : undefined;
|
||||||
|
|
||||||
|
let effective: HookResponse = { decision: 'pass' };
|
||||||
|
|
||||||
|
for (const cfg of configs) {
|
||||||
|
// Skip if match doesn't apply
|
||||||
|
if (toolName && cfg.match && cfg.match !== '*' && cfg.match !== toolName) continue;
|
||||||
|
|
||||||
|
const result = await runSingleHook(cfg, payload, log);
|
||||||
|
// Merge decisions: block > warn > pass
|
||||||
|
if (result.decision === 'block') {
|
||||||
|
effective = { ...result, decision: 'block' };
|
||||||
|
break; // block is terminal
|
||||||
|
}
|
||||||
|
if (result.decision === 'warn' && effective.decision !== 'block') {
|
||||||
|
effective = { ...result, decision: 'warn' };
|
||||||
|
}
|
||||||
|
// Merge additional_context and updated_input
|
||||||
|
if (result.additional_context) {
|
||||||
|
effective.additional_context = effective.additional_context
|
||||||
|
? effective.additional_context + '\n' + result.additional_context
|
||||||
|
: result.additional_context;
|
||||||
|
}
|
||||||
|
if (result.updated_input && !effective.updated_input) {
|
||||||
|
effective.updated_input = result.updated_input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return effective;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runSingleHook(
|
||||||
|
cfg: HookConfig,
|
||||||
|
payload: HookPayload,
|
||||||
|
log?: FastifyBaseLogger,
|
||||||
|
): Promise<HookResponse> {
|
||||||
|
const timeoutMs = (cfg.timeout ?? 30) * 1000;
|
||||||
|
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const child = spawn('sh', ['-c', cfg.command], {
|
||||||
|
stdio: ['pipe', 'pipe', 'pipe'],
|
||||||
|
timeout: timeoutMs,
|
||||||
|
env: { ...process.env },
|
||||||
|
});
|
||||||
|
|
||||||
|
const stdout: Buffer[] = [];
|
||||||
|
const stderr: Buffer[] = [];
|
||||||
|
|
||||||
|
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||||
|
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||||
|
|
||||||
|
let settled = false;
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
if (!settled) {
|
||||||
|
settled = true;
|
||||||
|
child.kill('SIGTERM');
|
||||||
|
log?.warn({ event: payload.event, command: cfg.command }, 'hooks: timeout');
|
||||||
|
resolve({ decision: 'warn', reason: 'hook timed out' });
|
||||||
|
}
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
child.on('error', (err) => {
|
||||||
|
if (!settled) {
|
||||||
|
settled = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
log?.warn({ err, event: payload.event }, 'hooks: spawn error');
|
||||||
|
resolve({ decision: 'warn', reason: `hook failed: ${err.message}` });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('close', (code) => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
clearTimeout(timer);
|
||||||
|
|
||||||
|
const out = Buffer.concat(stdout).toString('utf8').trim();
|
||||||
|
const errOut = Buffer.concat(stderr).toString('utf8').trim();
|
||||||
|
|
||||||
|
if (code !== 0 && !out) {
|
||||||
|
log?.warn({ event: payload.event, code, stderr: errOut.slice(0, 200) }, 'hooks: non-zero exit');
|
||||||
|
resolve({ decision: 'warn', reason: `hook exited ${code}` });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse stdout as JSON response
|
||||||
|
if (out) {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(out) as HookResponse;
|
||||||
|
resolve(parsed);
|
||||||
|
return;
|
||||||
|
} catch {
|
||||||
|
// Not JSON — treat as pass with stdout as context
|
||||||
|
if (out.length > 0) {
|
||||||
|
resolve({ decision: 'pass', additional_context: out });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve({ decision: 'pass' });
|
||||||
|
});
|
||||||
|
|
||||||
|
// Write payload to stdin
|
||||||
|
const json = JSON.stringify(payload);
|
||||||
|
child.stdin.write(json);
|
||||||
|
child.stdin.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -122,6 +122,8 @@ export async function finalizeStreamedRow(
|
|||||||
completionTokens: number | null;
|
completionTokens: number | null;
|
||||||
promptTokens: number | null;
|
promptTokens: number | null;
|
||||||
startedAt: string | null;
|
startedAt: string | null;
|
||||||
|
cacheTokens?: number | null;
|
||||||
|
reasoningTokens?: number | null;
|
||||||
beforeComplete?: () => Promise<void>;
|
beforeComplete?: () => Promise<void>;
|
||||||
},
|
},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
@@ -137,6 +139,8 @@ export async function finalizeStreamedRow(
|
|||||||
tokens_used = ${opts.completionTokens},
|
tokens_used = ${opts.completionTokens},
|
||||||
ctx_used = ${opts.promptTokens},
|
ctx_used = ${opts.promptTokens},
|
||||||
ctx_max = ${nCtx},
|
ctx_max = ${nCtx},
|
||||||
|
cache_tokens = ${opts.cacheTokens ?? null},
|
||||||
|
reasoning_tokens = ${opts.reasoningTokens ?? null},
|
||||||
finished_at = clock_timestamp()
|
finished_at = clock_timestamp()
|
||||||
WHERE id = ${opts.messageId}
|
WHERE id = ${opts.messageId}
|
||||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||||
@@ -149,6 +153,8 @@ export async function finalizeStreamedRow(
|
|||||||
tokens_used: updated?.tokens_used ?? null,
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
ctx_used: updated?.ctx_used ?? null,
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
ctx_max: updated?.ctx_max ?? null,
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
cache_tokens: opts.cacheTokens ?? null,
|
||||||
|
reasoning_tokens: opts.reasoningTokens ?? null,
|
||||||
started_at: opts.startedAt,
|
started_at: opts.startedAt,
|
||||||
finished_at: updated?.finished_at ?? null,
|
finished_at: updated?.finished_at ?? null,
|
||||||
model: opts.model,
|
model: opts.model,
|
||||||
@@ -188,7 +194,7 @@ export async function finalizeCompletion(
|
|||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const { sessionId, chatId, assistantMessageId } = args;
|
const { sessionId, chatId, assistantMessageId } = args;
|
||||||
const content = stripToolMarkup(result.content, { final: true });
|
const content = stripToolMarkup(result.content, { final: true });
|
||||||
const { finishReason, promptTokens, completionTokens } = result;
|
const { finishReason, promptTokens, completionTokens, cacheReadTokens, reasoningTokens } = result;
|
||||||
|
|
||||||
// v1.11.3: see executeToolPhase for the rationale.
|
// v1.11.3: see executeToolPhase for the rationale.
|
||||||
const mctx = await modelContext.getModelContext(session.model);
|
const mctx = await modelContext.getModelContext(session.model);
|
||||||
@@ -203,6 +209,8 @@ export async function finalizeCompletion(
|
|||||||
tokens_used = ${completionTokens},
|
tokens_used = ${completionTokens},
|
||||||
ctx_used = ${promptTokens},
|
ctx_used = ${promptTokens},
|
||||||
ctx_max = ${nCtx},
|
ctx_max = ${nCtx},
|
||||||
|
cache_tokens = ${cacheReadTokens ?? null},
|
||||||
|
reasoning_tokens = ${reasoningTokens ?? null},
|
||||||
model = ${session.model},
|
model = ${session.model},
|
||||||
finished_at = clock_timestamp()
|
finished_at = clock_timestamp()
|
||||||
WHERE id = ${assistantMessageId}
|
WHERE id = ${assistantMessageId}
|
||||||
@@ -268,6 +276,8 @@ export async function finalizeCompletion(
|
|||||||
tokens_used: updated?.tokens_used ?? null,
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
ctx_used: updated?.ctx_used ?? null,
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
ctx_max: updated?.ctx_max ?? null,
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
cache_tokens: cacheReadTokens ?? null,
|
||||||
|
reasoning_tokens: reasoningTokens ?? null,
|
||||||
started_at: startedAt,
|
started_at: startedAt,
|
||||||
finished_at: updated?.finished_at ?? null,
|
finished_at: updated?.finished_at ?? null,
|
||||||
model: session.model,
|
model: session.model,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||||
|
import { createDeepSeek } from '@ai-sdk/deepseek';
|
||||||
import type { LanguageModel } from 'ai';
|
import type { LanguageModel } from 'ai';
|
||||||
|
|
||||||
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||||
@@ -11,6 +12,12 @@ import type { LanguageModel } from 'ai';
|
|||||||
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
||||||
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
||||||
// stays cached since it has no per-request headers.
|
// stays cached since it has no per-request headers.
|
||||||
|
//
|
||||||
|
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
|
||||||
|
// is set, route through the official @ai-sdk/deepseek provider (not
|
||||||
|
// openai-compatible) so DeepSeek-specific features work: providerMetadata
|
||||||
|
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
|
||||||
|
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
|
||||||
|
|
||||||
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
||||||
|
|
||||||
@@ -41,7 +48,28 @@ function sidecarProvider(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
export type InferenceRoute = 'swap' | 'sidecar';
|
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||||
|
|
||||||
|
export function isDeepSeekModel(modelId: string): boolean {
|
||||||
|
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
|
||||||
|
|
||||||
|
function getDeepSeekProvider(
|
||||||
|
apiKey: string,
|
||||||
|
baseURL: string,
|
||||||
|
): ReturnType<typeof createDeepSeek> {
|
||||||
|
if (!deepseekProviderCache) {
|
||||||
|
deepseekProviderCache = createDeepSeek({
|
||||||
|
apiKey,
|
||||||
|
baseURL,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return deepseekProviderCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
|
||||||
|
|
||||||
export interface RoutingInfo {
|
export interface RoutingInfo {
|
||||||
route: InferenceRoute;
|
route: InferenceRoute;
|
||||||
@@ -55,12 +83,21 @@ interface AgentLike {
|
|||||||
interface ConfigLike {
|
interface ConfigLike {
|
||||||
LLAMA_SWAP_URL: string;
|
LLAMA_SWAP_URL: string;
|
||||||
LLAMA_SIDECAR_URL?: string;
|
LLAMA_SIDECAR_URL?: string;
|
||||||
|
DEEPSEEK_API_KEY?: string;
|
||||||
|
DEEPSEEK_BASE_URL?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function resolveRoute(
|
export function resolveRoute(
|
||||||
agent: AgentLike | null,
|
agent: AgentLike | null,
|
||||||
config?: ConfigLike,
|
config?: ConfigLike,
|
||||||
|
modelId?: string,
|
||||||
): RoutingInfo {
|
): RoutingInfo {
|
||||||
|
// vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
|
||||||
|
// route through the DeepSeek provider. Checked first so DeepSeek models
|
||||||
|
// always bypass llama-swap/sidecar even when those are also configured.
|
||||||
|
if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
|
||||||
|
return { route: 'deepseek', flags: null };
|
||||||
|
}
|
||||||
// When llama_extra_args are explicitly set, route through sidecar with them.
|
// When llama_extra_args are explicitly set, route through sidecar with them.
|
||||||
const flags = agent?.llama_extra_args;
|
const flags = agent?.llama_extra_args;
|
||||||
if (flags && flags.length > 0) {
|
if (flags && flags.length > 0) {
|
||||||
@@ -80,7 +117,13 @@ export function upstreamModel(
|
|||||||
modelId: string,
|
modelId: string,
|
||||||
agent?: AgentLike | null,
|
agent?: AgentLike | null,
|
||||||
): LanguageModel {
|
): LanguageModel {
|
||||||
const { route, flags } = resolveRoute(agent ?? null, config);
|
const { route, flags } = resolveRoute(agent ?? null, config, modelId);
|
||||||
|
if (route === 'deepseek') {
|
||||||
|
return getDeepSeekProvider(
|
||||||
|
config.DEEPSEEK_API_KEY!,
|
||||||
|
config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
|
||||||
|
).chat(modelId);
|
||||||
|
}
|
||||||
if (route === 'sidecar') {
|
if (route === 'sidecar') {
|
||||||
const url = config.LLAMA_SIDECAR_URL;
|
const url = config.LLAMA_SIDECAR_URL;
|
||||||
if (!url) {
|
if (!url) {
|
||||||
@@ -90,3 +133,30 @@ export function upstreamModel(
|
|||||||
}
|
}
|
||||||
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
|
||||||
|
* Returns the URL + model + optional auth header for direct fetch() usage. */
|
||||||
|
export function resolveModelEndpoint(
|
||||||
|
config: ConfigLike,
|
||||||
|
modelId: string,
|
||||||
|
): { url: string; model: string; headers: Record<string, string> } {
|
||||||
|
const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||||
|
if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
|
||||||
|
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||||
|
return {
|
||||||
|
url: baseURL,
|
||||||
|
model: modelId,
|
||||||
|
headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
|
||||||
|
model: modelId,
|
||||||
|
headers: baseHeaders,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
|
||||||
|
export function resetDeepSeekProvider(): void {
|
||||||
|
deepseekProviderCache = null;
|
||||||
|
}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import type { OpenAiMessage } from './payload.js';
|
|||||||
import { extractToolCallBlocks } from './tool-call-parser.js';
|
import { extractToolCallBlocks } from './tool-call-parser.js';
|
||||||
import { classifyStreamError } from './stream-error-classifier.js';
|
import { classifyStreamError } from './stream-error-classifier.js';
|
||||||
import type { StreamResult } from './types.js';
|
import type { StreamResult } from './types.js';
|
||||||
import { upstreamModel } from './provider.js';
|
import { isDeepSeekModel, upstreamModel } from './provider.js';
|
||||||
import {
|
import {
|
||||||
jsonSchema,
|
jsonSchema,
|
||||||
streamText,
|
streamText,
|
||||||
@@ -51,6 +51,9 @@ export interface StreamOptions {
|
|||||||
dry_base?: number | null;
|
dry_base?: number | null;
|
||||||
dry_allowed_length?: number | null;
|
dry_allowed_length?: number | null;
|
||||||
dry_penalty_last_n?: number | null;
|
dry_penalty_last_n?: number | null;
|
||||||
|
// vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
|
||||||
|
// API param for deepseek-v4-flash / deepseek-v4-pro models.
|
||||||
|
reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
|
||||||
}
|
}
|
||||||
|
|
||||||
// P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
|
// P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
|
||||||
@@ -74,6 +77,7 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
|
|||||||
dry_base: agent?.dry_base ?? undefined,
|
dry_base: agent?.dry_base ?? undefined,
|
||||||
dry_allowed_length: agent?.dry_allowed_length ?? undefined,
|
dry_allowed_length: agent?.dry_allowed_length ?? undefined,
|
||||||
dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
|
dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
|
||||||
|
reasoning_effort: agent?.reasoning_effort ?? undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,6 +276,19 @@ export async function streamCompletion(
|
|||||||
// before this. They now go through the same extraBody path as the new params.
|
// before this. They now go through the same extraBody path as the new params.
|
||||||
const samplerBody = buildSamplerProviderOptions(opts);
|
const samplerBody = buildSamplerProviderOptions(opts);
|
||||||
|
|
||||||
|
// vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
|
||||||
|
let deepseekProviderOptions:
|
||||||
|
| { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
|
||||||
|
| undefined;
|
||||||
|
if (isDeepSeekModel(model)) {
|
||||||
|
const dsEffort = opts.reasoning_effort;
|
||||||
|
const thinkingEnabled = dsEffort && dsEffort !== 'off';
|
||||||
|
deepseekProviderOptions = {
|
||||||
|
thinking: { type: thinkingEnabled ? 'enabled' : 'disabled' },
|
||||||
|
...(thinkingEnabled ? { reasoningEffort: dsEffort } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// F6: per-chunk stall deadline. If the model stops emitting chunks for
|
// F6: per-chunk stall deadline. If the model stops emitting chunks for
|
||||||
// STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
|
// STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
|
||||||
// abort check below then throws AbortError → handleAbortOrError writes
|
// abort check below then throws AbortError → handleAbortOrError writes
|
||||||
@@ -297,7 +314,14 @@ export async function streamCompletion(
|
|||||||
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||||
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
||||||
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
||||||
...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
|
...(samplerBody || deepseekProviderOptions
|
||||||
|
? {
|
||||||
|
providerOptions: {
|
||||||
|
...(samplerBody ? { openaiCompatible: samplerBody } : {}),
|
||||||
|
...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
abortSignal: effectiveSignal,
|
abortSignal: effectiveSignal,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -401,12 +425,26 @@ export async function streamCompletion(
|
|||||||
|
|
||||||
// Usage lands as a promise on the result; awaiting after fullStream is
|
// Usage lands as a promise on the result; awaiting after fullStream is
|
||||||
// drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
|
// drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
|
||||||
|
// Some providers (llama-swap via openai-compatible) return plain numbers;
|
||||||
|
// others (deepseek via @ai-sdk/deepseek) return {total, cacheRead, noCache, ...}.
|
||||||
let promptTokens: number | null = null;
|
let promptTokens: number | null = null;
|
||||||
let completionTokens: number | null = null;
|
let completionTokens: number | null = null;
|
||||||
|
let cacheReadTokens: number | null = null;
|
||||||
|
let reasoningTokens: number | null = null;
|
||||||
try {
|
try {
|
||||||
const usage = await result.usage;
|
const usage = await result.usage;
|
||||||
if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
|
if (typeof usage.inputTokens === 'number') {
|
||||||
if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
|
promptTokens = usage.inputTokens;
|
||||||
|
} else if (usage.inputTokens && typeof usage.inputTokens === 'object') {
|
||||||
|
promptTokens = (usage.inputTokens as Record<string, number | undefined>).total ?? null;
|
||||||
|
cacheReadTokens = (usage.inputTokens as Record<string, number | undefined>).cacheRead ?? null;
|
||||||
|
}
|
||||||
|
if (typeof usage.outputTokens === 'number') {
|
||||||
|
completionTokens = usage.outputTokens;
|
||||||
|
} else if (usage.outputTokens && typeof usage.outputTokens === 'object') {
|
||||||
|
completionTokens = (usage.outputTokens as Record<string, number | undefined>).total ?? null;
|
||||||
|
reasoningTokens = (usage.outputTokens as Record<string, number | undefined>).reasoning ?? null;
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Some providers omit usage on partial streams; leave both null.
|
// Some providers omit usage on partial streams; leave both null.
|
||||||
}
|
}
|
||||||
@@ -422,6 +460,13 @@ export async function streamCompletion(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cacheReadTokens !== null || reasoningTokens !== null) {
|
||||||
|
ctx.log.debug(
|
||||||
|
{ promptTokens, completionTokens, cacheReadTokens, reasoningTokens, model },
|
||||||
|
'streamCompletion: deepseek usage breakdown',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
finishReason,
|
finishReason,
|
||||||
content,
|
content,
|
||||||
@@ -429,6 +474,10 @@ export async function streamCompletion(
|
|||||||
promptTokens,
|
promptTokens,
|
||||||
completionTokens,
|
completionTokens,
|
||||||
reasoning: reasoningAccumulated,
|
reasoning: reasoningAccumulated,
|
||||||
|
// vDeepSeek: optional usage breakdown populated when the provider returns
|
||||||
|
// structured usage (cache hit tokens, reasoning tokens).
|
||||||
|
cacheReadTokens: cacheReadTokens ?? undefined,
|
||||||
|
reasoningTokens: reasoningTokens ?? undefined,
|
||||||
};
|
};
|
||||||
} finally {
|
} finally {
|
||||||
// Clear the stall timer whether the stream completes normally, throws, or
|
// Clear the stall timer whether the stream completes normally, throws, or
|
||||||
|
|||||||
179
apps/server/src/services/inference/tool-input-repair.ts
Normal file
179
apps/server/src/services/inference/tool-input-repair.ts
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
/**
|
||||||
|
* vWhale: schema-based tool input repair. When the model emits tool call args
|
||||||
|
* that don't match the expected types (common with weaker models), apply
|
||||||
|
* heuristic repairs before falling through to the Zod parse.
|
||||||
|
*
|
||||||
|
* Inspired by Whale's RepairToolInputForSpec:
|
||||||
|
* - Coerce string "true"/"false" → boolean
|
||||||
|
* - Unwrap markdown autolinks in string fields: <file:///path> → /path
|
||||||
|
* - Wrap bare values in arrays when schema expects array
|
||||||
|
* - Convert "42.0" decimal string → "42" for integer fields
|
||||||
|
* - Recurse into objects to repair nested properties
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface ToolInputRepair {
|
||||||
|
field: string;
|
||||||
|
kind: string;
|
||||||
|
detail: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MARKDOWN_AUTOLINK_RE = /^<(?:file|path):\/\/(.+?)>$/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to repair tool call args against the tool's JSON Schema.
|
||||||
|
* Returns the (possibly modified) args plus a list of repairs applied.
|
||||||
|
*/
|
||||||
|
export function repairToolInput(
|
||||||
|
schema: Record<string, unknown> | undefined,
|
||||||
|
args: Record<string, unknown>,
|
||||||
|
): { repaired: Record<string, unknown>; repairs: ToolInputRepair[] } {
|
||||||
|
const repairs: ToolInputRepair[] = [];
|
||||||
|
if (!schema || typeof schema !== 'object') {
|
||||||
|
return { repaired: args, repairs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const properties = (schema as Record<string, unknown>).properties as
|
||||||
|
Record<string, unknown> | undefined;
|
||||||
|
if (!properties) {
|
||||||
|
return { repaired: args, repairs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const required = new Set<string>(
|
||||||
|
Array.isArray((schema as Record<string, unknown>).required)
|
||||||
|
? (schema as Record<string, unknown>).required as string[]
|
||||||
|
: [],
|
||||||
|
);
|
||||||
|
|
||||||
|
const repaired: Record<string, unknown> = {};
|
||||||
|
for (const [key, value] of Object.entries(args)) {
|
||||||
|
const propSchema = properties[key] as Record<string, unknown> | undefined;
|
||||||
|
if (propSchema && value !== null && value !== undefined) {
|
||||||
|
repaired[key] = repairValue(key, propSchema, value, repairs, required.has(key));
|
||||||
|
} else {
|
||||||
|
repaired[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop keys not in the schema (only for required fields that are missing)
|
||||||
|
// to avoid polluting the model with hallucinated params.
|
||||||
|
for (const key of Object.keys(repaired)) {
|
||||||
|
if (!(key in properties)) {
|
||||||
|
repairs.push({ field: key, kind: 'removed_unknown', detail: `Removed unknown parameter '${key}'` });
|
||||||
|
delete repaired[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { repaired, repairs };
|
||||||
|
}
|
||||||
|
|
||||||
|
function repairValue(
|
||||||
|
field: string,
|
||||||
|
schema: Record<string, unknown>,
|
||||||
|
value: unknown,
|
||||||
|
repairs: ToolInputRepair[],
|
||||||
|
required: boolean,
|
||||||
|
): unknown {
|
||||||
|
const schemaType = schema.type;
|
||||||
|
const isArray = schemaType === 'array' || Array.isArray(schemaType)
|
||||||
|
? schemaType === 'array' || (Array.isArray(schemaType) && schemaType.includes('array'))
|
||||||
|
: false;
|
||||||
|
const isObject = schemaType === 'object';
|
||||||
|
const isBoolean = schemaType === 'boolean';
|
||||||
|
const isInteger = schemaType === 'integer' || schemaType === 'number';
|
||||||
|
const isString = schemaType === 'string';
|
||||||
|
|
||||||
|
// --- Array repair: wrap bare value or empty object ---
|
||||||
|
if (isArray) {
|
||||||
|
if (!Array.isArray(value)) {
|
||||||
|
if (typeof value === 'string') {
|
||||||
|
// Try parsing as JSON array first
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(value);
|
||||||
|
if (Array.isArray(parsed)) {
|
||||||
|
repairs.push({ field, kind: 'parsed_json_array', detail: `Parsed string as JSON array for '${field}'` });
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
} catch { /* not JSON */ }
|
||||||
|
}
|
||||||
|
if (typeof value === 'object' && value !== null && Object.keys(value).length === 0) {
|
||||||
|
if (required) {
|
||||||
|
repairs.push({ field, kind: 'empty_object_to_array', detail: `Converted empty object to empty array for '${field}'` });
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
repairs.push({ field, kind: 'empty_object_to_undefined', detail: `Removed empty object for optional array '${field}'` });
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
repairs.push({ field, kind: 'wrapped_in_array', detail: `Wrapped bare value in array for '${field}'` });
|
||||||
|
return [value];
|
||||||
|
}
|
||||||
|
// Recurse into array items
|
||||||
|
const itemsSchema = schema.items as Record<string, unknown> | undefined;
|
||||||
|
if (itemsSchema) {
|
||||||
|
return value.map((item, i) => repairValue(`${field}[${i}]`, itemsSchema, item, repairs, required));
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Object repair: recurse into properties ---
|
||||||
|
if (isObject && typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
||||||
|
const props = (schema.properties as Record<string, unknown>) ?? {};
|
||||||
|
const repaired: Record<string, unknown> = {};
|
||||||
|
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
|
||||||
|
const propSchema = props[k] as Record<string, unknown> | undefined;
|
||||||
|
if (propSchema) {
|
||||||
|
repaired[k] = repairValue(`${field}.${k}`, propSchema, v, repairs, required);
|
||||||
|
} else {
|
||||||
|
repaired[k] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return repaired;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- String repair: unwrap markdown autolinks ---
|
||||||
|
if (isString && typeof value === 'string') {
|
||||||
|
const match = value.match(MARKDOWN_AUTOLINK_RE);
|
||||||
|
if (match) {
|
||||||
|
repairs.push({ field, kind: 'unwrapped_markdown_link', detail: `Unwrapped markdown autolink for '${field}': ${value}` });
|
||||||
|
return match[1];
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Boolean coercion ---
|
||||||
|
if (isBoolean && typeof value === 'string') {
|
||||||
|
const lower = value.toLowerCase();
|
||||||
|
if (lower === 'true') {
|
||||||
|
repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → true for '${field}'` });
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (lower === 'false') {
|
||||||
|
repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → false for '${field}'` });
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Integer coercion: "42.0" → 42 ---
|
||||||
|
if (isInteger && typeof value === 'string') {
|
||||||
|
const num = Number(value);
|
||||||
|
if (!Number.isNaN(num)) {
|
||||||
|
repairs.push({ field, kind: 'coerced_to_number', detail: `Coerced string '${value}' → ${num} for '${field}'` });
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Integer coercion: boolean → 0/1 ---
|
||||||
|
if (isInteger && typeof value === 'boolean') {
|
||||||
|
repairs.push({ field, kind: 'coerced_boolean_to_integer', detail: `Coerced boolean ${value} → ${value ? 1 : 0} for '${field}'` });
|
||||||
|
return value ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Empty string to null for optional fields ---
|
||||||
|
if (value === '' && !required) {
|
||||||
|
repairs.push({ field, kind: 'empty_string_to_undefined', detail: `Converted empty string for optional '${field}'` });
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ import type { ToolExecCtx } from '../tools.js';
|
|||||||
import { matchToolGlob } from '../agents.js';
|
import { matchToolGlob } from '../agents.js';
|
||||||
import { maybeFlagForCompaction } from './payload.js';
|
import { maybeFlagForCompaction } from './payload.js';
|
||||||
import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
|
import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
|
||||||
|
import { getServerPermission } from '../mcp-client.js';
|
||||||
// v1.13.16: richer unknown-tool error so the model can self-correct when it
|
// v1.13.16: richer unknown-tool error so the model can self-correct when it
|
||||||
// drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
|
// drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
|
||||||
// Applies to all unknown tool names, not just <invoke>-derived ones — at the
|
// Applies to all unknown tool names, not just <invoke>-derived ones — at the
|
||||||
@@ -17,6 +18,7 @@ import { formatUnknownToolError } from './tool-suggestions.js';
|
|||||||
// prompted about paths we couldn't grant anyway (e.g. /etc/passwd).
|
// prompted about paths we couldn't grant anyway (e.g. /etc/passwd).
|
||||||
import { resolveGrantRoot } from '../grant_resolver.js';
|
import { resolveGrantRoot } from '../grant_resolver.js';
|
||||||
import { stripToolMarkup } from './tool-call-parser.js';
|
import { stripToolMarkup } from './tool-call-parser.js';
|
||||||
|
import { repairToolInput } from './tool-input-repair.js';
|
||||||
import type { FailureKind } from './mistake-tracker.js';
|
import type { FailureKind } from './mistake-tracker.js';
|
||||||
import type {
|
import type {
|
||||||
InferenceContext,
|
InferenceContext,
|
||||||
@@ -34,6 +36,8 @@ async function executeToolCall(
|
|||||||
toolCall: ToolCall,
|
toolCall: ToolCall,
|
||||||
extraRoots: readonly string[],
|
extraRoots: readonly string[],
|
||||||
toolCtx?: ToolExecCtx,
|
toolCtx?: ToolExecCtx,
|
||||||
|
hooks?: import('../hooks.js').HookRunner,
|
||||||
|
sessionId?: string,
|
||||||
): Promise<{ output: unknown; truncated: boolean; error?: string; outcome: FailureKind | 'success' }> {
|
): Promise<{ output: unknown; truncated: boolean; error?: string; outcome: FailureKind | 'success' }> {
|
||||||
// v#12 MistakeTracker: every return path carries an `outcome` so the turn
|
// v#12 MistakeTracker: every return path carries an `outcome` so the turn
|
||||||
// loop can detect a run of heterogeneous failures. The failure taxonomy
|
// loop can detect a run of heterogeneous failures. The failure taxonomy
|
||||||
@@ -48,7 +52,61 @@ async function executeToolCall(
|
|||||||
outcome: 'tool_not_found',
|
outcome: 'tool_not_found',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
const parsed = tool.inputSchema.safeParse(toolCall.args);
|
// MCP permission gate — block deny/ask before any Zod parsing or execution
|
||||||
|
const mcpPerm = getServerPermission(toolCall.name);
|
||||||
|
if (mcpPerm === 'deny') {
|
||||||
|
return { output: null, truncated: false, error: `blocked: MCP server denied tool '${toolCall.name}'`, outcome: 'permission_denied' };
|
||||||
|
}
|
||||||
|
if (mcpPerm === 'ask') {
|
||||||
|
return { output: null, truncated: false, error: `requires approval: tool '${toolCall.name}' needs user approval`, outcome: 'permission_denied' };
|
||||||
|
}
|
||||||
|
// vWhale: schema-based tool input repair. If the Zod parse fails, attempt
|
||||||
|
// heuristic repairs (type coercion, markdown-link unwrapping, array wrapping)
|
||||||
|
// and retry. Logs repairs for debugging.
|
||||||
|
let args = toolCall.args;
|
||||||
|
let parsed = tool.inputSchema.safeParse(args);
|
||||||
|
if (!parsed.success) {
|
||||||
|
const schema = tool.jsonSchema?.function?.parameters;
|
||||||
|
if (schema) {
|
||||||
|
const { repaired: repairedArgs, repairs } = repairToolInput(
|
||||||
|
schema as Record<string, unknown>,
|
||||||
|
args as Record<string, unknown>,
|
||||||
|
);
|
||||||
|
if (repairs.length > 0) {
|
||||||
|
const retry = tool.inputSchema.safeParse(repairedArgs);
|
||||||
|
if (retry.success) {
|
||||||
|
args = repairedArgs;
|
||||||
|
parsed = retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// vWhale: PreToolUse hook — can block execution.
|
||||||
|
if (hooks && sessionId) {
|
||||||
|
const hookResult = await hooks.run('PreToolUse', {
|
||||||
|
event: 'PreToolUse',
|
||||||
|
session_id: sessionId,
|
||||||
|
tool_name: toolCall.name,
|
||||||
|
tool_args: args as Record<string, unknown>,
|
||||||
|
});
|
||||||
|
if (hookResult.decision === 'block') {
|
||||||
|
return {
|
||||||
|
output: null,
|
||||||
|
truncated: false,
|
||||||
|
error: `blocked by hook: ${hookResult.reason ?? 'PreToolUse denied'}`,
|
||||||
|
outcome: 'permission_denied',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// Apply updated_input if the hook rewrote the args
|
||||||
|
if (hookResult.updated_input && typeof hookResult.updated_input === 'object') {
|
||||||
|
const reParsed = tool.inputSchema.safeParse(hookResult.updated_input);
|
||||||
|
if (reParsed.success) {
|
||||||
|
args = hookResult.updated_input as Record<string, unknown>;
|
||||||
|
parsed = reParsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!parsed.success) {
|
if (!parsed.success) {
|
||||||
// v1.12 Track B.2: enrich the zod-reject path so the model sees a
|
// v1.12 Track B.2: enrich the zod-reject path so the model sees a
|
||||||
// one-line, tool-named hint ("tool 'search_symbols' rejected — query:
|
// one-line, tool-named hint ("tool 'search_symbols' rejected — query:
|
||||||
@@ -183,6 +241,8 @@ export async function executeToolPhase(
|
|||||||
tokens_used: updated?.tokens_used ?? null,
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
ctx_used: updated?.ctx_used ?? null,
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
ctx_max: updated?.ctx_max ?? null,
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
cache_tokens: result.cacheReadTokens ?? null,
|
||||||
|
reasoning_tokens: result.reasoningTokens ?? null,
|
||||||
started_at: startedAt,
|
started_at: startedAt,
|
||||||
finished_at: updated?.finished_at ?? null,
|
finished_at: updated?.finished_at ?? null,
|
||||||
model: session.model,
|
model: session.model,
|
||||||
@@ -318,10 +378,22 @@ export async function executeToolPhase(
|
|||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const tres = await executeToolCall(projectRoot, tc, session.allowed_read_paths, {
|
const tres = await executeToolCall(
|
||||||
sql: ctx.sql,
|
projectRoot, tc, session.allowed_read_paths,
|
||||||
sessionId,
|
{ sql: ctx.sql, sessionId },
|
||||||
});
|
ctx.hooks, sessionId,
|
||||||
|
);
|
||||||
|
// vWhale: PostToolUse hook (best-effort, non-blocking).
|
||||||
|
if (ctx.hooks) {
|
||||||
|
ctx.hooks.run('PostToolUse', {
|
||||||
|
event: 'PostToolUse',
|
||||||
|
session_id: sessionId,
|
||||||
|
tool_name: tc.name,
|
||||||
|
tool_args: tc.args as Record<string, unknown>,
|
||||||
|
tool_result: tres.output,
|
||||||
|
tool_error: tres.error,
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
// v#12 MistakeTracker: record the real execution outcome (success or a
|
// v#12 MistakeTracker: record the real execution outcome (success or a
|
||||||
// FailureKind). This is the primary signal for heterogeneous-failure
|
// FailureKind). This is the primary signal for heterogeneous-failure
|
||||||
// detection.
|
// detection.
|
||||||
|
|||||||
@@ -144,6 +144,7 @@ export async function runAssistantTurn(
|
|||||||
log: ctx.log,
|
log: ctx.log,
|
||||||
broker: ctx.broker,
|
broker: ctx.broker,
|
||||||
chatId,
|
chatId,
|
||||||
|
hooks: ctx.hooks,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
|
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
|
||||||
@@ -214,6 +215,16 @@ export async function runAssistantTurn(
|
|||||||
|
|
||||||
// ---- non-tool finish → finalize and exit ----
|
// ---- non-tool finish → finalize and exit ----
|
||||||
if (result.toolCalls.length === 0) {
|
if (result.toolCalls.length === 0) {
|
||||||
|
// vWhale: Stop hook (best-effort, non-blocking).
|
||||||
|
if (ctx.hooks) {
|
||||||
|
ctx.hooks.run('Stop', {
|
||||||
|
event: 'Stop',
|
||||||
|
session_id: sessionId,
|
||||||
|
chat_id: chatId,
|
||||||
|
last_assistant_text: result.content.slice(0, 500),
|
||||||
|
turn: stepNumber,
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
|
await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -309,6 +320,22 @@ export async function runAssistantTurn(
|
|||||||
assistantMessageId = toolPhaseResult.nextAssistantId!;
|
assistantMessageId = toolPhaseResult.nextAssistantId!;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vWhale: Stop hook at post-loop exit (best-effort, non-blocking).
|
||||||
|
if (ctx.hooks) {
|
||||||
|
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||||
|
const lastAssistant = loaded?.history?.slice().reverse().find(
|
||||||
|
(m: import('../../types/api.js').Message) => m.role === 'assistant',
|
||||||
|
);
|
||||||
|
const content = lastAssistant?.content ?? '';
|
||||||
|
ctx.hooks.run('Stop', {
|
||||||
|
event: 'Stop',
|
||||||
|
session_id: sessionId,
|
||||||
|
chat_id: chatId,
|
||||||
|
last_assistant_text: content.slice(0, 500),
|
||||||
|
turn: stepNumber,
|
||||||
|
}).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// ---- post-loop: step-cap sentinel ----
|
// ---- post-loop: step-cap sentinel ----
|
||||||
// When the loop exits because stepNumber reached effectiveCap, the last
|
// When the loop exits because stepNumber reached effectiveCap, the last
|
||||||
// iteration's tool phase returned 'continue' with a nextAssistantId that
|
// iteration's tool phase returned 'continue' with a nextAssistantId that
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import type {
|
|||||||
UserStreamFrame,
|
UserStreamFrame,
|
||||||
} from '../../types/api.js';
|
} from '../../types/api.js';
|
||||||
import type { Broker } from '../broker.js';
|
import type { Broker } from '../broker.js';
|
||||||
|
import type { HookRunner } from '../hooks.js';
|
||||||
import type { MistakeState } from './mistake-tracker.js';
|
import type { MistakeState } from './mistake-tracker.js';
|
||||||
|
|
||||||
export interface StreamPhaseState {
|
export interface StreamPhaseState {
|
||||||
@@ -77,6 +78,8 @@ export interface InferenceFrame {
|
|||||||
started_at?: string | null;
|
started_at?: string | null;
|
||||||
finished_at?: string | null;
|
finished_at?: string | null;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
cache_tokens?: number | null;
|
||||||
|
reasoning_tokens?: number | null;
|
||||||
session_id?: string;
|
session_id?: string;
|
||||||
name?: string;
|
name?: string;
|
||||||
// orchestrator frames ([D-6])
|
// orchestrator frames ([D-6])
|
||||||
@@ -117,6 +120,9 @@ export interface InferenceContext {
|
|||||||
// inference goes through `publish`); keeping a separate field avoids
|
// inference goes through `publish`); keeping a separate field avoids
|
||||||
// tempting other code paths into bypassing the session-id binding.
|
// tempting other code paths into bypassing the session-id binding.
|
||||||
broker: Broker;
|
broker: Broker;
|
||||||
|
// vWhale: lifecycle hooks runner. Undefined when no hooks configured.
|
||||||
|
// Hook calls are best-effort — a failing hook never blocks inference.
|
||||||
|
hooks?: HookRunner;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface StreamResult {
|
export interface StreamResult {
|
||||||
@@ -128,6 +134,12 @@ export interface StreamResult {
|
|||||||
// v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
|
// v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
|
||||||
// Empty string when the model doesn't emit reasoning (most cases).
|
// Empty string when the model doesn't emit reasoning (most cases).
|
||||||
reasoning: string;
|
reasoning: string;
|
||||||
|
// vDeepSeek: optional cache-hit token count from DeepSeek's API.
|
||||||
|
// Only populated when using @ai-sdk/deepseek provider (not llama-swap).
|
||||||
|
cacheReadTokens?: number;
|
||||||
|
// vDeepSeek: optional reasoning token count from DeepSeek's API.
|
||||||
|
// Only populated when using @ai-sdk/deepseek provider (not llama-swap).
|
||||||
|
reasoningTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TurnArgs {
|
export interface TurnArgs {
|
||||||
|
|||||||
@@ -31,11 +31,14 @@ interface McpToolDef {
|
|||||||
annotations?: McpToolAnnotations;
|
annotations?: McpToolAnnotations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type McpPermission = 'allow' | 'ask' | 'deny';
|
||||||
|
|
||||||
interface ServerState {
|
interface ServerState {
|
||||||
client: Client;
|
client: Client;
|
||||||
transport: StreamableHTTPClientTransport | StdioClientTransport;
|
transport: StreamableHTTPClientTransport | StdioClientTransport;
|
||||||
tools: ToolDef<Record<string, unknown>>[];
|
tools: ToolDef<Record<string, unknown>>[];
|
||||||
type: 'streamableHttp' | 'stdio';
|
type: 'streamableHttp' | 'stdio';
|
||||||
|
permission: McpPermission;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- Module-level state ----
|
// ---- Module-level state ----
|
||||||
@@ -137,6 +140,14 @@ export async function callTool(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return the permission level for a given MCP server. Defaults to 'allow' if unknown. */
|
||||||
|
export function getServerPermission(prefixedToolName: string): McpPermission {
|
||||||
|
const serverName = toolToServer.get(prefixedToolName);
|
||||||
|
if (!serverName) return 'allow';
|
||||||
|
const state = servers.get(serverName);
|
||||||
|
return state?.permission ?? 'allow';
|
||||||
|
}
|
||||||
|
|
||||||
/** Return all wrapped ToolDefs from all connected servers, flattened. */
|
/** Return all wrapped ToolDefs from all connected servers, flattened. */
|
||||||
export function getTools(): ToolDef<Record<string, unknown>>[] {
|
export function getTools(): ToolDef<Record<string, unknown>>[] {
|
||||||
const all: ToolDef<Record<string, unknown>>[] = [];
|
const all: ToolDef<Record<string, unknown>>[] = [];
|
||||||
@@ -214,7 +225,8 @@ async function connectServer(entry: McpServerEntry): Promise<void> {
|
|||||||
toolToServer.set(wrapped.name, name);
|
toolToServer.set(wrapped.name, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
servers.set(name, { client, transport, tools, type: config.type });
|
const permission = (config as { permission?: McpPermission }).permission ?? 'allow';
|
||||||
|
servers.set(name, { client, transport, tools, type: config.type, permission });
|
||||||
|
|
||||||
log!.info(
|
log!.info(
|
||||||
{ server: name, type: config.type, count: tools.length, names: tools.map((t) => t.name) },
|
{ server: name, type: config.type, count: tools.length, names: tools.map((t) => t.name) },
|
||||||
|
|||||||
@@ -17,12 +17,15 @@ import type { FastifyBaseLogger } from 'fastify';
|
|||||||
|
|
||||||
// ---- Zod schema ----
|
// ---- Zod schema ----
|
||||||
|
|
||||||
|
const McpPermissionSchema = z.enum(['allow', 'ask', 'deny']).default('allow');
|
||||||
|
|
||||||
const McpServerConfigSchema = z.discriminatedUnion('type', [
|
const McpServerConfigSchema = z.discriminatedUnion('type', [
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal('streamableHttp'),
|
type: z.literal('streamableHttp'),
|
||||||
url: z.string().url(),
|
url: z.string().url(),
|
||||||
headers: z.record(z.string()).optional(),
|
headers: z.record(z.string()).optional(),
|
||||||
enabled: z.boolean().default(true),
|
enabled: z.boolean().default(true),
|
||||||
|
permission: McpPermissionSchema,
|
||||||
}),
|
}),
|
||||||
z.object({
|
z.object({
|
||||||
type: z.literal('stdio'),
|
type: z.literal('stdio'),
|
||||||
@@ -30,6 +33,7 @@ const McpServerConfigSchema = z.discriminatedUnion('type', [
|
|||||||
args: z.array(z.string()).default([]),
|
args: z.array(z.string()).default([]),
|
||||||
env: z.record(z.string()).optional(),
|
env: z.record(z.string()).optional(),
|
||||||
enabled: z.boolean().default(true),
|
enabled: z.boolean().default(true),
|
||||||
|
permission: McpPermissionSchema,
|
||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|||||||
@@ -7,10 +7,12 @@
|
|||||||
|
|
||||||
export const MESSAGE_COLUMNS =
|
export const MESSAGE_COLUMNS =
|
||||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||||
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
|
'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
|
||||||
|
'started_at, finished_at, created_at, metadata, ' +
|
||||||
'summary, tail_start_id, compacted_at, model';
|
'summary, tail_start_id, compacted_at, model';
|
||||||
|
|
||||||
export const INFERENCE_MESSAGE_COLUMNS =
|
export const INFERENCE_MESSAGE_COLUMNS =
|
||||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||||
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
|
'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
|
||||||
|
'started_at, finished_at, created_at, metadata, ' +
|
||||||
'reasoning_parts, model';
|
'reasoning_parts, model';
|
||||||
|
|||||||
@@ -37,7 +37,18 @@ export function configureModelContext(opts: { llamaSwapUrl: string }): void {
|
|||||||
llamaSwapUrl = opts.llamaSwapUrl;
|
llamaSwapUrl = opts.llamaSwapUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
|
||||||
|
// Return a reasonable default context so compaction estimates work.
|
||||||
|
const DEEPSEEK_DEFAULT_N_CTX = 131_072;
|
||||||
|
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||||
|
|
||||||
export async function getModelContext(model: string): Promise<ModelContext | null> {
|
export async function getModelContext(model: string): Promise<ModelContext | null> {
|
||||||
|
// vDeepSeek: DeepSeek models have no /upstream/<model>/props. Use a static
|
||||||
|
// default so compaction doesn't fall to the buffer-only path with tiny limits.
|
||||||
|
if (model.startsWith(DEEPSEEK_MODEL_PREFIX)) {
|
||||||
|
return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
|
||||||
|
}
|
||||||
|
|
||||||
// 1. Positive cache hit — no TTL check, model n_ctx is invariant.
|
// 1. Positive cache hit — no TTL check, model n_ctx is invariant.
|
||||||
const pos = positiveCache.get(model);
|
const pos = positiveCache.get(model);
|
||||||
if (pos) return pos;
|
if (pos) return pos;
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ export interface PrefixFingerprint {
|
|||||||
has_agent_system_prompt: boolean;
|
has_agent_system_prompt: boolean;
|
||||||
has_session_override: boolean;
|
has_session_override: boolean;
|
||||||
has_project_override: boolean;
|
has_project_override: boolean;
|
||||||
route: 'swap' | 'sidecar';
|
route: 'swap' | 'sidecar' | 'deepseek';
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PrefixDrift {
|
export interface PrefixDrift {
|
||||||
@@ -129,7 +129,7 @@ interface ObservedInputs {
|
|||||||
has_agent_system_prompt: boolean;
|
has_agent_system_prompt: boolean;
|
||||||
has_session_override: boolean;
|
has_session_override: boolean;
|
||||||
has_project_override: boolean;
|
has_project_override: boolean;
|
||||||
route: 'swap' | 'sidecar';
|
route: 'swap' | 'sidecar' | 'deepseek';
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ObserverEntry {
|
interface ObserverEntry {
|
||||||
|
|||||||
@@ -2,6 +2,12 @@ import { z } from 'zod';
|
|||||||
import type { ToolDef } from '../types.js';
|
import type { ToolDef } from '../types.js';
|
||||||
import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js';
|
import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js';
|
||||||
|
|
||||||
|
// DEPRECATED (Phase 4, Domain 2, v2.8.14): This factory builds ToolDefs that
|
||||||
|
// route through the Go codecontext sidecar via callCodecontext(). Superseded
|
||||||
|
// by direct boocontext MCP tool wrappers. Keep functional for backward
|
||||||
|
// compatibility — old codecontext tools still use HTTP. New tools should use
|
||||||
|
// the boocontext MCP server instead of adding entries here.
|
||||||
|
//
|
||||||
// Shared factory for the 12 codecontext shim ToolDefs.
|
// Shared factory for the 12 codecontext shim ToolDefs.
|
||||||
// Each shim provides name/schema/description/jsonParameters/mapArgs; the
|
// Each shim provides name/schema/description/jsonParameters/mapArgs; the
|
||||||
// factory builds the ToolDef and returns both the ToolDef and the standalone
|
// factory builds the ToolDef and returns both the ToolDef and the standalone
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { makeCodecontextTool } from './factory.js';
|
|||||||
|
|
||||||
export const GetCodebaseOverviewInput = z.object({
|
export const GetCodebaseOverviewInput = z.object({
|
||||||
include_stats: z.boolean().optional(),
|
include_stats: z.boolean().optional(),
|
||||||
|
compress: z.boolean().optional().describe('Apply DCP compression for large projects (>50 files)'),
|
||||||
});
|
});
|
||||||
export type GetCodebaseOverviewInputT = z.infer<typeof GetCodebaseOverviewInput>;
|
export type GetCodebaseOverviewInputT = z.infer<typeof GetCodebaseOverviewInput>;
|
||||||
|
|
||||||
@@ -24,10 +25,18 @@ const { toolDef: getCodebaseOverview, execute: executeGetCodebaseOverview } =
|
|||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
description: 'Include file count, symbol count, language stats. Defaults to true.',
|
description: 'Include file count, symbol count, language stats. Defaults to true.',
|
||||||
},
|
},
|
||||||
|
compress: {
|
||||||
|
type: 'boolean',
|
||||||
|
description: 'Apply DCP compression for large projects (>50 files)',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
additionalProperties: false,
|
additionalProperties: false,
|
||||||
},
|
},
|
||||||
mapArgs: (input) => ({ include_stats: input.include_stats ?? true }),
|
mapArgs: (input) => {
|
||||||
|
const args: Record<string, unknown> = { include_stats: input.include_stats ?? true };
|
||||||
|
if (input.compress) args['compress'] = true;
|
||||||
|
return args;
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
export { getCodebaseOverview, executeGetCodebaseOverview };
|
export { getCodebaseOverview, executeGetCodebaseOverview };
|
||||||
|
|||||||
@@ -18,3 +18,4 @@ export { getCodeHealth } from './get_code_health.js';
|
|||||||
export { getCodeImpact } from './get_code_impact.js';
|
export { getCodeImpact } from './get_code_impact.js';
|
||||||
export { getTypeInfo } from './get_type_info.js';
|
export { getTypeInfo } from './get_type_info.js';
|
||||||
export { getCodeMap } from './get_code_map.js';
|
export { getCodeMap } from './get_code_map.js';
|
||||||
|
export { getWikiArticle } from './get_wiki_article.js';
|
||||||
|
|||||||
@@ -127,6 +127,9 @@ export interface Agent {
|
|||||||
// bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
|
// bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
|
||||||
steps: number | null;
|
steps: number | null;
|
||||||
llama_extra_args: string[] | null;
|
llama_extra_args: string[] | null;
|
||||||
|
// vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
|
||||||
|
// Maps to DeepSeek's reasoning_effort API param.
|
||||||
|
reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// One entry per malformed `## Name` block. Per-block errors don't fail the
|
// One entry per malformed `## Name` block. Per-block errors don't fail the
|
||||||
@@ -206,6 +209,8 @@ export interface Message {
|
|||||||
tokens_used: number | null;
|
tokens_used: number | null;
|
||||||
ctx_used: number | null;
|
ctx_used: number | null;
|
||||||
ctx_max: number | null;
|
ctx_max: number | null;
|
||||||
|
cache_tokens: number | null;
|
||||||
|
reasoning_tokens: number | null;
|
||||||
started_at: string | null;
|
started_at: string | null;
|
||||||
finished_at: string | null;
|
finished_at: string | null;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
|
|||||||
@@ -152,6 +152,8 @@ export interface Message {
|
|||||||
tokens_used: number | null;
|
tokens_used: number | null;
|
||||||
ctx_used: number | null;
|
ctx_used: number | null;
|
||||||
ctx_max: number | null;
|
ctx_max: number | null;
|
||||||
|
cache_tokens: number | null;
|
||||||
|
reasoning_tokens: number | null;
|
||||||
// model-attribution: which model produced this assistant message (null for
|
// model-attribution: which model produced this assistant message (null for
|
||||||
// user/system rows + pre-attribution messages). Rendered as a chip.
|
// user/system rows + pre-attribution messages). Rendered as a chip.
|
||||||
model: string | null;
|
model: string | null;
|
||||||
@@ -530,6 +532,8 @@ export type WsFrame =
|
|||||||
tokens_used?: number | null;
|
tokens_used?: number | null;
|
||||||
ctx_used?: number | null;
|
ctx_used?: number | null;
|
||||||
ctx_max?: number | null;
|
ctx_max?: number | null;
|
||||||
|
cache_tokens?: number | null;
|
||||||
|
reasoning_tokens?: number | null;
|
||||||
started_at?: string | null;
|
started_at?: string | null;
|
||||||
finished_at?: string | null;
|
finished_at?: string | null;
|
||||||
// model-attribution: the model that produced this assistant message.
|
// model-attribution: the model that produced this assistant message.
|
||||||
|
|||||||
@@ -156,9 +156,16 @@ function StatsLine({ message }: { message: Message }) {
|
|||||||
: `${ctxUsed} ctx`
|
: `${ctxUsed} ctx`
|
||||||
: null;
|
: null;
|
||||||
|
|
||||||
|
const cacheHit = message.cache_tokens;
|
||||||
|
const reasoning = message.reasoning_tokens;
|
||||||
|
const cachePart = typeof cacheHit === 'number' && cacheHit > 0 ? `cache ${cacheHit}` : null;
|
||||||
|
const reasoningPart = typeof reasoning === 'number' && reasoning > 0 ? `think ${reasoning}` : null;
|
||||||
|
|
||||||
const parts: string[] = [`${tokens} tokens`];
|
const parts: string[] = [`${tokens} tokens`];
|
||||||
if (tps !== null) parts.push(`${tps.toFixed(1)} tok/s`);
|
if (tps !== null) parts.push(`${tps.toFixed(1)} tok/s`);
|
||||||
if (ctxPart) parts.push(ctxPart);
|
if (ctxPart) parts.push(ctxPart);
|
||||||
|
if (cachePart) parts.push(cachePart);
|
||||||
|
if (reasoningPart) parts.push(reasoningPart);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="text-[10px] font-mono text-muted-foreground">
|
<div className="text-[10px] font-mono text-muted-foreground">
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
|||||||
tokens_used: null,
|
tokens_used: null,
|
||||||
ctx_used: null,
|
ctx_used: null,
|
||||||
ctx_max: null,
|
ctx_max: null,
|
||||||
|
cache_tokens: null,
|
||||||
|
reasoning_tokens: null,
|
||||||
model: null,
|
model: null,
|
||||||
started_at: null,
|
started_at: null,
|
||||||
finished_at: null,
|
finished_at: null,
|
||||||
@@ -106,6 +108,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
|||||||
tokens_used: null,
|
tokens_used: null,
|
||||||
ctx_used: null,
|
ctx_used: null,
|
||||||
ctx_max: null,
|
ctx_max: null,
|
||||||
|
cache_tokens: null,
|
||||||
|
reasoning_tokens: null,
|
||||||
model: null,
|
model: null,
|
||||||
started_at: null,
|
started_at: null,
|
||||||
finished_at: null,
|
finished_at: null,
|
||||||
@@ -123,6 +127,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
|||||||
...(frame.tokens_used !== undefined ? { tokens_used: frame.tokens_used } : {}),
|
...(frame.tokens_used !== undefined ? { tokens_used: frame.tokens_used } : {}),
|
||||||
...(frame.ctx_used !== undefined ? { ctx_used: frame.ctx_used } : {}),
|
...(frame.ctx_used !== undefined ? { ctx_used: frame.ctx_used } : {}),
|
||||||
...(frame.ctx_max !== undefined ? { ctx_max: frame.ctx_max } : {}),
|
...(frame.ctx_max !== undefined ? { ctx_max: frame.ctx_max } : {}),
|
||||||
|
...(frame.cache_tokens !== undefined ? { cache_tokens: frame.cache_tokens } : {}),
|
||||||
|
...(frame.reasoning_tokens !== undefined ? { reasoning_tokens: frame.reasoning_tokens } : {}),
|
||||||
...(frame.started_at !== undefined ? { started_at: frame.started_at } : {}),
|
...(frame.started_at !== undefined ? { started_at: frame.started_at } : {}),
|
||||||
...(frame.finished_at !== undefined ? { finished_at: frame.finished_at } : {}),
|
...(frame.finished_at !== undefined ? { finished_at: frame.finished_at } : {}),
|
||||||
...(frame.model !== undefined ? { model: frame.model } : {}),
|
...(frame.model !== undefined ? { model: frame.model } : {}),
|
||||||
|
|||||||
31
codecontext/README.md
Normal file
31
codecontext/README.md
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# codecontext — Go sidecar (DEPRECATED)
|
||||||
|
|
||||||
|
> **Deprecated** (Phase 4, Domain 2, v2.8.14).
|
||||||
|
>
|
||||||
|
> Superseded by the **boocontext MCP server** (`apps/coder`). Do not add new
|
||||||
|
> callers. The 16 codecontext tool wrappers still use this sidecar via HTTP at
|
||||||
|
> `http://codecontext:8080/v1/{toolName}` for backward compatibility.
|
||||||
|
|
||||||
|
## Migration path
|
||||||
|
|
||||||
|
1. Existing tool wrappers in `apps/server/src/services/tools/codecontext/` route
|
||||||
|
through `callCodecontext()` in `codecontext_client.ts`, which calls this
|
||||||
|
Go sidecar over HTTP.
|
||||||
|
2. New callers should use the boocontext MCP server instead (reachable via the
|
||||||
|
`boocontext` tool wrappers).
|
||||||
|
3. After all callers have migrated, remove this directory, the `codecontext`
|
||||||
|
service block from `docker-compose.yml`, and the
|
||||||
|
`codecontext_client.ts`/`factory.ts` files.
|
||||||
|
|
||||||
|
## What it does
|
||||||
|
|
||||||
|
A Go HTTP shim wrapping the boocontext MCP server's stdio interface. Provides
|
||||||
|
code-graph analysis (symbols, callers, callees, file overview, etc.) over a
|
||||||
|
REST API at `/v1/{toolName}`.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
- `shim.go` — HTTP server that wraps the boocontext MCP stdio process
|
||||||
|
- `Dockerfile` — container build
|
||||||
|
- `fork.tar.gz` — vendored boocontext source (gitignored)
|
||||||
|
- `.codecontextignore.template` — default ignore patterns deployed per project
|
||||||
@@ -6,7 +6,7 @@ Operating rules for every agent in this registry. Full procedures live in the `c
|
|||||||
|
|
||||||
**Worktrees** — Isolate work in a worktree when it is parallel to in-progress work, risky/experimental, a hotfix interrupting other work, or splits into independent units — just create when clear, propose in one line when ambiguous, skip quick/small single-stream work. Branch from a stable base (default branch); worktrees persist (never auto-remove or auto-merge); they isolate code state, not runtime (ports/DBs/services still collide). Full heuristic: invoke `using-worktrees`.
|
**Worktrees** — Isolate work in a worktree when it is parallel to in-progress work, risky/experimental, a hotfix interrupting other work, or splits into independent units — just create when clear, propose in one line when ambiguous, skip quick/small single-stream work. Branch from a stable base (default branch); worktrees persist (never auto-remove or auto-merge); they isolate code state, not runtime (ports/DBs/services still collide). Full heuristic: invoke `using-worktrees`.
|
||||||
|
|
||||||
**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context).
|
**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context). DeepSeek V4 models also accept `reasoning_effort` (low/medium/high/xhigh/max); omit to disable thinking mode. Example: `reasoning_effort: 'high'`.
|
||||||
|
|
||||||
**Reasoning budget** — To cap a reasoning model's thinking tokens, pass `--reasoning-budget` through `llama_extra_args` (already permitted by the deny-list validator; routes the agent to llama-sidecar). Example frontmatter line: `llama_extra_args: ["--reasoning-budget", "2048"]`. This is a sidecar process flag, not a chat-completion body param — distinct from the sampling knobs above.
|
**Reasoning budget** — To cap a reasoning model's thinking tokens, pass `--reasoning-budget` through `llama_extra_args` (already permitted by the deny-list validator; routes the agent to llama-sidecar). Example frontmatter line: `llama_extra_args: ["--reasoning-budget", "2048"]`. This is a sidecar process flag, not a chat-completion body param — distinct from the sampling knobs above.
|
||||||
|
|
||||||
|
|||||||
@@ -95,6 +95,13 @@ services:
|
|||||||
# HTTP shim (see ./codecontext/). No host port — reached from boocode at
|
# HTTP shim (see ./codecontext/). No host port — reached from boocode at
|
||||||
# http://codecontext:8080 over the boocode_net bridge.
|
# http://codecontext:8080 over the boocode_net bridge.
|
||||||
#
|
#
|
||||||
|
# DEPRECATED (Phase 4, Domain 2, v2.8.14): Superseded by the boocontext
|
||||||
|
# MCP server. The 16 codecontext tool wrappers still use this sidecar via
|
||||||
|
# HTTP but should route through the boocontext MCP instead. Keep the
|
||||||
|
# service running for backward compatibility until all callers migrate.
|
||||||
|
# Remove this block after full migration — see codecontext_client.ts and
|
||||||
|
# factory.ts for deprecation markers.
|
||||||
|
#
|
||||||
# Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live
|
# Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live
|
||||||
# at /opt/<slug> on the host, not exclusively under /opt/projects. The
|
# at /opt/<slug> on the host, not exclusively under /opt/projects. The
|
||||||
# mount must cover anywhere a project.path could resolve to. Read-only
|
# mount must cover anywhere a project.path could resolve to. Read-only
|
||||||
|
|||||||
@@ -116,6 +116,8 @@ export const MessageCompleteFrame = z.object({
|
|||||||
tokens_used: z.number().int().nonnegative().nullable().optional(),
|
tokens_used: z.number().int().nonnegative().nullable().optional(),
|
||||||
ctx_used: z.number().int().nonnegative().nullable().optional(),
|
ctx_used: z.number().int().nonnegative().nullable().optional(),
|
||||||
ctx_max: z.number().int().positive().nullable().optional(),
|
ctx_max: z.number().int().positive().nullable().optional(),
|
||||||
|
cache_tokens: z.number().int().nonnegative().nullable().optional(),
|
||||||
|
reasoning_tokens: z.number().int().nonnegative().nullable().optional(),
|
||||||
started_at: IsoTimestamp.nullable().optional(),
|
started_at: IsoTimestamp.nullable().optional(),
|
||||||
finished_at: IsoTimestamp.nullable().optional(),
|
finished_at: IsoTimestamp.nullable().optional(),
|
||||||
// nullable: external-coder turns carry task.model, which is null when no
|
// nullable: external-coder turns carry task.model, which is null when no
|
||||||
|
|||||||
Reference in New Issue
Block a user