chore: snapshot main sync
This commit is contained in:
@@ -68,6 +68,14 @@
|
||||
"./skill-invoke": {
|
||||
"types": "./dist/services/skill-invoke.d.ts",
|
||||
"default": "./dist/services/skill-invoke.js"
|
||||
},
|
||||
"./mcp-config": {
|
||||
"types": "./dist/services/mcp-config.d.ts",
|
||||
"default": "./dist/services/mcp-config.js"
|
||||
},
|
||||
"./mcp-client": {
|
||||
"types": "./dist/services/mcp-client.d.ts",
|
||||
"default": "./dist/services/mcp-client.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
@@ -77,6 +85,7 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/anthropic": "^3.0.84",
|
||||
"@ai-sdk/deepseek": "^2.0.35",
|
||||
"@ai-sdk/openai-compatible": "^2.0.47",
|
||||
"@boocode/contracts": "workspace:*",
|
||||
|
||||
@@ -8,7 +8,7 @@ const ConfigSchema = z.object({
|
||||
LLAMA_SWAP_URL: z.string().url(),
|
||||
PROJECT_ROOT_WHITELIST: z.string().default('/opt'),
|
||||
BOOTSTRAP_ROOT: z.string().default('/opt/projects'),
|
||||
DEFAULT_MODEL: z.string().default('qwen3.6-35b-a3b-mxfp4'),
|
||||
DEFAULT_MODEL: z.string().default('sam-desktop/qwen3.6-35b-a3b'),
|
||||
LOG_LEVEL: z.string().default('info'),
|
||||
// v1.11.8: SearXNG JSON endpoint for web_search / web_fetch tools.
|
||||
// Defaults to the internal Tailscale Fathom URL (bypasses Authelia).
|
||||
@@ -31,12 +31,20 @@ const ConfigSchema = z.object({
|
||||
DEEPSEEK_API_KEY: z.string().optional(),
|
||||
// Optional base URL override for DeepSeek API. Defaults to api.deepseek.com.
|
||||
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||
// Beta endpoint for experimental features (strict tools, prefix completion, etc.).
|
||||
// Defaults to api.deepseek.com/beta. When set, deepseek calls with tools or
|
||||
// prefix content route through this endpoint.
|
||||
DEEPSEEK_BETA_BASE_URL: z.string().url().default('https://api.deepseek.com/beta'),
|
||||
// Hosted Anthropic Claude. When set, models with provider id "anthropic"
|
||||
// (or bare "claude-*" ids) route through the Anthropic Messages API via
|
||||
// @ai-sdk/anthropic instead of llama-swap. Unset = Claude routing disabled.
|
||||
ANTHROPIC_API_KEY: z.string().optional(),
|
||||
ANTHROPIC_BASE_URL: z.string().url().optional(),
|
||||
// vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
|
||||
HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
|
||||
// vMultiProvider: path to the local providers config JSON file. Missing file
|
||||
// = legacy synthesis from LLAMA_SWAP_URL.
|
||||
LLAMA_PROVIDERS_PATH: z.string().optional(),
|
||||
// BooControl host service origin. Used by /api/control/* proxy routes.
|
||||
BOOCONTROL_URL: z.string().url().optional(),
|
||||
});
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import { registerProjectRoutes } from './routes/projects.js';
|
||||
import { registerSessionRoutes } from './routes/sessions.js';
|
||||
import { registerSettingsRoutes } from './routes/settings.js';
|
||||
import { registerMessageRoutes } from './routes/messages.js';
|
||||
import { registerMessageFeedbackRoutes } from './routes/messages-feedback.js';
|
||||
import { registerArtifactRoutes } from './routes/artifacts.js';
|
||||
import { registerChatRoutes } from './routes/chats.js';
|
||||
import { registerSidebarRoutes } from './routes/sidebar.js';
|
||||
@@ -17,6 +18,7 @@ import { registerWebSocket } from './routes/ws.js';
|
||||
import { registerCoderProxy } from './routes/coder-proxy.js';
|
||||
import { registerControlProxy } from './routes/control-proxy.js';
|
||||
import { registerModelRoutes } from './routes/models.js';
|
||||
import { registerProviderRoutes } from './routes/providers.js';
|
||||
import { registerAgentRoutes } from './routes/agents.js';
|
||||
import { registerSkillsRoutes } from './routes/skills.js';
|
||||
import { registerTraceRoutes } from './routes/traces.js';
|
||||
@@ -35,7 +37,7 @@ import { cleanupTruncations } from './services/truncate.js';
|
||||
import { loadMcpConfig } from './services/mcp-config.js';
|
||||
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
||||
import { appendMcpTools } from './services/tools.js';
|
||||
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
||||
import { refreshToolNames } from "./services/agents.js";
|
||||
import { loadHooksConfig, createHookRunner } from './services/hooks.js';
|
||||
import { loadLlamaProviders } from './services/llama-providers.js';
|
||||
|
||||
@@ -119,6 +121,7 @@ async function main() {
|
||||
registerSessionRoutes(app, sql, config, broker);
|
||||
registerSettingsRoutes(app, sql);
|
||||
registerModelRoutes(app, config);
|
||||
registerProviderRoutes(app);
|
||||
registerAgentRoutes(app, sql);
|
||||
registerSidebarRoutes(app, sql);
|
||||
registerChatRoutes(app, sql, broker, config, {
|
||||
@@ -126,15 +129,17 @@ async function main() {
|
||||
// Reuse the inference runner's context pattern for compare mode.
|
||||
// Each compare run gets its own AbortController; cancellation keyed by
|
||||
// chatId (cancels ALL parallel runs in that compare group).
|
||||
let streamSeq = 0;
|
||||
const compareCtx: import('./services/inference/types.js').InferenceContext = {
|
||||
sql,
|
||||
config,
|
||||
log: app.log,
|
||||
publish: (sid, frame) => {
|
||||
broker.publishFrame(sid, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
frame.stream_seq = streamSeq++;
|
||||
broker.publishFrame(sid, frame as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
},
|
||||
publishUser: (frame) => {
|
||||
broker.publishUserFrame('default', frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
broker.publishUserFrame('default', frame as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
},
|
||||
broker,
|
||||
hooks: hasHooks ? hookRunner : undefined,
|
||||
@@ -169,6 +174,7 @@ async function main() {
|
||||
const hookRunner = createHookRunner();
|
||||
const hasHooks = Object.keys(loadHooksConfig(config.HOOKS_CONFIG_PATH).hooks).length > 0;
|
||||
|
||||
let streamSeq = 0;
|
||||
const inference = createInferenceRunner(
|
||||
{
|
||||
sql,
|
||||
@@ -176,9 +182,8 @@ async function main() {
|
||||
log: app.log,
|
||||
hooks: hasHooks ? hookRunner : undefined,
|
||||
publish: (sessionId, frame) => {
|
||||
// v1.13.11-b: route through the typed publishFrame so the broker's
|
||||
// Zod gate validates every inference frame before delivery.
|
||||
broker.publishFrame(sessionId, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
frame.stream_seq = streamSeq++;
|
||||
broker.publishFrame(sessionId, frame as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
},
|
||||
// v1.11: broker handle for compaction.process to publish 'compacted'
|
||||
// frames on the per-session channel. Inference's regular publish path
|
||||
@@ -187,7 +192,7 @@ async function main() {
|
||||
broker,
|
||||
},
|
||||
(user, frame) => {
|
||||
broker.publishUserFrame(user, frame as unknown as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
broker.publishUserFrame(user, frame as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
}
|
||||
);
|
||||
// v2.x: wire the background subagent task system to the inference runner.
|
||||
@@ -242,6 +247,7 @@ async function main() {
|
||||
broker.publishFrame(sessionId, frame as import('@boocode/contracts/ws-frames').WsFrame);
|
||||
},
|
||||
});
|
||||
registerMessageFeedbackRoutes(app, sql);
|
||||
registerArtifactRoutes(app, sql);
|
||||
registerSkillsRoutes(app, sql, {
|
||||
enqueueInference: (sessionId, chatId, assistantId, user) => {
|
||||
|
||||
@@ -522,7 +522,6 @@ export function registerChatRoutes(
|
||||
|
||||
const { message, models } = parsed.data;
|
||||
|
||||
// Check for active inference first.
|
||||
if (compareHandlers.hasActiveInference(req.params.id)) {
|
||||
reply.code(409);
|
||||
return { error: 'chat is currently streaming; stop it first' };
|
||||
|
||||
58
apps/server/src/routes/messages-feedback.ts
Normal file
58
apps/server/src/routes/messages-feedback.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { z } from 'zod';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { MessageMetadata } from '../types/api.js';
|
||||
|
||||
const FeedbackBody = z.object({
|
||||
value: z.enum(['up', 'down']),
|
||||
});
|
||||
|
||||
export function registerMessageFeedbackRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
app.post<{ Params: { id: string; message_id: string } }>(
|
||||
'/api/chats/:id/messages/:message_id/feedback',
|
||||
async (req, reply) => {
|
||||
const parsed = FeedbackBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
const { id: chatId, message_id: messageId } = req.params;
|
||||
const { value } = parsed.data;
|
||||
|
||||
const msg = await sql<{ id: string; role: string; metadata: MessageMetadata | null }[]>`
|
||||
SELECT id, role, metadata FROM messages WHERE id = ${messageId} AND chat_id = ${chatId}
|
||||
`;
|
||||
if (msg.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'message not found' };
|
||||
}
|
||||
|
||||
// Only allow feedback on assistant messages.
|
||||
if (msg[0]!.role !== 'assistant') {
|
||||
reply.code(400);
|
||||
return { error: 'only assistant messages can receive feedback' };
|
||||
}
|
||||
|
||||
// Check if feedback already exists
|
||||
const existingMeta = msg[0]!.metadata;
|
||||
if (existingMeta && existingMeta.kind === 'feedback') {
|
||||
reply.code(409);
|
||||
return { error: 'feedback already recorded' };
|
||||
}
|
||||
|
||||
const feedbackMeta: MessageMetadata = {
|
||||
kind: 'feedback',
|
||||
value,
|
||||
chat_id: chatId,
|
||||
};
|
||||
|
||||
await sql`
|
||||
UPDATE messages
|
||||
SET metadata = ${sql.json(feedbackMeta as never)}, updated_at = clock_timestamp()
|
||||
WHERE id = ${messageId}
|
||||
`;
|
||||
|
||||
return { ok: true };
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -10,80 +10,7 @@ import type { Chat, Message, MessageMetadata, Session, ToolCall } from '../types
|
||||
import { resolveGrantRoot } from '../services/grant_resolver.js';
|
||||
import { MESSAGE_COLUMNS } from '../services/message-columns.js';
|
||||
import { setServerPermission, getServerName } from '../services/mcp-client.js';
|
||||
|
||||
// Shared lookup for the answer_user_input + grant_read_access pause-resume
|
||||
// endpoints. Finds the originating assistant tool_call by id in message_parts,
|
||||
// validates the tool name, finds the pending tool_result part, and checks the
|
||||
// already-answered guard. Returns ok:true+context on success, ok:false+HTTP
|
||||
// status+body on any error (caller does reply.code(ctx.code); return ctx.body).
|
||||
type PendingToolLookupResult =
|
||||
| {
|
||||
ok: true;
|
||||
foundCall: ToolCall;
|
||||
toolMessageId: string;
|
||||
toolRow: { message_id: string; payload: { tool_call_id: string; output: unknown } };
|
||||
}
|
||||
| { ok: false; code: number; body: Record<string, unknown> };
|
||||
|
||||
async function lookupPendingToolCall(
|
||||
sql: Sql,
|
||||
chatId: string,
|
||||
tool_call_id: string,
|
||||
expectedToolName: string,
|
||||
wrongToolError: string,
|
||||
): Promise<PendingToolLookupResult> {
|
||||
// Find the assistant's tool_call by id via message_parts.
|
||||
const callerRows = await sql<{
|
||||
message_id: string;
|
||||
payload: { id: string; name: string; args: Record<string, unknown> };
|
||||
}[]>`
|
||||
SELECT p.message_id, p.payload
|
||||
FROM message_parts p
|
||||
JOIN messages m ON m.id = p.message_id
|
||||
WHERE m.chat_id = ${chatId}
|
||||
AND m.role = 'assistant'
|
||||
AND p.kind = 'tool_call'
|
||||
AND p.payload->>'id' = ${tool_call_id}
|
||||
ORDER BY m.created_at DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
const callerRow = callerRows[0];
|
||||
if (!callerRow) return { ok: false, code: 404, body: { error: 'unknown_tool_call_id' } };
|
||||
|
||||
const foundCall: ToolCall = {
|
||||
id: callerRow.payload.id,
|
||||
name: callerRow.payload.name,
|
||||
args: callerRow.payload.args,
|
||||
};
|
||||
if (foundCall.name !== expectedToolName) {
|
||||
return { ok: false, code: 400, body: { error: wrongToolError } };
|
||||
}
|
||||
|
||||
// Find the pending tool_result part by tool_call_id.
|
||||
const toolRows = await sql<{
|
||||
message_id: string;
|
||||
payload: { tool_call_id: string; output: unknown };
|
||||
}[]>`
|
||||
SELECT p.message_id, p.payload
|
||||
FROM message_parts p
|
||||
JOIN messages m ON m.id = p.message_id
|
||||
WHERE m.chat_id = ${chatId}
|
||||
AND m.role = 'tool'
|
||||
AND p.kind = 'tool_result'
|
||||
AND p.payload->>'tool_call_id' = ${tool_call_id}
|
||||
ORDER BY m.created_at DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
const toolRow = toolRows[0];
|
||||
if (!toolRow) {
|
||||
return { ok: false, code: 404, body: { error: 'unknown_tool_call_id', detail: 'tool message not found' } };
|
||||
}
|
||||
if (toolRow.payload && toolRow.payload.output !== null) {
|
||||
return { ok: false, code: 409, body: { error: 'tool_call_already_answered' } };
|
||||
}
|
||||
|
||||
return { ok: true, foundCall, toolMessageId: toolRow.message_id, toolRow };
|
||||
}
|
||||
import { lookupPendingToolCall } from '../services/pending-tool-lookup.js';
|
||||
|
||||
const SendBody = z.object({
|
||||
content: z.string().min(1).max(64_000),
|
||||
@@ -146,11 +73,6 @@ const RequestReadAccessArgs = z.object({
|
||||
|
||||
interface MessageHandlers {
|
||||
enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
|
||||
// v1.11: returns a promise that resolves after compaction.process finishes
|
||||
// (await the LLM call). Throws on failure — the route surfaces a 500.
|
||||
// Replaces the v1.10 enqueueCompact (which fired-and-forgot a kind='compact'
|
||||
// streaming row). The new anchored-rolling strategy inserts a single
|
||||
// summary=true assistant row only after the LLM responds.
|
||||
runCompaction: (chatId: string) => Promise<void>;
|
||||
publishUserMessage: (
|
||||
sessionId: string,
|
||||
@@ -360,11 +282,6 @@ export function registerMessageRoutes(
|
||||
}
|
||||
);
|
||||
|
||||
// v1.11: manual /compact. Was a streaming kind='compact' row inserted by
|
||||
// this handler; now delegates to the anchored-rolling compaction service.
|
||||
// Synchronous (we await the LLM call) — callers either await or rely on
|
||||
// the 'compacted' WS frame to refresh their view. The response carries
|
||||
// no body of interest; the new summary row arrives via the WS frame.
|
||||
app.post<{ Params: { id: string } }>(
|
||||
'/api/chats/:id/compact',
|
||||
async (req, reply) => {
|
||||
@@ -908,56 +825,4 @@ export function registerMessageRoutes(
|
||||
return { ok: true };
|
||||
},
|
||||
);
|
||||
|
||||
const FeedbackBody = z.object({
|
||||
value: z.enum(['up', 'down']),
|
||||
});
|
||||
|
||||
app.post<{ Params: { id: string; message_id: string } }>(
|
||||
'/api/chats/:id/messages/:message_id/feedback',
|
||||
async (req, reply) => {
|
||||
const parsed = FeedbackBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
const { id: chatId, message_id: messageId } = req.params;
|
||||
const { value } = parsed.data;
|
||||
|
||||
const msg = await sql<{ id: string; role: string; metadata: MessageMetadata | null }[]>`
|
||||
SELECT id, role, metadata FROM messages WHERE id = ${messageId} AND chat_id = ${chatId}
|
||||
`;
|
||||
if (msg.length === 0) {
|
||||
reply.code(404);
|
||||
return { error: 'message not found' };
|
||||
}
|
||||
|
||||
// Only allow feedback on assistant messages.
|
||||
if (msg[0]!.role !== 'assistant') {
|
||||
reply.code(400);
|
||||
return { error: 'only assistant messages can receive feedback' };
|
||||
}
|
||||
|
||||
// Check if feedback already exists
|
||||
const existingMeta = msg[0]!.metadata;
|
||||
if (existingMeta && existingMeta.kind === 'feedback') {
|
||||
reply.code(409);
|
||||
return { error: 'feedback already recorded' };
|
||||
}
|
||||
|
||||
const feedbackMeta: MessageMetadata = {
|
||||
kind: 'feedback',
|
||||
value,
|
||||
chat_id: chatId,
|
||||
};
|
||||
|
||||
await sql`
|
||||
UPDATE messages
|
||||
SET metadata = ${sql.json(feedbackMeta as never)}, updated_at = clock_timestamp()
|
||||
WHERE id = ${messageId}
|
||||
`;
|
||||
|
||||
return { ok: true };
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,15 @@ const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
|
||||
{ id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
|
||||
];
|
||||
|
||||
// Anthropic's /v1/models needs different headers (x-api-key + anthropic-version)
|
||||
// and a different response shape, so we surface a curated static list instead.
|
||||
const ANTHROPIC_STATIC_MODELS: ModelInfo[] = [
|
||||
{ id: 'claude-opus-4-8', object: 'model', created: 0, owned_by: 'anthropic' },
|
||||
{ id: 'claude-opus-4-7', object: 'model', created: 0, owned_by: 'anthropic' },
|
||||
{ id: 'claude-sonnet-4-6', object: 'model', created: 0, owned_by: 'anthropic' },
|
||||
{ id: 'claude-haiku-4-5', object: 'model', created: 0, owned_by: 'anthropic' },
|
||||
];
|
||||
|
||||
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
|
||||
app.get('/api/models', async (_req, reply) => {
|
||||
const providers: ModelCatalogProvider[] = [];
|
||||
@@ -62,6 +71,12 @@ export function registerModelRoutes(app: FastifyInstance, config: Config): void
|
||||
providers.push({ id: 'deepseek', label: 'DeepSeek', models: deepseekModels });
|
||||
}
|
||||
|
||||
// 3. If Anthropic is configured, add a synthetic "anthropic" provider group.
|
||||
if (config.ANTHROPIC_API_KEY) {
|
||||
const anthropicModels = ANTHROPIC_STATIC_MODELS.map((m) => ({ ...m, id: `anthropic/${m.id}` }));
|
||||
providers.push({ id: 'anthropic', label: 'Anthropic', models: anthropicModels });
|
||||
}
|
||||
|
||||
if (providers.length === 0) {
|
||||
reply.code(502);
|
||||
return { error: 'no models available from any provider' };
|
||||
|
||||
@@ -656,7 +656,6 @@ export function registerProjectRoutes(
|
||||
try { root = await resolveProjectRoot(projectPath); }
|
||||
catch (err) { if (err instanceof PathScopeError) { reply.code(404); return { error: (err as Error).message }; } throw err; }
|
||||
const target = body.data.path.startsWith('/') ? body.data.path : resolve(root, body.data.path);
|
||||
// Validate path stays within project root
|
||||
const realTarget = await realpath(target).catch(() => target);
|
||||
if (!realTarget.startsWith(root + sep) && realTarget !== root) {
|
||||
reply.code(403);
|
||||
@@ -668,14 +667,12 @@ export function registerProjectRoutes(
|
||||
await rename(tmp, target);
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
// Clean up tmp on failure
|
||||
await access(tmp).then(() => rename(tmp, target + '.bak').catch(() => {})).catch(() => {});
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// GET /api/projects/:id/files
|
||||
app.get<{ Params: { id: string } }>(
|
||||
'/api/projects/:id/files',
|
||||
async (req, reply) => {
|
||||
|
||||
36
apps/server/src/routes/providers.ts
Normal file
36
apps/server/src/routes/providers.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { getProviderStatus, unloadProvider, unloadModel } from '../services/provider-status.js';
|
||||
|
||||
export function registerProviderRoutes(app: FastifyInstance): void {
|
||||
app.get('/api/providers/status', async (_req, reply) => {
|
||||
try {
|
||||
const result = await getProviderStatus();
|
||||
return reply.send(result);
|
||||
} catch (err) {
|
||||
return reply.status(502).send({
|
||||
error: 'failed to query provider statuses',
|
||||
detail: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
app.post('/api/providers/:providerId/unload', async (req, reply) => {
|
||||
const params = req.params as { providerId: string };
|
||||
const ok = await unloadProvider(params.providerId);
|
||||
if (!ok) {
|
||||
return reply.status(404).send({ error: `provider ${params.providerId} not found or unload failed` });
|
||||
}
|
||||
return reply.send({ status: 'ok', providerId: params.providerId });
|
||||
});
|
||||
|
||||
app.post('/api/providers/:providerId/unload/:modelId', async (req, reply) => {
|
||||
const params = req.params as { providerId: string; modelId: string };
|
||||
const ok = await unloadModel(params.providerId, params.modelId);
|
||||
if (!ok) {
|
||||
return reply.status(404).send({
|
||||
error: `unload failed for provider ${params.providerId}, model ${params.modelId}`,
|
||||
});
|
||||
}
|
||||
return reply.send({ status: 'ok', providerId: params.providerId, modelId: params.modelId });
|
||||
});
|
||||
}
|
||||
@@ -78,6 +78,19 @@ END $$;
|
||||
CREATE INDEX IF NOT EXISTS message_parts_hidden_idx
|
||||
ON message_parts (message_id) WHERE hidden_at IS NULL;
|
||||
|
||||
-- v2.x-workflow-sdk: add retry_count for future tool retry observability.
|
||||
-- Idempotent: information_schema guard skips on re-run. Existing rows
|
||||
-- receive 0 via DEFAULT; no existing retry logic — column is plumbing only.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'message_parts' AND column_name = 'retry_count'
|
||||
) THEN
|
||||
ALTER TABLE message_parts ADD COLUMN retry_count int NOT NULL DEFAULT 0;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- v1.13.13: extend message_parts.kind to allow 'synthesis'. Existing DBs were
|
||||
-- created with the pre-v1.13.13 CHECK constraint that did NOT include
|
||||
-- 'synthesis'; drop + re-add the constraint with the extended enum. Fresh
|
||||
@@ -219,7 +232,7 @@ CREATE TABLE IF NOT EXISTS settings (
|
||||
value JSONB NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO settings (key, value) VALUES ('default_model', '"qwen3.6-35b-a3b-mxfp4"') ON CONFLICT (key) DO NOTHING;
|
||||
INSERT INTO settings (key, value) VALUES ('default_model', '"sam-desktop/qwen3.6-35b-a3b"') ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- v1.12.1: deprecated session_panes table removed. Workspace pane state now
|
||||
-- lives in sessions.workspace_panes (jsonb), see below.
|
||||
|
||||
@@ -262,6 +262,31 @@ describe('buildMessagesPayload', async () => {
|
||||
expect(result[4]).toMatchObject({ role: 'assistant', content: 'here it is' });
|
||||
});
|
||||
|
||||
it('preserves every tool result across a multi-step tool turn', async () => {
|
||||
// Regression anchor (dcp-context-corruption-fix): a multi-step tool turn
|
||||
// must deliver every prior step's tool output to the payload. Tool rows
|
||||
// carry content='' with the output in tool_results; no pre-processing step
|
||||
// may drop them.
|
||||
const session = makeSession();
|
||||
const project = makeProject();
|
||||
const history: Message[] = [
|
||||
makeMessage('user', 'read x and y'),
|
||||
makeMessage('assistant', '', { tool_calls: [{ id: 'c1', name: 'view_file', args: {} }] }),
|
||||
makeMessage('tool', '', { tool_results: { tool_call_id: 'c1', output: 'OUT1', truncated: false } }),
|
||||
makeMessage('assistant', '', { tool_calls: [{ id: 'c2', name: 'view_file', args: {} }] }),
|
||||
makeMessage('tool', '', { tool_results: { tool_call_id: 'c2', output: 'OUT2', truncated: false } }),
|
||||
];
|
||||
const result = await buildMessagesPayload(session, project, history);
|
||||
const toolContents = result.filter((m) => m.role === 'tool').map((m) => m.content);
|
||||
expect(toolContents).toContain('OUT1');
|
||||
expect(toolContents).toContain('OUT2');
|
||||
// Both assistant turns retain their tool_calls (not stripped as orphans).
|
||||
const assistantsWithCalls = result.filter(
|
||||
(m) => m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0
|
||||
);
|
||||
expect(assistantsWithCalls).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('strips assistant tool_calls when matching tool results are missing', async () => {
|
||||
const session = makeSession();
|
||||
const project = makeProject();
|
||||
|
||||
@@ -376,7 +376,7 @@ describe('getModelContext — bare-id resolution through default provider (W3)',
|
||||
|
||||
const result = await getModelContext('deepseek-v4-pro');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(131_072);
|
||||
expect(result!.n_ctx).toBe(1_000_000);
|
||||
expect(fetchSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -385,7 +385,7 @@ describe('getModelContext — bare-id resolution through default provider (W3)',
|
||||
|
||||
const result = await getModelContext('deepseek/deepseek-v4-pro');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(131_072);
|
||||
expect(result!.n_ctx).toBe(1_000_000);
|
||||
expect(fetchSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -82,6 +82,46 @@ describe('partsFromAssistantMessage', () => {
|
||||
[1, 'tool_call'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('Phase 2: signed reasoning blocks become one reasoning part each, supersede the joined string', () => {
|
||||
const parts = partsFromAssistantMessage({
|
||||
content: 'done',
|
||||
tool_calls: null,
|
||||
reasoning: 'block1block2', // the joined fallback — must be ignored here
|
||||
reasoningBlocks: [
|
||||
{ text: 'block1', signature: 'sig1' },
|
||||
{ text: 'block2', signature: 'sig2' },
|
||||
],
|
||||
});
|
||||
expect(parts.map((p) => [p.sequence, p.kind])).toEqual([
|
||||
[0, 'reasoning'],
|
||||
[1, 'reasoning'],
|
||||
[2, 'text'],
|
||||
]);
|
||||
expect(parts[0]!.payload).toEqual({ text: 'block1', signature: 'sig1' });
|
||||
expect(parts[1]!.payload).toEqual({ text: 'block2', signature: 'sig2' });
|
||||
});
|
||||
|
||||
it('Phase 2: an empty-text block with a signature is still persisted (display:omitted)', () => {
|
||||
const parts = partsFromAssistantMessage({
|
||||
content: '',
|
||||
tool_calls: null,
|
||||
reasoningBlocks: [{ text: '', signature: 'sig-only' }],
|
||||
});
|
||||
expect(parts.map((p) => [p.kind, p.payload])).toEqual([
|
||||
['reasoning', { text: '', signature: 'sig-only' }],
|
||||
]);
|
||||
});
|
||||
|
||||
it('Phase 2: empty reasoningBlocks falls back to the joined reasoning string', () => {
|
||||
const parts = partsFromAssistantMessage({
|
||||
content: 'x',
|
||||
tool_calls: null,
|
||||
reasoning: 'plain reasoning',
|
||||
reasoningBlocks: [],
|
||||
});
|
||||
expect(parts[0]!.payload).toEqual({ text: 'plain reasoning' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('partsFromToolMessage', () => {
|
||||
|
||||
@@ -152,6 +152,47 @@ describe('resolveModelProvider — bare id legacy fallback', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Anthropic route
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelProvider — anthropic route', () => {
|
||||
const cfg = { LLAMA_SWAP_URL: 'http://localhost:8080', ANTHROPIC_API_KEY: 'sk-ant' };
|
||||
|
||||
it('routes composite "anthropic/" id to the anthropic wire', () => {
|
||||
const r = resolveModelProvider('anthropic/claude-opus-4-8', cfg);
|
||||
expect(r.route).toBe('anthropic');
|
||||
expect(r.providerId).toBe('anthropic');
|
||||
expect(r.wireModelId).toBe('claude-opus-4-8');
|
||||
expect(r.baseUrl).toBe('https://api.anthropic.com');
|
||||
});
|
||||
|
||||
it('routes bare "claude-*" id to anthropic when configured', () => {
|
||||
const r = resolveModelProvider('claude-sonnet-4-6', cfg);
|
||||
expect(r.route).toBe('anthropic');
|
||||
expect(r.wireModelId).toBe('claude-sonnet-4-6');
|
||||
});
|
||||
|
||||
it('bare "claude-*" stays on swap when ANTHROPIC_API_KEY is unset', () => {
|
||||
const r = resolveModelProvider('claude-opus-4-8', { LLAMA_SWAP_URL: 'http://localhost:8080' });
|
||||
expect(r.route).toBe('swap');
|
||||
});
|
||||
|
||||
it('honors ANTHROPIC_BASE_URL override and strips trailing slash', () => {
|
||||
const r = resolveModelProvider('claude-opus-4-8', {
|
||||
...cfg,
|
||||
ANTHROPIC_BASE_URL: 'https://proxy.example.com/',
|
||||
});
|
||||
expect(r.baseUrl).toBe('https://proxy.example.com');
|
||||
});
|
||||
|
||||
it('resolveModelEndpoint throws for the anthropic wire (no OpenAI direct-fetch)', () => {
|
||||
expect(() => resolveModelEndpoint(cfg, 'anthropic/claude-opus-4-8')).toThrow(
|
||||
/anthropic wire has no OpenAI-compatible direct-fetch endpoint/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// upstreamModel uses the resolver
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -306,3 +347,65 @@ describe('resolveModelProvider — gateway routing (P7)', () => {
|
||||
expect((model as any).modelId).toBe('auto:code');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// P7 G3: bare auto:* footgun fix
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('resolveModelProvider — bare auto:* routing (G3)', () => {
|
||||
const config = { LLAMA_SWAP_URL: 'http://localhost:8080' };
|
||||
|
||||
it('bare "auto:code" with a gateway registered routes to gateway (not the default swap host)', () => {
|
||||
mockProvidersList = [
|
||||
...mockProvidersList,
|
||||
{ id: 'auto', label: 'Auto (gateway)', baseUrl: 'http://100.114.205.53:9503', kind: 'boocontrol-gateway' },
|
||||
];
|
||||
const r = resolveModelProvider('auto:code', config);
|
||||
expect(r.route).toBe('gateway');
|
||||
expect(r.baseUrl).toBe('http://100.114.205.53:9503');
|
||||
expect(r.wireModelId).toBe('auto:code');
|
||||
});
|
||||
|
||||
it('bare "auto:code" with NO gateway resolves to gateway_error, never swap', () => {
|
||||
const r = resolveModelProvider('auto:code', config);
|
||||
expect(r.route).toBe('gateway_error');
|
||||
expect(r.gatewayReason).toBe('offline');
|
||||
expect(r.baseUrl).not.toBe(config.LLAMA_SWAP_URL);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DeepSeek beta endpoint routing (A5)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('upstreamModel — DeepSeek beta routing', () => {
|
||||
const dsConfig = {
|
||||
LLAMA_SWAP_URL: 'http://localhost:8080',
|
||||
DEEPSEEK_API_KEY: 'sk-test',
|
||||
DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
|
||||
DEEPSEEK_BETA_BASE_URL: 'https://api.deepseek.com/beta',
|
||||
};
|
||||
|
||||
it('DeepSeek without useBeta returns a model (stable endpoint)', () => {
|
||||
const model = upstreamModel(dsConfig, 'deepseek-v4-pro');
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('deepseek-v4-pro');
|
||||
});
|
||||
|
||||
it('DeepSeek with useBeta returns a model (beta endpoint)', () => {
|
||||
const model = upstreamModel(dsConfig, 'deepseek-v4-pro', null, undefined, true);
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('deepseek-v4-pro');
|
||||
});
|
||||
|
||||
it('DeepSeek composite with useBeta returns a model', () => {
|
||||
const model = upstreamModel(dsConfig, 'deepseek/deepseek-v4-pro', null, undefined, true);
|
||||
expect(model).toBeDefined();
|
||||
expect((model as any).modelId).toBe('deepseek-v4-pro');
|
||||
});
|
||||
|
||||
it('non-DeepSeek with useBeta ignores the flag', () => {
|
||||
const model = upstreamModel(dsConfig, 'qwen3.6', null, undefined, true);
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -27,8 +27,6 @@ export function refreshToolNames(): void {
|
||||
}
|
||||
const DEFAULT_TEMPERATURE = 0.7;
|
||||
|
||||
// ---- Tool glob matching (v1.15.0-mcp-multi) --------------------------------
|
||||
|
||||
/**
|
||||
* Simple glob match for tool names. Supports `*` as a wildcard for any
|
||||
* characters. No `?` or `**` — tool names are flat (no path separators).
|
||||
@@ -81,8 +79,6 @@ export function slugify(name: string): string {
|
||||
.replace(/^-+|-+$/g, '');
|
||||
}
|
||||
|
||||
// ---- AGENTS.md parser ------------------------------------------------------
|
||||
|
||||
interface ParsedFrontmatter {
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
@@ -108,6 +104,10 @@ interface ParsedFrontmatter {
|
||||
|
||||
// vDeepSeek: thinking effort for DeepSeek V4 models.
|
||||
reasoning_effort?: string;
|
||||
|
||||
// vDeepSeek: JSON output mode and prefix completion for DeepSeek V4.
|
||||
response_format?: Record<string, unknown>;
|
||||
prefix_content?: string;
|
||||
}
|
||||
|
||||
// P5: table-driven validation for the "soft-range" numeric frontmatter fields.
|
||||
@@ -362,6 +362,12 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
|
||||
steps: typeof fm.steps === 'number' ? fm.steps : null,
|
||||
|
||||
reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
|
||||
|
||||
response_format:
|
||||
fm.response_format && typeof fm.response_format === 'object' && (fm.response_format as Record<string, unknown>).type === 'json_object'
|
||||
? { type: 'json_object' as const }
|
||||
: null,
|
||||
prefix_content: typeof fm.prefix_content === 'string' && fm.prefix_content.length > 0 ? fm.prefix_content : null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -399,8 +405,6 @@ export function isAgentRegistryMarkdown(content: string): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---- mtime-keyed cache + public API ----------------------------------------
|
||||
|
||||
interface CacheEntry {
|
||||
globalMtime: number | null;
|
||||
projectMtime: number | null;
|
||||
|
||||
@@ -31,8 +31,6 @@ export interface ArtifactWriteResult {
|
||||
|
||||
const ARTIFACT_SUBDIR = '.boocode/artifacts';
|
||||
|
||||
// ---- slug helpers ----
|
||||
|
||||
// Lowercase, replace non-alnum runs with '-', trim leading/trailing '-',
|
||||
// collapse repeated '-', cap at 60 chars. Empty → 'artifact'.
|
||||
function slugify(input: string): string {
|
||||
@@ -118,8 +116,6 @@ export function deriveHtmlTitle(html: string): string | null {
|
||||
return inner.slice(0, 80);
|
||||
}
|
||||
|
||||
// ---- HTML detection (B4) ----
|
||||
|
||||
// Returns the inner HTML content if `text` is a recognised HTML artifact:
|
||||
// - starts with <!DOCTYPE html> (case-insensitive, whitespace-trimmed), OR
|
||||
// - wrapped entirely in a fenced ```html ... ``` block.
|
||||
@@ -142,8 +138,6 @@ export function detectHtmlArtifact(text: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---- path resolution ----
|
||||
|
||||
// Resolve `<projectRoot>/.boocode/artifacts/<filename>` and verify the
|
||||
// result stays under projectRoot. Mirrors the v1.13.18 codecontext_client.ts
|
||||
// approach: realpath projectRoot first, then prefix-check the candidate.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
ensureRunsDir,
|
||||
|
||||
@@ -24,7 +24,7 @@ import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
|
||||
import * as modelContextLookup from './model-context.js';
|
||||
import { SENTINEL_KINDS } from './inference/sentinels.js';
|
||||
import type { OpenAiMessage } from './inference/payload.js';
|
||||
import { resolveModelEndpoint } from './inference/provider.js';
|
||||
import { resolveModelEndpoint, resolveModelProvider } from './inference/provider.js';
|
||||
import type { HookRunner } from './hooks.js';
|
||||
|
||||
// v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
|
||||
@@ -58,8 +58,6 @@ export interface CompactionMessage {
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// === overflow ===
|
||||
|
||||
// Returns the token budget at which overflow fires. Triggers compaction at
|
||||
// 85% of contextLimit (opencode session/overflow.ts pattern). Returns 0 when
|
||||
// the context limit is unknown — caller treats 0 as "do not trigger overflow",
|
||||
@@ -83,8 +81,6 @@ export function isOverflow(usage: Usage, contextLimit: number): boolean {
|
||||
return (usage.prompt_tokens + usage.completion_tokens) >= budget;
|
||||
}
|
||||
|
||||
// === selection ===
|
||||
|
||||
interface Turn {
|
||||
start: number;
|
||||
end: number;
|
||||
@@ -185,8 +181,6 @@ export function select(
|
||||
};
|
||||
}
|
||||
|
||||
// === file-provenance ledger (#12, Part B) ===
|
||||
|
||||
// Read tools whose path/target arg names a file or directory that was read.
|
||||
// BooChat (apps/server) is read-only — there are no write tools, so the ledger
|
||||
// only ever has a "Files Read" side (apps/coder can add "Modified" later).
|
||||
@@ -233,8 +227,6 @@ export function buildFilesReadContext(head: CompactionMessage[]): string | null
|
||||
return ['## Files Read', ...paths.map((p) => `- ${p}`)].join('\n');
|
||||
}
|
||||
|
||||
// === prompt assembly ===
|
||||
|
||||
// Build the final user message that asks the model to (re)produce the
|
||||
// anchored summary. `context` is reserved for future plugin injection;
|
||||
// callers pass [] today.
|
||||
@@ -336,8 +328,6 @@ export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
|
||||
return out;
|
||||
}
|
||||
|
||||
// === llama-swap call ===
|
||||
|
||||
// Non-streaming completion. Opencode streams; for a one-shot summary call a
|
||||
// single POST is less code and the latency hit is acceptable (the user
|
||||
// doesn't see this directly — useSessionStream emits the toast + refetches
|
||||
@@ -379,8 +369,6 @@ async function callLlm(
|
||||
return { content, promptTokens, completionTokens };
|
||||
}
|
||||
|
||||
// === entry point ===
|
||||
|
||||
export interface ProcessInput {
|
||||
sql: Sql;
|
||||
config: Config;
|
||||
@@ -523,7 +511,14 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
let result: CompletionResult | undefined;
|
||||
try {
|
||||
// 7. Single completion (no tools). Throws on llama-swap failure.
|
||||
result = await callLlm(config, session.model, payload, log);
|
||||
// Anthropic chat models speak /v1/messages, which callLlm's OpenAI-shaped
|
||||
// direct fetch can't target — summarize with a local model instead (a fast/
|
||||
// default model's summary is acceptable; the alternative is a hard crash).
|
||||
const summaryModel =
|
||||
resolveModelProvider(session.model, config).route === 'anthropic'
|
||||
? (config.FAST_MODEL ?? config.DEFAULT_MODEL)
|
||||
: session.model;
|
||||
result = await callLlm(config, summaryModel, payload, log);
|
||||
|
||||
// 7b. v1.11.3: fetch the model's true context window from the provider's
|
||||
// /upstream/<wireModelId>/props (the streaming completion doesn't carry it).
|
||||
|
||||
@@ -27,8 +27,6 @@ import { spawn } from 'node:child_process';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
// ─── Events ───────────────────────────────────────────────────────────────
|
||||
|
||||
export type HookEvent =
|
||||
| 'PreToolUse'
|
||||
| 'PostToolUse'
|
||||
@@ -46,8 +44,6 @@ const ALL_EVENTS: HookEvent[] = [
|
||||
'PostCompact',
|
||||
];
|
||||
|
||||
// ─── Config ────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface HookConfig {
|
||||
/** Glob or exact tool name to match (PreToolUse/PostToolUse only). Omit or '*' for all. */
|
||||
match?: string;
|
||||
@@ -61,8 +57,6 @@ export interface HooksConfig {
|
||||
hooks: Partial<Record<HookEvent, HookConfig[]>>;
|
||||
}
|
||||
|
||||
// ─── Payloads ──────────────────────────────────────────────────────────────
|
||||
|
||||
export interface PreToolUsePayload {
|
||||
event: 'PreToolUse';
|
||||
session_id: string;
|
||||
@@ -118,21 +112,16 @@ export type HookPayload =
|
||||
| PreCompactPayload
|
||||
| PostCompactPayload;
|
||||
|
||||
// ─── Response ──────────────────────────────────────────────────────────────
|
||||
|
||||
export type HookDecision = 'pass' | 'warn' | 'block';
|
||||
|
||||
export interface HookResponse {
|
||||
decision?: HookDecision;
|
||||
reason?: string;
|
||||
/** When present, replaces the original tool args / user prompt. */
|
||||
updated_input?: Record<string, unknown> | string;
|
||||
/** Injected into the model's context for the next turn. */
|
||||
additional_context?: string;
|
||||
}
|
||||
|
||||
// ─── Runner ────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface HookRunner {
|
||||
/** Run all hooks for the given event. Returns the effective response. */
|
||||
run(event: HookEvent, payload: HookPayload, log?: FastifyBaseLogger): Promise<HookResponse>;
|
||||
@@ -154,7 +143,6 @@ export function loadHooksConfig(path: string): HooksConfig {
|
||||
hooksConfig = {
|
||||
hooks: { ...parsed.hooks },
|
||||
};
|
||||
// Validate event names
|
||||
for (const event of Object.keys(hooksConfig.hooks)) {
|
||||
if (!ALL_EVENTS.includes(event as HookEvent)) {
|
||||
console.warn(`hooks: unknown event '${event}' in ${path} — ignoring`);
|
||||
@@ -273,7 +261,6 @@ async function runSingleHook(
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse stdout as JSON response
|
||||
if (out) {
|
||||
try {
|
||||
const parsed = JSON.parse(out) as HookResponse;
|
||||
@@ -291,7 +278,6 @@ async function runSingleHook(
|
||||
resolve({ decision: 'pass' });
|
||||
});
|
||||
|
||||
// Write payload to stdin
|
||||
const json = JSON.stringify(payload);
|
||||
child.stdin.write(json);
|
||||
child.stdin.end();
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { ThinkSplitter } from '../think-splitter.js';
|
||||
|
||||
/** Feed deltas through a splitter and concatenate the reasoning/text outputs. */
|
||||
function run(deltas: string[]): { reasoning: string; text: string } {
|
||||
const s = new ThinkSplitter();
|
||||
let reasoning = '';
|
||||
let text = '';
|
||||
for (const d of deltas) {
|
||||
const r = s.push(d);
|
||||
reasoning += r.reasoning;
|
||||
text += r.text;
|
||||
}
|
||||
const tail = s.flush();
|
||||
reasoning += tail.reasoning;
|
||||
text += tail.text;
|
||||
return { reasoning, text };
|
||||
}
|
||||
|
||||
describe('ThinkSplitter', () => {
|
||||
it('passes through ordinary content unchanged (no arm)', () => {
|
||||
expect(run(['Hello ', 'world'])).toEqual({ reasoning: '', text: 'Hello world' });
|
||||
});
|
||||
|
||||
it('splits a whole-buffer think block', () => {
|
||||
expect(run(['<think>reasoning here</think>answer'])).toEqual({
|
||||
reasoning: 'reasoning here',
|
||||
text: 'answer',
|
||||
});
|
||||
});
|
||||
|
||||
it('discards whitespace before <think> and after </think>', () => {
|
||||
expect(run([' <think>r</think>\n\nanswer'])).toEqual({ reasoning: 'r', text: 'answer' });
|
||||
});
|
||||
|
||||
it('handles the open tag split across deltas', () => {
|
||||
expect(run(['<thi', 'nk>cot</think>out'])).toEqual({ reasoning: 'cot', text: 'out' });
|
||||
});
|
||||
|
||||
it('handles the close tag split across deltas (the core reason this exists)', () => {
|
||||
expect(run(['<think>abc</thi', 'nk>tail'])).toEqual({ reasoning: 'abc', text: 'tail' });
|
||||
});
|
||||
|
||||
it('does not hijack content that only mentions the tag mid-stream', () => {
|
||||
expect(run(['use the ', '<think> tag'])).toEqual({ reasoning: '', text: 'use the <think> tag' });
|
||||
});
|
||||
|
||||
it('emits reasoning incrementally while inside, holding partial close tags', () => {
|
||||
const s = new ThinkSplitter();
|
||||
expect(s.push('<think>aaa')).toEqual({ reasoning: 'aaa', text: '' });
|
||||
// a lone "</" could be the start of the closer, so it is held back
|
||||
expect(s.push('bbb</')).toEqual({ reasoning: 'bbb', text: '' });
|
||||
expect(s.push('think>done')).toEqual({ reasoning: '', text: 'done' });
|
||||
});
|
||||
|
||||
it('treats an unterminated think block at stream end as reasoning', () => {
|
||||
expect(run(['<think>never closed'])).toEqual({ reasoning: 'never closed', text: '' });
|
||||
});
|
||||
|
||||
it('passes through a tag-like opener that is not <think>', () => {
|
||||
expect(run(['<div>hello</div>'])).toEqual({ reasoning: '', text: '<div>hello</div>' });
|
||||
});
|
||||
});
|
||||
@@ -6,7 +6,6 @@
|
||||
* without pulling in a full diff library.
|
||||
*/
|
||||
|
||||
// Write-tool names that can produce file diffs.
|
||||
export const WRITE_TOOL_NAMES = new Set([
|
||||
'edit_file',
|
||||
'create_file',
|
||||
@@ -68,7 +67,6 @@ export function computeDiff(oldStr: string, newStr: string, filePath: string): s
|
||||
const start = Math.max(0, firstDiff - contextBefore);
|
||||
const end = Math.min(maxLen - 1, lastDiff + contextAfter);
|
||||
|
||||
// Build the unified diff hunk
|
||||
const hunkLines: string[] = [];
|
||||
const hunkOldStart = start + 1; // 1-indexed
|
||||
const hunkNewStart = start + 1;
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { deduplicate } from '../strategies/deduplication.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('deduplicate', () => {
|
||||
it('removes consecutive identical tool_call+tool_result pairs', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'user', content: 'search for x' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||
// Duplicate pair
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '2' },
|
||||
];
|
||||
|
||||
const { messages: result, stats } = deduplicate(messages);
|
||||
expect(result).toHaveLength(3); // user + first pair
|
||||
expect(stats.removedCount).toBe(2);
|
||||
});
|
||||
|
||||
it('preserves non-duplicate content', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result2', tool_call_id: '2' }, // Different result
|
||||
];
|
||||
|
||||
const { messages: result, stats } = deduplicate(messages);
|
||||
expect(result).toHaveLength(4);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -1,22 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { toDcpMessages, fromDcpMessages } from '../messages.js';
|
||||
|
||||
describe('toDcpMessages', () => {
|
||||
it('converts user messages', () => {
|
||||
const result = toDcpMessages([{ role: 'user', content: 'hello' }]);
|
||||
expect(result[0].role).toBe('user');
|
||||
expect(result[0].content).toBe('hello');
|
||||
});
|
||||
|
||||
it('marks Error: content as isError', () => {
|
||||
const result = toDcpMessages([{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' }]);
|
||||
expect(result[0].isError).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fromDcpMessages', () => {
|
||||
it('round-trips messages', () => {
|
||||
const original = [{ role: 'user', content: 'hello' }];
|
||||
expect(fromDcpMessages(toDcpMessages(original))).toEqual(original);
|
||||
});
|
||||
});
|
||||
@@ -1,33 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { purgeErrors } from '../strategies/purge-errors.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('purgeErrors', () => {
|
||||
it('removes tool results where content starts with Error:', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' },
|
||||
{ role: 'tool', content: '{"files":[]}', tool_call_id: '2' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(stats.removedCount).toBe(1);
|
||||
});
|
||||
|
||||
it('removes empty tool results', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: '', tool_call_id: '1' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(0);
|
||||
expect(stats.removedCount).toBe(1);
|
||||
});
|
||||
|
||||
it('preserves valid tool results', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: '{"files":["a.ts"]}', tool_call_id: '1' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -1,25 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { transformMessages } from '../transform.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('transformMessages', () => {
|
||||
it('applies dedup then purge in order', () => {
|
||||
const input: DcpMessage[] = [
|
||||
{ role: 'user', content: 'hello' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result', tool_call_id: '1' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result', tool_call_id: '2' }, // Dup
|
||||
];
|
||||
|
||||
const { messages, stats } = transformMessages('test-chat', input);
|
||||
expect(stats.removedCount).toBeGreaterThan(0);
|
||||
expect(messages.length).toBeLessThan(input.length);
|
||||
});
|
||||
|
||||
it('handles empty input', () => {
|
||||
const { messages, stats } = transformMessages('empty', []);
|
||||
expect(messages).toHaveLength(0);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -1,4 +0,0 @@
|
||||
export { transformMessages } from './transform.js';
|
||||
export type { DcpMessage } from './messages.js';
|
||||
export { toDcpMessages, fromDcpMessages } from './messages.js';
|
||||
export { getDcpState, clearDcpState } from './state.js';
|
||||
@@ -1,34 +0,0 @@
|
||||
// DCP message shape adapter.
|
||||
// Converts between BooCode MessagePart[] and the DCP internal shape.
|
||||
// Clean-room implementation — no AGPL source copied.
|
||||
|
||||
export interface DcpMessage {
|
||||
role: 'user' | 'assistant' | 'tool';
|
||||
content: string;
|
||||
tool_call_id?: string;
|
||||
tool_calls?: Array<{ id: string; name: string; arguments: string }>;
|
||||
isError?: boolean;
|
||||
}
|
||||
|
||||
export function toDcpMessages(parts: any[]): DcpMessage[] {
|
||||
return parts.map((p: any) => {
|
||||
const msg: DcpMessage = { role: p.role, content: p.content ?? '' };
|
||||
if (p.tool_call_id) msg.tool_call_id = p.tool_call_id;
|
||||
if (p.tool_calls) msg.tool_calls = p.tool_calls;
|
||||
if (p.isError) msg.isError = true;
|
||||
if (p.role === 'tool' && p.content && p.content.startsWith('Error:')) {
|
||||
msg.isError = true;
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
}
|
||||
|
||||
export function fromDcpMessages(msgs: DcpMessage[]): any[] {
|
||||
return msgs.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {}),
|
||||
...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
|
||||
...(m.isError ? { isError: true } : {}),
|
||||
}));
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
// Per-chat session state for DCP.
|
||||
// Tracks last transform timestamp and message count to avoid re-processing.
|
||||
|
||||
interface ChatDcpState {
|
||||
lastTransformAt: number;
|
||||
lastMessageCount: number;
|
||||
}
|
||||
|
||||
const chatStates = new Map<string, ChatDcpState>();
|
||||
|
||||
export function getDcpState(chatId: string): ChatDcpState | undefined {
|
||||
return chatStates.get(chatId);
|
||||
}
|
||||
|
||||
export function setDcpState(chatId: string, messageCount: number): void {
|
||||
chatStates.set(chatId, { lastTransformAt: Date.now(), lastMessageCount: messageCount });
|
||||
}
|
||||
|
||||
export function clearDcpState(chatId: string): void {
|
||||
chatStates.delete(chatId);
|
||||
}
|
||||
|
||||
export function shouldTransform(chatId: string, messageCount: number): boolean {
|
||||
const state = chatStates.get(chatId);
|
||||
if (!state) return true;
|
||||
return state.lastMessageCount !== messageCount;
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
export function deduplicate(messages: DcpMessage[]): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||
const result: DcpMessage[] = [];
|
||||
let removedCount = 0;
|
||||
let freedTokens = 0;
|
||||
let i = 0;
|
||||
|
||||
while (i < messages.length) {
|
||||
const current: DcpMessage = messages[i]!;
|
||||
const next = messages[i + 1];
|
||||
|
||||
if (
|
||||
current.role === 'assistant' &&
|
||||
current.tool_calls &&
|
||||
next &&
|
||||
next.role === 'tool' &&
|
||||
next.tool_call_id === current.tool_calls[0]?.id
|
||||
) {
|
||||
const nextNext = messages[i + 2];
|
||||
const nextNextNext = messages[i + 3];
|
||||
|
||||
if (
|
||||
nextNext &&
|
||||
nextNext.role === 'assistant' &&
|
||||
nextNext.tool_calls &&
|
||||
nextNextNext &&
|
||||
nextNextNext.role === 'tool' &&
|
||||
nextNextNext.tool_call_id === nextNext.tool_calls[0]?.id &&
|
||||
nextNext.tool_calls[0]?.name === current.tool_calls[0]?.name &&
|
||||
nextNext.tool_calls[0]?.arguments === current.tool_calls[0]?.arguments &&
|
||||
nextNextNext.content === next.content
|
||||
) {
|
||||
result.push(current, next);
|
||||
i += 4;
|
||||
removedCount += 2;
|
||||
freedTokens += Math.ceil(nextNext.content.length / 4);
|
||||
freedTokens += Math.ceil(current.content.length / 4);
|
||||
} else {
|
||||
result.push(current);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
result.push(current);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return { messages: result, stats: { removedCount, freedTokens } };
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
// Purge-errors strategy — removes failed/empty tool_result entries.
|
||||
// Clean-room implementation.
|
||||
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
const ERROR_PREFIXES = ['Error:', 'error:', 'Error: '];
|
||||
const DEFAULT_WINDOW = 5;
|
||||
|
||||
export function purgeErrors(
|
||||
messages: DcpMessage[],
|
||||
windowSize: number = DEFAULT_WINDOW,
|
||||
): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||
const result: DcpMessage[] = [];
|
||||
let removedCount = 0;
|
||||
let freedTokens = 0;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'tool') {
|
||||
const shouldRemove =
|
||||
msg.isError ||
|
||||
ERROR_PREFIXES.some((p) => msg.content.startsWith(p)) ||
|
||||
msg.content.trim() === '';
|
||||
|
||||
if (shouldRemove) {
|
||||
removedCount++;
|
||||
freedTokens += Math.ceil(msg.content.length / 4);
|
||||
continue; // Skip this message
|
||||
}
|
||||
}
|
||||
result.push(msg);
|
||||
}
|
||||
|
||||
return { messages: result, stats: { removedCount, freedTokens } };
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
// Transform orchestrator — runs DCP strategies in sequence.
|
||||
// Clean-room implementation.
|
||||
|
||||
import type { DcpMessage } from './messages.js';
|
||||
import { deduplicate } from './strategies/deduplication.js';
|
||||
import { purgeErrors } from './strategies/purge-errors.js';
|
||||
import { getDcpState, setDcpState, shouldTransform } from './state.js';
|
||||
|
||||
export interface TransformStats {
|
||||
removedCount: number;
|
||||
freedTokens: number;
|
||||
dedupRemoved: number;
|
||||
purgeRemoved: number;
|
||||
}
|
||||
|
||||
export interface TransformResult {
|
||||
messages: DcpMessage[];
|
||||
stats: TransformStats;
|
||||
}
|
||||
|
||||
export function transformMessages(chatId: string, messages: DcpMessage[]): TransformResult {
|
||||
if (!shouldTransform(chatId, messages.length)) {
|
||||
return { messages, stats: { removedCount: 0, freedTokens: 0, dedupRemoved: 0, purgeRemoved: 0 } };
|
||||
}
|
||||
|
||||
let m = messages;
|
||||
|
||||
// Step 1: Deduplicate
|
||||
const dedupResult = deduplicate(m);
|
||||
m = dedupResult.messages;
|
||||
const dedupRemoved = dedupResult.stats.removedCount;
|
||||
|
||||
// Step 2: Purge errors
|
||||
const purgeResult = purgeErrors(m);
|
||||
m = purgeResult.messages;
|
||||
const purgeRemoved = purgeResult.stats.removedCount;
|
||||
|
||||
const totalRemoved = dedupRemoved + purgeRemoved;
|
||||
const totalFreed = dedupResult.stats.freedTokens + purgeResult.stats.freedTokens;
|
||||
|
||||
setDcpState(chatId, messages.length);
|
||||
|
||||
return {
|
||||
messages: m,
|
||||
stats: {
|
||||
removedCount: totalRemoved,
|
||||
freedTokens: totalFreed,
|
||||
dedupRemoved,
|
||||
purgeRemoved,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -7,8 +7,7 @@ import {
|
||||
} from '../artifacts.js';
|
||||
import * as modelContext from '../model-context.js';
|
||||
import { maybeFlagForCompaction } from './payload.js';
|
||||
import { insertParts, partsFromAssistantMessage } from './parts.js';
|
||||
import type { PartInsert } from './parts.js';
|
||||
import { insertParts, partsFromAssistantMessage, type PartInsert } from "./parts.js";
|
||||
import { stripToolMarkup } from './tool-call-parser.js';
|
||||
import type { InferenceContext, StreamResult, TurnArgs } from './types.js';
|
||||
|
||||
@@ -232,6 +231,7 @@ export async function finalizeCompletion(
|
||||
content,
|
||||
tool_calls: null,
|
||||
reasoning: result.reasoning,
|
||||
reasoningBlocks: result.reasoningBlocks,
|
||||
}).map((p) => ({
|
||||
...p,
|
||||
message_id: assistantMessageId,
|
||||
|
||||
@@ -40,11 +40,13 @@ export async function insertParts(sql: Sql, parts: PartInsert[]): Promise<void>
|
||||
sequence: p.sequence,
|
||||
kind: p.kind,
|
||||
payload: sql.json(p.payload as never),
|
||||
retry_count: 0,
|
||||
})),
|
||||
'message_id',
|
||||
'sequence',
|
||||
'kind',
|
||||
'payload',
|
||||
'retry_count',
|
||||
)}
|
||||
`;
|
||||
}
|
||||
@@ -62,10 +64,24 @@ export function partsFromAssistantMessage(args: {
|
||||
// Most rows have none — only models with separate reasoning channels
|
||||
// (qwen3.6 etc.) populate this.
|
||||
reasoning?: string;
|
||||
// Phase 2 (anthropic): per-thinking-block reasoning with signatures. When
|
||||
// present (and non-empty) this supersedes `reasoning` — one reasoning part
|
||||
// per block, each carrying its signature for verbatim replay.
|
||||
reasoningBlocks?: Array<{ text: string; signature?: string }>;
|
||||
}): Omit<PartInsert, 'message_id'>[] {
|
||||
const out: Omit<PartInsert, 'message_id'>[] = [];
|
||||
let seq = 0;
|
||||
if (args.reasoning && args.reasoning.length > 0) {
|
||||
const blocks = args.reasoningBlocks?.filter((b) => b.text.length > 0 || b.signature);
|
||||
if (blocks && blocks.length > 0) {
|
||||
for (const b of blocks) {
|
||||
out.push({
|
||||
sequence: seq,
|
||||
kind: 'reasoning',
|
||||
payload: { text: b.text, ...(b.signature ? { signature: b.signature } : {}) },
|
||||
});
|
||||
seq += 1;
|
||||
}
|
||||
} else if (args.reasoning && args.reasoning.length > 0) {
|
||||
out.push({ sequence: seq, kind: 'reasoning', payload: { text: args.reasoning } });
|
||||
seq += 1;
|
||||
}
|
||||
|
||||
@@ -28,6 +28,10 @@ export interface OpenAiMessage {
|
||||
// this into the AI SDK ReasoningPart when forwarding to the model so
|
||||
// reasoning models can resume mid-thought across tool-call boundaries.
|
||||
reasoning?: string;
|
||||
// Phase 2 (anthropic): per-thinking-block reasoning with signatures, from the
|
||||
// same reasoning_parts rows. toModelMessages replays each signed block
|
||||
// verbatim (the joined `reasoning` string can't carry per-block signatures).
|
||||
reasoning_blocks?: Array<{ text: string; signature?: string }>;
|
||||
}
|
||||
|
||||
// v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the
|
||||
@@ -185,6 +189,13 @@ export async function buildMessagesPayload(
|
||||
// message are rare but concat preserves ordering. Skip when absent.
|
||||
if (m.reasoning_parts && m.reasoning_parts.length > 0) {
|
||||
msg.reasoning = m.reasoning_parts.map((p) => p.text ?? '').join('');
|
||||
// Carry per-block text+signature for verbatim anthropic replay.
|
||||
if (m.reasoning_parts.some((p) => p.signature)) {
|
||||
msg.reasoning_blocks = m.reasoning_parts.map((p) => ({
|
||||
text: p.text ?? '',
|
||||
...(p.signature ? { signature: p.signature } : {}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
const hasPayload =
|
||||
(msg.content != null && msg.content.trim().length > 0) ||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||
import { createDeepSeek } from '@ai-sdk/deepseek';
|
||||
import { createAnthropic } from '@ai-sdk/anthropic';
|
||||
import type { LanguageModel } from 'ai';
|
||||
import { getLlamaProviders, parseModelRef } from '../llama-providers.js';
|
||||
import { GATEWAY_KIND, isGatewayVirtualModel } from '@boocode/contracts/gateway';
|
||||
|
||||
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
||||
@@ -53,44 +55,57 @@ export function isDeepSeekModel(modelId: string): boolean {
|
||||
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
|
||||
}
|
||||
|
||||
let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
|
||||
// Cache keyed by apiKey+baseURL so a runtime env change (and resetDeepSeekProvider)
|
||||
// can't hand back a provider built with stale credentials.
|
||||
const deepseekProviderCache = new Map<string, ReturnType<typeof createDeepSeek>>();
|
||||
|
||||
function getDeepSeekProvider(
|
||||
apiKey: string,
|
||||
baseURL: string,
|
||||
): ReturnType<typeof createDeepSeek> {
|
||||
if (!deepseekProviderCache) {
|
||||
deepseekProviderCache = createDeepSeek({
|
||||
apiKey,
|
||||
baseURL,
|
||||
});
|
||||
const key = `${apiKey}||${baseURL}`;
|
||||
let provider = deepseekProviderCache.get(key);
|
||||
if (!provider) {
|
||||
provider = createDeepSeek({ apiKey, baseURL });
|
||||
deepseekProviderCache.set(key, provider);
|
||||
}
|
||||
return deepseekProviderCache;
|
||||
return provider;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider-aware resolver (W2, D-2, D-3)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// P7: 'gateway' routes to the BooControl auto:* gateway (OpenAI-compatible,
|
||||
// does its own policy routing + failover). 'gateway_error' is the
|
||||
// present-but-unhealthy / orphaned-session state: the session selected an
|
||||
// auto:* model but the gateway provider is missing/disabled, so we surface a
|
||||
// clean error instead of silently mis-routing to LLAMA_SWAP_URL.
|
||||
export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error';
|
||||
export type InferenceRoute = 'swap' | 'deepseek' | 'gateway' | 'gateway_error' | 'anthropic';
|
||||
|
||||
/** Provider registry `kind` marking the BooControl routing gateway. */
|
||||
export const GATEWAY_KIND = 'boocontrol-gateway';
|
||||
const ANTHROPIC_MODEL_PREFIX = 'claude-';
|
||||
|
||||
/**
|
||||
* Whether a (bare) wire model id is a gateway virtual model. Used to detect an
|
||||
* orphaned auto:* session whose gateway registry entry was removed — the id
|
||||
* still looks like a gateway model, so resolve to gateway_error, never swap.
|
||||
*/
|
||||
export function isGatewayVirtualModel(wireModelId: string): boolean {
|
||||
return wireModelId === 'auto' || wireModelId.startsWith('auto:');
|
||||
/** Legacy prefix check for bare "claude-*" ids, mirroring isDeepSeekModel. */
|
||||
export function isAnthropicModel(modelId: string): boolean {
|
||||
return modelId.startsWith(ANTHROPIC_MODEL_PREFIX);
|
||||
}
|
||||
|
||||
// Cache keyed by apiKey+baseURL, same rationale as the DeepSeek cache.
|
||||
const anthropicProviderCache = new Map<string, ReturnType<typeof createAnthropic>>();
|
||||
|
||||
function getAnthropicProvider(apiKey: string, baseURL?: string): ReturnType<typeof createAnthropic> {
|
||||
const key = `${apiKey}||${baseURL ?? ''}`;
|
||||
let provider = anthropicProviderCache.get(key);
|
||||
if (!provider) {
|
||||
provider = createAnthropic({ apiKey, ...(baseURL ? { baseURL } : {}) });
|
||||
anthropicProviderCache.set(key, provider);
|
||||
}
|
||||
return provider;
|
||||
}
|
||||
|
||||
/** Invalidate the cached Anthropic provider (e.g. when env vars change at runtime). */
|
||||
export function resetAnthropicProvider(): void {
|
||||
anthropicProviderCache.clear();
|
||||
}
|
||||
|
||||
export { GATEWAY_KIND, isGatewayVirtualModel } from '@boocode/contracts/gateway';
|
||||
|
||||
export interface ResolvedModel {
|
||||
/** Routing destination. */
|
||||
route: InferenceRoute;
|
||||
@@ -114,6 +129,9 @@ interface ConfigLike {
|
||||
LLAMA_SWAP_URL: string;
|
||||
DEEPSEEK_API_KEY?: string;
|
||||
DEEPSEEK_BASE_URL?: string;
|
||||
DEEPSEEK_BETA_BASE_URL?: string;
|
||||
ANTHROPIC_API_KEY?: string;
|
||||
ANTHROPIC_BASE_URL?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -137,8 +155,6 @@ export function resolveModelProvider(
|
||||
const deepseekConfigured = !!config.DEEPSEEK_API_KEY;
|
||||
const deepseekBaseUrl = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
|
||||
// --- DeepSeek routing ---
|
||||
// Explicit provider id "deepseek" → DeepSeek SDK.
|
||||
if (providerId === 'deepseek' && deepseekConfigured) {
|
||||
return {
|
||||
route: 'deepseek',
|
||||
@@ -161,13 +177,30 @@ export function resolveModelProvider(
|
||||
};
|
||||
}
|
||||
|
||||
// --- Local provider routing ---
|
||||
// Hosted Anthropic Claude (different wire: /v1/messages, x-api-key). Routed
|
||||
// by provider id "anthropic" or, for bare ids, the legacy "claude-*" prefix
|
||||
// when ANTHROPIC_API_KEY is configured. baseUrl is the API base for the
|
||||
// streaming path; resolveModelEndpoint (direct OpenAI-shaped fetch) does NOT
|
||||
// support this wire and throws.
|
||||
const anthropicConfigured = !!config.ANTHROPIC_API_KEY;
|
||||
const anthropicBaseUrl = (config.ANTHROPIC_BASE_URL ?? 'https://api.anthropic.com').replace(/\/+$/, '');
|
||||
if (providerId === 'anthropic' && anthropicConfigured) {
|
||||
return { route: 'anthropic', baseUrl: anthropicBaseUrl, wireModelId, isLegacyBareId, providerId: 'anthropic' };
|
||||
}
|
||||
if (isLegacyBareId && isAnthropicModel(wireModelId) && anthropicConfigured) {
|
||||
return { route: 'anthropic', baseUrl: anthropicBaseUrl, wireModelId, isLegacyBareId: true, providerId: 'anthropic' };
|
||||
}
|
||||
|
||||
if (isLegacyBareId && isGatewayVirtualModel(wireModelId)) {
|
||||
const gw = providers.providers.find((p) => p.kind === GATEWAY_KIND);
|
||||
if (gw) {
|
||||
return { route: 'gateway', baseUrl: gw.baseUrl, wireModelId, isLegacyBareId, providerId: gw.id };
|
||||
}
|
||||
return { route: 'gateway_error', baseUrl: '', wireModelId, isLegacyBareId, providerId, gatewayReason: 'offline' };
|
||||
}
|
||||
|
||||
const provider = providers.providers.find((p) => p.id === providerId);
|
||||
|
||||
// --- Gateway routing (P7) ---
|
||||
// A known gateway-kind provider → route to the gateway as an OpenAI-compatible
|
||||
// upstream (it does its own policy routing). The gateway forwards X-Boo-Source
|
||||
// to the chosen target so attribution survives the extra hop.
|
||||
if (provider && provider.kind === GATEWAY_KIND) {
|
||||
return {
|
||||
route: 'gateway',
|
||||
@@ -242,15 +275,26 @@ export function upstreamModel(
|
||||
modelId: string,
|
||||
agent?: AgentLike | null,
|
||||
source?: string,
|
||||
useBeta?: boolean,
|
||||
): LanguageModel {
|
||||
const resolved = resolveModelProvider(modelId, config);
|
||||
if (resolved.route === 'deepseek') {
|
||||
const baseUrl = useBeta
|
||||
? (config.DEEPSEEK_BETA_BASE_URL ?? 'https://api.deepseek.com/beta')
|
||||
: resolved.baseUrl;
|
||||
return getDeepSeekProvider(
|
||||
config.DEEPSEEK_API_KEY!,
|
||||
resolved.baseUrl,
|
||||
baseUrl,
|
||||
).chat(resolved.wireModelId);
|
||||
}
|
||||
|
||||
if (resolved.route === 'anthropic') {
|
||||
const baseURL = config.ANTHROPIC_BASE_URL
|
||||
? `${config.ANTHROPIC_BASE_URL.replace(/\/+$/, '')}/v1`
|
||||
: undefined;
|
||||
return getAnthropicProvider(config.ANTHROPIC_API_KEY!, baseURL)(resolved.wireModelId);
|
||||
}
|
||||
|
||||
// P7: gateway is OpenAI-compatible — same adapter as swap, pointed at the
|
||||
// gateway baseUrl. The gateway resolves the policy + forwards X-Boo-Source.
|
||||
if (resolved.route === 'gateway') {
|
||||
@@ -285,6 +329,16 @@ export function resolveModelEndpoint(
|
||||
};
|
||||
}
|
||||
|
||||
// Anthropic speaks /v1/messages (x-api-key, blocks content) — the OpenAI-shaped
|
||||
// direct fetch these callers use cannot target it. Compaction guards against
|
||||
// this by summarizing with a local model; surface a clear error if anything
|
||||
// else direct-fetches a Claude model.
|
||||
if (resolved.route === 'anthropic') {
|
||||
throw new Error(
|
||||
`anthropic wire has no OpenAI-compatible direct-fetch endpoint (compaction/task-model): ${modelId}`,
|
||||
);
|
||||
}
|
||||
|
||||
// P7: orphaned auto:* session with no gateway — fail loud (no swap fallback).
|
||||
if (resolved.route === 'gateway_error') {
|
||||
throw new Error(
|
||||
@@ -304,5 +358,5 @@ export function resolveModelEndpoint(
|
||||
|
||||
/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
|
||||
export function resetDeepSeekProvider(): void {
|
||||
deepseekProviderCache = null;
|
||||
deepseekProviderCache.clear();
|
||||
}
|
||||
|
||||
@@ -26,7 +26,6 @@ import {
|
||||
buildMessagesPayload,
|
||||
loadContext,
|
||||
} from './payload.js';
|
||||
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
|
||||
import {
|
||||
finalizeCompletion,
|
||||
finalizeEmpty,
|
||||
@@ -79,7 +78,7 @@ async function detectAndRunBuild(
|
||||
const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
|
||||
const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
|
||||
try {
|
||||
const out = await new Promise<string>((resolve, reject) => {
|
||||
const out = await new Promise<string>((resolve, _reject) => {
|
||||
execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
@@ -230,7 +229,7 @@ export async function runGraph(
|
||||
}
|
||||
|
||||
// -- PLAN node ------------------------------------------------------------
|
||||
// Top-of-loop gate → compaction → loadContext → DCP → buildPayload → stream
|
||||
// Top-of-loop gate → compaction → loadContext → buildPayload → stream
|
||||
|
||||
async function planNode(
|
||||
ctx: InferenceContext,
|
||||
@@ -311,18 +310,6 @@ async function planNode(
|
||||
const projectRoot = await resolveProjectRoot(iterProject.path);
|
||||
state.projectRoot = projectRoot;
|
||||
|
||||
// 4. DCP transform
|
||||
try {
|
||||
const dcpMsgs = toDcpMessages(history);
|
||||
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
|
||||
if (stats.removedCount > 0) {
|
||||
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
|
||||
history = fromDcpMessages(pruned) as typeof history;
|
||||
}
|
||||
} catch (err) {
|
||||
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
|
||||
}
|
||||
|
||||
// 5. Log step boundary
|
||||
ctx.log.info(
|
||||
{ sessionId, chatId, step: state.stepNumber, assistantMessageId: state.assistantMessageId },
|
||||
|
||||
@@ -11,9 +11,10 @@ import type { Agent, ToolCall } from '../../types/api.js';
|
||||
import type { ToolJsonSchema } from '../tools.js';
|
||||
import type { OpenAiMessage } from './payload.js';
|
||||
import { extractToolCallBlocks } from './tool-call-parser.js';
|
||||
import { ThinkSplitter } from './think-splitter.js';
|
||||
import { classifyStreamError } from './stream-error-classifier.js';
|
||||
import type { StreamResult } from './types.js';
|
||||
import { isDeepSeekModel, upstreamModel } from './provider.js';
|
||||
import { resolveModelProvider, upstreamModel } from './provider.js';
|
||||
import {
|
||||
jsonSchema,
|
||||
streamText,
|
||||
@@ -54,6 +55,10 @@ export interface StreamOptions {
|
||||
// vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
|
||||
// API param for deepseek-v4-flash / deepseek-v4-pro models.
|
||||
reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
|
||||
// vDeepSeek: JSON output mode. When set, model outputs valid JSON.
|
||||
response_format?: { type: 'json_object' };
|
||||
// vDeepSeek: prefix content for chat prefix completion.
|
||||
prefix_content?: string;
|
||||
}
|
||||
|
||||
// P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
|
||||
@@ -78,6 +83,8 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
|
||||
dry_allowed_length: agent?.dry_allowed_length ?? undefined,
|
||||
dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
|
||||
reasoning_effort: agent?.reasoning_effort ?? undefined,
|
||||
response_format: agent?.response_format ?? undefined,
|
||||
prefix_content: agent?.prefix_content ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -125,7 +132,11 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
|
||||
}
|
||||
if (m.role === 'assistant') {
|
||||
const hasTools = m.tool_calls && m.tool_calls.length > 0;
|
||||
const hasReasoning = typeof m.reasoning === 'string' && m.reasoning.length > 0;
|
||||
// Anthropic signed thinking blocks (Phase 2) must be replayed verbatim per
|
||||
// block with their signature; other reasoning uses the joined string.
|
||||
const signedBlocks = (m.reasoning_blocks ?? []).filter((b) => b.signature);
|
||||
const hasReasoning =
|
||||
(typeof m.reasoning === 'string' && m.reasoning.length > 0) || signedBlocks.length > 0;
|
||||
if (!hasTools && !hasReasoning) {
|
||||
// Bare text assistant (string content). null content + no tool_calls
|
||||
// is degenerate but harmless to forward.
|
||||
@@ -136,12 +147,20 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
|
||||
// assistant content array. Reasoning models (qwen3.6) consume their
|
||||
// prior reasoning context to resume mid-thought across tool boundaries.
|
||||
const parts: Array<
|
||||
| { type: 'reasoning'; text: string }
|
||||
| { type: 'reasoning'; text: string; providerOptions?: Record<string, Record<string, JSONValue>> }
|
||||
| { type: 'text'; text: string }
|
||||
| { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown }
|
||||
> = [];
|
||||
if (hasReasoning) {
|
||||
parts.push({ type: 'reasoning', text: m.reasoning! });
|
||||
if (signedBlocks.length > 0) {
|
||||
for (const b of m.reasoning_blocks!) {
|
||||
parts.push({
|
||||
type: 'reasoning',
|
||||
text: b.text,
|
||||
...(b.signature ? { providerOptions: { anthropic: { signature: b.signature } } } : {}),
|
||||
});
|
||||
}
|
||||
} else if (typeof m.reasoning === 'string' && m.reasoning.length > 0) {
|
||||
parts.push({ type: 'reasoning', text: m.reasoning });
|
||||
}
|
||||
if (m.content && m.content.length > 0) {
|
||||
parts.push({ type: 'text', text: m.content });
|
||||
@@ -187,12 +206,13 @@ function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
|
||||
// No `execute` field: BooCode runs tools itself in tool-phase.ts; streamText
|
||||
// surfaces the tool-call parts via fullStream and we capture them for the
|
||||
// outer loop to dispatch.
|
||||
function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<typeof tool>> {
|
||||
function buildAiTools(schemas: ToolJsonSchema[], strict?: boolean): Record<string, ReturnType<typeof tool>> {
|
||||
const out: Record<string, ReturnType<typeof tool>> = {};
|
||||
for (const s of schemas) {
|
||||
out[s.function.name] = tool({
|
||||
description: s.function.description,
|
||||
inputSchema: jsonSchema(s.function.parameters),
|
||||
...(strict ? { strict } : {}),
|
||||
});
|
||||
}
|
||||
return out;
|
||||
@@ -235,7 +255,19 @@ export async function streamCompletion(
|
||||
): Promise<StreamResult> {
|
||||
const aiMessages = toModelMessages(messages);
|
||||
const hasTools = opts.tools !== null && opts.tools.length > 0;
|
||||
const aiTools = hasTools ? buildAiTools(opts.tools!) : undefined;
|
||||
// DeepSeek detection via providerId (handles both bare "deepseek-*" and
|
||||
// composite "deepseek/model" — JD1 fix).
|
||||
const resolvedModel = resolveModelProvider(model, ctx.config);
|
||||
const isDsModel = resolvedModel.providerId === 'deepseek';
|
||||
// Anthropic's Messages API rejects the llama.cpp sampler extensions and
|
||||
// out-of-range temperature/top_p (Opus 4.x), so we omit all of them for the
|
||||
// anthropic route and let the model's defaults apply.
|
||||
const isAnthropic = resolvedModel.route === 'anthropic';
|
||||
// Extended thinking is opt-in per agent via reasoning_effort (same gate as
|
||||
// DeepSeek). Adaptive lets Claude choose depth; display:'summarized' surfaces
|
||||
// the reasoning to BooChat (Opus 4.7+ default 'omitted' = empty-text blocks).
|
||||
const anthropicThinkingEnabled = isAnthropic && !!opts.reasoning_effort && opts.reasoning_effort !== 'off';
|
||||
const aiTools = hasTools ? buildAiTools(opts.tools!, isDsModel) : undefined;
|
||||
|
||||
const startedAt = Date.now();
|
||||
// v1.13.1-C: accumulate reasoning text across reasoning-delta parts.
|
||||
@@ -244,6 +276,18 @@ export async function streamCompletion(
|
||||
// Replaces the v1.13.1-A counter-only diagnostic.
|
||||
let reasoningAccumulated = '';
|
||||
|
||||
// Phase 2 (anthropic): reasoning grouped per thinking block (keyed by the
|
||||
// stream part id = content-block index), each carrying its signature. The
|
||||
// signature arrives as a reasoning-delta with empty text + providerMetadata.
|
||||
// Insertion order is preserved (Map), so replay order matches the model's.
|
||||
const reasoningBlockMap = new Map<string, { text: string; signature?: string }>();
|
||||
|
||||
// Peel inline <think>...</think> reasoning out of the text-delta channel for
|
||||
// local models that don't use a structured reasoning channel. Arms only when
|
||||
// content starts with <think>; otherwise a verbatim pass-through (no-op for
|
||||
// models whose reasoning already arrives via reasoning-delta).
|
||||
const thinkSplitter = new ThinkSplitter();
|
||||
|
||||
// v1.13.3: experimental_repairToolCall keeps the stream alive when the
|
||||
// model emits a malformed tool call (bad JSON args, unknown name, etc.).
|
||||
// Without a repair function streamText throws and the WHOLE stream dies;
|
||||
@@ -274,13 +318,14 @@ export async function streamCompletion(
|
||||
// the openai-compatible provider dropped it with an "unsupported feature: topK"
|
||||
// warning and min_p was never wired at all, so both were dead on the wire
|
||||
// before this. They now go through the same extraBody path as the new params.
|
||||
const samplerBody = buildSamplerProviderOptions(opts);
|
||||
// Omit llama.cpp sampler extensions on the anthropic route (rejected there).
|
||||
const samplerBody = isAnthropic ? undefined : buildSamplerProviderOptions(opts);
|
||||
|
||||
// vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
|
||||
let deepseekProviderOptions:
|
||||
| { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
|
||||
| undefined;
|
||||
if (isDeepSeekModel(model)) {
|
||||
if (isDsModel) {
|
||||
const dsEffort = opts.reasoning_effort;
|
||||
const thinkingEnabled = dsEffort && dsEffort !== 'off';
|
||||
deepseekProviderOptions = {
|
||||
@@ -305,20 +350,40 @@ export async function streamCompletion(
|
||||
? AbortSignal.any([signal, stallAc.signal])
|
||||
: stallAc.signal;
|
||||
|
||||
// vDeepSeek: chat prefix completion (B2). When prefix_content is set and
|
||||
// the model is DeepSeek, inject an assistant message with prefix=true.
|
||||
// If the last message is already an assistant, replace its content to
|
||||
// avoid two consecutive assistant messages (AV5 fix).
|
||||
let prefixMessages = aiMessages;
|
||||
const useBeta = isDsModel && (hasTools || !!opts.prefix_content);
|
||||
if (opts.prefix_content && isDsModel) {
|
||||
const prefixMsg = { role: 'assistant' as const, content: opts.prefix_content };
|
||||
const last = prefixMessages[prefixMessages.length - 1];
|
||||
if (last && last.role === 'assistant') {
|
||||
prefixMessages = [...prefixMessages.slice(0, -1), prefixMsg];
|
||||
} else {
|
||||
prefixMessages = [...prefixMessages, prefixMsg];
|
||||
}
|
||||
}
|
||||
|
||||
const result = streamText({
|
||||
model: upstreamModel(ctx.config, model, agent ?? null, 'boochat'),
|
||||
messages: aiMessages,
|
||||
model: upstreamModel(ctx.config, model, agent ?? null, 'boochat', useBeta),
|
||||
messages: prefixMessages,
|
||||
...(aiTools
|
||||
? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
|
||||
: {}),
|
||||
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
||||
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
||||
...(samplerBody || deepseekProviderOptions
|
||||
...(!isAnthropic && typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||
...(!isAnthropic && typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
||||
...(!isAnthropic && typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
||||
...(!isAnthropic && opts.response_format ? { responseFormat: { type: 'json_object' } } : {}),
|
||||
...(samplerBody || deepseekProviderOptions || anthropicThinkingEnabled
|
||||
? {
|
||||
providerOptions: {
|
||||
...(samplerBody ? { openaiCompatible: samplerBody } : {}),
|
||||
...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
|
||||
...(anthropicThinkingEnabled
|
||||
? { anthropic: { thinking: { type: 'adaptive', display: 'summarized' } } }
|
||||
: {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
@@ -341,7 +406,12 @@ export async function streamCompletion(
|
||||
bumpStallTimer();
|
||||
switch (part.type) {
|
||||
case 'text-delta': {
|
||||
pendingBuffer += part.text;
|
||||
// Peel any inline <think> reasoning before tool-call extraction; the
|
||||
// reasoning span accumulates exactly like a structured reasoning-delta.
|
||||
const split = thinkSplitter.push(part.text);
|
||||
if (split.reasoning) reasoningAccumulated += split.reasoning;
|
||||
if (!split.text) break;
|
||||
pendingBuffer += split.text;
|
||||
// v1.13.16: unified extraction. The helper finds the earliest-opening
|
||||
// complete <tool_call> or <invoke> block, flushes prose between/around
|
||||
// them, holds any partial opener for the next chunk, and silently
|
||||
@@ -373,11 +443,18 @@ export async function streamCompletion(
|
||||
break;
|
||||
}
|
||||
case 'reasoning-delta': {
|
||||
// v1.13.1-C: accumulate; finalizeCompletion / executeToolPhase
|
||||
// dual-write the resulting text as a kind='reasoning' part.
|
||||
// v1.13.1-C: accumulate the joined string (compaction prose + non-
|
||||
// anthropic replay). Phase 2: also group per block id and capture the
|
||||
// Anthropic signature (arrives on a delta with empty text).
|
||||
if (typeof part.text === 'string') {
|
||||
reasoningAccumulated += part.text;
|
||||
}
|
||||
const blk = reasoningBlockMap.get(part.id) ?? { text: '' };
|
||||
if (typeof part.text === 'string') blk.text += part.text;
|
||||
const sig = (part.providerMetadata as Record<string, Record<string, unknown> | undefined> | undefined)
|
||||
?.anthropic?.signature;
|
||||
if (typeof sig === 'string') blk.signature = sig;
|
||||
reasoningBlockMap.set(part.id, blk);
|
||||
break;
|
||||
}
|
||||
case 'finish': {
|
||||
@@ -403,6 +480,12 @@ export async function streamCompletion(
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve any text the splitter was holding (an unterminated <think> block
|
||||
// becomes reasoning; a held partial opener becomes text).
|
||||
const splitTail = thinkSplitter.flush();
|
||||
if (splitTail.reasoning) reasoningAccumulated += splitTail.reasoning;
|
||||
if (splitTail.text) pendingBuffer += splitTail.text;
|
||||
|
||||
// v1.13.1-A: drain any buffered partial XML opener as plain text. The
|
||||
// pre-AI-SDK path did this on stream end too — better to leak `<tool_c`
|
||||
// than vanish the text.
|
||||
@@ -467,6 +550,13 @@ export async function streamCompletion(
|
||||
);
|
||||
}
|
||||
|
||||
const reasoningBlocks = Array.from(reasoningBlockMap.values()).filter(
|
||||
(b) => b.text.length > 0 || b.signature,
|
||||
);
|
||||
// Only signed blocks need verbatim per-block replay; text-only reasoning uses
|
||||
// the joined string, so omit reasoningBlocks unless something was signed.
|
||||
const hasSignedReasoning = reasoningBlocks.some((b) => b.signature);
|
||||
|
||||
return {
|
||||
finishReason,
|
||||
content,
|
||||
@@ -474,6 +564,7 @@ export async function streamCompletion(
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
reasoning: reasoningAccumulated,
|
||||
...(hasSignedReasoning ? { reasoningBlocks } : {}),
|
||||
// vDeepSeek: optional usage breakdown populated when the provider returns
|
||||
// structured usage (cache hit tokens, reasoning tokens).
|
||||
cacheReadTokens: cacheReadTokens ?? undefined,
|
||||
|
||||
100
apps/server/src/services/inference/think-splitter.ts
Normal file
100
apps/server/src/services/inference/think-splitter.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
// ThinkSplitter — peels inline <think>...</think> reasoning out of streamed text
|
||||
// content. Some local models (QwQ, DeepSeek-R1 distills, MiniMax) served raw
|
||||
// emit their chain-of-thought inline in the assistant `content` channel rather
|
||||
// than on a structured reasoning channel; BooCode's stream adapter otherwise
|
||||
// treats that as ordinary prose. This splitter routes the reasoning span to the
|
||||
// reasoning accumulator and passes the rest through unchanged.
|
||||
//
|
||||
// Ported from deepseek-reasonix internal/provider/openai/think.go. Two
|
||||
// guarantees make it safe to run on every text delta:
|
||||
// 1. It only ARMS if the turn's content begins with <think> (after leading
|
||||
// whitespace), so an answer that merely mentions the tag is never hijacked.
|
||||
// 2. For any content that does not start with <think> it degrades to a
|
||||
// verbatim pass-through (a no-op for models on a structured reasoning
|
||||
// channel).
|
||||
// It buffers partial closing tags across chunk boundaries so a `</thi` split
|
||||
// across two deltas is not mistaken for prose.
|
||||
|
||||
const OPEN = '<think>';
|
||||
const CLOSE = '</think>';
|
||||
const LEADING_WS = /^[ \t\r\n]+/;
|
||||
|
||||
type State = 'probe' | 'inside' | 'passthrough';
|
||||
|
||||
export interface SplitResult {
|
||||
/** Text classified as reasoning (the inside of a <think> block). */
|
||||
reasoning: string;
|
||||
/** Text classified as ordinary content to pass through. */
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Longest proper suffix of `s` that is a prefix of `marker`. Used to hold back
|
||||
* the bytes that might be the start of a closing tag split across chunks. Never
|
||||
* returns the full marker length (that is a complete match, handled separately).
|
||||
*/
|
||||
function markerSuffixLen(s: string, marker: string): number {
|
||||
const max = Math.min(marker.length - 1, s.length);
|
||||
for (let n = max; n > 0; n--) {
|
||||
if (marker.startsWith(s.slice(s.length - n))) return n;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Stateful, single-stream splitter. Create one per streamed completion. */
|
||||
export class ThinkSplitter {
|
||||
private state: State = 'probe';
|
||||
private buf = '';
|
||||
|
||||
push(s: string): SplitResult {
|
||||
if (this.state === 'passthrough') return { reasoning: '', text: s };
|
||||
if (this.state === 'inside') return this.scanClose(s);
|
||||
|
||||
// probe
|
||||
this.buf += s;
|
||||
const trimmed = this.buf.replace(LEADING_WS, '');
|
||||
if (trimmed.length < OPEN.length) {
|
||||
// Not enough yet to decide. Hold only if still a viable <think> prefix.
|
||||
if (OPEN.startsWith(trimmed)) return { reasoning: '', text: '' };
|
||||
return this.drainPassthrough();
|
||||
}
|
||||
if (trimmed.startsWith(OPEN)) {
|
||||
this.state = 'inside';
|
||||
this.buf = '';
|
||||
return this.scanClose(trimmed.slice(OPEN.length));
|
||||
}
|
||||
return this.drainPassthrough();
|
||||
}
|
||||
|
||||
/** Resolve any buffered remainder at stream end. */
|
||||
flush(): SplitResult {
|
||||
const r = this.buf;
|
||||
this.buf = '';
|
||||
if (this.state === 'inside') return { reasoning: r, text: '' };
|
||||
return { reasoning: '', text: r };
|
||||
}
|
||||
|
||||
private scanClose(s: string): SplitResult {
|
||||
this.buf += s;
|
||||
const idx = this.buf.indexOf(CLOSE);
|
||||
if (idx >= 0) {
|
||||
const reasoning = this.buf.slice(0, idx);
|
||||
const text = this.buf.slice(idx + CLOSE.length).replace(LEADING_WS, '');
|
||||
this.buf = '';
|
||||
this.state = 'passthrough';
|
||||
return { reasoning, text };
|
||||
}
|
||||
// No full closing tag yet — emit everything except a possible partial tag.
|
||||
const keep = markerSuffixLen(this.buf, CLOSE);
|
||||
const reasoning = this.buf.slice(0, this.buf.length - keep);
|
||||
this.buf = this.buf.slice(this.buf.length - keep);
|
||||
return { reasoning, text: '' };
|
||||
}
|
||||
|
||||
private drainPassthrough(): SplitResult {
|
||||
const text = this.buf;
|
||||
this.buf = '';
|
||||
this.state = 'passthrough';
|
||||
return { reasoning: '', text };
|
||||
}
|
||||
}
|
||||
@@ -82,7 +82,6 @@ function repairValue(
|
||||
const isInteger = schemaType === 'integer' || schemaType === 'number';
|
||||
const isString = schemaType === 'string';
|
||||
|
||||
// --- Array repair: wrap bare value or empty object ---
|
||||
if (isArray) {
|
||||
if (!Array.isArray(value)) {
|
||||
if (typeof value === 'string') {
|
||||
@@ -114,7 +113,6 @@ function repairValue(
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Object repair: recurse into properties ---
|
||||
if (isObject && typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
||||
const props = (schema.properties as Record<string, unknown>) ?? {};
|
||||
const repaired: Record<string, unknown> = {};
|
||||
@@ -129,7 +127,6 @@ function repairValue(
|
||||
return repaired;
|
||||
}
|
||||
|
||||
// --- String repair: unwrap markdown autolinks ---
|
||||
if (isString && typeof value === 'string') {
|
||||
const match = value.match(MARKDOWN_AUTOLINK_RE);
|
||||
if (match) {
|
||||
@@ -139,7 +136,6 @@ function repairValue(
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Boolean coercion ---
|
||||
if (isBoolean && typeof value === 'string') {
|
||||
const lower = value.toLowerCase();
|
||||
if (lower === 'true') {
|
||||
@@ -153,7 +149,6 @@ function repairValue(
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Integer coercion: "42.0" → 42 ---
|
||||
if (isInteger && typeof value === 'string') {
|
||||
const num = Number(value);
|
||||
if (!Number.isNaN(num)) {
|
||||
@@ -163,13 +158,11 @@ function repairValue(
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Integer coercion: boolean → 0/1 ---
|
||||
if (isInteger && typeof value === 'boolean') {
|
||||
repairs.push({ field, kind: 'coerced_boolean_to_integer', detail: `Coerced boolean ${value} → ${value ? 1 : 0} for '${field}'` });
|
||||
return value ? 1 : 0;
|
||||
}
|
||||
|
||||
// --- Empty string to null for optional fields ---
|
||||
if (value === '' && !required) {
|
||||
repairs.push({ field, kind: 'empty_string_to_undefined', detail: `Converted empty string for optional '${field}'` });
|
||||
return undefined;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import type { Agent, Session, ToolCall } from '../../types/api.js';
|
||||
import * as modelContext from '../model-context.js';
|
||||
import { PathScopeError } from '../path_guard.js';
|
||||
import { TOOLS_BY_NAME } from '../tools.js';
|
||||
import type { ToolExecCtx } from '../tools.js';
|
||||
import { TOOLS_BY_NAME, type ToolExecCtx } from "../tools.js";
|
||||
import { matchToolGlob } from '../agents.js';
|
||||
import { maybeFlagForCompaction } from './payload.js';
|
||||
import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
|
||||
@@ -214,6 +213,7 @@ export async function executeToolPhase(
|
||||
content,
|
||||
tool_calls: toolCalls,
|
||||
reasoning: result.reasoning,
|
||||
reasoningBlocks: result.reasoningBlocks,
|
||||
}).map((p) => ({
|
||||
...p,
|
||||
message_id: assistantMessageId,
|
||||
@@ -404,7 +404,7 @@ export async function executeToolPhase(
|
||||
});
|
||||
const tres = await executeToolCall(
|
||||
projectRoot, tc, session.allowed_read_paths,
|
||||
{ sql: ctx.sql, sessionId },
|
||||
{ sql: ctx.sql, sessionId, toolCallId: tc.id },
|
||||
ctx.hooks, sessionId,
|
||||
);
|
||||
// tool_trace instrumentation - finish
|
||||
|
||||
@@ -21,7 +21,6 @@ import {
|
||||
buildMessagesPayload,
|
||||
loadContext,
|
||||
} from './payload.js';
|
||||
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
|
||||
import {
|
||||
finalizeCompletion,
|
||||
finalizeEmpty,
|
||||
@@ -88,9 +87,8 @@ async function detectAndRunBuild(
|
||||
const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
|
||||
const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
|
||||
|
||||
// Run the build.
|
||||
try {
|
||||
const out = await new Promise<string>((resolve, reject) => {
|
||||
const out = await new Promise<string>((resolve, _reject) => {
|
||||
execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
|
||||
(err, stdout, stderr) => {
|
||||
if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
@@ -136,7 +134,6 @@ export { buildMessagesPayload } from './payload.js';
|
||||
// turn.ts type-hub-and-leaf near-cycle. They are re-exported from there via
|
||||
// inference/index.ts for the public surface.
|
||||
|
||||
|
||||
export async function runAssistantTurn(
|
||||
ctx: InferenceContext,
|
||||
args: TurnArgs,
|
||||
@@ -211,7 +208,6 @@ export async function runAssistantTurn(
|
||||
let pendingRecoveryNote: string | undefined = args.pendingRecoveryNote;
|
||||
|
||||
if (session.state_graph_enabled) {
|
||||
// ---- optional state graph path ----
|
||||
const gProjectRoot = await resolveProjectRoot(project.path);
|
||||
const graphResult = await runGraph(ctx, args, { effectiveCap, budget, agent, projectRoot: gProjectRoot });
|
||||
stepNumber = graphResult.stepNumber;
|
||||
@@ -221,7 +217,6 @@ export async function runAssistantTurn(
|
||||
// mistakeTracker is the same object reference (mutated in place by the graph).
|
||||
} else {
|
||||
while (stepNumber < effectiveCap) {
|
||||
// ---- top-of-loop gate: doom-loop, then budget (pure decision) ----
|
||||
const decision = decideStep({ recentToolCalls, toolsUsed, budget });
|
||||
if (decision.kind === 'doom') {
|
||||
// Need fresh history for the summary.
|
||||
@@ -244,10 +239,6 @@ export async function runAssistantTurn(
|
||||
}
|
||||
// decision.kind === 'stream' → proceed with compaction + stream + tools.
|
||||
|
||||
// ---- compaction check ----
|
||||
// v1.11: if the prior turn flagged this chat for compaction, run it
|
||||
// before loadContext so we read post-compaction history. Swallow
|
||||
// failures and proceed with un-compacted history.
|
||||
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
|
||||
SELECT needs_compaction FROM chats WHERE id = ${chatId}
|
||||
`;
|
||||
@@ -267,7 +258,6 @@ export async function runAssistantTurn(
|
||||
}
|
||||
}
|
||||
|
||||
// ---- load context (must re-load each iteration — new messages since last step) ----
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
if (!loaded) {
|
||||
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
|
||||
@@ -279,17 +269,6 @@ export async function runAssistantTurn(
|
||||
}
|
||||
const projectRoot = await resolveProjectRoot(iterProject.path);
|
||||
|
||||
try {
|
||||
const dcpMsgs = toDcpMessages(history);
|
||||
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
|
||||
if (stats.removedCount > 0) {
|
||||
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
|
||||
history = fromDcpMessages(pruned) as typeof history;
|
||||
}
|
||||
} catch (err) {
|
||||
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
|
||||
}
|
||||
|
||||
// v1.14.0: log step boundary for instrumentation. step_start parts are in
|
||||
// the schema CHECK but not emitted here — writing to the assistant message
|
||||
// before the stream phase creates a sequence-0 collision with
|
||||
@@ -297,7 +276,6 @@ export async function runAssistantTurn(
|
||||
// since the frontend doesn't render step boundaries in v1.14.
|
||||
ctx.log.info({ sessionId, chatId, step: stepNumber, assistantMessageId }, 'step_start');
|
||||
|
||||
// ---- build messages + stream phase ----
|
||||
const messages = await buildMessagesPayload(iterSession, iterProject, history, agent, ctx.log);
|
||||
const webToolsEnabled =
|
||||
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
|
||||
@@ -331,7 +309,6 @@ export async function runAssistantTurn(
|
||||
break;
|
||||
}
|
||||
|
||||
// ---- non-tool finish → finalize and exit ----
|
||||
if (result.toolCalls.length === 0) {
|
||||
// vWhale: Stop hook (best-effort, non-blocking).
|
||||
if (ctx.hooks) {
|
||||
@@ -347,15 +324,6 @@ export async function runAssistantTurn(
|
||||
break;
|
||||
}
|
||||
|
||||
// ---- steps: 0 edge case ----
|
||||
// effectiveCap check above guarantees we're inside the loop, but this
|
||||
// guard handles the theoretical case where the model emits tool calls
|
||||
// on step 0 when effectiveCap would have been 0 (impossible since the
|
||||
// while condition prevents entry, but kept for safety). If effectiveCap
|
||||
// is 1 and we're on step 0, tool calls ARE executed — steps counts
|
||||
// iterations, not post-first-stream.
|
||||
|
||||
// ---- tool phase ----
|
||||
let toolPhaseResult: ToolPhaseResult;
|
||||
try {
|
||||
toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
|
||||
@@ -366,7 +334,6 @@ export async function runAssistantTurn(
|
||||
break;
|
||||
}
|
||||
|
||||
// ---- update loop locals ----
|
||||
toolsUsed += toolPhaseResult.toolCallCount;
|
||||
recentToolCalls = [...recentToolCalls, ...toolPhaseResult.toolCalls];
|
||||
stepNumber++;
|
||||
@@ -466,7 +433,6 @@ export async function runAssistantTurn(
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// ---- persist agent snapshot (best-effort, never blocks inference) ----
|
||||
const snapLoaded = await loadContext(ctx.sql, sessionId, chatId).catch(() => null);
|
||||
if (snapLoaded) {
|
||||
await saveAgentSnapshot(ctx.sql, chatId, {
|
||||
@@ -479,10 +445,6 @@ export async function runAssistantTurn(
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// ---- post-loop: step-cap sentinel ----
|
||||
// When the loop exits because stepNumber reached effectiveCap, the last
|
||||
// iteration's tool phase returned 'continue' with a nextAssistantId that
|
||||
// is still in 'streaming' status (unfilled). Use it for the wrap-up.
|
||||
if (stepNumber >= effectiveCap && effectiveCap < Infinity) {
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
if (loaded) {
|
||||
@@ -559,9 +521,6 @@ export async function runInference(
|
||||
});
|
||||
}
|
||||
|
||||
// v2.8-compare: run inference with a model override and compare group id.
|
||||
// Used by the compare endpoint to run the same message through N models in
|
||||
// parallel. Each call publishes frames scoped to its compare_group_id.
|
||||
export async function runInferenceWithModel(
|
||||
ctx: InferenceContext,
|
||||
sessionId: string,
|
||||
@@ -652,4 +611,3 @@ export function createInferenceRunner(
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -27,9 +27,6 @@ export interface StreamPhaseState {
|
||||
startedAt: string | null;
|
||||
}
|
||||
|
||||
// 500ms keeps the DB UPDATE rate bounded under heavy streaming. Used by
|
||||
// executeStreamPhase, runCapHitSummary, and runDoomLoopSummary — every site
|
||||
// that does a debounced content flush during streaming.
|
||||
export const DB_FLUSH_INTERVAL_MS = 500;
|
||||
|
||||
export interface InferenceFrame {
|
||||
@@ -123,6 +120,7 @@ export interface InferenceFrame {
|
||||
analysis_ready?: boolean;
|
||||
cross_exam_id?: string;
|
||||
delta?: string;
|
||||
stream_seq?: number;
|
||||
}
|
||||
|
||||
export type FramePublisher = (sessionId: string, frame: InferenceFrame) => void;
|
||||
@@ -153,6 +151,12 @@ export interface StreamResult {
|
||||
// v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
|
||||
// Empty string when the model doesn't emit reasoning (most cases).
|
||||
reasoning: string;
|
||||
// Phase 2 (anthropic): reasoning split per thinking block, each with its
|
||||
// Anthropic signature. Adaptive thinking auto-enables interleaved thinking,
|
||||
// so a turn can carry several signed blocks — they MUST be replayed verbatim
|
||||
// per block (a joined string + one signature would 400). Empty/undefined for
|
||||
// models without signed reasoning.
|
||||
reasoningBlocks?: Array<{ text: string; signature?: string }>;
|
||||
// vDeepSeek: optional cache-hit token count from DeepSeek's API.
|
||||
// Only populated when using @ai-sdk/deepseek provider (not llama-swap).
|
||||
cacheReadTokens?: number;
|
||||
@@ -184,8 +188,6 @@ export interface TurnArgs {
|
||||
// Never persisted — mirrors how the cap-hit/doom-loop notes live only inside
|
||||
// the summary call's messages array.
|
||||
pendingRecoveryNote?: string;
|
||||
// v2.8-compare: when set, overrides the session model for this single turn.
|
||||
// Used by the compare endpoint to run the same message through N models.
|
||||
modelOverride?: string;
|
||||
// v2.8-compare: opaque group id that rides on every published frame.
|
||||
compareGroupId?: string;
|
||||
|
||||
@@ -16,8 +16,6 @@ import type { FastifyBaseLogger } from 'fastify';
|
||||
import type { McpServerEntry, McpServerConfig } from './mcp-config.js';
|
||||
import type { ToolDef } from './tools.js';
|
||||
|
||||
// ---- Types ----
|
||||
|
||||
interface McpToolAnnotations {
|
||||
readOnlyHint?: boolean;
|
||||
destructiveHint?: boolean;
|
||||
@@ -41,8 +39,6 @@ interface ServerState {
|
||||
permission: McpPermission;
|
||||
}
|
||||
|
||||
// ---- Module-level state ----
|
||||
|
||||
const servers = new Map<string, ServerState>();
|
||||
// Reverse map: prefixed tool name → server name (built during discovery)
|
||||
const toolToServer = new Map<string, string>();
|
||||
@@ -50,8 +46,6 @@ let log: FastifyBaseLogger | null = null;
|
||||
|
||||
const MAX_RESULT_BYTES = 5 * 1024 * 1024;
|
||||
|
||||
// ---- Public API ----
|
||||
|
||||
/**
|
||||
* Connect to all configured MCP servers, discover tools, and wrap them.
|
||||
* Per-server graceful degradation: a failing server is logged and skipped.
|
||||
@@ -148,7 +142,6 @@ export function getServerPermission(prefixedToolName: string): McpPermission {
|
||||
return state?.permission ?? 'allow';
|
||||
}
|
||||
|
||||
/** Override the permission for a server. Used by the approval flow. */
|
||||
export function setServerPermission(serverName: string, permission: McpPermission): void {
|
||||
const state = servers.get(serverName);
|
||||
if (state) {
|
||||
@@ -208,8 +201,6 @@ export async function shutdown(): Promise<void> {
|
||||
toolToServer.clear();
|
||||
}
|
||||
|
||||
// ---- Internal helpers ----
|
||||
|
||||
async function connectServer(entry: McpServerEntry): Promise<void> {
|
||||
const { name, config } = entry;
|
||||
|
||||
|
||||
@@ -15,8 +15,6 @@ import { readFileSync } from 'node:fs';
|
||||
import { z } from 'zod';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
// ---- Zod schema ----
|
||||
|
||||
const McpPermissionSchema = z.enum(['allow', 'ask', 'deny']).default('allow');
|
||||
|
||||
const McpServerConfigSchema = z.discriminatedUnion('type', [
|
||||
@@ -48,8 +46,6 @@ export interface McpServerEntry {
|
||||
config: McpServerConfig;
|
||||
}
|
||||
|
||||
// ---- Env-var substitution ----
|
||||
|
||||
const ENV_VAR_PATTERN = /\{env:([A-Za-z_][A-Za-z0-9_]*)\}/g;
|
||||
|
||||
/**
|
||||
@@ -91,8 +87,6 @@ export function substituteEnvVars(
|
||||
return value;
|
||||
}
|
||||
|
||||
// ---- Loader ----
|
||||
|
||||
/**
|
||||
* Read and validate the MCP config file. Returns enabled servers only.
|
||||
* File missing → log info, return []. Parse/validation error → log warn, return [].
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { homedir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { readFile, readdir } from 'node:fs/promises';
|
||||
import type { MemoryEntry } from './entries.js';
|
||||
import { parseMemoryEntries } from './entries.js';
|
||||
import { parseMemoryEntries, type MemoryEntry } from "./entries.js";
|
||||
import { getMemoryRoot } from './paths.js';
|
||||
|
||||
export interface MemoryScope {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { readFile, writeFile, readdir } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type { MemoryTopic } from './paths.js';
|
||||
import { getTopicDir } from './paths.js';
|
||||
import { getTopicDir, type MemoryTopic } from "./paths.js";
|
||||
|
||||
export async function readTopicFiles(root: string, topic: MemoryTopic): Promise<Map<string, string>> {
|
||||
const dir = getTopicDir(root, topic);
|
||||
|
||||
@@ -44,6 +44,8 @@ type ConfigForModelContext = {
|
||||
LLAMA_SWAP_URL: string;
|
||||
DEEPSEEK_API_KEY?: string;
|
||||
DEEPSEEK_BASE_URL?: string;
|
||||
ANTHROPIC_API_KEY?: string;
|
||||
ANTHROPIC_BASE_URL?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -67,7 +69,21 @@ export function configureModelContext(
|
||||
|
||||
// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
|
||||
// Return a reasonable default context so compaction estimates work.
|
||||
const DEEPSEEK_DEFAULT_N_CTX = 131_072;
|
||||
const DEEPSEEK_DEFAULT_N_CTX = 1_000_000;
|
||||
|
||||
// Anthropic Claude models also have no props endpoint. Static windows: the
|
||||
// 4.x Opus/Sonnet family is 1M; Haiku is 200K.
|
||||
const ANTHROPIC_HAIKU_N_CTX = 200_000;
|
||||
const ANTHROPIC_DEFAULT_N_CTX = 1_000_000;
|
||||
|
||||
/** Static context window for hosted providers without a props endpoint, or null. */
|
||||
function staticHostedNCtx(resolved: ReturnType<typeof resolveModelProvider>): number | null {
|
||||
if (resolved.providerId === 'deepseek') return DEEPSEEK_DEFAULT_N_CTX;
|
||||
if (resolved.route === 'anthropic') {
|
||||
return resolved.wireModelId.includes('haiku') ? ANTHROPIC_HAIKU_N_CTX : ANTHROPIC_DEFAULT_N_CTX;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function getModelContext(model: string): Promise<ModelContext | null> {
|
||||
// Resolve the model through the provider-aware resolver. For composite
|
||||
@@ -84,12 +100,11 @@ export async function getModelContext(model: string): Promise<ModelContext | nul
|
||||
|
||||
const resolved = resolveModelProvider(model, config);
|
||||
|
||||
// DeepSeek models (by provider id) have no /upstream/<model>/props.
|
||||
// Use a static default so compaction doesn't fall to the buffer-only
|
||||
// path with tiny limits.
|
||||
if (resolved.providerId === 'deepseek') {
|
||||
return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
|
||||
}
|
||||
// Hosted providers (DeepSeek, Anthropic) have no /upstream/<model>/props
|
||||
// endpoint — use a static window so compaction doesn't fall to the
|
||||
// buffer-only path with tiny limits.
|
||||
const staticCtx = staticHostedNCtx(resolved);
|
||||
if (staticCtx !== null) return { n_ctx: staticCtx };
|
||||
|
||||
// P7: orphaned auto:* session with no gateway configured — no props endpoint
|
||||
// to query. Negative-cache and return null; compaction degrades gracefully.
|
||||
|
||||
76
apps/server/src/services/pending-tool-lookup.ts
Normal file
76
apps/server/src/services/pending-tool-lookup.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import type { Sql } from '../db.js';
|
||||
import type { ToolCall } from '../types/api.js';
|
||||
|
||||
// Shared lookup for the answer_user_input + grant_read_access pause-resume
|
||||
// endpoints. Finds the originating assistant tool_call by id in message_parts,
|
||||
// validates the tool name, finds the pending tool_result part, and checks the
|
||||
// already-answered guard. Returns ok:true+context on success, ok:false+HTTP
|
||||
// status+body on any error (caller does reply.code(ctx.code); return ctx.body).
|
||||
export type PendingToolLookupResult =
|
||||
| {
|
||||
ok: true;
|
||||
foundCall: ToolCall;
|
||||
toolMessageId: string;
|
||||
toolRow: { message_id: string; payload: { tool_call_id: string; output: unknown } };
|
||||
}
|
||||
| { ok: false; code: number; body: Record<string, unknown> };
|
||||
|
||||
export async function lookupPendingToolCall(
|
||||
sql: Sql,
|
||||
chatId: string,
|
||||
tool_call_id: string,
|
||||
expectedToolName: string,
|
||||
wrongToolError: string,
|
||||
): Promise<PendingToolLookupResult> {
|
||||
// Find the assistant's tool_call by id via message_parts.
|
||||
const callerRows = await sql<{
|
||||
message_id: string;
|
||||
payload: { id: string; name: string; args: Record<string, unknown> };
|
||||
}[]>`
|
||||
SELECT p.message_id, p.payload
|
||||
FROM message_parts p
|
||||
JOIN messages m ON m.id = p.message_id
|
||||
WHERE m.chat_id = ${chatId}
|
||||
AND m.role = 'assistant'
|
||||
AND p.kind = 'tool_call'
|
||||
AND p.payload->>'id' = ${tool_call_id}
|
||||
ORDER BY m.created_at DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
const callerRow = callerRows[0];
|
||||
if (!callerRow) return { ok: false, code: 404, body: { error: 'unknown_tool_call_id' } };
|
||||
|
||||
const foundCall: ToolCall = {
|
||||
id: callerRow.payload.id,
|
||||
name: callerRow.payload.name,
|
||||
args: callerRow.payload.args,
|
||||
};
|
||||
if (foundCall.name !== expectedToolName) {
|
||||
return { ok: false, code: 400, body: { error: wrongToolError } };
|
||||
}
|
||||
|
||||
// Find the pending tool_result part by tool_call_id.
|
||||
const toolRows = await sql<{
|
||||
message_id: string;
|
||||
payload: { tool_call_id: string; output: unknown };
|
||||
}[]>`
|
||||
SELECT p.message_id, p.payload
|
||||
FROM message_parts p
|
||||
JOIN messages m ON m.id = p.message_id
|
||||
WHERE m.chat_id = ${chatId}
|
||||
AND m.role = 'tool'
|
||||
AND p.kind = 'tool_result'
|
||||
AND p.payload->>'tool_call_id' = ${tool_call_id}
|
||||
ORDER BY m.created_at DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
const toolRow = toolRows[0];
|
||||
if (!toolRow) {
|
||||
return { ok: false, code: 404, body: { error: 'unknown_tool_call_id', detail: 'tool message not found' } };
|
||||
}
|
||||
if (toolRow.payload && toolRow.payload.output !== null) {
|
||||
return { ok: false, code: 409, body: { error: 'tool_call_already_answered' } };
|
||||
}
|
||||
|
||||
return { ok: true, foundCall, toolMessageId: toolRow.message_id, toolRow };
|
||||
}
|
||||
@@ -102,19 +102,15 @@ export async function bootstrapProject(
|
||||
let gitea_pushed = false;
|
||||
let gitea_remote_url: string | null = null;
|
||||
|
||||
// Step 1: mkdir
|
||||
await mkdir(fullPath, { recursive: false });
|
||||
folder_created = true;
|
||||
log.info({ fullPath }, 'project_bootstrap: folder created');
|
||||
|
||||
// Step 2: write .gitignore
|
||||
await writeFile(resolve(fullPath, '.gitignore'), GITIGNORE_TEMPLATE, 'utf8');
|
||||
|
||||
// Step 3: git init -b main
|
||||
await execFileAsync('git', ['init', '-b', 'main'], { cwd: fullPath });
|
||||
git_initialized = true;
|
||||
|
||||
// Step 4: git add + commit (per-command -c, no global config touch)
|
||||
await execFileAsync('git', ['add', '.gitignore'], { cwd: fullPath });
|
||||
await execFileAsync(
|
||||
'git',
|
||||
@@ -129,7 +125,6 @@ export async function bootstrapProject(
|
||||
first_commit = true;
|
||||
log.info({ folder }, 'project_bootstrap: initial commit');
|
||||
|
||||
// Step 5: optional Gitea remote
|
||||
if (options.createGiteaRemote) {
|
||||
if (!config.GITEA_TOKEN) {
|
||||
warnings.push('Gitea remote skipped — token not configured');
|
||||
@@ -144,7 +139,6 @@ export async function bootstrapProject(
|
||||
gitea_remote_url = repo.html_url;
|
||||
log.info({ folder, html_url: repo.html_url }, 'project_bootstrap: gitea repo created');
|
||||
|
||||
// Step 6: git remote add + push
|
||||
try {
|
||||
const sshUrl = repo.ssh_url.replace('git.indifferentketchup.com', '100.114.205.53');
|
||||
await execFileAsync('git', ['remote', 'add', 'origin', sshUrl], { cwd: fullPath });
|
||||
|
||||
400
apps/server/src/services/provider-status.ts
Normal file
400
apps/server/src/services/provider-status.ts
Normal file
@@ -0,0 +1,400 @@
|
||||
import { getLlamaProviders, type LlamaProvider } from './llama-providers.js';
|
||||
import { GATEWAY_KIND } from '@boocode/contracts/gateway';
|
||||
import { loadConfig } from '../config.js';
|
||||
|
||||
export interface RunningModel {
|
||||
model: string;
|
||||
compositeId: string;
|
||||
state: string;
|
||||
cmd: string;
|
||||
proxy: string;
|
||||
ttl: number;
|
||||
name: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export interface CloudModelMeta {
|
||||
id: string;
|
||||
name?: string;
|
||||
contextLength?: number;
|
||||
maxOutputTokens?: number;
|
||||
pricing?: { input: number; output: number; cached?: number };
|
||||
modalities?: string[];
|
||||
supportsToolCalling?: boolean;
|
||||
supportsReasoning?: boolean;
|
||||
}
|
||||
|
||||
export interface RateLimitInfo {
|
||||
requestsRemaining?: number;
|
||||
requestsLimit?: number;
|
||||
tokensRemaining?: number;
|
||||
tokensLimit?: number;
|
||||
resetAt?: number;
|
||||
}
|
||||
|
||||
export interface GpuInfo {
|
||||
index: number;
|
||||
name: string;
|
||||
temperature: number;
|
||||
utilizationPct: number;
|
||||
memoryUsedMb: number;
|
||||
memoryTotalMb: number;
|
||||
powerDrawW: number;
|
||||
}
|
||||
|
||||
export interface ProviderStatus {
|
||||
id: string;
|
||||
label: string;
|
||||
baseUrl: string;
|
||||
kind: string;
|
||||
healthy: boolean;
|
||||
healthyError?: string;
|
||||
running: RunningModel[];
|
||||
cloudModels?: CloudModelMeta[];
|
||||
rateLimits?: RateLimitInfo;
|
||||
gpus?: GpuInfo[];
|
||||
hostLoad?: { cpuSat: number; memSat: number; load1: number };
|
||||
fetchError?: string;
|
||||
}
|
||||
|
||||
export interface ProviderStatusResponse {
|
||||
providers: ProviderStatus[];
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
const HEALTH_CACHE_TTL_MS = 30_000;
|
||||
const RUNNING_CACHE_TTL_MS = 5_000;
|
||||
const CLOUD_MODELS_CACHE_TTL_MS = 120_000;
|
||||
const METRICS_CACHE_TTL_MS = 10_000;
|
||||
const FETCH_TIMEOUT_MS = 3_000;
|
||||
const CLOUD_FETCH_TIMEOUT_MS = 8_000;
|
||||
|
||||
const healthCache = new Map<string, { value: boolean; error?: string; at: number }>();
|
||||
const runningCache = new Map<string, { value: RunningModel[]; error?: string; at: number }>();
|
||||
const cloudModelsCache = new Map<string, { value: CloudModelMeta[]; at: number }>();
|
||||
const metricsCache = new Map<string, { gpus: GpuInfo[]; hostLoad?: ProviderStatus['hostLoad']; at: number }>();
|
||||
|
||||
async function fetchJson(url: string, timeoutMs = FETCH_TIMEOUT_MS, headers?: Record<string, string>): Promise<unknown> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const res = await fetch(url, { signal: controller.signal, headers });
|
||||
if (!res.ok) throw new Error(`HTTP ${res.status}: ${res.statusText}`);
|
||||
return await res.json();
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchHealth(provider: LlamaProvider): Promise<{ healthy: boolean; error?: string }> {
|
||||
const cacheKey = `health:${provider.id}/${provider.baseUrl}`;
|
||||
const cached = healthCache.get(cacheKey);
|
||||
if (cached && Date.now() - cached.at < HEALTH_CACHE_TTL_MS) {
|
||||
return { healthy: cached.value, error: cached.error };
|
||||
}
|
||||
|
||||
try {
|
||||
const url = `${provider.baseUrl.replace(/\/+$/, '')}/health`;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
try {
|
||||
const res = await fetch(url, { signal: controller.signal });
|
||||
const healthy = res.ok;
|
||||
const entry = { value: healthy, error: res.ok ? undefined : `HTTP ${res.status}`, at: Date.now() };
|
||||
healthCache.set(cacheKey, entry);
|
||||
return { healthy, error: entry.error };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
healthCache.set(cacheKey, { value: false, error: msg, at: Date.now() });
|
||||
return { healthy: false, error: msg };
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchRunning(provider: LlamaProvider): Promise<{ running: RunningModel[]; error?: string }> {
|
||||
const cacheKey = `running:${provider.id}/${provider.baseUrl}`;
|
||||
const cached = runningCache.get(cacheKey);
|
||||
if (cached && Date.now() - cached.at < RUNNING_CACHE_TTL_MS) {
|
||||
return { running: cached.value, error: cached.error };
|
||||
}
|
||||
|
||||
try {
|
||||
const data = (await fetchJson(`${provider.baseUrl.replace(/\/+$/, '')}/running`)) as {
|
||||
running: Omit<RunningModel, 'compositeId'>[];
|
||||
};
|
||||
const raw = data?.running ?? [];
|
||||
const running: RunningModel[] = raw.map((m) => ({
|
||||
...m,
|
||||
compositeId: `${provider.id}/${m.model}`,
|
||||
}));
|
||||
runningCache.set(cacheKey, { value: running, at: Date.now() });
|
||||
return { running };
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
const cachedFallback = runningCache.get(cacheKey);
|
||||
const fallback = cachedFallback?.value ?? [];
|
||||
runningCache.set(cacheKey, { value: fallback, error: msg, at: Date.now() });
|
||||
return { running: fallback, error: msg };
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchCloudModels(
|
||||
provider: LlamaProvider,
|
||||
apiKey?: string,
|
||||
): Promise<CloudModelMeta[]> {
|
||||
const cacheKey = `cloud:${provider.id}/${provider.baseUrl}`;
|
||||
const cached = cloudModelsCache.get(cacheKey);
|
||||
if (cached && Date.now() - cached.at < CLOUD_MODELS_CACHE_TTL_MS) return cached.value;
|
||||
|
||||
try {
|
||||
let models: CloudModelMeta[];
|
||||
const cleanBase = provider.baseUrl.replace(/\/+$/, '');
|
||||
|
||||
if (provider.kind === 'openrouter') {
|
||||
const headers: Record<string, string> = {};
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
const data = (await fetchJson(`${cleanBase}/api/v1/models`, CLOUD_FETCH_TIMEOUT_MS, headers)) as {
|
||||
data?: Array<{
|
||||
id: string; name?: string;
|
||||
context_length?: number; top_provider?: { max_completion_tokens?: number };
|
||||
pricing?: { prompt?: string; completion?: string };
|
||||
architecture?: { modality?: string; tokenizer?: string };
|
||||
description?: string;
|
||||
}>;
|
||||
};
|
||||
models = (data.data ?? []).map((m) => ({
|
||||
id: `${provider.id}/${m.id}`,
|
||||
name: m.name || m.id,
|
||||
contextLength: m.context_length,
|
||||
maxOutputTokens: m.top_provider?.max_completion_tokens,
|
||||
pricing: {
|
||||
input: parseFloatPricing(m.pricing?.prompt),
|
||||
output: parseFloatPricing(m.pricing?.completion),
|
||||
},
|
||||
modalities: m.architecture?.modality ? [m.architecture.modality] : ['text'],
|
||||
supportsToolCalling: (m.description ?? '').toLowerCase().includes('tool'),
|
||||
supportsReasoning:
|
||||
(m.description ?? '').toLowerCase().includes('reason') ||
|
||||
(m.description ?? '').toLowerCase().includes('think'),
|
||||
}));
|
||||
} else if (provider.kind === 'deepseek') {
|
||||
models = [
|
||||
{
|
||||
id: `${provider.id}/deepseek-v4-flash`,
|
||||
name: 'DeepSeek V4 Flash',
|
||||
contextLength: 1_000_000,
|
||||
maxOutputTokens: 384_000,
|
||||
pricing: { input: 0.14, output: 0.28, cached: 0.0028 },
|
||||
modalities: ['text'],
|
||||
supportsToolCalling: true,
|
||||
supportsReasoning: true,
|
||||
},
|
||||
{
|
||||
id: `${provider.id}/deepseek-v4-pro`,
|
||||
name: 'DeepSeek V4 Pro',
|
||||
contextLength: 1_000_000,
|
||||
maxOutputTokens: 384_000,
|
||||
pricing: { input: 0.435, output: 0.87, cached: 0.003625 },
|
||||
modalities: ['text'],
|
||||
supportsToolCalling: true,
|
||||
supportsReasoning: true,
|
||||
},
|
||||
];
|
||||
} else {
|
||||
models = [];
|
||||
}
|
||||
|
||||
cloudModelsCache.set(cacheKey, { value: models, at: Date.now() });
|
||||
return models;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function parseFloatPricing(val?: string): number {
|
||||
if (!val) return 0;
|
||||
const n = parseFloat(val);
|
||||
return isNaN(n) ? 0 : n;
|
||||
}
|
||||
|
||||
async function fetchMetrics(provider: LlamaProvider): Promise<{
|
||||
gpus: GpuInfo[];
|
||||
hostLoad?: ProviderStatus['hostLoad'];
|
||||
}> {
|
||||
const cacheKey = `metrics:${provider.id}/${provider.baseUrl}`;
|
||||
const cached = metricsCache.get(cacheKey);
|
||||
if (cached && Date.now() - cached.at < METRICS_CACHE_TTL_MS) return cached;
|
||||
|
||||
try {
|
||||
const text = await fetch(`${provider.baseUrl.replace(/\/+$/, '')}/metrics`)
|
||||
.then((r) => (r.ok ? r.text() : Promise.reject(new Error(`HTTP ${r.status}`))));
|
||||
const gpus = parsePrometheusGpuMetrics(text);
|
||||
const hostLoad = parsePrometheusHostMetrics(text);
|
||||
const entry = { gpus, hostLoad, at: Date.now() };
|
||||
metricsCache.set(cacheKey, entry);
|
||||
return entry;
|
||||
} catch {
|
||||
return { gpus: [] };
|
||||
}
|
||||
}
|
||||
|
||||
function parsePrometheusGpuMetrics(text: string): GpuInfo[] {
|
||||
const gpuMap = new Map<number, Partial<GpuInfo>>();
|
||||
// GPU name extraction: parse label from type line that follows the value line.
|
||||
const nameRe = /nvidia_gpu_name\{gpu="(\d+)"[^}]*name="([^"]+)"/g;
|
||||
let nm: RegExpExecArray | null;
|
||||
while ((nm = nameRe.exec(text)) !== null) {
|
||||
if (!nm[1] || !nm[2]) continue;
|
||||
const idx = parseInt(nm[1], 10);
|
||||
if (!gpuMap.has(idx)) gpuMap.set(idx, { index: idx } as GpuInfo);
|
||||
(gpuMap.get(idx) as Record<string, unknown>)['name'] = nm[2];
|
||||
}
|
||||
|
||||
const patterns: Array<{ regex: RegExp; field: keyof GpuInfo; scale?: number }> = [
|
||||
{ regex: /nvidia_gpu_temperature_celsius\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'temperature' },
|
||||
{ regex: /nvidia_gpu_utilization_ratio\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'utilizationPct', scale: 100 },
|
||||
{ regex: /nvidia_gpu_memory_used_bytes\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'memoryUsedMb', scale: 1 / (1024 * 1024) },
|
||||
{ regex: /nvidia_gpu_memory_total_bytes\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'memoryTotalMb', scale: 1 / (1024 * 1024) },
|
||||
{ regex: /nvidia_gpu_power_draw_watts\{gpu="(\d+)"\}\s+([\d.]+)/, field: 'powerDrawW' },
|
||||
];
|
||||
|
||||
for (const { regex, field, scale } of patterns) {
|
||||
let match: RegExpExecArray | null;
|
||||
const re = new RegExp(regex.source, regex.flags);
|
||||
while ((match = re.exec(text)) !== null) {
|
||||
if (!match[1] || match[2] === undefined) continue;
|
||||
const idx = parseInt(match[1], 10);
|
||||
const raw = parseFloat(match[2]);
|
||||
const val = scale != null ? raw * scale : raw;
|
||||
if (!gpuMap.has(idx)) gpuMap.set(idx, { index: idx } as GpuInfo);
|
||||
(gpuMap.get(idx) as Record<string, unknown>)[field] = val;
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(gpuMap.values()).filter((g) => g.name != null) as GpuInfo[];
|
||||
}
|
||||
|
||||
function parsePrometheusHostMetrics(text: string): ProviderStatus['hostLoad'] {
|
||||
const cpuMatch = /node_cpu_seconds_total/.test(text) ? /cpu_usage_active\{[^}]*\}\s+([\d.]+)/.exec(text) : null;
|
||||
const memMatch = /node_memory_MemAvailable_bytes\s+([\d.]+)/.exec(text);
|
||||
const memTotalMatch = /node_memory_MemTotal_bytes\s+([\d.]+)/.exec(text);
|
||||
const load1Match = /node_load1\s+([\d.]+)/.exec(text);
|
||||
|
||||
if (!memMatch?.[1] || !memTotalMatch?.[1]) return undefined;
|
||||
|
||||
const memAvail = parseFloat(memMatch[1]);
|
||||
const memTotal = parseFloat(memTotalMatch[1]);
|
||||
return {
|
||||
cpuSat: cpuMatch?.[1] ? parseFloat(cpuMatch[1]) / 100 : 0,
|
||||
memSat: 1 - memAvail / memTotal,
|
||||
load1: load1Match?.[1] ? parseFloat(load1Match[1]) : 0,
|
||||
};
|
||||
}
|
||||
|
||||
export async function getProviderStatus(): Promise<ProviderStatusResponse> {
|
||||
const config = loadConfig();
|
||||
const registry = getLlamaProviders();
|
||||
const statuses = await Promise.all(
|
||||
registry.providers.map(async (provider): Promise<ProviderStatus> => {
|
||||
if (provider.kind === GATEWAY_KIND) {
|
||||
return { id: provider.id, label: provider.label, baseUrl: provider.baseUrl, kind: provider.kind, healthy: true, running: [] };
|
||||
}
|
||||
|
||||
const cloudKinds = new Set(['openrouter', 'deepseek']);
|
||||
if (cloudKinds.has(provider.kind)) {
|
||||
const health = await fetchHealth(provider);
|
||||
let apiKey: string | undefined;
|
||||
if (provider.kind === 'openrouter') {
|
||||
const raw = process.env['OPENROUTER_API_KEY'];
|
||||
apiKey = raw ?? undefined;
|
||||
} else if (provider.kind === 'deepseek') {
|
||||
apiKey = config.DEEPSEEK_API_KEY;
|
||||
}
|
||||
|
||||
const cloudModels = await fetchCloudModels(provider, apiKey);
|
||||
return {
|
||||
id: provider.id,
|
||||
label: provider.label,
|
||||
baseUrl: provider.baseUrl,
|
||||
kind: provider.kind,
|
||||
healthy: health.healthy,
|
||||
healthyError: health.error,
|
||||
running: [],
|
||||
cloudModels,
|
||||
};
|
||||
}
|
||||
|
||||
const [health, running, metrics] = await Promise.all([
|
||||
fetchHealth(provider),
|
||||
fetchRunning(provider),
|
||||
fetchMetrics(provider).catch(() => ({ gpus: [] as GpuInfo[], hostLoad: undefined })),
|
||||
]);
|
||||
|
||||
return {
|
||||
id: provider.id,
|
||||
label: provider.label,
|
||||
baseUrl: provider.baseUrl,
|
||||
kind: provider.kind,
|
||||
healthy: health.healthy,
|
||||
healthyError: health.error,
|
||||
running: running.running,
|
||||
gpus: metrics.gpus,
|
||||
hostLoad: metrics.hostLoad,
|
||||
fetchError: running.error,
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
return { providers: statuses, timestamp: Date.now() };
|
||||
}
|
||||
|
||||
export async function unloadProvider(providerId: string): Promise<boolean> {
|
||||
const registry = getLlamaProviders();
|
||||
const provider = registry.providers.find((p) => p.id === providerId);
|
||||
if (!provider || provider.kind === GATEWAY_KIND || !provider.kind.endsWith('swap')) return false;
|
||||
|
||||
try {
|
||||
const url = `${provider.baseUrl.replace(/\/+$/, '')}/api/models/unload`;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), 10_000);
|
||||
try {
|
||||
const res = await fetch(url, { method: 'POST', signal: controller.signal });
|
||||
if (res.ok) runningCache.delete(`running:${provider.id}/${provider.baseUrl}`);
|
||||
return res.ok;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function unloadModel(providerId: string, modelId: string): Promise<boolean> {
|
||||
const registry = getLlamaProviders();
|
||||
const provider = registry.providers.find((p) => p.id === providerId);
|
||||
if (!provider || provider.kind === GATEWAY_KIND || !provider.kind.endsWith('swap')) return false;
|
||||
|
||||
try {
|
||||
const url = `${provider.baseUrl.replace(/\/+$/, '')}/api/models/unload/${encodeURIComponent(modelId)}`;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), 10_000);
|
||||
try {
|
||||
const res = await fetch(url, { method: 'POST', signal: controller.signal });
|
||||
if (res.ok) runningCache.delete(`running:${provider.id}/${provider.baseUrl}`);
|
||||
return res.ok;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function invalidateProviderCache(): void {
|
||||
healthCache.clear();
|
||||
runningCache.clear();
|
||||
cloudModelsCache.clear();
|
||||
metricsCache.clear();
|
||||
}
|
||||
@@ -128,12 +128,6 @@ export const DEFAULT_SECURITY_IGNORE_FILETYPES: ReadonlyArray<string> = [
|
||||
...BOOCODE_ADDITIONS,
|
||||
];
|
||||
|
||||
// === glob compilation ======================================================
|
||||
// Tiny glob-to-regex. No new prod dep — the patterns we ship are simple
|
||||
// (literal | name* | *.ext | dir/). Covers ~95% of glob spec, which is
|
||||
// 100% of what this list uses. If patterns ever grow to need `**`, `[]`,
|
||||
// `{a,b}`, or negation, swap in picomatch.
|
||||
|
||||
interface CompiledPattern {
|
||||
regex: RegExp;
|
||||
// 'basename' = test against the trailing path component only.
|
||||
@@ -158,8 +152,6 @@ function compile(pattern: string): CompiledPattern {
|
||||
|
||||
const COMPILED: ReadonlyArray<CompiledPattern> = DEFAULT_SECURITY_IGNORE_FILETYPES.map(compile);
|
||||
|
||||
// === public API ============================================================
|
||||
|
||||
// Returns true when `relPath` matches a known-secret pattern. Case-insensitive
|
||||
// (regex 'i' flag). Always normalize path separators to `/` so Windows-origin
|
||||
// paths match the same patterns. Empty or root-only paths return false.
|
||||
|
||||
@@ -35,11 +35,6 @@ interface CachedSkill extends Skill {
|
||||
const cache = new Map<string, CachedSkill>();
|
||||
let lastWalkedAt = 0;
|
||||
|
||||
// ---- Frontmatter parser ----------------------------------------------------
|
||||
// Minimal `---\n...\n---` extractor. Only `name` and `description` keys are
|
||||
// honored; other frontmatter keys are silently ignored for forward-compat
|
||||
// with the anthropics/skills upstream spec.
|
||||
|
||||
interface Frontmatter {
|
||||
name?: string;
|
||||
description?: string;
|
||||
@@ -91,8 +86,6 @@ function parseSkillFile(content: string): ParsedSkillFile {
|
||||
return { name: fm.name, description: fm.description, body };
|
||||
}
|
||||
|
||||
// ---- Tree walk -------------------------------------------------------------
|
||||
|
||||
// Fixed depth-3 scan: /data/skills/<group>/<skill>/SKILL.md. Two layers of
|
||||
// readdir, no recursion. Group folders without SKILL.md are skipped silently;
|
||||
// LICENSE / ATTRIBUTION.md / other non-SKILL.md files are ignored entirely.
|
||||
@@ -145,8 +138,6 @@ async function walkSkills(root: string): Promise<CachedSkill[]> {
|
||||
return found;
|
||||
}
|
||||
|
||||
// ---- Cache ----------------------------------------------------------------
|
||||
|
||||
async function ensureCache(): Promise<void> {
|
||||
const now = Date.now();
|
||||
if (cache.size > 0 && now - lastWalkedAt < LIST_CACHE_TTL_MS) return;
|
||||
@@ -186,8 +177,6 @@ async function ensureCache(): Promise<void> {
|
||||
lastWalkedAt = now;
|
||||
}
|
||||
|
||||
// ---- Public API -----------------------------------------------------------
|
||||
|
||||
export async function listSkills(): Promise<Skill[]> {
|
||||
await ensureCache();
|
||||
return Array.from(cache.values()).map((s) => ({
|
||||
|
||||
@@ -34,7 +34,7 @@ import type { InferenceContext, TurnArgs } from './inference/types.js';
|
||||
export const SYNTHESIS_TOOLS: ReadonlySet<string> = new Set([
|
||||
'boocontext_boocontext_overview',
|
||||
'boocontext_boocontext_symbols',
|
||||
'boocontext_codesight_get_blast_radius',
|
||||
'boocontext_boocontext_get_blast_radius',
|
||||
]);
|
||||
|
||||
const TOP_N_FILES = 5;
|
||||
@@ -52,7 +52,7 @@ const SYNTH_TIMEOUT_MS = 90_000;
|
||||
// File-extension regex for referenced-file extraction. Limited to source-
|
||||
// language extensions so we don't pull in lockfiles, images, etc.
|
||||
const FILE_PATH_RE =
|
||||
/(?:^|[`'"<\s\(\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<\)\]\s,;:]|$)/gm;
|
||||
/(?:^|[`'"<\s(\[])([A-Za-z0-9_./@-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|kt|c|cpp|h|hpp|md|json|yaml|yml|sql|sh|html|css))(?=[`'"<)\]\s,;:]|$)/gm;
|
||||
|
||||
export interface SynthesisParams {
|
||||
ctx: InferenceContext;
|
||||
|
||||
@@ -24,10 +24,6 @@ import {
|
||||
getBackgroundTaskResult,
|
||||
} from '../background-task.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// spawn_subagent
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const SpawnSubagentInput = z.object({
|
||||
input: z.string().min(1).describe('The task to execute in the background'),
|
||||
model: z
|
||||
@@ -139,10 +135,6 @@ export const spawnSubagent: ToolDef<SpawnSubagentInputT> = {
|
||||
},
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// subagent_status
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const SubagentStatusInput = z.object({
|
||||
task_id: z.string().uuid().describe('Task ID from spawn_subagent'),
|
||||
});
|
||||
@@ -218,10 +210,6 @@ export const subagentStatus: ToolDef<SubagentStatusInputT> = {
|
||||
},
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// subagent_result
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const SubagentResultInput = z.object({
|
||||
task_id: z.string().uuid().describe('Task ID from spawn_subagent'),
|
||||
});
|
||||
|
||||
@@ -5,7 +5,7 @@ import { webSearch } from '../web_search.js';
|
||||
import { webFetch } from '../web_fetch.js';
|
||||
// v2.8.24: All codecontext tools removed. Boocontext MCP tools are appended
|
||||
// at startup via appendMcpTools(). Agent tool lists reference the MCP tool
|
||||
// names (boocontext_boocontext_*, boocontext_codesight_*) directly.
|
||||
// names (boocontext_boocontext_*), boocontext_boocontext_*) directly.
|
||||
// v1.13.17-cross-repo-reads: cross-repo read grant request tool. Paired
|
||||
// with the pause-on-pending-grant branch in inference/tool-phase.ts and the
|
||||
// POST /api/chats/:id/grant_read_access endpoint in routes/messages.ts.
|
||||
|
||||
@@ -18,6 +18,7 @@ export interface ToolJsonSchema {
|
||||
export interface ToolExecCtx {
|
||||
sql: Sql;
|
||||
sessionId: string;
|
||||
toolCallId?: string;
|
||||
}
|
||||
|
||||
export interface ToolDef<TInput> {
|
||||
|
||||
@@ -8,10 +8,6 @@
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A built-in workflow definition shipped with BooCode.
|
||||
*/
|
||||
@@ -32,10 +28,6 @@ export interface BuiltinWorkflow {
|
||||
generateScript: (args?: Record<string, unknown>) => string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Script templates (shared helpers)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Stable JSON serialisation for generating deterministic cache keys from
|
||||
* structured arguments. Keys are sorted so the same data always produces
|
||||
@@ -52,12 +44,6 @@ function stableJson(value: unknown): string {
|
||||
return `{${pairs.join(',')}}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic SHA-256 fingerprint for a combined spec + args
|
||||
* payload. Used by the resumability cache to detect unchanged agent tasks.
|
||||
*
|
||||
* Exported for testing.
|
||||
*/
|
||||
export function fingerprintAgentTask(
|
||||
prompt: string,
|
||||
spec: Record<string, unknown>,
|
||||
@@ -68,10 +54,6 @@ export function fingerprintAgentTask(
|
||||
.digest('hex');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Built-in workflow definitions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function generateDeepResearchScript(_args?: Record<string, unknown>): string {
|
||||
return `
|
||||
export const meta = {
|
||||
@@ -90,7 +72,6 @@ export default async function main(args) {
|
||||
const query = args?.query ?? 'No query provided';
|
||||
log('deep-research: starting with query: ' + query);
|
||||
|
||||
// Phase 1: Scope
|
||||
phase('Scope');
|
||||
const scope = await agent(
|
||||
'Analyse this research query and produce a search plan with 3-5 key sub-questions: ' + query,
|
||||
@@ -98,7 +79,6 @@ export default async function main(args) {
|
||||
);
|
||||
log('Scope completed');
|
||||
|
||||
// Phase 2: Search
|
||||
phase('Search');
|
||||
const searchResults = await agent(
|
||||
'Based on the scope, search for authoritative sources. Return a list of 3-5 URLs with brief annotations.',
|
||||
@@ -106,7 +86,6 @@ export default async function main(args) {
|
||||
);
|
||||
log('Search completed');
|
||||
|
||||
// Phase 3: Fetch
|
||||
phase('Fetch');
|
||||
const fetchedContent = await agent(
|
||||
'Extract and summarise the key information from these sources: ' + JSON.stringify(searchResults),
|
||||
@@ -114,7 +93,6 @@ export default async function main(args) {
|
||||
);
|
||||
log('Fetch completed');
|
||||
|
||||
// Phase 4: Verify
|
||||
phase('Verify');
|
||||
const verified = await agent(
|
||||
'Cross-reference the fetched information. Note any contradictions, gaps, or weak sources: ' + JSON.stringify(fetchedContent),
|
||||
@@ -122,7 +100,6 @@ export default async function main(args) {
|
||||
);
|
||||
log('Verify completed');
|
||||
|
||||
// Phase 5: Synthesise
|
||||
phase('Synthesise');
|
||||
const report = await agent(
|
||||
'Synthesise the verified information into a structured report with findings, sources, and confidence levels: ' + JSON.stringify(verified),
|
||||
@@ -161,28 +138,24 @@ export default async function main(args) {
|
||||
{ label: 'read-context', phase: 'context' },
|
||||
);
|
||||
|
||||
// Phase 1: Correctness
|
||||
phase('Correctness');
|
||||
const correctness = await agent(
|
||||
'Review this code for correctness. Check logical errors, edge cases, type safety, and concurrency issues:\\n' + JSON.stringify(context),
|
||||
{ label: 'correctness-review', phase: 'correctness' },
|
||||
);
|
||||
|
||||
// Phase 2: Security
|
||||
phase('Security');
|
||||
const security = await agent(
|
||||
'Review this code for security vulnerabilities. Check for injection, auth bypasses, unsafe deserialisation, secret exposure:\\n' + JSON.stringify(context),
|
||||
{ label: 'security-review', phase: 'security' },
|
||||
);
|
||||
|
||||
// Phase 3: Performance
|
||||
phase('Performance');
|
||||
const performance = await agent(
|
||||
'Review this code for performance issues. Check algorithmic complexity, unnecessary allocations, I/O patterns, caching opportunities:\\n' + JSON.stringify(context),
|
||||
{ label: 'performance-review', phase: 'performance' },
|
||||
);
|
||||
|
||||
// Phase 4: Synthesise
|
||||
phase('Synthesise');
|
||||
const report = await agent(
|
||||
'Merge these three review perspectives into one structured report with severity-ranked findings:\\n' +
|
||||
@@ -271,10 +244,6 @@ export default async function main(args) {
|
||||
`.trim();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Registry
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* All built-in workflow definitions shipped with BooCode.
|
||||
*/
|
||||
|
||||
@@ -109,7 +109,6 @@ export function findWorkflow(
|
||||
name: string,
|
||||
projectRoot: string,
|
||||
): WorkflowMeta | undefined {
|
||||
// Check built-in catalog first
|
||||
const builtin = getBuiltinWorkflow(name);
|
||||
if (builtin) {
|
||||
return {
|
||||
|
||||
@@ -45,10 +45,6 @@ const AGENT_TASK_TIMEOUT_MS = 300_000;
|
||||
*/
|
||||
const POLL_INTERVAL_MS = 500;
|
||||
|
||||
/**
|
||||
* Maximum time for the entire workflow run (30 minutes).
|
||||
*/
|
||||
const WORKFLOW_TIMEOUT_MS = 1_800_000;
|
||||
|
||||
/**
|
||||
* Token budget tracker. Tracks total token spend across agent calls.
|
||||
@@ -110,8 +106,6 @@ export class WorkflowManager {
|
||||
private broker: Broker,
|
||||
) {}
|
||||
|
||||
// ---- public API ----
|
||||
|
||||
/**
|
||||
* Discover all available workflow scripts.
|
||||
*/
|
||||
@@ -154,7 +148,6 @@ export class WorkflowManager {
|
||||
}
|
||||
|
||||
try {
|
||||
// Load meta by executing the script in a throwaway context
|
||||
const context = this.#createMinimalContext('meta-loader');
|
||||
const code = readFileSync(found.sourceFile, 'utf8');
|
||||
const finalCode = isEsmSyntax(code) ? transformEsmToCjs(code) : code;
|
||||
@@ -209,7 +202,6 @@ export class WorkflowManager {
|
||||
this.#runs.set(runId, state);
|
||||
this.#emit({ type: 'run_started', runId, name });
|
||||
|
||||
// Run asynchronously — caller receives the runId immediately.
|
||||
void this.#executeRun(state, found.sourceFile, args ?? {});
|
||||
|
||||
return { runId };
|
||||
@@ -259,8 +251,6 @@ export class WorkflowManager {
|
||||
};
|
||||
}
|
||||
|
||||
// ---- internal execution ----
|
||||
|
||||
/**
|
||||
* Execute the workflow script in the sandbox.
|
||||
*/
|
||||
@@ -373,7 +363,6 @@ export class WorkflowManager {
|
||||
spec: AgentTaskSpec,
|
||||
signal?: AbortSignal,
|
||||
): Promise<unknown> {
|
||||
// ---- 0. Check resumability cache before creating a new task ----
|
||||
const cacheKeyStr = cacheKey(spec, '');
|
||||
const cached = getCachedResult(cacheKeyStr);
|
||||
if (cached) {
|
||||
@@ -382,7 +371,6 @@ export class WorkflowManager {
|
||||
|
||||
const model = spec.model ?? null;
|
||||
|
||||
// ---- 1. Create a session for this agent task ----
|
||||
const sessionName = `workflow-agent-${spec.label ?? 'task'}`;
|
||||
const sessionResult = await this.sql.begin(async (tx) => {
|
||||
const [session] = await tx<{ id: string }[]>`
|
||||
@@ -395,7 +383,6 @@ export class WorkflowManager {
|
||||
});
|
||||
const sessionId = sessionResult.id;
|
||||
|
||||
// ---- 2. Create a chat in this session ----
|
||||
const chatResult = await this.sql.begin(async (tx) => {
|
||||
const [chat] = await tx<{ id: string }[]>`
|
||||
INSERT INTO chats (session_id, name)
|
||||
@@ -407,8 +394,7 @@ export class WorkflowManager {
|
||||
});
|
||||
const chatId = chatResult.id;
|
||||
|
||||
// ---- 3. Insert user message + streaming assistant message ----
|
||||
const { userMessageId, assistantMessageId } = await this.sql.begin(async (tx) => {
|
||||
const { userMessageId: _userMessageId, assistantMessageId } = await this.sql.begin(async (tx) => {
|
||||
const [userMsg] = await tx<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
|
||||
VALUES (${sessionId}, ${chatId}, 'user', ${prompt}, 'complete', clock_timestamp())
|
||||
@@ -425,8 +411,6 @@ export class WorkflowManager {
|
||||
};
|
||||
});
|
||||
|
||||
// ---- 4. Dispatch inference ----
|
||||
// Create a bounded InferenceContext that won't crash on missing WS
|
||||
const ctx: import('../inference/types.js').InferenceContext = {
|
||||
sql: this.sql,
|
||||
config: this.config,
|
||||
@@ -451,7 +435,6 @@ export class WorkflowManager {
|
||||
signal?.removeEventListener('abort', onAbort);
|
||||
});
|
||||
|
||||
// ---- 5. Poll for completion ----
|
||||
try {
|
||||
const result = await this.#pollForCompletion(
|
||||
chatId,
|
||||
@@ -607,7 +590,7 @@ export class WorkflowManager {
|
||||
* Create a minimal WorkflowContext for non-execution purposes
|
||||
* (e.g. loading meta).
|
||||
*/
|
||||
#createMinimalContext(runId: string): Record<string, unknown> {
|
||||
#createMinimalContext(_runId: string): Record<string, unknown> {
|
||||
return {
|
||||
agent: () => Promise.reject(new Error('Not available in this context')),
|
||||
parallel: () => Promise.reject(new Error('Not available in this context')),
|
||||
@@ -634,8 +617,6 @@ export class WorkflowManager {
|
||||
}
|
||||
}
|
||||
|
||||
// ---- internal types ----
|
||||
|
||||
/**
|
||||
* Metadata returned from listWorkflows / getWorkflow.
|
||||
*/
|
||||
|
||||
@@ -8,10 +8,6 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import type { AgentTaskSpec } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Shape of a cached agent task result. Mirrors the successful fields of
|
||||
* `AgentTaskResult` without the runtime-only `cached` flag.
|
||||
@@ -31,10 +27,6 @@ interface CacheEntry {
|
||||
insertedAt: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cache store
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Default TTL for cached entries (30 minutes).
|
||||
* After this period entries are considered stale and are evicted on access.
|
||||
@@ -51,10 +43,6 @@ const MAX_ENTRIES = 500;
|
||||
*/
|
||||
const cache = new Map<string, CacheEntry>();
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build a deterministic SHA-256 hash for an agent task specification.
|
||||
*
|
||||
@@ -168,10 +156,6 @@ export function cacheSize(): number {
|
||||
return cache.size;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Stable JSON serialisation that produces the same output string for the same
|
||||
* data regardless of JavaScript object property insertion order.
|
||||
|
||||
@@ -76,7 +76,6 @@ export function isEsmSyntax(code: string): boolean {
|
||||
*/
|
||||
export function buildSandbox(context: WorkflowContext): Record<string, unknown> {
|
||||
return {
|
||||
// --- Workflow API (from context) ---
|
||||
agent: context.agent,
|
||||
parallel: context.parallel,
|
||||
pipeline: context.pipeline,
|
||||
@@ -86,7 +85,6 @@ export function buildSandbox(context: WorkflowContext): Record<string, unknown>
|
||||
args: context.args,
|
||||
workflow: context.workflow,
|
||||
|
||||
// --- Safe built-ins ---
|
||||
console: {
|
||||
log: context.log,
|
||||
warn: context.log,
|
||||
@@ -122,7 +120,6 @@ export function buildSandbox(context: WorkflowContext): Record<string, unknown>
|
||||
true: true,
|
||||
false: false,
|
||||
|
||||
// --- CommonJS interop ---
|
||||
module: { exports: {} },
|
||||
exports: {},
|
||||
require: undefined, // intentionally disabled
|
||||
|
||||
@@ -132,6 +132,11 @@ export interface Agent {
|
||||
// vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
|
||||
// Maps to DeepSeek's reasoning_effort API param.
|
||||
reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
|
||||
// vDeepSeek: JSON output mode. When set, model outputs valid JSON object.
|
||||
response_format: { type: 'json_object' } | null;
|
||||
// vDeepSeek: prefix content for chat prefix completion. When set, injects
|
||||
// an assistant prefix message forcing the model to complete from it.
|
||||
prefix_content: string | null;
|
||||
}
|
||||
|
||||
// One entry per malformed `## Name` block. Per-block errors don't fail the
|
||||
@@ -224,7 +229,9 @@ export interface Message {
|
||||
// (qwen3.6 etc.). Populated from message_parts via the messages_with_parts
|
||||
// view's reasoning_parts column. Optional — most rows have no reasoning
|
||||
// and the API may omit the field on legacy responses.
|
||||
reasoning_parts?: Array<{ text: string }> | null;
|
||||
// `signature` (Phase 2) carries the Anthropic thinking-block signature so it
|
||||
// can be replayed verbatim on the next turn. Absent for non-anthropic models.
|
||||
reasoning_parts?: Array<{ text: string; signature?: string }> | null;
|
||||
// v1.11: anchored rolling compaction. Optional so consumers that SELECT
|
||||
// the pre-v1.11 column set still type-check. See compaction.ts +
|
||||
// schema.sql for semantics.
|
||||
|
||||
Reference in New Issue
Block a user