Compare commits
7 Commits
v2.8.16-om
...
v2.8.19-do
| Author | SHA1 | Date | |
|---|---|---|---|
| 917a229363 | |||
| 39be5ce413 | |||
| 378e29308e | |||
| 8f6a814ab0 | |||
| 3c019a2281 | |||
| 203cfd2fa8 | |||
| c11e26090f |
@@ -20,6 +20,12 @@ SEARXNG_URL=http://100.114.205.53:8888
|
||||
# with FAST_MODEL when unset.
|
||||
# TASK_MODEL_URL=http://100.90.172.55:7995
|
||||
|
||||
# DeepSeek API key. When set, models with IDs starting with 'deepseek-'
|
||||
# (e.g. deepseek-chat, deepseek-reasoner, deepseek-v4-flash) route through
|
||||
# DeepSeek's API instead of llama-swap. Requires a DeepSeek Platform API key.
|
||||
# DEEPSEEK_API_KEY=sk-...
|
||||
# DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||
|
||||
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
||||
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
||||
# sessions where the model only needs read-only filesystem access.
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
|
||||
All notable changes per release tag. Most recent on top, ordered by tag creation date (which matches the git history). Tag names follow `vMAJOR.MINOR.PATCH-slug` — the slug describes what shipped, so the tag name alone is enough to recall the batch.
|
||||
|
||||
## v2.8.18-deepseek-whale-lift — 2026-06-08
|
||||
|
||||
Integrates DeepSeek API directly into BooChat and BooCoder via `@ai-sdk/deepseek`, replacing the generic `openai-compatible` wrapper. DeepSeek V4 models (`deepseek-v4-flash`, `deepseek-v4-pro`) with configurable thinking effort levels appear in both chat and coder pane model pickers. Full token tracking — cache hit tokens and reasoning tokens — flow from the API through new DB columns and WS frames into the UI message stats line. Lifts three high-value features from the Whale codebase: a schema-based tool input repair system that coerces types and unwraps markdown autolinks before Zod validation, a shell-based lifecycle hooks system (PreToolUse, PostToolUse, Stop, PreCompact, PostCompact) with JSON stdin/stdout contract, and per-MCP-server permissions (allow/ask/deny) gating tool execution.
|
||||
|
||||
## v2.8.0-fork-lifts — 2026-06-07
|
||||
|
||||
Completes the eight fork-lift integrations from `/opt/forks` into BooCode: boocontext sidecar upgrade, LSP code intelligence, DCP clean-room pruning, institutional memory, subagent protocol enhancements, plugin hook host, inference reliability (tool-shim + loop detectors), and TokenScope token breakdown. Backfills edit safety guards (truncation + dropped imports) and the TokenScope analyzer/persist module. Closes the fork-lifts-mit epic.
|
||||
|
||||
@@ -50,6 +50,8 @@ const ConfigSchema = z.object({
|
||||
// only reaped after it's been untouched this long (avoids sweeping a dir mid
|
||||
// ensureSessionWorktree create). 1h default.
|
||||
ORPHAN_WORKTREE_GRACE_MS: z.coerce.number().int().positive().default(3_600_000),
|
||||
DEEPSEEK_API_KEY: z.string().optional(),
|
||||
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||
});
|
||||
|
||||
export type Config = z.infer<typeof ConfigSchema>;
|
||||
|
||||
@@ -29,7 +29,9 @@ import { registerProviderRoutes } from './routes/providers.js';
|
||||
import { registerWorktreeSafetyRoutes } from './routes/worktree-safety.js';
|
||||
import { registerLifecycleRoutes } from './routes/lifecycle.js';
|
||||
import { registerAnalyticsRoutes } from './routes/analytics.js';
|
||||
import { registerPlanRoutes } from './routes/plans.js';
|
||||
import { registerWebSocket } from './routes/ws.js';
|
||||
import { updatePlanFromRun } from './services/plan-store.js';
|
||||
// Phase 4: dispatcher + agent probe
|
||||
import { createDispatcher } from './services/dispatcher.js';
|
||||
// Orchestrator (Phase 2): DB-backed flow-runner; advances on the dispatcher's
|
||||
@@ -229,8 +231,16 @@ async function main() {
|
||||
|
||||
// Orchestrator (Phase 2): the flow-runner reacts to the dispatcher's
|
||||
// onTaskTerminal hook to advance flow_runs. Created before the dispatcher so its
|
||||
// terminal callback can be wired in.
|
||||
const flowRunner = createFlowRunner({ sql, broker, log: app.log, config });
|
||||
// terminal callback can be wired in. onRunTerminal updates linked plans.
|
||||
const flowRunner = createFlowRunner({
|
||||
sql, broker, log: app.log, config,
|
||||
onRunTerminal: (runId, status) => {
|
||||
updatePlanFromRun(sql, runId, status).catch((err) => {
|
||||
app.log.error({ err: err instanceof Error ? err.message : String(err), runId },
|
||||
'plans: updatePlanFromRun failed');
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
// Arena SEAM (a): build the local-model set from the live llama-swap model list.
|
||||
// Both bare IDs ('qwen3.6-35b') and prefixed IDs ('llama-swap/qwen3.6-35b') are
|
||||
@@ -384,6 +394,7 @@ async function main() {
|
||||
registerWorktreeSafetyRoutes(app, sql);
|
||||
registerLifecycleRoutes(app, sql);
|
||||
registerAnalyticsRoutes(app, sql);
|
||||
registerPlanRoutes(app, sql);
|
||||
registerWebSocket(app, sql, broker);
|
||||
|
||||
// Graceful shutdown
|
||||
|
||||
134
apps/coder/src/routes/plans.ts
Normal file
134
apps/coder/src/routes/plans.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
/**
|
||||
* Boulder state — plan routes.
|
||||
*
|
||||
* GET /api/plans?project_id= — list plans for a project
|
||||
* GET /api/plans/active?project_id= — list active (in-flight) plans
|
||||
* POST /api/plans — create a new plan
|
||||
* PATCH /api/plans/:id — update plan progress / status
|
||||
*/
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import { z } from 'zod';
|
||||
import type { Sql } from '../db.js';
|
||||
import {
|
||||
createPlan,
|
||||
getPlan,
|
||||
listPlans,
|
||||
listActivePlans,
|
||||
updatePlan,
|
||||
} from '../services/plan-store.js';
|
||||
|
||||
const CreatePlanBody = z.object({
|
||||
project_id: z.string().uuid(),
|
||||
title: z.string().min(1).max(500),
|
||||
description: z.string().max(10_000).optional(),
|
||||
flow_run_id: z.string().uuid().optional(),
|
||||
metadata: z.record(z.unknown()).optional(),
|
||||
});
|
||||
|
||||
const ListPlansQuery = z.object({
|
||||
project_id: z.string().uuid(),
|
||||
});
|
||||
|
||||
const UpdatePlanBody = z.object({
|
||||
title: z.string().min(1).max(500).optional(),
|
||||
description: z.string().max(10_000).nullable().optional(),
|
||||
status: z.enum(['active', 'completed', 'cancelled', 'failed']).optional(),
|
||||
progress_pct: z.number().int().min(0).max(100).optional(),
|
||||
items_total: z.number().int().min(0).optional(),
|
||||
items_completed: z.number().int().min(0).optional(),
|
||||
metadata: z.record(z.unknown()).nullable().optional(),
|
||||
});
|
||||
|
||||
const PlanIdParam = z.string().uuid();
|
||||
|
||||
export function registerPlanRoutes(app: FastifyInstance, sql: Sql): void {
|
||||
// GET /api/plans?project_id= — all plans for a project
|
||||
app.get('/api/plans', async (req, reply) => {
|
||||
const parsed = ListPlansQuery.safeParse(req.query);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid query', details: parsed.error.flatten() };
|
||||
}
|
||||
const plans = await listPlans(sql, parsed.data.project_id);
|
||||
return { plans };
|
||||
});
|
||||
|
||||
// GET /api/plans/active?project_id= — active plans only
|
||||
app.get('/api/plans/active', async (req, reply) => {
|
||||
const parsed = ListPlansQuery.safeParse(req.query);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid query', details: parsed.error.flatten() };
|
||||
}
|
||||
const plans = await listActivePlans(sql, parsed.data.project_id);
|
||||
return { plans };
|
||||
});
|
||||
|
||||
// POST /api/plans — create a new plan
|
||||
app.post('/api/plans', async (req, reply) => {
|
||||
const parsed = CreatePlanBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
|
||||
const { project_id, title, description, flow_run_id, metadata } = parsed.data;
|
||||
const plan = await createPlan(sql, {
|
||||
projectId: project_id,
|
||||
title,
|
||||
description,
|
||||
flowRunId: flow_run_id,
|
||||
metadata,
|
||||
});
|
||||
|
||||
reply.code(201);
|
||||
return { plan };
|
||||
});
|
||||
|
||||
// GET /api/plans/:id — single plan
|
||||
app.get<{ Params: { id: string } }>('/api/plans/:id', async (req, reply) => {
|
||||
const parsedId = PlanIdParam.safeParse(req.params.id);
|
||||
if (!parsedId.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid id' };
|
||||
}
|
||||
const plan = await getPlan(sql, parsedId.data);
|
||||
if (!plan) {
|
||||
reply.code(404);
|
||||
return { error: 'plan not found' };
|
||||
}
|
||||
return { plan };
|
||||
});
|
||||
|
||||
// PATCH /api/plans/:id — update plan
|
||||
app.patch<{ Params: { id: string } }>('/api/plans/:id', async (req, reply) => {
|
||||
const parsedId = PlanIdParam.safeParse(req.params.id);
|
||||
if (!parsedId.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid id' };
|
||||
}
|
||||
|
||||
const parsed = UpdatePlanBody.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
reply.code(400);
|
||||
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||
}
|
||||
|
||||
const { title, description, status, progress_pct, items_total, items_completed, metadata } = parsed.data;
|
||||
const plan = await updatePlan(sql, parsedId.data, {
|
||||
title,
|
||||
description: description === null ? null : description,
|
||||
status,
|
||||
progressPct: progress_pct,
|
||||
itemsTotal: items_total,
|
||||
itemsCompleted: items_completed,
|
||||
metadata: metadata === null ? null : metadata,
|
||||
});
|
||||
|
||||
if (!plan) {
|
||||
reply.code(404);
|
||||
return { error: 'plan not found' };
|
||||
}
|
||||
return { plan };
|
||||
});
|
||||
}
|
||||
@@ -438,3 +438,31 @@ CREATE TABLE IF NOT EXISTS flow_step_events (
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS flow_step_events_run_idx ON flow_step_events(run_id);
|
||||
|
||||
-- v2.9.0: Boulder state — cross-session plan persistence with auto-resumption.
|
||||
-- project_id carries no FK (matches tasks/fow_runs convention).
|
||||
-- flow_run_id links the plan to an in-flight orchestrator run for auto-tracking.
|
||||
CREATE TABLE IF NOT EXISTS plans (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
project_id UUID NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
flow_run_id UUID REFERENCES flow_runs(id) ON DELETE SET NULL,
|
||||
progress_pct INTEGER NOT NULL DEFAULT 0,
|
||||
items_total INTEGER NOT NULL DEFAULT 0,
|
||||
items_completed INTEGER NOT NULL DEFAULT 0,
|
||||
metadata JSONB,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
||||
CONSTRAINT plans_status_chk CHECK (status IN ('active', 'completed', 'cancelled', 'failed')),
|
||||
CONSTRAINT plans_progress_chk CHECK (progress_pct >= 0 AND progress_pct <= 100),
|
||||
CONSTRAINT plans_items_chk CHECK (items_total >= 0 AND items_completed >= 0 AND items_completed <= items_total)
|
||||
);
|
||||
|
||||
-- Plan queries by project and status.
|
||||
CREATE INDEX IF NOT EXISTS plans_project_status_idx ON plans(project_id, status);
|
||||
-- Fast lookup of the plan owning a flow run (for onRunTerminal updates).
|
||||
CREATE INDEX IF NOT EXISTS plans_flow_run_id_idx ON plans(flow_run_id);
|
||||
-- Plans sorted by recency (for "resume from last" surface).
|
||||
CREATE INDEX IF NOT EXISTS plans_project_created_idx ON plans(project_id, created_at DESC);
|
||||
|
||||
16
apps/coder/src/services/__tests__/plan-store.test.ts
Normal file
16
apps/coder/src/services/__tests__/plan-store.test.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { planStatusFromRun } from '../plan-store.js';
|
||||
|
||||
describe('planStatusFromRun', () => {
|
||||
it('maps completed to completed', () => {
|
||||
expect(planStatusFromRun('completed')).toBe('completed');
|
||||
});
|
||||
|
||||
it('maps failed to failed', () => {
|
||||
expect(planStatusFromRun('failed')).toBe('failed');
|
||||
});
|
||||
|
||||
it('maps cancelled to cancelled', () => {
|
||||
expect(planStatusFromRun('cancelled')).toBe('cancelled');
|
||||
});
|
||||
});
|
||||
@@ -89,6 +89,8 @@ interface Deps {
|
||||
broker: Broker;
|
||||
log: FastifyBaseLogger;
|
||||
config: Config;
|
||||
/** Fired when a flow run reaches a terminal state (for plan-store integration). */
|
||||
onRunTerminal?: (runId: string, status: 'completed' | 'failed' | 'cancelled') => void;
|
||||
}
|
||||
|
||||
interface FlowStepRow {
|
||||
@@ -479,6 +481,7 @@ export function createFlowRunner(deps: Deps): FlowRunner {
|
||||
WHERE id = ${runId} AND status = 'running'
|
||||
`;
|
||||
if (updated.count === 0) return; // already terminal (e.g. cancelled) — don't publish
|
||||
deps.onRunTerminal?.(runId, 'completed');
|
||||
publishStep(runId, lastAgentStepId(flow, input, model), 'completed', {
|
||||
run_status: 'completed',
|
||||
report,
|
||||
@@ -498,6 +501,7 @@ export function createFlowRunner(deps: Deps): FlowRunner {
|
||||
WHERE id = ${runId} AND status = 'running'
|
||||
`;
|
||||
if (updated.count === 0) return;
|
||||
deps.onRunTerminal?.(runId, 'failed');
|
||||
const stepId = failedStepId ?? (flow ? lastAgentStepId(flow, input, model) : 'run');
|
||||
log.warn({ runId, error }, 'flow-runner: run failed');
|
||||
await appendStepEvent(sql, runId, stepId, 'failed', { error });
|
||||
@@ -512,6 +516,7 @@ export function createFlowRunner(deps: Deps): FlowRunner {
|
||||
WHERE id = ${runId} AND status = 'running'
|
||||
`;
|
||||
if (updated.count === 0) return; // idempotent — already terminal
|
||||
deps.onRunTerminal?.(runId, 'cancelled');
|
||||
// Any remaining pending steps are unreachable; mark + publish them so the
|
||||
// pane can show them as cancelled rather than stuck in pending.
|
||||
const pending = await sql<{ step_id: string; kind: string }[]>`
|
||||
@@ -742,6 +747,7 @@ export function createFlowRunner(deps: Deps): FlowRunner {
|
||||
WHERE id = ${runId} AND status = 'running'
|
||||
`;
|
||||
if (updated.count === 0) return { cancelled: false, taskIds: [] };
|
||||
deps.onRunTerminal?.(runId, 'cancelled');
|
||||
|
||||
// Mark all non-terminal steps cancelled and collect in-flight task_ids.
|
||||
const steps = await sql<{ step_id: string; task_id: string | null; kind: string }[]>`
|
||||
|
||||
184
apps/coder/src/services/plan-store.ts
Normal file
184
apps/coder/src/services/plan-store.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
/**
|
||||
* Boulder state — cross-session plan persistence for BooCode.
|
||||
*
|
||||
* Plans live above flow_runs: a plan tracks a user's work goal and can link to
|
||||
* a flow run for automatic progress tracking. When the linked flow run reaches
|
||||
* a terminal state (completed/failed/cancelled), the plan is auto-updated.
|
||||
*
|
||||
* Auto-resumption: on startup, plans with a linked in-flight flow_run are
|
||||
* surfaced via the GET endpoint so the UI can show a resume prompt. The
|
||||
* flow-runner's initResume() re-advances the actual run; this store surfaces
|
||||
* the plan-level view.
|
||||
*/
|
||||
import type { Sql } from '../db.js';
|
||||
|
||||
export interface Plan {
|
||||
id: string;
|
||||
project_id: string;
|
||||
title: string;
|
||||
description: string | null;
|
||||
status: string;
|
||||
flow_run_id: string | null;
|
||||
progress_pct: number;
|
||||
items_total: number;
|
||||
items_completed: number;
|
||||
metadata: Record<string, unknown> | null;
|
||||
created_at: Date;
|
||||
updated_at: Date;
|
||||
}
|
||||
|
||||
export interface CreatePlanOpts {
|
||||
projectId: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
flowRunId?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface UpdatePlanOpts {
|
||||
title?: string;
|
||||
description?: string | null;
|
||||
status?: 'active' | 'completed' | 'cancelled' | 'failed';
|
||||
progressPct?: number;
|
||||
itemsTotal?: number;
|
||||
itemsCompleted?: number;
|
||||
metadata?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export function createPlan(sql: Sql, opts: CreatePlanOpts): Promise<Plan> {
|
||||
return sql`
|
||||
INSERT INTO plans (project_id, title, description, flow_run_id, metadata)
|
||||
VALUES (
|
||||
${opts.projectId},
|
||||
${opts.title},
|
||||
${opts.description ?? null},
|
||||
${opts.flowRunId ?? null},
|
||||
${opts.metadata ? sql.json(opts.metadata as never) : null}
|
||||
)
|
||||
RETURNING *
|
||||
`.then((rows) => rows[0] as unknown as Plan);
|
||||
}
|
||||
|
||||
export function getPlan(sql: Sql, planId: string): Promise<Plan | null> {
|
||||
return sql`
|
||||
SELECT * FROM plans WHERE id = ${planId}
|
||||
`.then((rows) => (rows[0] as unknown as Plan) ?? null);
|
||||
}
|
||||
|
||||
export function listPlans(sql: Sql, projectId: string): Promise<Plan[]> {
|
||||
return sql`
|
||||
SELECT * FROM plans
|
||||
WHERE project_id = ${projectId}
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
` as Promise<Plan[]>;
|
||||
}
|
||||
|
||||
export function listActivePlans(sql: Sql, projectId: string): Promise<Plan[]> {
|
||||
return sql`
|
||||
SELECT * FROM plans
|
||||
WHERE project_id = ${projectId} AND status = 'active'
|
||||
ORDER BY created_at DESC
|
||||
` as Promise<Plan[]>;
|
||||
}
|
||||
|
||||
export async function updatePlan(
|
||||
sql: Sql,
|
||||
planId: string,
|
||||
opts: UpdatePlanOpts,
|
||||
): Promise<Plan | null> {
|
||||
const sets: string[] = [];
|
||||
const values: unknown[] = [];
|
||||
|
||||
if (opts.title !== undefined) {
|
||||
sets.push(`title = $${values.length + 1}`);
|
||||
values.push(opts.title);
|
||||
}
|
||||
if (opts.description !== undefined) {
|
||||
sets.push(`description = $${values.length + 1}`);
|
||||
values.push(opts.description);
|
||||
}
|
||||
if (opts.status !== undefined) {
|
||||
sets.push(`status = $${values.length + 1}`);
|
||||
values.push(opts.status);
|
||||
}
|
||||
if (opts.progressPct !== undefined) {
|
||||
sets.push(`progress_pct = $${values.length + 1}`);
|
||||
values.push(opts.progressPct);
|
||||
}
|
||||
if (opts.itemsTotal !== undefined) {
|
||||
sets.push(`items_total = $${values.length + 1}`);
|
||||
values.push(opts.itemsTotal);
|
||||
}
|
||||
if (opts.itemsCompleted !== undefined) {
|
||||
sets.push(`items_completed = $${values.length + 1}`);
|
||||
values.push(opts.itemsCompleted);
|
||||
}
|
||||
if (opts.metadata !== undefined) {
|
||||
sets.push(`metadata = $${values.length + 1}::jsonb`);
|
||||
values.push(opts.metadata !== null ? JSON.stringify(opts.metadata) : null);
|
||||
}
|
||||
|
||||
if (sets.length === 0) return getPlan(sql, planId);
|
||||
|
||||
sets.push(`updated_at = clock_timestamp()`);
|
||||
|
||||
const query = `
|
||||
UPDATE plans SET ${sets.join(', ')}
|
||||
WHERE id = $${values.length + 1}
|
||||
RETURNING *
|
||||
`;
|
||||
values.push(planId);
|
||||
|
||||
const result = await sql.unsafe(query, values as never[]);
|
||||
return (result[0] as unknown as Plan) ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a flow run reaches a terminal state. Updates the linked plan's
|
||||
* status based on the run outcome:
|
||||
* - completed → plan completed
|
||||
* - failed → plan failed
|
||||
* - cancelled → plan cancelled
|
||||
* Returns true when a plan was updated, false when no plan is linked to the run.
|
||||
*/
|
||||
export async function updatePlanFromRun(
|
||||
sql: Sql,
|
||||
runId: string,
|
||||
runStatus: 'completed' | 'failed' | 'cancelled',
|
||||
): Promise<boolean> {
|
||||
const planStatus = planStatusFromRun(runStatus);
|
||||
const updated = await sql`
|
||||
UPDATE plans
|
||||
SET status = ${planStatus}, progress_pct = 100,
|
||||
items_completed = items_total, updated_at = clock_timestamp()
|
||||
WHERE flow_run_id = ${runId} AND status = 'active'
|
||||
`;
|
||||
return updated.count > 0;
|
||||
}
|
||||
|
||||
/** Map a flow-run terminal status to its corresponding plan status. Pure. */
|
||||
export function planStatusFromRun(runStatus: 'completed' | 'failed' | 'cancelled'): string {
|
||||
return runStatus === 'completed' ? 'completed' : runStatus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find any active plan linked to a running flow run — used by the startup
|
||||
* resume path to surface plans that have in-flight orchestrator runs.
|
||||
*/
|
||||
export async function findPlanWithRunningRun(
|
||||
sql: Sql,
|
||||
projectId: string,
|
||||
): Promise<(Plan & { run_status: string }) | null> {
|
||||
const [row] = await sql`
|
||||
SELECT p.*, fr.status AS run_status
|
||||
FROM plans p
|
||||
JOIN flow_runs fr ON fr.id = p.flow_run_id
|
||||
WHERE p.project_id = ${projectId}
|
||||
AND p.status = 'active'
|
||||
AND fr.status = 'running'
|
||||
ORDER BY p.created_at DESC
|
||||
LIMIT 1
|
||||
`;
|
||||
return (row as unknown as Plan & { run_status: string }) ?? null;
|
||||
}
|
||||
@@ -29,6 +29,22 @@ interface AgentRow {
|
||||
last_probed_at: string | Date | null;
|
||||
}
|
||||
|
||||
export async function fetchDeepSeekModels(config: Config): Promise<ProviderModel[]> {
|
||||
if (!config.DEEPSEEK_API_KEY) return [];
|
||||
try {
|
||||
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
const res = await fetch(`${baseURL}/v1/models`, {
|
||||
headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (!res.ok) return [];
|
||||
const parsed = (await res.json()) as { data?: Array<{ id: string }> };
|
||||
return (parsed.data ?? []).map((m) => ({ id: m.id, label: m.id }));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchLlamaSwapModels(config: Config): Promise<ProviderModel[]> {
|
||||
try {
|
||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
||||
@@ -256,7 +272,13 @@ export async function getProviderSnapshot(
|
||||
}
|
||||
|
||||
const build = async (): Promise<ProviderSnapshotEntry[]> => {
|
||||
const llamaModels = await fetchLlamaSwapModels(config);
|
||||
const [llamaModels, deepseekModels] = await Promise.all([
|
||||
fetchLlamaSwapModels(config),
|
||||
fetchDeepSeekModels(config),
|
||||
]);
|
||||
// Merge DeepSeek models into the llama-swap model pool so the boocode
|
||||
// provider (which sources from llama-swap) also includes DeepSeek models.
|
||||
const mergedModels = mergeModels(llamaModels, deepseekModels);
|
||||
const agents = await sql<AgentRow[]>`
|
||||
SELECT name, install_path, supports_acp, models, commands, label, transport, last_probed_at FROM available_agents
|
||||
`;
|
||||
@@ -265,7 +287,7 @@ export async function getProviderSnapshot(
|
||||
|
||||
const entries = await Promise.all(
|
||||
[...getResolvedRegistry().values()].map((resolved) =>
|
||||
buildProviderEntry(resolved, agentMap.get(resolved.id), llamaModels, resolvedCwd, ttlMs, force),
|
||||
buildProviderEntry(resolved, agentMap.get(resolved.id), mergedModels, resolvedCwd, ttlMs, force),
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
@@ -77,8 +77,9 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"@boocode/contracts": "workspace:*",
|
||||
"@ai-sdk/deepseek": "^2.0.35",
|
||||
"@ai-sdk/openai-compatible": "^2.0.47",
|
||||
"@boocode/contracts": "workspace:*",
|
||||
"@fastify/static": "^7.0.4",
|
||||
"@fastify/websocket": "^10.0.1",
|
||||
"@modelcontextprotocol/sdk": "^1.29.0",
|
||||
|
||||
@@ -26,6 +26,14 @@ const ConfigSchema = z.object({
|
||||
FAST_MODEL: z.string().optional(),
|
||||
TASK_MODEL_URL: z.string().url().optional(),
|
||||
LLAMA_SIDECAR_URL: z.string().url().optional(),
|
||||
// vDeepSeek: DeepSeek API key for direct API access. When set, models
|
||||
// with IDs starting with 'deepseek-' route through DeepSeek's API instead
|
||||
// of llama-swap. Defaults to empty (DeepSeek routing disabled).
|
||||
DEEPSEEK_API_KEY: z.string().optional(),
|
||||
// Optional base URL override for DeepSeek API. Defaults to api.deepseek.com.
|
||||
DEEPSEEK_BASE_URL: z.string().url().default('https://api.deepseek.com'),
|
||||
// vWhale hooks: path to hooks JSON config file. Missing file = no hooks.
|
||||
HOOKS_CONFIG_PATH: z.string().default('/data/hooks.json'),
|
||||
});
|
||||
|
||||
export type Config = z.infer<typeof ConfigSchema>;
|
||||
|
||||
@@ -31,6 +31,7 @@ import { loadMcpConfig } from './services/mcp-config.js';
|
||||
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
||||
import { appendMcpTools } from './services/tools.js';
|
||||
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
||||
import { loadHooksConfig, createHookRunner } from './services/hooks.js';
|
||||
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
@@ -136,11 +137,17 @@ async function main() {
|
||||
app.log.warn({ err }, 'skills boot walk failed');
|
||||
}
|
||||
|
||||
// vWhale hooks: load hook config and create runner. Missing file = no hooks.
|
||||
loadHooksConfig(config.HOOKS_CONFIG_PATH);
|
||||
const hookRunner = createHookRunner();
|
||||
const hasHooks = Object.keys(loadHooksConfig(config.HOOKS_CONFIG_PATH).hooks).length > 0;
|
||||
|
||||
const inference = createInferenceRunner(
|
||||
{
|
||||
sql,
|
||||
config,
|
||||
log: app.log,
|
||||
hooks: hasHooks ? hookRunner : undefined,
|
||||
publish: (sessionId, frame) => {
|
||||
// v1.13.11-b: route through the typed publishFrame so the broker's
|
||||
// Zod gate validates every inference frame before delivery.
|
||||
@@ -166,7 +173,7 @@ async function main() {
|
||||
// bubble up so the route can reply 500 — manual /compact failures
|
||||
// should be loud (the user just clicked a button).
|
||||
runCompaction: (chatId) =>
|
||||
compaction.process({ sql, config, log: app.log, broker, chatId }),
|
||||
compaction.process({ sql, config, log: app.log, broker, chatId, hooks: hasHooks ? hookRunner : undefined }),
|
||||
cancelInference: async (sessionId, chatId) => {
|
||||
return inference.cancel(sessionId, chatId);
|
||||
},
|
||||
|
||||
@@ -2,26 +2,55 @@ import type { FastifyInstance } from 'fastify';
|
||||
import type { Config } from '../config.js';
|
||||
import type { ModelInfo } from '../types/api.js';
|
||||
|
||||
interface LlamaSwapModelsResponse {
|
||||
interface ApiModelsResponse {
|
||||
data?: ModelInfo[];
|
||||
}
|
||||
|
||||
const DEEPSEEK_STATIC_MODELS: ModelInfo[] = [
|
||||
{ id: 'deepseek-v4-flash', object: 'model', created: 0, owned_by: 'deepseek' },
|
||||
{ id: 'deepseek-v4-pro', object: 'model', created: 0, owned_by: 'deepseek' },
|
||||
];
|
||||
|
||||
export function registerModelRoutes(app: FastifyInstance, config: Config): void {
|
||||
app.get('/api/models', async (_req, reply) => {
|
||||
const models: ModelInfo[] = [];
|
||||
|
||||
// 1. Fetch llama-swap models
|
||||
try {
|
||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/models`);
|
||||
if (!res.ok) {
|
||||
reply.code(502);
|
||||
return { error: `llama-swap returned ${res.status}` };
|
||||
if (res.ok) {
|
||||
const parsed = (await res.json()) as ApiModelsResponse;
|
||||
if (parsed.data) models.push(...parsed.data);
|
||||
}
|
||||
const parsed = (await res.json()) as LlamaSwapModelsResponse;
|
||||
return parsed.data ?? [];
|
||||
} catch (err) {
|
||||
reply.code(502);
|
||||
return {
|
||||
error: 'failed to reach llama-swap',
|
||||
details: err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
} catch {
|
||||
// llama-swap unreachable — proceed with whatever we have
|
||||
}
|
||||
|
||||
// 2. If DeepSeek is configured, fetch live models from their API
|
||||
if (config.DEEPSEEK_API_KEY) {
|
||||
try {
|
||||
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
const res = await fetch(`${baseURL}/v1/models`, {
|
||||
headers: { Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||
signal: AbortSignal.timeout(5_000),
|
||||
});
|
||||
if (res.ok) {
|
||||
const parsed = (await res.json()) as ApiModelsResponse;
|
||||
if (parsed.data) models.push(...parsed.data);
|
||||
} else {
|
||||
// API call failed — fall back to static model list
|
||||
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||
}
|
||||
} catch {
|
||||
// Network error — fall back to static model list
|
||||
models.push(...DEEPSEEK_STATIC_MODELS);
|
||||
}
|
||||
}
|
||||
|
||||
if (models.length === 0) {
|
||||
reply.code(502);
|
||||
return { error: 'no models available from any provider' };
|
||||
}
|
||||
return models;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -32,11 +32,18 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
content TEXT NOT NULL DEFAULT '',
|
||||
status TEXT NOT NULL DEFAULT 'complete',
|
||||
last_seq INT NOT NULL DEFAULT 0,
|
||||
cache_tokens INTEGER,
|
||||
reasoning_tokens INTEGER,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, created_at);
|
||||
|
||||
-- vDeepSeek: add cache/reasoning token columns early so messages_with_parts
|
||||
-- view (defined below) can reference them. IF NOT EXISTS guards re-runs.
|
||||
ALTER TABLE messages ADD COLUMN IF NOT EXISTS cache_tokens INTEGER;
|
||||
ALTER TABLE messages ADD COLUMN IF NOT EXISTS reasoning_tokens INTEGER;
|
||||
|
||||
-- v1.13.0: granular message parts table. v1.13.20: legacy tool_calls/
|
||||
-- tool_results columns dropped; message_parts is now the sole source of
|
||||
-- truth for tool calls, tool results, and reasoning. ON DELETE CASCADE
|
||||
@@ -126,8 +133,8 @@ SELECT
|
||||
FROM message_parts p
|
||||
WHERE p.message_id = m.id AND p.kind = 'reasoning' AND p.hidden_at IS NULL) AS reasoning_parts,
|
||||
-- NEW columns MUST be appended at the end: CREATE OR REPLACE VIEW can't
|
||||
-- reorder/rename existing columns (42P16). m.model added last.
|
||||
m.model
|
||||
-- reorder/rename existing columns (42P16). cache_tokens and reasoning_tokens added last.
|
||||
m.model, m.cache_tokens, m.reasoning_tokens
|
||||
FROM messages m;
|
||||
|
||||
-- v1.13.20: drop legacy tool_calls/tool_results columns. Reads have routed
|
||||
|
||||
@@ -106,6 +106,8 @@ interface ParsedFrontmatter {
|
||||
// allowed" — the model responds text-only.
|
||||
steps?: number;
|
||||
llama_extra_args?: string[];
|
||||
// vDeepSeek: thinking effort for DeepSeek V4 models.
|
||||
reasoning_effort?: string;
|
||||
}
|
||||
|
||||
// P5: table-driven validation for the "soft-range" numeric frontmatter fields.
|
||||
@@ -386,6 +388,7 @@ function parseAgentSection(section: RawSection): Omit<Agent, 'source'> {
|
||||
max_tool_calls: typeof fm.max_tool_calls === 'number' ? fm.max_tool_calls : null,
|
||||
steps: typeof fm.steps === 'number' ? fm.steps : null,
|
||||
llama_extra_args: Array.isArray(fm.llama_extra_args) ? fm.llama_extra_args : null,
|
||||
reasoning_effort: typeof fm.reasoning_effort === 'string' ? (fm.reasoning_effort as Agent['reasoning_effort']) : null,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
// DEPRECATED (Phase 4, Domain 2, v2.8.14): This HTTP client routes through
|
||||
// the Go codecontext sidecar (http://codecontext:8080). Superseded by the
|
||||
// boocontext MCP server. New callers should use boocontext MCP tool wrappers
|
||||
// directly. Keep this file for backward compatibility — the 16 existing
|
||||
// codecontext tool wrappers (under tools/codecontext/) still call through
|
||||
// callCodecontext(). Remove after full migration.
|
||||
//
|
||||
// v1.12 Track B.2: shared HTTP client for the codecontext sidecar. The 8
|
||||
// per-tool wrappers under tools/codecontext/ all funnel through callCodecontext
|
||||
// — they're thin adapters that supply toolName + args + projectPath. The
|
||||
@@ -112,6 +119,11 @@ export async function callCodecontext(
|
||||
req: CodecontextRequest,
|
||||
fetcher: typeof fetch = fetch,
|
||||
): Promise<CodecontextResponse> {
|
||||
// DEPRECATED: This function routes through the Go codecontext sidecar at
|
||||
// http://codecontext:8080. New callers should use boocontext MCP instead.
|
||||
console.warn(
|
||||
`[deprecated] callCodecontext("${req.toolName}") — route through boocontext MCP instead`,
|
||||
);
|
||||
// Step 1: realpath the project root, then realpath the requested target_dir
|
||||
// (defaulting to projectPath when the caller didn't pass one — the 12 wrappers
|
||||
// never pass target_dir; tests can override). A non-existent target_dir
|
||||
|
||||
@@ -24,6 +24,8 @@ import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
|
||||
import * as modelContextLookup from './model-context.js';
|
||||
import { SENTINEL_KINDS } from './inference/sentinels.js';
|
||||
import type { OpenAiMessage } from './inference/payload.js';
|
||||
import { resolveModelEndpoint } from './inference/provider.js';
|
||||
import type { HookRunner } from './hooks.js';
|
||||
|
||||
// v1.13.9: ratio-only overflow trigger. Fires compaction at 85% of ctx_max
|
||||
// (opencode session/overflow.ts pattern). Replaces the v1.11.0-era
|
||||
@@ -346,20 +348,22 @@ interface CompletionResult {
|
||||
completionTokens: number;
|
||||
}
|
||||
|
||||
async function callLlamaSwap(
|
||||
async function callLlm(
|
||||
config: Config,
|
||||
model: string,
|
||||
messages: OpenAiMessage[],
|
||||
log: FastifyBaseLogger,
|
||||
): Promise<CompletionResult> {
|
||||
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
||||
const { url, headers, model: resolvedModel } = resolveModelEndpoint(config, model);
|
||||
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model, messages, stream: false }),
|
||||
headers,
|
||||
body: JSON.stringify({ model: resolvedModel, messages, stream: false }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '');
|
||||
throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
|
||||
const prefix = model.startsWith('deepseek-') ? 'deepseek' : 'llama-swap';
|
||||
throw new Error(`${prefix} returned ${res.status}: ${text.slice(0, 200)}`);
|
||||
}
|
||||
const json = (await res.json()) as {
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
@@ -383,6 +387,8 @@ export interface ProcessInput {
|
||||
log: FastifyBaseLogger;
|
||||
broker: Broker;
|
||||
chatId: string;
|
||||
/** vWhale: lifecycle hooks runner. Undefined when no hooks configured. */
|
||||
hooks?: HookRunner;
|
||||
}
|
||||
|
||||
// Runs one round of anchored rolling compaction on `chatId`. No-ops cleanly
|
||||
@@ -497,6 +503,17 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
at: new Date().toISOString(),
|
||||
});
|
||||
|
||||
// vWhale: PreCompact hook (best-effort, non-blocking).
|
||||
const msgBefore = messages.length;
|
||||
if (input.hooks) {
|
||||
input.hooks.run('PreCompact', {
|
||||
event: 'PreCompact',
|
||||
session_id: sessionId,
|
||||
chat_id: chatId,
|
||||
messages_before: msgBefore,
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// try/finally so the dot ALWAYS drops back to idle, even if the LLM call
|
||||
// throws or a downstream DB write fails. The succeeded flag gates the
|
||||
// 'compacted' frame + final log: we only signal completion to the UI when
|
||||
@@ -506,7 +523,7 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
let result: CompletionResult | undefined;
|
||||
try {
|
||||
// 7. Single completion (no tools). Throws on llama-swap failure.
|
||||
result = await callLlamaSwap(config, session.model, payload, log);
|
||||
result = await callLlm(config, session.model, payload, log);
|
||||
|
||||
// 7b. v1.11.3: fetch the model's true context window from llama-swap's
|
||||
// /upstream/<model>/props (the streaming completion doesn't carry it).
|
||||
@@ -558,6 +575,18 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
`;
|
||||
|
||||
succeeded = true;
|
||||
|
||||
// vWhale: PostCompact hook (best-effort, non-blocking).
|
||||
if (input.hooks) {
|
||||
input.hooks.run('PostCompact', {
|
||||
event: 'PostCompact',
|
||||
session_id: sessionId,
|
||||
chat_id: chatId,
|
||||
messages_before: msgBefore,
|
||||
messages_after: sel.head.length,
|
||||
summary: (result?.content ?? '').slice(0, 500),
|
||||
}).catch(() => {});
|
||||
}
|
||||
} finally {
|
||||
// Always restore the dot. Status='idle' (not 'error') even on failure —
|
||||
// the caller logs/re-surfaces the error separately; the dot doesn't
|
||||
|
||||
299
apps/server/src/services/hooks.ts
Normal file
299
apps/server/src/services/hooks.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* vWhale: lifecycle hook runner. Hooks are shell commands that fire at key
|
||||
* points in the inference pipeline. Each hook receives a JSON payload on
|
||||
* stdin and can return JSON on stdout to influence behavior.
|
||||
*
|
||||
* Inspired by Whale's hook system with 11 lifecycle events. BooCode
|
||||
* implements the most relevant subset: PreToolUse, PostToolUse,
|
||||
* UserPromptSubmit, Stop, PreCompact, PostCompact.
|
||||
*
|
||||
* Config: JSON file at HOOKS_CONFIG_PATH (default /data/hooks.json).
|
||||
* Format:
|
||||
* ```json
|
||||
* {
|
||||
* "hooks": {
|
||||
* "PreToolUse": [
|
||||
* { "match": "shell_run", "command": "python3 /data/hooks/check_shell.py", "timeout": 30 }
|
||||
* ],
|
||||
* "Stop": [
|
||||
* { "command": "node /data/hooks/log_turn.mjs" }
|
||||
* ]
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { spawn } from 'node:child_process';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
// ─── Events ───────────────────────────────────────────────────────────────
|
||||
|
||||
export type HookEvent =
|
||||
| 'PreToolUse'
|
||||
| 'PostToolUse'
|
||||
| 'UserPromptSubmit'
|
||||
| 'Stop'
|
||||
| 'PreCompact'
|
||||
| 'PostCompact';
|
||||
|
||||
const ALL_EVENTS: HookEvent[] = [
|
||||
'PreToolUse',
|
||||
'PostToolUse',
|
||||
'UserPromptSubmit',
|
||||
'Stop',
|
||||
'PreCompact',
|
||||
'PostCompact',
|
||||
];
|
||||
|
||||
// ─── Config ────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface HookConfig {
|
||||
/** Glob or exact tool name to match (PreToolUse/PostToolUse only). Omit or '*' for all. */
|
||||
match?: string;
|
||||
/** Shell command to run. Receives JSON payload on stdin. */
|
||||
command: string;
|
||||
/** Timeout in seconds (default 30). */
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface HooksConfig {
|
||||
hooks: Partial<Record<HookEvent, HookConfig[]>>;
|
||||
}
|
||||
|
||||
// ─── Payloads ──────────────────────────────────────────────────────────────
|
||||
|
||||
export interface PreToolUsePayload {
|
||||
event: 'PreToolUse';
|
||||
session_id: string;
|
||||
tool_name: string;
|
||||
tool_args: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface PostToolUsePayload {
|
||||
event: 'PostToolUse';
|
||||
session_id: string;
|
||||
tool_name: string;
|
||||
tool_args: Record<string, unknown>;
|
||||
tool_result: unknown;
|
||||
tool_error?: string;
|
||||
}
|
||||
|
||||
export interface UserPromptSubmitPayload {
|
||||
event: 'UserPromptSubmit';
|
||||
session_id: string;
|
||||
chat_id: string;
|
||||
prompt: string;
|
||||
}
|
||||
|
||||
export interface StopPayload {
|
||||
event: 'Stop';
|
||||
session_id: string;
|
||||
chat_id: string;
|
||||
last_assistant_text: string;
|
||||
turn: number;
|
||||
}
|
||||
|
||||
export interface PreCompactPayload {
|
||||
event: 'PreCompact';
|
||||
session_id: string;
|
||||
chat_id: string;
|
||||
messages_before: number;
|
||||
}
|
||||
|
||||
export interface PostCompactPayload {
|
||||
event: 'PostCompact';
|
||||
session_id: string;
|
||||
chat_id: string;
|
||||
messages_before: number;
|
||||
messages_after: number;
|
||||
summary: string;
|
||||
}
|
||||
|
||||
export type HookPayload =
|
||||
| PreToolUsePayload
|
||||
| PostToolUsePayload
|
||||
| UserPromptSubmitPayload
|
||||
| StopPayload
|
||||
| PreCompactPayload
|
||||
| PostCompactPayload;
|
||||
|
||||
// ─── Response ──────────────────────────────────────────────────────────────
|
||||
|
||||
export type HookDecision = 'pass' | 'warn' | 'block';
|
||||
|
||||
export interface HookResponse {
|
||||
decision?: HookDecision;
|
||||
reason?: string;
|
||||
/** When present, replaces the original tool args / user prompt. */
|
||||
updated_input?: Record<string, unknown> | string;
|
||||
/** Injected into the model's context for the next turn. */
|
||||
additional_context?: string;
|
||||
}
|
||||
|
||||
// ─── Runner ────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface HookRunner {
|
||||
/** Run all hooks for the given event. Returns the effective response. */
|
||||
run(event: HookEvent, payload: HookPayload, log?: FastifyBaseLogger): Promise<HookResponse>;
|
||||
}
|
||||
|
||||
let hooksConfig: HooksConfig | null = null;
|
||||
let hooksPath: string | null = null;
|
||||
|
||||
/** Load hooks config from disk. Missing file = no hooks. Never throws. */
|
||||
export function loadHooksConfig(path: string): HooksConfig {
|
||||
hooksPath = path;
|
||||
if (!existsSync(path)) {
|
||||
hooksConfig = { hooks: {} };
|
||||
return hooksConfig;
|
||||
}
|
||||
try {
|
||||
const raw = readFileSync(path, 'utf8');
|
||||
const parsed = JSON.parse(raw) as HooksConfig;
|
||||
hooksConfig = {
|
||||
hooks: { ...parsed.hooks },
|
||||
};
|
||||
// Validate event names
|
||||
for (const event of Object.keys(hooksConfig.hooks)) {
|
||||
if (!ALL_EVENTS.includes(event as HookEvent)) {
|
||||
console.warn(`hooks: unknown event '${event}' in ${path} — ignoring`);
|
||||
delete hooksConfig.hooks[event as HookEvent];
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`hooks: failed to load ${path}`, err);
|
||||
hooksConfig = { hooks: {} };
|
||||
}
|
||||
return hooksConfig;
|
||||
}
|
||||
|
||||
/** Reload the config file (call after a PATCH). */
|
||||
export function reloadHooksConfig(): HooksConfig {
|
||||
if (hooksPath) return loadHooksConfig(hooksPath);
|
||||
hooksConfig = { hooks: {} };
|
||||
return hooksConfig;
|
||||
}
|
||||
|
||||
function getConfig(): HooksConfig {
|
||||
return hooksConfig ?? { hooks: {} };
|
||||
}
|
||||
|
||||
/** Create a HookRunner for the current config. */
|
||||
export function createHookRunner(): HookRunner {
|
||||
return {
|
||||
async run(event, payload, log): Promise<HookResponse> {
|
||||
const configs = getConfig().hooks[event];
|
||||
if (!configs || configs.length === 0) return { decision: 'pass' };
|
||||
|
||||
// Pre-filter by match pattern for tool events
|
||||
const toolName = 'tool_name' in payload ? (payload as PreToolUsePayload).tool_name : undefined;
|
||||
|
||||
let effective: HookResponse = { decision: 'pass' };
|
||||
|
||||
for (const cfg of configs) {
|
||||
// Skip if match doesn't apply
|
||||
if (toolName && cfg.match && cfg.match !== '*' && cfg.match !== toolName) continue;
|
||||
|
||||
const result = await runSingleHook(cfg, payload, log);
|
||||
// Merge decisions: block > warn > pass
|
||||
if (result.decision === 'block') {
|
||||
effective = { ...result, decision: 'block' };
|
||||
break; // block is terminal
|
||||
}
|
||||
if (result.decision === 'warn' && effective.decision !== 'block') {
|
||||
effective = { ...result, decision: 'warn' };
|
||||
}
|
||||
// Merge additional_context and updated_input
|
||||
if (result.additional_context) {
|
||||
effective.additional_context = effective.additional_context
|
||||
? effective.additional_context + '\n' + result.additional_context
|
||||
: result.additional_context;
|
||||
}
|
||||
if (result.updated_input && !effective.updated_input) {
|
||||
effective.updated_input = result.updated_input;
|
||||
}
|
||||
}
|
||||
|
||||
return effective;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function runSingleHook(
|
||||
cfg: HookConfig,
|
||||
payload: HookPayload,
|
||||
log?: FastifyBaseLogger,
|
||||
): Promise<HookResponse> {
|
||||
const timeoutMs = (cfg.timeout ?? 30) * 1000;
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn('sh', ['-c', cfg.command], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
timeout: timeoutMs,
|
||||
env: { ...process.env },
|
||||
});
|
||||
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||
|
||||
let settled = false;
|
||||
const timer = setTimeout(() => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
child.kill('SIGTERM');
|
||||
log?.warn({ event: payload.event, command: cfg.command }, 'hooks: timeout');
|
||||
resolve({ decision: 'warn', reason: 'hook timed out' });
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
child.on('error', (err) => {
|
||||
if (!settled) {
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
log?.warn({ err, event: payload.event }, 'hooks: spawn error');
|
||||
resolve({ decision: 'warn', reason: `hook failed: ${err.message}` });
|
||||
}
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
|
||||
const out = Buffer.concat(stdout).toString('utf8').trim();
|
||||
const errOut = Buffer.concat(stderr).toString('utf8').trim();
|
||||
|
||||
if (code !== 0 && !out) {
|
||||
log?.warn({ event: payload.event, code, stderr: errOut.slice(0, 200) }, 'hooks: non-zero exit');
|
||||
resolve({ decision: 'warn', reason: `hook exited ${code}` });
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse stdout as JSON response
|
||||
if (out) {
|
||||
try {
|
||||
const parsed = JSON.parse(out) as HookResponse;
|
||||
resolve(parsed);
|
||||
return;
|
||||
} catch {
|
||||
// Not JSON — treat as pass with stdout as context
|
||||
if (out.length > 0) {
|
||||
resolve({ decision: 'pass', additional_context: out });
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resolve({ decision: 'pass' });
|
||||
});
|
||||
|
||||
// Write payload to stdin
|
||||
const json = JSON.stringify(payload);
|
||||
child.stdin.write(json);
|
||||
child.stdin.end();
|
||||
});
|
||||
}
|
||||
@@ -122,6 +122,8 @@ export async function finalizeStreamedRow(
|
||||
completionTokens: number | null;
|
||||
promptTokens: number | null;
|
||||
startedAt: string | null;
|
||||
cacheTokens?: number | null;
|
||||
reasoningTokens?: number | null;
|
||||
beforeComplete?: () => Promise<void>;
|
||||
},
|
||||
): Promise<void> {
|
||||
@@ -137,6 +139,8 @@ export async function finalizeStreamedRow(
|
||||
tokens_used = ${opts.completionTokens},
|
||||
ctx_used = ${opts.promptTokens},
|
||||
ctx_max = ${nCtx},
|
||||
cache_tokens = ${opts.cacheTokens ?? null},
|
||||
reasoning_tokens = ${opts.reasoningTokens ?? null},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${opts.messageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
@@ -149,6 +153,8 @@ export async function finalizeStreamedRow(
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
cache_tokens: opts.cacheTokens ?? null,
|
||||
reasoning_tokens: opts.reasoningTokens ?? null,
|
||||
started_at: opts.startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: opts.model,
|
||||
@@ -188,7 +194,7 @@ export async function finalizeCompletion(
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId } = args;
|
||||
const content = stripToolMarkup(result.content, { final: true });
|
||||
const { finishReason, promptTokens, completionTokens } = result;
|
||||
const { finishReason, promptTokens, completionTokens, cacheReadTokens, reasoningTokens } = result;
|
||||
|
||||
// v1.11.3: see executeToolPhase for the rationale.
|
||||
const mctx = await modelContext.getModelContext(session.model);
|
||||
@@ -203,6 +209,8 @@ export async function finalizeCompletion(
|
||||
tokens_used = ${completionTokens},
|
||||
ctx_used = ${promptTokens},
|
||||
ctx_max = ${nCtx},
|
||||
cache_tokens = ${cacheReadTokens ?? null},
|
||||
reasoning_tokens = ${reasoningTokens ?? null},
|
||||
model = ${session.model},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
@@ -268,6 +276,8 @@ export async function finalizeCompletion(
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
cache_tokens: cacheReadTokens ?? null,
|
||||
reasoning_tokens: reasoningTokens ?? null,
|
||||
started_at: startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: session.model,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||
import { createDeepSeek } from '@ai-sdk/deepseek';
|
||||
import type { LanguageModel } from 'ai';
|
||||
|
||||
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||
@@ -11,6 +12,12 @@ import type { LanguageModel } from 'ai';
|
||||
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
||||
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
||||
// stays cached since it has no per-request headers.
|
||||
//
|
||||
// vDeepSeek: when the model ID starts with 'deepseek-' and DEEPSEEK_API_KEY
|
||||
// is set, route through the official @ai-sdk/deepseek provider (not
|
||||
// openai-compatible) so DeepSeek-specific features work: providerMetadata
|
||||
// with promptCacheHitTokens/promptCacheMissTokens, reasoning via
|
||||
// LanguageModelV4Usage.outputTokens.reasoning, and thinking-mode options.
|
||||
|
||||
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
||||
|
||||
@@ -41,7 +48,28 @@ function sidecarProvider(
|
||||
});
|
||||
}
|
||||
|
||||
export type InferenceRoute = 'swap' | 'sidecar';
|
||||
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||
|
||||
export function isDeepSeekModel(modelId: string): boolean {
|
||||
return modelId.startsWith(DEEPSEEK_MODEL_PREFIX);
|
||||
}
|
||||
|
||||
let deepseekProviderCache: ReturnType<typeof createDeepSeek> | null = null;
|
||||
|
||||
function getDeepSeekProvider(
|
||||
apiKey: string,
|
||||
baseURL: string,
|
||||
): ReturnType<typeof createDeepSeek> {
|
||||
if (!deepseekProviderCache) {
|
||||
deepseekProviderCache = createDeepSeek({
|
||||
apiKey,
|
||||
baseURL,
|
||||
});
|
||||
}
|
||||
return deepseekProviderCache;
|
||||
}
|
||||
|
||||
export type InferenceRoute = 'swap' | 'sidecar' | 'deepseek';
|
||||
|
||||
export interface RoutingInfo {
|
||||
route: InferenceRoute;
|
||||
@@ -55,12 +83,21 @@ interface AgentLike {
|
||||
interface ConfigLike {
|
||||
LLAMA_SWAP_URL: string;
|
||||
LLAMA_SIDECAR_URL?: string;
|
||||
DEEPSEEK_API_KEY?: string;
|
||||
DEEPSEEK_BASE_URL?: string;
|
||||
}
|
||||
|
||||
export function resolveRoute(
|
||||
agent: AgentLike | null,
|
||||
config?: ConfigLike,
|
||||
modelId?: string,
|
||||
): RoutingInfo {
|
||||
// vDeepSeek: if the model starts with deepseek- and DEEPSEEK_API_KEY is set,
|
||||
// route through the DeepSeek provider. Checked first so DeepSeek models
|
||||
// always bypass llama-swap/sidecar even when those are also configured.
|
||||
if (modelId?.startsWith(DEEPSEEK_MODEL_PREFIX) && config?.DEEPSEEK_API_KEY) {
|
||||
return { route: 'deepseek', flags: null };
|
||||
}
|
||||
// When llama_extra_args are explicitly set, route through sidecar with them.
|
||||
const flags = agent?.llama_extra_args;
|
||||
if (flags && flags.length > 0) {
|
||||
@@ -80,7 +117,13 @@ export function upstreamModel(
|
||||
modelId: string,
|
||||
agent?: AgentLike | null,
|
||||
): LanguageModel {
|
||||
const { route, flags } = resolveRoute(agent ?? null, config);
|
||||
const { route, flags } = resolveRoute(agent ?? null, config, modelId);
|
||||
if (route === 'deepseek') {
|
||||
return getDeepSeekProvider(
|
||||
config.DEEPSEEK_API_KEY!,
|
||||
config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com',
|
||||
).chat(modelId);
|
||||
}
|
||||
if (route === 'sidecar') {
|
||||
const url = config.LLAMA_SIDECAR_URL;
|
||||
if (!url) {
|
||||
@@ -90,3 +133,30 @@ export function upstreamModel(
|
||||
}
|
||||
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
||||
}
|
||||
|
||||
/** Resolve the API endpoint for non-streaming calls (compaction, task-model).
|
||||
* Returns the URL + model + optional auth header for direct fetch() usage. */
|
||||
export function resolveModelEndpoint(
|
||||
config: ConfigLike,
|
||||
modelId: string,
|
||||
): { url: string; model: string; headers: Record<string, string> } {
|
||||
const baseHeaders: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (modelId.startsWith(DEEPSEEK_MODEL_PREFIX) && config.DEEPSEEK_API_KEY) {
|
||||
const baseURL = (config.DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com').replace(/\/+$/, '');
|
||||
return {
|
||||
url: baseURL,
|
||||
model: modelId,
|
||||
headers: { ...baseHeaders, Authorization: `Bearer ${config.DEEPSEEK_API_KEY}` },
|
||||
};
|
||||
}
|
||||
return {
|
||||
url: config.LLAMA_SWAP_URL.replace(/\/+$/, ''),
|
||||
model: modelId,
|
||||
headers: baseHeaders,
|
||||
};
|
||||
}
|
||||
|
||||
/** Invalidate the cached DeepSeek provider (e.g. when env vars change at runtime). */
|
||||
export function resetDeepSeekProvider(): void {
|
||||
deepseekProviderCache = null;
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import type { OpenAiMessage } from './payload.js';
|
||||
import { extractToolCallBlocks } from './tool-call-parser.js';
|
||||
import { classifyStreamError } from './stream-error-classifier.js';
|
||||
import type { StreamResult } from './types.js';
|
||||
import { upstreamModel } from './provider.js';
|
||||
import { isDeepSeekModel, upstreamModel } from './provider.js';
|
||||
import {
|
||||
jsonSchema,
|
||||
streamText,
|
||||
@@ -51,6 +51,9 @@ export interface StreamOptions {
|
||||
dry_base?: number | null;
|
||||
dry_allowed_length?: number | null;
|
||||
dry_penalty_last_n?: number | null;
|
||||
// vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
|
||||
// API param for deepseek-v4-flash / deepseek-v4-pro models.
|
||||
reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
|
||||
}
|
||||
|
||||
// P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
|
||||
@@ -74,6 +77,7 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
|
||||
dry_base: agent?.dry_base ?? undefined,
|
||||
dry_allowed_length: agent?.dry_allowed_length ?? undefined,
|
||||
dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
|
||||
reasoning_effort: agent?.reasoning_effort ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -272,6 +276,19 @@ export async function streamCompletion(
|
||||
// before this. They now go through the same extraBody path as the new params.
|
||||
const samplerBody = buildSamplerProviderOptions(opts);
|
||||
|
||||
// vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
|
||||
let deepseekProviderOptions:
|
||||
| { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
|
||||
| undefined;
|
||||
if (isDeepSeekModel(model)) {
|
||||
const dsEffort = opts.reasoning_effort;
|
||||
const thinkingEnabled = dsEffort && dsEffort !== 'off';
|
||||
deepseekProviderOptions = {
|
||||
thinking: { type: thinkingEnabled ? 'enabled' : 'disabled' },
|
||||
...(thinkingEnabled ? { reasoningEffort: dsEffort } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// F6: per-chunk stall deadline. If the model stops emitting chunks for
|
||||
// STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
|
||||
// abort check below then throws AbortError → handleAbortOrError writes
|
||||
@@ -297,7 +314,14 @@ export async function streamCompletion(
|
||||
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
||||
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
||||
...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
|
||||
...(samplerBody || deepseekProviderOptions
|
||||
? {
|
||||
providerOptions: {
|
||||
...(samplerBody ? { openaiCompatible: samplerBody } : {}),
|
||||
...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
abortSignal: effectiveSignal,
|
||||
});
|
||||
|
||||
@@ -401,12 +425,26 @@ export async function streamCompletion(
|
||||
|
||||
// Usage lands as a promise on the result; awaiting after fullStream is
|
||||
// drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
|
||||
// Some providers (llama-swap via openai-compatible) return plain numbers;
|
||||
// others (deepseek via @ai-sdk/deepseek) return {total, cacheRead, noCache, ...}.
|
||||
let promptTokens: number | null = null;
|
||||
let completionTokens: number | null = null;
|
||||
let cacheReadTokens: number | null = null;
|
||||
let reasoningTokens: number | null = null;
|
||||
try {
|
||||
const usage = await result.usage;
|
||||
if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
|
||||
if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
|
||||
if (typeof usage.inputTokens === 'number') {
|
||||
promptTokens = usage.inputTokens;
|
||||
} else if (usage.inputTokens && typeof usage.inputTokens === 'object') {
|
||||
promptTokens = (usage.inputTokens as Record<string, number | undefined>).total ?? null;
|
||||
cacheReadTokens = (usage.inputTokens as Record<string, number | undefined>).cacheRead ?? null;
|
||||
}
|
||||
if (typeof usage.outputTokens === 'number') {
|
||||
completionTokens = usage.outputTokens;
|
||||
} else if (usage.outputTokens && typeof usage.outputTokens === 'object') {
|
||||
completionTokens = (usage.outputTokens as Record<string, number | undefined>).total ?? null;
|
||||
reasoningTokens = (usage.outputTokens as Record<string, number | undefined>).reasoning ?? null;
|
||||
}
|
||||
} catch {
|
||||
// Some providers omit usage on partial streams; leave both null.
|
||||
}
|
||||
@@ -422,6 +460,13 @@ export async function streamCompletion(
|
||||
);
|
||||
}
|
||||
|
||||
if (cacheReadTokens !== null || reasoningTokens !== null) {
|
||||
ctx.log.debug(
|
||||
{ promptTokens, completionTokens, cacheReadTokens, reasoningTokens, model },
|
||||
'streamCompletion: deepseek usage breakdown',
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
finishReason,
|
||||
content,
|
||||
@@ -429,6 +474,10 @@ export async function streamCompletion(
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
reasoning: reasoningAccumulated,
|
||||
// vDeepSeek: optional usage breakdown populated when the provider returns
|
||||
// structured usage (cache hit tokens, reasoning tokens).
|
||||
cacheReadTokens: cacheReadTokens ?? undefined,
|
||||
reasoningTokens: reasoningTokens ?? undefined,
|
||||
};
|
||||
} finally {
|
||||
// Clear the stall timer whether the stream completes normally, throws, or
|
||||
|
||||
179
apps/server/src/services/inference/tool-input-repair.ts
Normal file
179
apps/server/src/services/inference/tool-input-repair.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
/**
|
||||
* vWhale: schema-based tool input repair. When the model emits tool call args
|
||||
* that don't match the expected types (common with weaker models), apply
|
||||
* heuristic repairs before falling through to the Zod parse.
|
||||
*
|
||||
* Inspired by Whale's RepairToolInputForSpec:
|
||||
* - Coerce string "true"/"false" → boolean
|
||||
* - Unwrap markdown autolinks in string fields: <file:///path> → /path
|
||||
* - Wrap bare values in arrays when schema expects array
|
||||
* - Convert "42.0" decimal string → "42" for integer fields
|
||||
* - Recurse into objects to repair nested properties
|
||||
*/
|
||||
|
||||
export interface ToolInputRepair {
|
||||
field: string;
|
||||
kind: string;
|
||||
detail: string;
|
||||
}
|
||||
|
||||
const MARKDOWN_AUTOLINK_RE = /^<(?:file|path):\/\/(.+?)>$/;
|
||||
|
||||
/**
|
||||
* Attempt to repair tool call args against the tool's JSON Schema.
|
||||
* Returns the (possibly modified) args plus a list of repairs applied.
|
||||
*/
|
||||
export function repairToolInput(
|
||||
schema: Record<string, unknown> | undefined,
|
||||
args: Record<string, unknown>,
|
||||
): { repaired: Record<string, unknown>; repairs: ToolInputRepair[] } {
|
||||
const repairs: ToolInputRepair[] = [];
|
||||
if (!schema || typeof schema !== 'object') {
|
||||
return { repaired: args, repairs };
|
||||
}
|
||||
|
||||
const properties = (schema as Record<string, unknown>).properties as
|
||||
Record<string, unknown> | undefined;
|
||||
if (!properties) {
|
||||
return { repaired: args, repairs };
|
||||
}
|
||||
|
||||
const required = new Set<string>(
|
||||
Array.isArray((schema as Record<string, unknown>).required)
|
||||
? (schema as Record<string, unknown>).required as string[]
|
||||
: [],
|
||||
);
|
||||
|
||||
const repaired: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(args)) {
|
||||
const propSchema = properties[key] as Record<string, unknown> | undefined;
|
||||
if (propSchema && value !== null && value !== undefined) {
|
||||
repaired[key] = repairValue(key, propSchema, value, repairs, required.has(key));
|
||||
} else {
|
||||
repaired[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Drop keys not in the schema (only for required fields that are missing)
|
||||
// to avoid polluting the model with hallucinated params.
|
||||
for (const key of Object.keys(repaired)) {
|
||||
if (!(key in properties)) {
|
||||
repairs.push({ field: key, kind: 'removed_unknown', detail: `Removed unknown parameter '${key}'` });
|
||||
delete repaired[key];
|
||||
}
|
||||
}
|
||||
|
||||
return { repaired, repairs };
|
||||
}
|
||||
|
||||
function repairValue(
|
||||
field: string,
|
||||
schema: Record<string, unknown>,
|
||||
value: unknown,
|
||||
repairs: ToolInputRepair[],
|
||||
required: boolean,
|
||||
): unknown {
|
||||
const schemaType = schema.type;
|
||||
const isArray = schemaType === 'array' || Array.isArray(schemaType)
|
||||
? schemaType === 'array' || (Array.isArray(schemaType) && schemaType.includes('array'))
|
||||
: false;
|
||||
const isObject = schemaType === 'object';
|
||||
const isBoolean = schemaType === 'boolean';
|
||||
const isInteger = schemaType === 'integer' || schemaType === 'number';
|
||||
const isString = schemaType === 'string';
|
||||
|
||||
// --- Array repair: wrap bare value or empty object ---
|
||||
if (isArray) {
|
||||
if (!Array.isArray(value)) {
|
||||
if (typeof value === 'string') {
|
||||
// Try parsing as JSON array first
|
||||
try {
|
||||
const parsed = JSON.parse(value);
|
||||
if (Array.isArray(parsed)) {
|
||||
repairs.push({ field, kind: 'parsed_json_array', detail: `Parsed string as JSON array for '${field}'` });
|
||||
return parsed;
|
||||
}
|
||||
} catch { /* not JSON */ }
|
||||
}
|
||||
if (typeof value === 'object' && value !== null && Object.keys(value).length === 0) {
|
||||
if (required) {
|
||||
repairs.push({ field, kind: 'empty_object_to_array', detail: `Converted empty object to empty array for '${field}'` });
|
||||
return [];
|
||||
}
|
||||
repairs.push({ field, kind: 'empty_object_to_undefined', detail: `Removed empty object for optional array '${field}'` });
|
||||
return undefined;
|
||||
}
|
||||
repairs.push({ field, kind: 'wrapped_in_array', detail: `Wrapped bare value in array for '${field}'` });
|
||||
return [value];
|
||||
}
|
||||
// Recurse into array items
|
||||
const itemsSchema = schema.items as Record<string, unknown> | undefined;
|
||||
if (itemsSchema) {
|
||||
return value.map((item, i) => repairValue(`${field}[${i}]`, itemsSchema, item, repairs, required));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Object repair: recurse into properties ---
|
||||
if (isObject && typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
||||
const props = (schema.properties as Record<string, unknown>) ?? {};
|
||||
const repaired: Record<string, unknown> = {};
|
||||
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
|
||||
const propSchema = props[k] as Record<string, unknown> | undefined;
|
||||
if (propSchema) {
|
||||
repaired[k] = repairValue(`${field}.${k}`, propSchema, v, repairs, required);
|
||||
} else {
|
||||
repaired[k] = v;
|
||||
}
|
||||
}
|
||||
return repaired;
|
||||
}
|
||||
|
||||
// --- String repair: unwrap markdown autolinks ---
|
||||
if (isString && typeof value === 'string') {
|
||||
const match = value.match(MARKDOWN_AUTOLINK_RE);
|
||||
if (match) {
|
||||
repairs.push({ field, kind: 'unwrapped_markdown_link', detail: `Unwrapped markdown autolink for '${field}': ${value}` });
|
||||
return match[1];
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Boolean coercion ---
|
||||
if (isBoolean && typeof value === 'string') {
|
||||
const lower = value.toLowerCase();
|
||||
if (lower === 'true') {
|
||||
repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → true for '${field}'` });
|
||||
return true;
|
||||
}
|
||||
if (lower === 'false') {
|
||||
repairs.push({ field, kind: 'coerced_to_boolean', detail: `Coerced string '${value}' → false for '${field}'` });
|
||||
return false;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Integer coercion: "42.0" → 42 ---
|
||||
if (isInteger && typeof value === 'string') {
|
||||
const num = Number(value);
|
||||
if (!Number.isNaN(num)) {
|
||||
repairs.push({ field, kind: 'coerced_to_number', detail: `Coerced string '${value}' → ${num} for '${field}'` });
|
||||
return num;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// --- Integer coercion: boolean → 0/1 ---
|
||||
if (isInteger && typeof value === 'boolean') {
|
||||
repairs.push({ field, kind: 'coerced_boolean_to_integer', detail: `Coerced boolean ${value} → ${value ? 1 : 0} for '${field}'` });
|
||||
return value ? 1 : 0;
|
||||
}
|
||||
|
||||
// --- Empty string to null for optional fields ---
|
||||
if (value === '' && !required) {
|
||||
repairs.push({ field, kind: 'empty_string_to_undefined', detail: `Converted empty string for optional '${field}'` });
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import type { ToolExecCtx } from '../tools.js';
|
||||
import { matchToolGlob } from '../agents.js';
|
||||
import { maybeFlagForCompaction } from './payload.js';
|
||||
import { insertParts, partsFromAssistantMessage, partsFromToolMessage } from './parts.js';
|
||||
import { getServerPermission } from '../mcp-client.js';
|
||||
// v1.13.16: richer unknown-tool error so the model can self-correct when it
|
||||
// drifts to a Claude Code tool name (e.g. read_file → suggest view_file).
|
||||
// Applies to all unknown tool names, not just <invoke>-derived ones — at the
|
||||
@@ -17,6 +18,7 @@ import { formatUnknownToolError } from './tool-suggestions.js';
|
||||
// prompted about paths we couldn't grant anyway (e.g. /etc/passwd).
|
||||
import { resolveGrantRoot } from '../grant_resolver.js';
|
||||
import { stripToolMarkup } from './tool-call-parser.js';
|
||||
import { repairToolInput } from './tool-input-repair.js';
|
||||
import type { FailureKind } from './mistake-tracker.js';
|
||||
import type {
|
||||
InferenceContext,
|
||||
@@ -34,6 +36,8 @@ async function executeToolCall(
|
||||
toolCall: ToolCall,
|
||||
extraRoots: readonly string[],
|
||||
toolCtx?: ToolExecCtx,
|
||||
hooks?: import('../hooks.js').HookRunner,
|
||||
sessionId?: string,
|
||||
): Promise<{ output: unknown; truncated: boolean; error?: string; outcome: FailureKind | 'success' }> {
|
||||
// v#12 MistakeTracker: every return path carries an `outcome` so the turn
|
||||
// loop can detect a run of heterogeneous failures. The failure taxonomy
|
||||
@@ -48,7 +52,61 @@ async function executeToolCall(
|
||||
outcome: 'tool_not_found',
|
||||
};
|
||||
}
|
||||
const parsed = tool.inputSchema.safeParse(toolCall.args);
|
||||
// MCP permission gate — block deny/ask before any Zod parsing or execution
|
||||
const mcpPerm = getServerPermission(toolCall.name);
|
||||
if (mcpPerm === 'deny') {
|
||||
return { output: null, truncated: false, error: `blocked: MCP server denied tool '${toolCall.name}'`, outcome: 'permission_denied' };
|
||||
}
|
||||
if (mcpPerm === 'ask') {
|
||||
return { output: null, truncated: false, error: `requires approval: tool '${toolCall.name}' needs user approval`, outcome: 'permission_denied' };
|
||||
}
|
||||
// vWhale: schema-based tool input repair. If the Zod parse fails, attempt
|
||||
// heuristic repairs (type coercion, markdown-link unwrapping, array wrapping)
|
||||
// and retry. Logs repairs for debugging.
|
||||
let args = toolCall.args;
|
||||
let parsed = tool.inputSchema.safeParse(args);
|
||||
if (!parsed.success) {
|
||||
const schema = tool.jsonSchema?.function?.parameters;
|
||||
if (schema) {
|
||||
const { repaired: repairedArgs, repairs } = repairToolInput(
|
||||
schema as Record<string, unknown>,
|
||||
args as Record<string, unknown>,
|
||||
);
|
||||
if (repairs.length > 0) {
|
||||
const retry = tool.inputSchema.safeParse(repairedArgs);
|
||||
if (retry.success) {
|
||||
args = repairedArgs;
|
||||
parsed = retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// vWhale: PreToolUse hook — can block execution.
|
||||
if (hooks && sessionId) {
|
||||
const hookResult = await hooks.run('PreToolUse', {
|
||||
event: 'PreToolUse',
|
||||
session_id: sessionId,
|
||||
tool_name: toolCall.name,
|
||||
tool_args: args as Record<string, unknown>,
|
||||
});
|
||||
if (hookResult.decision === 'block') {
|
||||
return {
|
||||
output: null,
|
||||
truncated: false,
|
||||
error: `blocked by hook: ${hookResult.reason ?? 'PreToolUse denied'}`,
|
||||
outcome: 'permission_denied',
|
||||
};
|
||||
}
|
||||
// Apply updated_input if the hook rewrote the args
|
||||
if (hookResult.updated_input && typeof hookResult.updated_input === 'object') {
|
||||
const reParsed = tool.inputSchema.safeParse(hookResult.updated_input);
|
||||
if (reParsed.success) {
|
||||
args = hookResult.updated_input as Record<string, unknown>;
|
||||
parsed = reParsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!parsed.success) {
|
||||
// v1.12 Track B.2: enrich the zod-reject path so the model sees a
|
||||
// one-line, tool-named hint ("tool 'search_symbols' rejected — query:
|
||||
@@ -183,6 +241,8 @@ export async function executeToolPhase(
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
cache_tokens: result.cacheReadTokens ?? null,
|
||||
reasoning_tokens: result.reasoningTokens ?? null,
|
||||
started_at: startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: session.model,
|
||||
@@ -318,10 +378,22 @@ export async function executeToolPhase(
|
||||
});
|
||||
return;
|
||||
}
|
||||
const tres = await executeToolCall(projectRoot, tc, session.allowed_read_paths, {
|
||||
sql: ctx.sql,
|
||||
sessionId,
|
||||
});
|
||||
const tres = await executeToolCall(
|
||||
projectRoot, tc, session.allowed_read_paths,
|
||||
{ sql: ctx.sql, sessionId },
|
||||
ctx.hooks, sessionId,
|
||||
);
|
||||
// vWhale: PostToolUse hook (best-effort, non-blocking).
|
||||
if (ctx.hooks) {
|
||||
ctx.hooks.run('PostToolUse', {
|
||||
event: 'PostToolUse',
|
||||
session_id: sessionId,
|
||||
tool_name: tc.name,
|
||||
tool_args: tc.args as Record<string, unknown>,
|
||||
tool_result: tres.output,
|
||||
tool_error: tres.error,
|
||||
}).catch(() => {});
|
||||
}
|
||||
// v#12 MistakeTracker: record the real execution outcome (success or a
|
||||
// FailureKind). This is the primary signal for heterogeneous-failure
|
||||
// detection.
|
||||
|
||||
@@ -144,6 +144,7 @@ export async function runAssistantTurn(
|
||||
log: ctx.log,
|
||||
broker: ctx.broker,
|
||||
chatId,
|
||||
hooks: ctx.hooks,
|
||||
});
|
||||
} catch (err) {
|
||||
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
|
||||
@@ -214,6 +215,16 @@ export async function runAssistantTurn(
|
||||
|
||||
// ---- non-tool finish → finalize and exit ----
|
||||
if (result.toolCalls.length === 0) {
|
||||
// vWhale: Stop hook (best-effort, non-blocking).
|
||||
if (ctx.hooks) {
|
||||
ctx.hooks.run('Stop', {
|
||||
event: 'Stop',
|
||||
session_id: sessionId,
|
||||
chat_id: chatId,
|
||||
last_assistant_text: result.content.slice(0, 500),
|
||||
turn: stepNumber,
|
||||
}).catch(() => {});
|
||||
}
|
||||
await finalizeCompletion(ctx, iterArgs, result, state.startedAt, iterSession);
|
||||
break;
|
||||
}
|
||||
@@ -309,6 +320,22 @@ export async function runAssistantTurn(
|
||||
assistantMessageId = toolPhaseResult.nextAssistantId!;
|
||||
}
|
||||
|
||||
// vWhale: Stop hook at post-loop exit (best-effort, non-blocking).
|
||||
if (ctx.hooks) {
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
const lastAssistant = loaded?.history?.slice().reverse().find(
|
||||
(m: import('../../types/api.js').Message) => m.role === 'assistant',
|
||||
);
|
||||
const content = lastAssistant?.content ?? '';
|
||||
ctx.hooks.run('Stop', {
|
||||
event: 'Stop',
|
||||
session_id: sessionId,
|
||||
chat_id: chatId,
|
||||
last_assistant_text: content.slice(0, 500),
|
||||
turn: stepNumber,
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
// ---- post-loop: step-cap sentinel ----
|
||||
// When the loop exits because stepNumber reached effectiveCap, the last
|
||||
// iteration's tool phase returned 'continue' with a nextAssistantId that
|
||||
|
||||
@@ -19,6 +19,7 @@ import type {
|
||||
UserStreamFrame,
|
||||
} from '../../types/api.js';
|
||||
import type { Broker } from '../broker.js';
|
||||
import type { HookRunner } from '../hooks.js';
|
||||
import type { MistakeState } from './mistake-tracker.js';
|
||||
|
||||
export interface StreamPhaseState {
|
||||
@@ -77,6 +78,8 @@ export interface InferenceFrame {
|
||||
started_at?: string | null;
|
||||
finished_at?: string | null;
|
||||
model?: string;
|
||||
cache_tokens?: number | null;
|
||||
reasoning_tokens?: number | null;
|
||||
session_id?: string;
|
||||
name?: string;
|
||||
// orchestrator frames ([D-6])
|
||||
@@ -117,6 +120,9 @@ export interface InferenceContext {
|
||||
// inference goes through `publish`); keeping a separate field avoids
|
||||
// tempting other code paths into bypassing the session-id binding.
|
||||
broker: Broker;
|
||||
// vWhale: lifecycle hooks runner. Undefined when no hooks configured.
|
||||
// Hook calls are best-effort — a failing hook never blocks inference.
|
||||
hooks?: HookRunner;
|
||||
}
|
||||
|
||||
export interface StreamResult {
|
||||
@@ -128,6 +134,12 @@ export interface StreamResult {
|
||||
// v1.13.1-C: reasoning text accumulated across reasoning-delta parts.
|
||||
// Empty string when the model doesn't emit reasoning (most cases).
|
||||
reasoning: string;
|
||||
// vDeepSeek: optional cache-hit token count from DeepSeek's API.
|
||||
// Only populated when using @ai-sdk/deepseek provider (not llama-swap).
|
||||
cacheReadTokens?: number;
|
||||
// vDeepSeek: optional reasoning token count from DeepSeek's API.
|
||||
// Only populated when using @ai-sdk/deepseek provider (not llama-swap).
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
export interface TurnArgs {
|
||||
|
||||
@@ -31,11 +31,14 @@ interface McpToolDef {
|
||||
annotations?: McpToolAnnotations;
|
||||
}
|
||||
|
||||
export type McpPermission = 'allow' | 'ask' | 'deny';
|
||||
|
||||
interface ServerState {
|
||||
client: Client;
|
||||
transport: StreamableHTTPClientTransport | StdioClientTransport;
|
||||
tools: ToolDef<Record<string, unknown>>[];
|
||||
type: 'streamableHttp' | 'stdio';
|
||||
permission: McpPermission;
|
||||
}
|
||||
|
||||
// ---- Module-level state ----
|
||||
@@ -137,6 +140,14 @@ export async function callTool(
|
||||
}
|
||||
}
|
||||
|
||||
/** Return the permission level for a given MCP server. Defaults to 'allow' if unknown. */
|
||||
export function getServerPermission(prefixedToolName: string): McpPermission {
|
||||
const serverName = toolToServer.get(prefixedToolName);
|
||||
if (!serverName) return 'allow';
|
||||
const state = servers.get(serverName);
|
||||
return state?.permission ?? 'allow';
|
||||
}
|
||||
|
||||
/** Return all wrapped ToolDefs from all connected servers, flattened. */
|
||||
export function getTools(): ToolDef<Record<string, unknown>>[] {
|
||||
const all: ToolDef<Record<string, unknown>>[] = [];
|
||||
@@ -214,7 +225,8 @@ async function connectServer(entry: McpServerEntry): Promise<void> {
|
||||
toolToServer.set(wrapped.name, name);
|
||||
}
|
||||
|
||||
servers.set(name, { client, transport, tools, type: config.type });
|
||||
const permission = (config as { permission?: McpPermission }).permission ?? 'allow';
|
||||
servers.set(name, { client, transport, tools, type: config.type, permission });
|
||||
|
||||
log!.info(
|
||||
{ server: name, type: config.type, count: tools.length, names: tools.map((t) => t.name) },
|
||||
|
||||
@@ -17,12 +17,15 @@ import type { FastifyBaseLogger } from 'fastify';
|
||||
|
||||
// ---- Zod schema ----
|
||||
|
||||
const McpPermissionSchema = z.enum(['allow', 'ask', 'deny']).default('allow');
|
||||
|
||||
const McpServerConfigSchema = z.discriminatedUnion('type', [
|
||||
z.object({
|
||||
type: z.literal('streamableHttp'),
|
||||
url: z.string().url(),
|
||||
headers: z.record(z.string()).optional(),
|
||||
enabled: z.boolean().default(true),
|
||||
permission: McpPermissionSchema,
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal('stdio'),
|
||||
@@ -30,6 +33,7 @@ const McpServerConfigSchema = z.discriminatedUnion('type', [
|
||||
args: z.array(z.string()).default([]),
|
||||
env: z.record(z.string()).optional(),
|
||||
enabled: z.boolean().default(true),
|
||||
permission: McpPermissionSchema,
|
||||
}),
|
||||
]);
|
||||
|
||||
|
||||
@@ -7,10 +7,12 @@
|
||||
|
||||
export const MESSAGE_COLUMNS =
|
||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
|
||||
'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
|
||||
'started_at, finished_at, created_at, metadata, ' +
|
||||
'summary, tail_start_id, compacted_at, model';
|
||||
|
||||
export const INFERENCE_MESSAGE_COLUMNS =
|
||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
|
||||
'tokens_used, ctx_used, ctx_max, cache_tokens, reasoning_tokens, ' +
|
||||
'started_at, finished_at, created_at, metadata, ' +
|
||||
'reasoning_parts, model';
|
||||
|
||||
@@ -37,7 +37,18 @@ export function configureModelContext(opts: { llamaSwapUrl: string }): void {
|
||||
llamaSwapUrl = opts.llamaSwapUrl;
|
||||
}
|
||||
|
||||
// vDeepSeek: DeepSeek models don't have a /upstream/<model>/props endpoint.
|
||||
// Return a reasonable default context so compaction estimates work.
|
||||
const DEEPSEEK_DEFAULT_N_CTX = 131_072;
|
||||
const DEEPSEEK_MODEL_PREFIX = 'deepseek-';
|
||||
|
||||
export async function getModelContext(model: string): Promise<ModelContext | null> {
|
||||
// vDeepSeek: DeepSeek models have no /upstream/<model>/props. Use a static
|
||||
// default so compaction doesn't fall to the buffer-only path with tiny limits.
|
||||
if (model.startsWith(DEEPSEEK_MODEL_PREFIX)) {
|
||||
return { n_ctx: DEEPSEEK_DEFAULT_N_CTX };
|
||||
}
|
||||
|
||||
// 1. Positive cache hit — no TTL check, model n_ctx is invariant.
|
||||
const pos = positiveCache.get(model);
|
||||
if (pos) return pos;
|
||||
|
||||
@@ -101,7 +101,7 @@ export interface PrefixFingerprint {
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
route: 'swap' | 'sidecar';
|
||||
route: 'swap' | 'sidecar' | 'deepseek';
|
||||
}
|
||||
|
||||
export interface PrefixDrift {
|
||||
@@ -129,7 +129,7 @@ interface ObservedInputs {
|
||||
has_agent_system_prompt: boolean;
|
||||
has_session_override: boolean;
|
||||
has_project_override: boolean;
|
||||
route: 'swap' | 'sidecar';
|
||||
route: 'swap' | 'sidecar' | 'deepseek';
|
||||
}
|
||||
|
||||
interface ObserverEntry {
|
||||
|
||||
@@ -2,6 +2,12 @@ import { z } from 'zod';
|
||||
import type { ToolDef } from '../types.js';
|
||||
import { callCodecontext, type CodecontextResponse } from '../../codecontext_client.js';
|
||||
|
||||
// DEPRECATED (Phase 4, Domain 2, v2.8.14): This factory builds ToolDefs that
|
||||
// route through the Go codecontext sidecar via callCodecontext(). Superseded
|
||||
// by direct boocontext MCP tool wrappers. Keep functional for backward
|
||||
// compatibility — old codecontext tools still use HTTP. New tools should use
|
||||
// the boocontext MCP server instead of adding entries here.
|
||||
//
|
||||
// Shared factory for the 12 codecontext shim ToolDefs.
|
||||
// Each shim provides name/schema/description/jsonParameters/mapArgs; the
|
||||
// factory builds the ToolDef and returns both the ToolDef and the standalone
|
||||
|
||||
@@ -3,6 +3,7 @@ import { makeCodecontextTool } from './factory.js';
|
||||
|
||||
export const GetCodebaseOverviewInput = z.object({
|
||||
include_stats: z.boolean().optional(),
|
||||
compress: z.boolean().optional().describe('Apply DCP compression for large projects (>50 files)'),
|
||||
});
|
||||
export type GetCodebaseOverviewInputT = z.infer<typeof GetCodebaseOverviewInput>;
|
||||
|
||||
@@ -24,10 +25,18 @@ const { toolDef: getCodebaseOverview, execute: executeGetCodebaseOverview } =
|
||||
type: 'boolean',
|
||||
description: 'Include file count, symbol count, language stats. Defaults to true.',
|
||||
},
|
||||
compress: {
|
||||
type: 'boolean',
|
||||
description: 'Apply DCP compression for large projects (>50 files)',
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
mapArgs: (input) => ({ include_stats: input.include_stats ?? true }),
|
||||
mapArgs: (input) => {
|
||||
const args: Record<string, unknown> = { include_stats: input.include_stats ?? true };
|
||||
if (input.compress) args['compress'] = true;
|
||||
return args;
|
||||
},
|
||||
});
|
||||
|
||||
export { getCodebaseOverview, executeGetCodebaseOverview };
|
||||
|
||||
@@ -18,3 +18,4 @@ export { getCodeHealth } from './get_code_health.js';
|
||||
export { getCodeImpact } from './get_code_impact.js';
|
||||
export { getTypeInfo } from './get_type_info.js';
|
||||
export { getCodeMap } from './get_code_map.js';
|
||||
export { getWikiArticle } from './get_wiki_article.js';
|
||||
|
||||
@@ -127,6 +127,9 @@ export interface Agent {
|
||||
// bounded only by MAX_STEPS (200). 0 means "no tool calls allowed."
|
||||
steps: number | null;
|
||||
llama_extra_args: string[] | null;
|
||||
// vDeepSeek: thinking/reasoning effort for DeepSeek V4 models.
|
||||
// Maps to DeepSeek's reasoning_effort API param.
|
||||
reasoning_effort: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | null;
|
||||
}
|
||||
|
||||
// One entry per malformed `## Name` block. Per-block errors don't fail the
|
||||
@@ -206,6 +209,8 @@ export interface Message {
|
||||
tokens_used: number | null;
|
||||
ctx_used: number | null;
|
||||
ctx_max: number | null;
|
||||
cache_tokens: number | null;
|
||||
reasoning_tokens: number | null;
|
||||
started_at: string | null;
|
||||
finished_at: string | null;
|
||||
created_at: string;
|
||||
|
||||
@@ -152,6 +152,8 @@ export interface Message {
|
||||
tokens_used: number | null;
|
||||
ctx_used: number | null;
|
||||
ctx_max: number | null;
|
||||
cache_tokens: number | null;
|
||||
reasoning_tokens: number | null;
|
||||
// model-attribution: which model produced this assistant message (null for
|
||||
// user/system rows + pre-attribution messages). Rendered as a chip.
|
||||
model: string | null;
|
||||
@@ -530,6 +532,8 @@ export type WsFrame =
|
||||
tokens_used?: number | null;
|
||||
ctx_used?: number | null;
|
||||
ctx_max?: number | null;
|
||||
cache_tokens?: number | null;
|
||||
reasoning_tokens?: number | null;
|
||||
started_at?: string | null;
|
||||
finished_at?: string | null;
|
||||
// model-attribution: the model that produced this assistant message.
|
||||
|
||||
@@ -156,9 +156,16 @@ function StatsLine({ message }: { message: Message }) {
|
||||
: `${ctxUsed} ctx`
|
||||
: null;
|
||||
|
||||
const cacheHit = message.cache_tokens;
|
||||
const reasoning = message.reasoning_tokens;
|
||||
const cachePart = typeof cacheHit === 'number' && cacheHit > 0 ? `cache ${cacheHit}` : null;
|
||||
const reasoningPart = typeof reasoning === 'number' && reasoning > 0 ? `think ${reasoning}` : null;
|
||||
|
||||
const parts: string[] = [`${tokens} tokens`];
|
||||
if (tps !== null) parts.push(`${tps.toFixed(1)} tok/s`);
|
||||
if (ctxPart) parts.push(ctxPart);
|
||||
if (cachePart) parts.push(cachePart);
|
||||
if (reasoningPart) parts.push(reasoningPart);
|
||||
|
||||
return (
|
||||
<div className="text-[10px] font-mono text-muted-foreground">
|
||||
|
||||
@@ -40,6 +40,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
||||
tokens_used: null,
|
||||
ctx_used: null,
|
||||
ctx_max: null,
|
||||
cache_tokens: null,
|
||||
reasoning_tokens: null,
|
||||
model: null,
|
||||
started_at: null,
|
||||
finished_at: null,
|
||||
@@ -106,6 +108,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
||||
tokens_used: null,
|
||||
ctx_used: null,
|
||||
ctx_max: null,
|
||||
cache_tokens: null,
|
||||
reasoning_tokens: null,
|
||||
model: null,
|
||||
started_at: null,
|
||||
finished_at: null,
|
||||
@@ -123,6 +127,8 @@ function applyFrame(state: State, frame: WsFrame): State {
|
||||
...(frame.tokens_used !== undefined ? { tokens_used: frame.tokens_used } : {}),
|
||||
...(frame.ctx_used !== undefined ? { ctx_used: frame.ctx_used } : {}),
|
||||
...(frame.ctx_max !== undefined ? { ctx_max: frame.ctx_max } : {}),
|
||||
...(frame.cache_tokens !== undefined ? { cache_tokens: frame.cache_tokens } : {}),
|
||||
...(frame.reasoning_tokens !== undefined ? { reasoning_tokens: frame.reasoning_tokens } : {}),
|
||||
...(frame.started_at !== undefined ? { started_at: frame.started_at } : {}),
|
||||
...(frame.finished_at !== undefined ? { finished_at: frame.finished_at } : {}),
|
||||
...(frame.model !== undefined ? { model: frame.model } : {}),
|
||||
|
||||
31
codecontext/README.md
Normal file
31
codecontext/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# codecontext — Go sidecar (DEPRECATED)
|
||||
|
||||
> **Deprecated** (Phase 4, Domain 2, v2.8.14).
|
||||
>
|
||||
> Superseded by the **boocontext MCP server** (`apps/coder`). Do not add new
|
||||
> callers. The 16 codecontext tool wrappers still use this sidecar via HTTP at
|
||||
> `http://codecontext:8080/v1/{toolName}` for backward compatibility.
|
||||
|
||||
## Migration path
|
||||
|
||||
1. Existing tool wrappers in `apps/server/src/services/tools/codecontext/` route
|
||||
through `callCodecontext()` in `codecontext_client.ts`, which calls this
|
||||
Go sidecar over HTTP.
|
||||
2. New callers should use the boocontext MCP server instead (reachable via the
|
||||
`boocontext` tool wrappers).
|
||||
3. After all callers have migrated, remove this directory, the `codecontext`
|
||||
service block from `docker-compose.yml`, and the
|
||||
`codecontext_client.ts`/`factory.ts` files.
|
||||
|
||||
## What it does
|
||||
|
||||
A Go HTTP shim wrapping the boocontext MCP server's stdio interface. Provides
|
||||
code-graph analysis (symbols, callers, callees, file overview, etc.) over a
|
||||
REST API at `/v1/{toolName}`.
|
||||
|
||||
## Files
|
||||
|
||||
- `shim.go` — HTTP server that wraps the boocontext MCP stdio process
|
||||
- `Dockerfile` — container build
|
||||
- `fork.tar.gz` — vendored boocontext source (gitignored)
|
||||
- `.codecontextignore.template` — default ignore patterns deployed per project
|
||||
@@ -6,7 +6,7 @@ Operating rules for every agent in this registry. Full procedures live in the `c
|
||||
|
||||
**Worktrees** — Isolate work in a worktree when it is parallel to in-progress work, risky/experimental, a hotfix interrupting other work, or splits into independent units — just create when clear, propose in one line when ambiguous, skip quick/small single-stream work. Branch from a stable base (default branch); worktrees persist (never auto-remove or auto-merge); they isolate code state, not runtime (ports/DBs/services still collide). Full heuristic: invoke `using-worktrees`.
|
||||
|
||||
**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context).
|
||||
**Sampling knobs** — Each `## Name` frontmatter block accepts these per-agent sampler fields, threaded into the llama-swap chat-completion request: `temperature`, `top_p`, `top_k`, `min_p`, `presence_penalty`, and (v2.6) `top_n_sigma`, `dry_multiplier`, `dry_base`, `dry_allowed_length`, `dry_penalty_last_n`. The `top_n_sigma` + `dry_*` repetition family curb the doom-loop-prone local model. Omit a field to leave it at the server default. Example: `top_n_sigma: 1.0`, `dry_multiplier: 0.8`, `dry_base: 1.75`, `dry_allowed_length: 2`, `dry_penalty_last_n: -1` (-1 = whole context). DeepSeek V4 models also accept `reasoning_effort` (low/medium/high/xhigh/max); omit to disable thinking mode. Example: `reasoning_effort: 'high'`.
|
||||
|
||||
**Reasoning budget** — To cap a reasoning model's thinking tokens, pass `--reasoning-budget` through `llama_extra_args` (already permitted by the deny-list validator; routes the agent to llama-sidecar). Example frontmatter line: `llama_extra_args: ["--reasoning-budget", "2048"]`. This is a sidecar process flag, not a chat-completion body param — distinct from the sampling knobs above.
|
||||
|
||||
|
||||
@@ -95,6 +95,13 @@ services:
|
||||
# HTTP shim (see ./codecontext/). No host port — reached from boocode at
|
||||
# http://codecontext:8080 over the boocode_net bridge.
|
||||
#
|
||||
# DEPRECATED (Phase 4, Domain 2, v2.8.14): Superseded by the boocontext
|
||||
# MCP server. The 16 codecontext tool wrappers still use this sidecar via
|
||||
# HTTP but should route through the boocontext MCP instead. Keep the
|
||||
# service running for backward compatibility until all callers migrate.
|
||||
# Remove this block after full migration — see codecontext_client.ts and
|
||||
# factory.ts for deprecation markers.
|
||||
#
|
||||
# Mounts /opt:/opt:ro (not just /opt/projects:ro): BooCode projects live
|
||||
# at /opt/<slug> on the host, not exclusively under /opt/projects. The
|
||||
# mount must cover anywhere a project.path could resolve to. Read-only
|
||||
|
||||
@@ -116,6 +116,8 @@ export const MessageCompleteFrame = z.object({
|
||||
tokens_used: z.number().int().nonnegative().nullable().optional(),
|
||||
ctx_used: z.number().int().nonnegative().nullable().optional(),
|
||||
ctx_max: z.number().int().positive().nullable().optional(),
|
||||
cache_tokens: z.number().int().nonnegative().nullable().optional(),
|
||||
reasoning_tokens: z.number().int().nonnegative().nullable().optional(),
|
||||
started_at: IsoTimestamp.nullable().optional(),
|
||||
finished_at: IsoTimestamp.nullable().optional(),
|
||||
// nullable: external-coder turns carry task.model, which is null when no
|
||||
|
||||
Reference in New Issue
Block a user