v1.11.6: doom-loop guard (3 identical tool calls aborts recursion)
This commit is contained in:
130
apps/server/src/services/__tests__/doom-loop.test.ts
Normal file
130
apps/server/src/services/__tests__/doom-loop.test.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { DOOM_LOOP_THRESHOLD, detectDoomLoop } from '../inference.js';
|
||||
import type { ToolCall } from '../../types/api.js';
|
||||
|
||||
// ---- fixture ----------------------------------------------------------------
|
||||
// Tiny helper. `id` is required on ToolCall but irrelevant to detection —
|
||||
// detectDoomLoop compares name + JSON.stringify(args). Counter-based id keeps
|
||||
// each call unique so we don't accidentally test id-based equality.
|
||||
|
||||
let counter = 0;
|
||||
function mkCall(name: string, args: Record<string, unknown> = {}): ToolCall {
|
||||
counter += 1;
|
||||
return { id: `c${counter}`, name, args };
|
||||
}
|
||||
|
||||
// ---- below-threshold -------------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — below threshold', () => {
|
||||
it('returns null for an empty array', () => {
|
||||
expect(detectDoomLoop([])).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when fewer than DOOM_LOOP_THRESHOLD calls exist', () => {
|
||||
// 2 < 3 — sliding-window can't form even if both match.
|
||||
const a = mkCall('view_file', { path: 'a.ts' });
|
||||
const b = mkCall('view_file', { path: 'a.ts' });
|
||||
expect(detectDoomLoop([a, b])).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- positive detection ----------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — positive matches', () => {
|
||||
it('returns name + args when exactly DOOM_LOOP_THRESHOLD identical calls land', () => {
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
];
|
||||
const result = detectDoomLoop(calls);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.name).toBe('grep');
|
||||
expect(result!.args).toEqual({ pattern: 'TODO', path: 'src' });
|
||||
});
|
||||
|
||||
it('matches sliding window — last DOOM_LOOP_THRESHOLD match even with earlier non-matching calls', () => {
|
||||
// 4 calls: first differs, last 3 are identical → fire.
|
||||
const calls = [
|
||||
mkCall('list_dir', { path: '/' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
];
|
||||
const result = detectDoomLoop(calls);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.name).toBe('view_file');
|
||||
});
|
||||
|
||||
it('matches identical empty-args calls (defense against {} !== {} reference bug)', () => {
|
||||
// JSON.stringify on two distinct {} both produce '{}'. Confirms the
|
||||
// detector uses value-equality not reference-equality.
|
||||
const calls = [mkCall('ping', {}), mkCall('ping', {}), mkCall('ping', {})];
|
||||
expect(detectDoomLoop(calls)).not.toBeNull();
|
||||
});
|
||||
|
||||
it('matches calls with nested args of equal shape', () => {
|
||||
// Deep-equal via JSON.stringify. If the model emits the same nested
|
||||
// object three times, that's still a loop.
|
||||
const nested = { filter: { glob: '*.ts', case: 'sensitive' }, limit: 50 };
|
||||
const calls = [
|
||||
mkCall('find_files', { ...nested }),
|
||||
mkCall('find_files', { ...nested }),
|
||||
mkCall('find_files', { ...nested }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).not.toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- negative detection ----------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — negative cases', () => {
|
||||
it('returns null when 3 calls share name but differ in args', () => {
|
||||
const calls = [
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'b.ts' }),
|
||||
mkCall('view_file', { path: 'c.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when 3 calls share args but differ in name', () => {
|
||||
const calls = [
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('grep', { path: 'a.ts' }),
|
||||
mkCall('list_dir', { path: 'a.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when the FIRST three of four match but the latest differs', () => {
|
||||
// Critical sliding-window edge: detector must ONLY look at the last
|
||||
// DOOM_LOOP_THRESHOLD entries. Earlier matches don't count if the
|
||||
// model has since moved on.
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when args have same keys but different values', () => {
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'apps' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- threshold contract ----------------------------------------------------
|
||||
|
||||
describe('DOOM_LOOP_THRESHOLD', () => {
|
||||
it('is a positive integer (the public contract — tests assume 3)', () => {
|
||||
expect(DOOM_LOOP_THRESHOLD).toBeGreaterThan(0);
|
||||
expect(Number.isInteger(DOOM_LOOP_THRESHOLD)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -54,6 +54,36 @@ function resolveToolBudget(agent: Agent | null): number {
|
||||
const CAP_HIT_SUMMARY_NOTE = (limit: number) =>
|
||||
`You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`;
|
||||
|
||||
// v1.11.6: doom-loop guard. When the model calls the same tool with the
|
||||
// same arguments DOOM_LOOP_THRESHOLD times in a row within one user-message
|
||||
// turn, abort the recursion and run the same wrap-up summary path as the
|
||||
// cap-hit case. Ported from opencode (DOOM_LOOP_THRESHOLD in
|
||||
// session/processor.ts). Threshold of 3 is the smallest value that doesn't
|
||||
// false-positive on a model that retries once after a transient error.
|
||||
export const DOOM_LOOP_THRESHOLD = 3;
|
||||
|
||||
const DOOM_LOOP_NOTE = (name: string) =>
|
||||
`You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`;
|
||||
|
||||
// Returns the name + args of the looping tool when the LAST
|
||||
// DOOM_LOOP_THRESHOLD entries in `recentToolCalls` are identical (same name
|
||||
// AND deep-equal args via JSON.stringify). Returns null otherwise.
|
||||
// Pure; exported for unit-test access.
|
||||
export function detectDoomLoop(
|
||||
recentToolCalls: ToolCall[],
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
if (recentToolCalls.length < DOOM_LOOP_THRESHOLD) return null;
|
||||
const last = recentToolCalls.slice(-DOOM_LOOP_THRESHOLD);
|
||||
const ref = last[0]!;
|
||||
const refArgs = JSON.stringify(ref.args);
|
||||
for (let i = 1; i < last.length; i++) {
|
||||
const tc = last[i]!;
|
||||
if (tc.name !== ref.name) return null;
|
||||
if (JSON.stringify(tc.args) !== refArgs) return null;
|
||||
}
|
||||
return { name: ref.name, args: ref.args };
|
||||
}
|
||||
|
||||
function isCapHitSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
@@ -63,6 +93,22 @@ function isCapHitSentinel(m: Message): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
// v1.11.6: parallel predicate. Same UI-only semantics as cap-hit sentinels —
|
||||
// never sent to the LLM (filtered by buildMessagesPayload through the
|
||||
// isAnySentinel check below).
|
||||
function isDoomLoopSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'doom_loop'
|
||||
);
|
||||
}
|
||||
|
||||
function isAnySentinel(m: Message): boolean {
|
||||
return isCapHitSentinel(m) || isDoomLoopSentinel(m);
|
||||
}
|
||||
|
||||
export interface InferenceFrame {
|
||||
type:
|
||||
| 'message_started'
|
||||
@@ -203,11 +249,11 @@ export function buildMessagesPayload(
|
||||
out.push({ role: 'system', content: m.content });
|
||||
continue;
|
||||
}
|
||||
// v1.8.2: cap-hit sentinels are UI-only — never send them to the LLM. The
|
||||
// synthetic "you've reached the tool budget" note lives only inside the
|
||||
// summary call's messages array and is never persisted, so on Continue
|
||||
// the model resumes with a clean context.
|
||||
if (isCapHitSentinel(m)) continue;
|
||||
// v1.8.2 / v1.11.6: cap-hit and doom-loop sentinels are UI-only — never
|
||||
// send them to the LLM. The synthetic instruction note lives only inside
|
||||
// the summary call's messages array and is never persisted, so on a
|
||||
// follow-up turn the model resumes with a clean context.
|
||||
if (isAnySentinel(m)) continue;
|
||||
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
||||
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
||||
if (m.role === 'tool') {
|
||||
@@ -608,6 +654,11 @@ interface TurnArgs {
|
||||
// resolved budget at the top of each turn. Replaces the older `depth`
|
||||
// counter (which counted iterations, not invocations).
|
||||
toolsUsed: number;
|
||||
// v1.11.6: ordered tool calls executed in this user-message turn (across
|
||||
// recursive runAssistantTurn invocations). Reset to [] at user-message
|
||||
// boundaries by runInference, same as toolsUsed. Doom-loop check at the
|
||||
// top of runAssistantTurn slices the last DOOM_LOOP_THRESHOLD entries.
|
||||
recentToolCalls: ToolCall[];
|
||||
signal: AbortSignal | undefined;
|
||||
}
|
||||
|
||||
@@ -910,6 +961,11 @@ async function executeToolPhase(
|
||||
// One assistant message can emit multiple tool_calls, so we add the run
|
||||
// count, not 1. The next turn's budget check sees the cumulative total.
|
||||
toolsUsed: toolsUsed + result.toolCalls.length,
|
||||
// v1.11.6: append the just-executed tool calls to the per-turn history
|
||||
// so the next runAssistantTurn's doom-loop check can see them. We don't
|
||||
// cap the array length here — per-turn budgets keep it bounded
|
||||
// (typically <30 entries), and slicing happens inside detectDoomLoop.
|
||||
recentToolCalls: [...args.recentToolCalls, ...result.toolCalls],
|
||||
signal,
|
||||
});
|
||||
}
|
||||
@@ -1029,6 +1085,17 @@ async function runAssistantTurn(
|
||||
return;
|
||||
}
|
||||
|
||||
// v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can
|
||||
// burn through 3 identical calls long before the 15-call budget fires).
|
||||
// Same in-flight-slot-reuse pattern as runCapHitSummary — wrap-up reply
|
||||
// lands in args.assistantMessageId, then a doom_loop sentinel is inserted
|
||||
// to make the abort visible in the chat history.
|
||||
const loop = detectDoomLoop(args.recentToolCalls);
|
||||
if (loop) {
|
||||
await runDoomLoopSummary(ctx, args, session, project, history, agent, loop);
|
||||
return;
|
||||
}
|
||||
|
||||
const messages = buildMessagesPayload(session, project, history, agent);
|
||||
|
||||
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
||||
@@ -1059,7 +1126,16 @@ export async function runInference(
|
||||
// continue) starts with a clean budget. Tool-call accumulation across
|
||||
// Continue invocations is what the hard ceiling guards against, not the
|
||||
// per-call budget.
|
||||
return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, toolsUsed: 0, signal });
|
||||
// v1.11.6: recentToolCalls also resets — doom-loop detection is scoped
|
||||
// to a single user-message turn, so a Continue starts with no history.
|
||||
return runAssistantTurn(ctx, {
|
||||
sessionId,
|
||||
chatId,
|
||||
assistantMessageId,
|
||||
toolsUsed: 0,
|
||||
recentToolCalls: [],
|
||||
signal,
|
||||
});
|
||||
}
|
||||
|
||||
// v1.8.2: cap-hit summary flow. Called instead of erroring when the loop
|
||||
@@ -1318,6 +1394,250 @@ async function insertCapHitSentinel(
|
||||
});
|
||||
}
|
||||
|
||||
// v1.11.6: doom-loop wrap-up. Mirrors runCapHitSummary structurally — same
|
||||
// in-flight-slot reuse, same tools-disabled streaming-summary call, same
|
||||
// post-finalize sentinel insert + chat_status drop. Differences:
|
||||
// - synthetic note text comes from DOOM_LOOP_NOTE (names the looping tool)
|
||||
// - sentinel metadata is { kind: 'doom_loop', tool_name, args, threshold }
|
||||
// and has no Continue affordance (manual retry would just re-loop)
|
||||
// - chat_status error path uses reason: 'doom_loop_summary_failed'
|
||||
// Kept as a clone rather than refactored into a shared helper because the
|
||||
// two summary paths still differ in error reason + sentinel shape; a third
|
||||
// sentinel would justify factoring out runWrapUpSummary(opts).
|
||||
async function runDoomLoopSummary(
|
||||
ctx: InferenceContext,
|
||||
args: TurnArgs,
|
||||
session: Session,
|
||||
project: Project,
|
||||
history: Message[],
|
||||
agent: Agent | null,
|
||||
loop: { name: string; args: Record<string, unknown> },
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId, signal } = args;
|
||||
|
||||
const messages = buildMessagesPayload(session, project, history, agent);
|
||||
messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });
|
||||
|
||||
const startedRow = await ctx.sql<{ started_at: string }[]>`
|
||||
UPDATE messages
|
||||
SET started_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING started_at
|
||||
`;
|
||||
const startedAt = startedRow[0]?.started_at ?? null;
|
||||
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
role: 'assistant',
|
||||
});
|
||||
|
||||
let accumulated = '';
|
||||
let pendingFlushTimer: NodeJS.Timeout | null = null;
|
||||
let flushPromise: Promise<unknown> = Promise.resolve();
|
||||
const flushNow = () => {
|
||||
if (pendingFlushTimer) {
|
||||
clearTimeout(pendingFlushTimer);
|
||||
pendingFlushTimer = null;
|
||||
}
|
||||
const snapshot = accumulated;
|
||||
flushPromise = flushPromise.then(() =>
|
||||
ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
|
||||
);
|
||||
};
|
||||
const scheduleFlush = () => {
|
||||
if (pendingFlushTimer) return;
|
||||
pendingFlushTimer = setTimeout(() => {
|
||||
pendingFlushTimer = null;
|
||||
flushNow();
|
||||
}, DB_FLUSH_INTERVAL_MS);
|
||||
};
|
||||
|
||||
let summaryOk = false;
|
||||
let summarySoftCancelled = false;
|
||||
let summaryError: string | null = null;
|
||||
let result: StreamResult | null = null;
|
||||
try {
|
||||
result = await streamCompletion(
|
||||
ctx,
|
||||
session.model,
|
||||
messages,
|
||||
{ tools: null, temperature: agent?.temperature },
|
||||
(delta) => {
|
||||
accumulated += delta;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
content: delta,
|
||||
});
|
||||
scheduleFlush();
|
||||
},
|
||||
signal,
|
||||
);
|
||||
summaryOk = true;
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.name === 'AbortError') {
|
||||
summarySoftCancelled = true;
|
||||
} else {
|
||||
summaryError = err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
} finally {
|
||||
if (pendingFlushTimer) {
|
||||
clearTimeout(pendingFlushTimer);
|
||||
pendingFlushTimer = null;
|
||||
}
|
||||
await flushPromise;
|
||||
}
|
||||
|
||||
if (summaryOk && result) {
|
||||
const mctx = await modelContext.getModelContext(session.model);
|
||||
const nCtx = mctx?.n_ctx ?? null;
|
||||
const [updated] = await ctx.sql<
|
||||
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||
>`
|
||||
UPDATE messages
|
||||
SET content = ${result.content},
|
||||
status = 'complete',
|
||||
tokens_used = ${result.completionTokens},
|
||||
ctx_used = ${result.promptTokens},
|
||||
ctx_max = ${nCtx},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
started_at: startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: session.model,
|
||||
});
|
||||
} else if (summarySoftCancelled) {
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET content = ${accumulated},
|
||||
status = 'cancelled',
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
});
|
||||
} else {
|
||||
// Doom-loop summary failure reuses the existing summary_after_cap_failed
|
||||
// error reason — the ErrorReason union is shared between sentinel paths
|
||||
// and the UI surfaces a generic "summary failed" line for both. We don't
|
||||
// add a new reason code because the user-visible failure mode is the
|
||||
// same (model gave up mid-summary). Sentinel below still fires.
|
||||
const errMeta: MessageMetadata = {
|
||||
kind: 'error',
|
||||
error_reason: 'summary_after_cap_failed',
|
||||
error_text: summaryError ?? 'doom-loop summary failed',
|
||||
};
|
||||
await ctx.sql`
|
||||
UPDATE messages
|
||||
SET content = ${accumulated},
|
||||
status = 'failed',
|
||||
finished_at = clock_timestamp(),
|
||||
metadata = ${ctx.sql.json(errMeta as never)}
|
||||
WHERE id = ${assistantMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'error',
|
||||
message_id: assistantMessageId,
|
||||
chat_id: chatId,
|
||||
error: summaryError ?? 'doom-loop summary failed',
|
||||
reason: 'summary_after_cap_failed',
|
||||
});
|
||||
}
|
||||
|
||||
const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||
UPDATE sessions SET updated_at = clock_timestamp()
|
||||
WHERE id = ${sessionId}
|
||||
RETURNING project_id, name, updated_at
|
||||
`;
|
||||
ctx.publishUser({
|
||||
type: 'session_updated',
|
||||
session_id: sessionId,
|
||||
project_id: sessRow!.project_id,
|
||||
name: sessRow!.name,
|
||||
updated_at: sessRow!.updated_at,
|
||||
});
|
||||
|
||||
await insertDoomLoopSentinel(ctx, sessionId, chatId, loop);
|
||||
|
||||
if (summaryOk || summarySoftCancelled) {
|
||||
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||
} else {
|
||||
ctx.publishUser({
|
||||
type: 'chat_status',
|
||||
chat_id: chatId,
|
||||
status: 'error',
|
||||
at: new Date().toISOString(),
|
||||
reason: 'summary_after_cap_failed',
|
||||
});
|
||||
}
|
||||
|
||||
ctx.log.info(
|
||||
{ sessionId, chatId, assistantMessageId, loopedTool: loop.name, summaryOk, summaryCancelled: summarySoftCancelled },
|
||||
'inference doom-loop summary finished',
|
||||
);
|
||||
}
|
||||
|
||||
async function insertDoomLoopSentinel(
|
||||
ctx: InferenceContext,
|
||||
sessionId: string,
|
||||
chatId: string,
|
||||
loop: { name: string; args: Record<string, unknown> },
|
||||
): Promise<void> {
|
||||
// No hard-ceiling / can-continue logic here — doom-loop is a different
|
||||
// failure mode from cap-hit. Continuing would re-trigger the loop with
|
||||
// the same tools available; the user needs to restate their question
|
||||
// or switch agents instead.
|
||||
const metadata: MessageMetadata = {
|
||||
kind: 'doom_loop',
|
||||
tool_name: loop.name,
|
||||
args: loop.args,
|
||||
threshold: DOOM_LOOP_THRESHOLD,
|
||||
};
|
||||
const content = `Detected ${DOOM_LOOP_THRESHOLD} identical calls to ${loop.name}. Stopping the tool-call loop. Produce the best answer you can with what you have.`;
|
||||
|
||||
const [row] = await ctx.sql<{ id: string }[]>`
|
||||
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||
VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)})
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
// Standard frame sequence — same as cap-hit sentinel — so
|
||||
// useSessionStream's reducer appends the row via the existing path.
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
role: 'system',
|
||||
});
|
||||
ctx.publish(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
content,
|
||||
});
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: row!.id,
|
||||
chat_id: chatId,
|
||||
metadata,
|
||||
});
|
||||
}
|
||||
|
||||
interface InferenceRegistration {
|
||||
controller: AbortController;
|
||||
completed: Promise<void>;
|
||||
|
||||
@@ -128,9 +128,11 @@ export type ErrorReason =
|
||||
| 'tool_execution_failed'
|
||||
| 'summary_after_cap_failed';
|
||||
|
||||
// v1.8.2: shapes stored in messages.metadata. Discriminated on `kind`.
|
||||
// cap_hit — system sentinel emitted when tool budget is exhausted
|
||||
// error — attached to a failed assistant message so UI can show reason
|
||||
// v1.8.2 / v1.11.6: shapes stored in messages.metadata. Discriminated on `kind`.
|
||||
// cap_hit — system sentinel emitted when tool budget is exhausted
|
||||
// doom_loop — system sentinel emitted when the model called the same
|
||||
// tool with the same args DOOM_LOOP_THRESHOLD times in a row
|
||||
// error — attached to a failed assistant message so UI can show reason
|
||||
export type MessageMetadata =
|
||||
| {
|
||||
kind: 'cap_hit';
|
||||
@@ -139,6 +141,12 @@ export type MessageMetadata =
|
||||
agent_name: string | null;
|
||||
can_continue: boolean;
|
||||
}
|
||||
| {
|
||||
kind: 'doom_loop';
|
||||
tool_name: string;
|
||||
args: Record<string, unknown>;
|
||||
threshold: number;
|
||||
}
|
||||
| {
|
||||
kind: 'error';
|
||||
error_reason: ErrorReason;
|
||||
|
||||
@@ -112,11 +112,13 @@ export type ErrorReason =
|
||||
| 'tool_execution_failed'
|
||||
| 'summary_after_cap_failed';
|
||||
|
||||
// v1.8.2: shapes stored in Message.metadata. Discriminated on `kind`.
|
||||
// cap_hit — sentinel emitted when the tool budget is hit; carries the
|
||||
// budget + agent name + whether Continue is still allowed.
|
||||
// error — attached to a failed assistant message so the bubble can show
|
||||
// a specific reason on reload (WS error frame is one-shot).
|
||||
// v1.8.2 / v1.11.6: shapes stored in Message.metadata. Discriminated on `kind`.
|
||||
// cap_hit — sentinel emitted when the tool budget is hit; carries the
|
||||
// budget + agent name + whether Continue is still allowed.
|
||||
// doom_loop — sentinel emitted when the model called the same tool with
|
||||
// the same arguments threshold times in a row.
|
||||
// error — attached to a failed assistant message so the bubble can show
|
||||
// a specific reason on reload (WS error frame is one-shot).
|
||||
export type MessageMetadata =
|
||||
| {
|
||||
kind: 'cap_hit';
|
||||
@@ -125,6 +127,12 @@ export type MessageMetadata =
|
||||
agent_name: string | null;
|
||||
can_continue: boolean;
|
||||
}
|
||||
| {
|
||||
kind: 'doom_loop';
|
||||
tool_name: string;
|
||||
args: Record<string, unknown>;
|
||||
threshold: number;
|
||||
}
|
||||
| {
|
||||
kind: 'error';
|
||||
error_reason: ErrorReason;
|
||||
|
||||
43
apps/web/src/components/DoomLoopSentinel.tsx
Normal file
43
apps/web/src/components/DoomLoopSentinel.tsx
Normal file
@@ -0,0 +1,43 @@
|
||||
import { AlertCircle } from 'lucide-react';
|
||||
import type { Message } from '@/api/types';
|
||||
|
||||
interface Props {
|
||||
message: Message;
|
||||
}
|
||||
|
||||
// v1.11.6: doom-loop sentinel. Renders the system row inserted by
|
||||
// services/inference.ts insertDoomLoopSentinel when the model called the
|
||||
// same tool with the same arguments threshold times in a row. Visual
|
||||
// treatment mirrors CapHitSentinel (amber card + alert icon) so users learn
|
||||
// "amber alert = the loop hit a guard rail and stopped" regardless of
|
||||
// which guard fired. Intentionally NO Continue button — retrying with the
|
||||
// same tools would just re-loop; the user needs to restate the prompt or
|
||||
// switch agents instead.
|
||||
export function DoomLoopSentinel({ message }: Props) {
|
||||
const meta = message.metadata;
|
||||
const isDoomLoop =
|
||||
meta !== null && typeof meta === 'object' && meta.kind === 'doom_loop';
|
||||
const toolName = isDoomLoop ? meta.tool_name : null;
|
||||
const threshold = isDoomLoop ? meta.threshold : null;
|
||||
|
||||
return (
|
||||
<div className="rounded-md border border-amber-500/40 bg-amber-500/10 text-sm">
|
||||
<div className="px-3 py-2 flex items-start gap-2">
|
||||
<AlertCircle className="size-4 text-amber-500 shrink-0 mt-0.5" />
|
||||
<div className="flex-1 min-w-0 space-y-1">
|
||||
<div className="text-xs font-medium text-amber-700 dark:text-amber-300">
|
||||
Doom loop detected
|
||||
</div>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
{toolName !== null && threshold !== null
|
||||
? `Stopped after ${threshold} identical calls to ${toolName}. The model was looping.`
|
||||
: message.content}
|
||||
</div>
|
||||
<div className="text-[11px] text-muted-foreground/80">
|
||||
Send a new message with a different angle, or switch agents.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import { api } from '@/api/client';
|
||||
import { sessionEvents } from '@/hooks/sessionEvents';
|
||||
import { sendToTerminal, terminalsRegistry, type TerminalRegistration } from '@/lib/events';
|
||||
import { CapHitSentinel } from './CapHitSentinel';
|
||||
import { DoomLoopSentinel } from './DoomLoopSentinel';
|
||||
import { CodeBlock } from './CodeBlock';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import {
|
||||
@@ -622,6 +623,13 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) {
|
||||
);
|
||||
}
|
||||
|
||||
// v1.11.6: doom-loop sentinel. No Continue affordance — retrying with the
|
||||
// same tools would just re-loop. The card explains what tripped and
|
||||
// suggests next steps (new message angle / switch agents).
|
||||
if (message.role === 'system' && message.metadata?.kind === 'doom_loop') {
|
||||
return <DoomLoopSentinel message={message} />;
|
||||
}
|
||||
|
||||
// v1.8.2: tool messages and assistant tool_calls are now rendered by
|
||||
// MessageList via ToolCallLine / ToolCallGroup. Tool-role messages reach
|
||||
// this point only if MessageList didn't consume them (shouldn't happen,
|
||||
|
||||
244
boocode_code_review.md
Normal file
244
boocode_code_review.md
Normal file
@@ -0,0 +1,244 @@
|
||||
# BooCode — External Code Review & Lift Inventory
|
||||
|
||||
Last updated: 2026-05-20
|
||||
|
||||
This document tracks every open source repo BooCode references or lifts code from. Pin this so we don't lose attribution and don't re-evaluate the same projects twice.
|
||||
|
||||
BooCode is personal/single-user — license compatibility is non-blocking, but the License column is recorded so we don't accidentally inherit an obligation if BooCode ever goes public.
|
||||
|
||||
-----
|
||||
|
||||
## Reference repos
|
||||
|
||||
### Tier A — actively lifting from / running as sidecar
|
||||
|
||||
#### 1. sst/opencode (NEW Tier A as of 2026-05-20)
|
||||
|
||||
- **URL:** https://github.com/sst/opencode
|
||||
- **License:** MIT
|
||||
- **Language:** TypeScript (Effect-TS service-oriented)
|
||||
- **What it is:** The coding agent Sam uses via Termius/Paseo. Also the source of every algorithm BooCode is porting through v1.15.
|
||||
- **Why it matters:** opencode's `packages/opencode/src/session/` is the canonical reference implementation for every part of the inference layer BooCode is rebuilding. We lift the algorithms, not the Effect-TS plumbing.
|
||||
- **Algorithms lifted so far:**
|
||||
- `session/compaction.ts` → v1.11.0 (shipped). `usable`, `isOverflow`, `select`, `buildPrompt` ported to plain TS. SUMMARY_TEMPLATE markdown skeleton verbatim.
|
||||
- `session/overflow.ts` → v1.11.0 (shipped). 20k `COMPACTION_BUFFER` constant.
|
||||
- **Algorithms lifted (queued):**
|
||||
- `session/processor.ts` `DOOM_LOOP_THRESHOLD=3` → v1.11.6
|
||||
- `session/llm.ts` `experimental_repairToolCall` → v1.12 (hand-rolled), then v1.13 (via AI SDK)
|
||||
- `tool/truncate.ts` truncation + outputPath pattern → v1.12 (adapted: opaque id, not filesystem path)
|
||||
- `session/prompt.ts` `runLoop()` outer agent loop → v1.14
|
||||
- `permission/evaluate.ts` wildcard ruleset → v1.15
|
||||
- MCP client (transport, tools/list discovery, tools/call) → v1.15
|
||||
- **What NOT to use:** Effect-TS service plumbing. Snapshot/patch system (for tool-edit revert; BooCoder territory if needed). The `experimental_native_runtime` (AI SDK fallback path). opencode's prompts.
|
||||
- **Source tag:** `dev` branch on `sst/opencode`. Note: `anomalyco/opencode` is a rebranded mirror; use `sst/opencode` as canonical.
|
||||
|
||||
#### 2. nmakod/codecontext
|
||||
|
||||
- **URL:** https://github.com/nmakod/codecontext
|
||||
- **License:** MIT
|
||||
- **Language:** Go (single binary)
|
||||
- **What it is:** AI-oriented codebase context map generator. Tree-sitter parsing across TS/JS/Go/C++/Swift/Python/Java/Rust/Dart/JSON/YAML. Generates `CLAUDE.md`-style structured overview. Bundled MCP server with 8 tools.
|
||||
- **MCP tools exposed:** `get_codebase_overview`, `get_file_analysis`, `get_symbol_info`, `search_symbols`, `get_dependencies`, `watch_changes`, `get_semantic_neighborhoods` (git co-change patterns — no embeddings), `get_framework_analysis`.
|
||||
- **Why it matters:** Solves the "architect needs a map" problem without embeddings.
|
||||
- **How we use it:** Run as sidecar container in v1.12. Wire its MCP tools into BooCode's `inference/tools.ts` as static wrappers in v1.12, then re-wire via real MCP client when v1.15 ships.
|
||||
- **What NOT to use:** Nothing. Clean fit.
|
||||
|
||||
#### 3. aimasteracc/tree-sitter-analyzer
|
||||
|
||||
- **URL:** https://github.com/aimasteracc/tree-sitter-analyzer
|
||||
- **License:** MIT
|
||||
- **Language:** Python, MCP server + CLI
|
||||
- **What it is:** Local-first code context engine. Outline-first navigation, ripgrep-based impact trace, no embeddings. 17 languages. Claims 54-56% token reduction via TOON format.
|
||||
- **MCP tools exposed:** `get_code_outline`, `trace_impact`, plus structural search/extract tools.
|
||||
- **Why it matters:** Backup analyzer with a different response shape — outline-first scales better than codecontext's full dump on huge files. Impact trace is useful for "what calls this function" without a full graph build.
|
||||
- **How we use it:** Lift the AST query patterns (`.scm` files) and the outline-first response shape. Can also run as a second MCP sidecar alongside codecontext.
|
||||
- **What NOT to use:** Don't lift the TOON format if it conflicts with shadcn rendering — markdown stays.
|
||||
|
||||
#### 4. spirituslab/codesight
|
||||
|
||||
- **URL:** https://github.com/spirituslab/codesight
|
||||
- **License:** check repo — assumed MIT-ish
|
||||
- **Language:** TypeScript/Node
|
||||
- **What it is:** Static code structure visualization. Symbol extraction, import resolution, call graphs. Detects circular dependencies and dead code (with documented false-positive caveats for `customElements.define()`, framework entry points, dynamic imports).
|
||||
- **Why it matters:** Gives BooCode a `repo_health` tool — different from codecontext's "what is this" map. This is "what's wrong with this."
|
||||
- **How we use it:** v1.16. Port the analyzer core (`analyze.mjs`). Call-graph builder + circular-dep + dead-code detectors into BooCode's `tools/repo_health.ts`. Drop the VS Code extension shell entirely.
|
||||
- **What NOT to use:** The VS Code wrapper, the "idea layer" feature (requires Copilot or Claude Code wiring we don't want).
|
||||
|
||||
#### 5. Aider-AI/aider
|
||||
|
||||
- **URL:** https://github.com/Aider-AI/aider
|
||||
- **License:** Apache-2.0
|
||||
- **Language:** Python
|
||||
- **What it is:** Git-native AI pair programmer CLI. Pioneered the tree-sitter repo-map + personalized PageRank approach.
|
||||
- **Why it matters:** Authoritative source of per-language `tags.scm` query files. 60+ languages curated and battle-tested.
|
||||
- **How we use it:** **Lift directly:** `aider/queries/tree-sitter-*.scm` — drop into BooCode's analyzer for any language codecontext or codesight don't cover natively.
|
||||
- **What NOT to use:** Don't port `repomap.py` itself — codecontext supersedes it.
|
||||
|
||||
-----
|
||||
|
||||
### Tier B — patterns / partial lift
|
||||
|
||||
#### 6. continuedev/continue
|
||||
|
||||
- **URL:** https://github.com/continuedev/continue
|
||||
- **License:** Apache-2.0
|
||||
- **Language:** TypeScript
|
||||
- **What it is:** IDE assistant framework. Full RAG pipeline, AST chunking, multi-provider LLM abstraction.
|
||||
- **Why it matters:** One specific drop-in lift:
|
||||
1. `core/indexing/ignore.ts` — `DEFAULT_SECURITY_IGNORE_FILETYPES`. Three-tier matcher (basenames, extensions, prefixes). Going into BooCode's `pathGuard` to block analyzing `.env`, `.pem`, `id_rsa`, etc.
|
||||
- **How we use it:** v1.11.7. Lift the ignore list, adapt to a `path.basename` + extension + prefix matcher.
|
||||
- **What NOT to use:** `core/indexing/CodebaseIndexer.ts` and `LanceDbIndex.ts` — embedding-based, the path we walked away from.
|
||||
|
||||
#### 7. cline/cline
|
||||
|
||||
- **URL:** https://github.com/cline/cline
|
||||
- **License:** Apache-2.0
|
||||
- **Language:** TypeScript (VS Code extension)
|
||||
- **What it is:** Autonomous coding agent. Pioneered plan/act mode and granular per-tool auto-approve.
|
||||
- **Why it matters:** Pattern source for v1.15 (absorbed into the broader permissions work). Plan/act invariant: in plan mode, write tools hidden from the model's tool registry; in act mode, available but each individual tool can be approval-gated.
|
||||
- **How we use it:** Lift the *pattern*, not the code. opencode's `permission/evaluate.ts` wildcard ruleset supersedes cline's mode-enum; cline contributes the conceptual framing (read-only invariant in BooCode v1.x).
|
||||
- **What NOT to use:** Cline's VS Code-specific UI plumbing. The shape is wrong for our stack.
|
||||
|
||||
#### 8. plandex-ai/plandex
|
||||
|
||||
- **URL:** https://github.com/plandex-ai/plandex
|
||||
- **License:** MIT
|
||||
- **Language:** Go
|
||||
- **What it is:** Terminal agent with a pending-changes sandbox. Edits never touch the filesystem until `/apply`. 2M token context.
|
||||
- **Why it matters:** Reference architecture for BooCoder (v2.0). The "edits queue in a virtual layer, applied atomically" model is the right safety story for write tools.
|
||||
- **How we use it:** Lift the data model: `pending_changes` table keyed by `(project_id, session_id, file_path)`, with diff content and apply/reject state. Lift the `diff` / `apply` / `rewind` UX vocabulary.
|
||||
- **What NOT to use:** Plandex's 2M-context-window engineering. Our context is bounded by llama-swap.
|
||||
|
||||
#### 9. OpenHands/OpenHands
|
||||
|
||||
- **URL:** https://github.com/OpenHands/OpenHands
|
||||
- **License:** MIT
|
||||
- **Language:** Python
|
||||
- **What it is:** Autonomous coding agent platform. V1 architecture is built on an append-only typed event log + Docker sandbox runtime.
|
||||
- **Why it matters:** Two distinct patterns:
|
||||
1. Event-log architecture — superseded by v1.13's parts-table approach (which derives from opencode's part-message model). OpenHands event-log is conceptually similar but different shape.
|
||||
2. Sandbox runtime — per-session Docker container for write tools. Closes the `/opt:ro` mount risk.
|
||||
- **How we use it:** v2.1. Lift the runtime container pattern (HTTP API inside container, BooCoder calls in). Don't port the Python implementation directly.
|
||||
- **What NOT to use:** OpenHands' agent prompts, the full microagent system, the cloud deployment path. Event-log shape (use opencode-derived parts table instead).
|
||||
|
||||
-----
|
||||
|
||||
### Tier C — reference only / partial use / skip
|
||||
|
||||
#### 10. cortexkit/aft (actual repo path: ualtinok/aft)
|
||||
|
||||
- **URL:** https://github.com/ualtinok/aft
|
||||
- **License:** check repo
|
||||
- **Language:** Rust binary + TypeScript plugin
|
||||
- **What it is:** Tree-sitter analysis tools delivered as a Rust binary, communicating with an OpenCode plugin via JSON-over-stdio. Warm-process pattern: one binary per project keeps parse trees in memory.
|
||||
- **Why it matters:** The BridgePool transport model. If our `codecontext` tool calls get hot (agent loops calling it dozens of times per session), the warm-process pattern is faster than fork-per-call.
|
||||
- **How we use it:** **Defer.** Profile first. Codecontext sidecar might be fast enough on its own. Revisit if tool-call latency becomes the bottleneck.
|
||||
- **What NOT to use:** The opencode-plugin wrapper. Wrong integration surface.
|
||||
|
||||
#### 11. codeprysm/codeprysm
|
||||
|
||||
- **URL:** https://github.com/codeprysm/codeprysm
|
||||
- **License:** check repo
|
||||
- **Language:** Rust
|
||||
- **What it is:** Graph-based code intelligence: tree-sitter parsing → node/edge graph in Qdrant, embeddings layered on top, MCP server exposes semantic search.
|
||||
- **Why it matters:** Clean node/edge taxonomy: nodes = Container/Callable/Data; edges = CONTAINS/USES/DEFINES.
|
||||
- **How we use it:** Lift the taxonomy *only* if we end up building our own graph instead of relying on codecontext. The embedding half is the trap we walked away from.
|
||||
- **What NOT to use:** The Qdrant + embedding pipeline. Same anti-pattern as continue's indexer.
|
||||
|
||||
#### 12. DeepSourceCorp/globstar
|
||||
|
||||
- **URL:** https://github.com/DeepSourceCorp/globstar
|
||||
- **License:** MIT
|
||||
- **Language:** Go
|
||||
- **What it is:** Static analysis toolkit for writing code checkers using tree-sitter S-expression queries. YAML interface for simple checkers, Go interface for complex multi-file checkers.
|
||||
- **Why it matters:** Not for the architect tool. **Future use only.** If BooCoder ever grows a "verify before commit" lane, globstar checkers could be the verification engine: drop YAML checkers into `.globstar/`, run as a pre-apply gate.
|
||||
- **How we use it:** Park. Not in any current version.
|
||||
- **What NOT to use:** Don't try to use it as a codebase analyzer — it's a linter framework, wrong tool for the architect role.
|
||||
|
||||
#### 13. getpaseo/paseo
|
||||
|
||||
- **URL:** https://github.com/getpaseo/paseo
|
||||
- **License:** AGPL-3.0
|
||||
- **What it is:** WebSocket daemon ↔ client protocol for agent coordination. Already running in your stack (paseo dispatches Claude Code/opencode).
|
||||
- **Why it matters:** Patterns for agent lifecycle, `--worktree` flag pattern, ECDH/NaCl security model.
|
||||
- **How we use it:** Reference for BooCoder isolation (v2.0/v2.1). Note AGPL — fine for personal, blocks public distribution.
|
||||
- **What NOT to use:** Don't vendor the source. Treat as a peer service.
|
||||
|
||||
#### 14. earendil-works/pi
|
||||
|
||||
- **URL:** https://github.com/earendil-works/pi
|
||||
- **License:** MIT
|
||||
- **What it is:** `@mariozechner/pi-agent-core` (tool loop + state machine) and `@mariozechner/pi-ai` (provider abstraction).
|
||||
- **Why it matters:** If we ever want non-llama-swap inference (Anthropic, OpenAI, Mistral direct), pi-ai is the cleanest TypeScript provider abstraction available.
|
||||
- **How we use it:** Defer. v2.x optional batch only.
|
||||
|
||||
#### 15. microsoft/agent-framework
|
||||
|
||||
- **URL:** https://github.com/microsoft/agent-framework
|
||||
- **License:** MIT
|
||||
- **What it is:** Workflow graphs for multi-agent coordination.
|
||||
- **Why it matters:** Conceptual reference for far-future multi-agent orchestration.
|
||||
- **How we use it:** Read the ADRs in `docs/decisions/`. Don't port code — implementation is Azure/Python/.NET-heavy.
|
||||
|
||||
#### 16. microsoft/autogen
|
||||
|
||||
- **URL:** https://github.com/microsoft/autogen
|
||||
- **License:** MIT
|
||||
- **What it is:** Earlier Microsoft multi-agent framework.
|
||||
- **Why it matters:** Effectively sunsetting in favor of agent-framework.
|
||||
- **How we use it:** Skip. Don't invest in evaluating further.
|
||||
|
||||
#### 17. open-webui/open-webui
|
||||
|
||||
- **URL:** https://github.com/open-webui/open-webui
|
||||
- **License:** BSD-3
|
||||
- **What it is:** Self-hosted LLM frontend.
|
||||
- **Why it matters:** Python/Svelte, wrong stack. RAG pipeline only worth a read if BooLab needs improvement — unrelated to BooCode.
|
||||
- **How we use it:** Skip for BooCode.
|
||||
|
||||
-----
|
||||
|
||||
## Lift catalog — what lands where
|
||||
|
||||
| Source repo | Specific artifact | License | BooCode destination | Version |
|
||||
|---|---|---|---|---|
|
||||
| `sst/opencode` | `session/compaction.ts` + `session/overflow.ts` algorithms | MIT | `services/compaction.ts` | **v1.11.0 ✅** |
|
||||
| `sst/opencode` | `session/processor.ts` DOOM_LOOP_THRESHOLD pattern | MIT | `services/inference.ts` doom-loop guard | v1.11.6 |
|
||||
| `continuedev/continue` | `core/indexing/ignore.ts` DEFAULT_SECURITY_IGNORE_FILETYPES | Apache-2.0 | Extend `path_guard.ts` exclusion list | v1.11.7 |
|
||||
| `nmakod/codecontext` | Whole binary (sidecar) | MIT | New `codecontext` container, 8 MCP tools wired via static wrappers | v1.12 |
|
||||
| `sst/opencode` | `session/llm.ts` experimental_repairToolCall pattern | MIT | `services/inference.ts` synthetic invalid-tool result | v1.12 |
|
||||
| `sst/opencode` | `tool/truncate.ts` truncation + outputPath pattern (adapted: opaque id) | MIT | `services/truncate.ts` + `view_truncated_output` tool | v1.12 |
|
||||
| `Aider-AI/aider` | `aider/queries/tree-sitter-*.scm` (60+ files) | Apache-2.0 | Fallback grammars for languages not covered by sidecars | v1.12 (fallback) |
|
||||
| `sst/opencode` | `session/llm.ts` AI SDK adoption + alpha tool ordering | MIT | `services/inference.ts` rewrite | v1.13 |
|
||||
| `sst/opencode` | Parts-message taxonomy (text, tool_call, tool_result, reasoning, step_start) | MIT | new `message_parts` table | v1.13 |
|
||||
| `sst/opencode` | `session/prompt.ts` runLoop() outer agent loop | MIT | `services/inference.ts` step-based loop | v1.14 |
|
||||
| `sst/opencode` | `agent.steps` per-agent step cap | MIT | AGENTS.md + agents.ts | v1.14 |
|
||||
| `sst/opencode` | `permission/evaluate.ts` wildcard ruleset | MIT | new `permissions` table + matcher | v1.15 |
|
||||
| `sst/opencode` | `mcp/index.ts` MCP client (SSE transport + tools/list + tools/call) | MIT | new `services/mcp/` module; codecontext re-wired through it | v1.15 |
|
||||
| `cline/cline` | Plan/Act invariant (read-only mode pattern) | Apache-2.0 | absorbed into v1.15 permissions work | v1.15 |
|
||||
| `spirituslab/codesight` | `analyze.mjs` — call graph, circular-dep, dead-code | MIT-ish | `apps/server/src/tools/repo_health.ts` | v1.16 |
|
||||
| `plandex-ai/plandex` | `pending_changes` data model, diff/apply/rewind UX | MIT | New `pending_changes` table, BooCoder write-tool gating | v2.0 |
|
||||
| `OpenHands/OpenHands` | Sandbox runtime pattern | MIT | New `boocoder` container, per-session Docker | v2.1 |
|
||||
| `cortexkit/aft` (ualtinok/aft) | BridgePool warm-process JSON-stdio pattern | check | Optimization if profile shows fork overhead | Deferred |
|
||||
| `codeprysm/codeprysm` | Node/edge taxonomy (Container/Callable/Data, CONTAINS/USES/DEFINES) | check | Reference only if we ever build our own graph | None |
|
||||
| `DeepSourceCorp/globstar` | Whole toolkit | MIT | Future verify-before-commit gate for BooCoder | Parked |
|
||||
| `earendil-works/pi` | `pi-ai` provider abstraction | MIT | Multi-provider LLM if pursued | v2.x optional |
|
||||
| `microsoft/agent-framework` | Workflow graph concepts | MIT | Conceptual only | v3.x |
|
||||
|
||||
-----
|
||||
|
||||
## Decisions log
|
||||
|
||||
- **Embeddings dropped from BooCode** (May 2026). Replaced RAG with file-view tools + sidecar analyzers.
|
||||
- **opencode promoted to Tier A** (2026-05-20). The compaction port (v1.11.0) made it clear opencode is not just "the agent Sam uses" — it's the canonical reference implementation for everything BooCode is rebuilding through v1.15. Five algorithms identified for lift (compaction, doom-loop, repairToolCall, runLoop, permission evaluate) plus truncate.ts and MCP client.
|
||||
- **Source is `sst/opencode` `dev` branch.** `anomalyco/opencode` is a rebranded mirror; do not source from there.
|
||||
- **Original Batch 11 (aider PageRank port) replaced** by codecontext sidecar approach.
|
||||
- **Original Batch 12 (codebase indexer w/ Harrier) removed.** No embedding infrastructure.
|
||||
- **Original Batch 13 (OpenHands event log) replaced** by v1.13 parts table (opencode pattern). Same outcome, different shape.
|
||||
- **Original Batch 12 (cline plan/act mode) absorbed into v1.15** (opencode permission ruleset). Same outcome, wildcard rules instead of mode enum.
|
||||
- **Aider's `repomap.py` port dropped.** Codecontext supersedes it. Aider contribution narrows to the `.scm` query files only.
|
||||
- **Globstar role re-scoped.** Not an architect tool — parked for future verify-before-commit gate.
|
||||
- **codeprysm role re-scoped.** Taxonomy reference only. Embedding half rejected.
|
||||
- **AI SDK adoption deferred to v1.13.** Hand-roll opencode's repairToolCall pattern in v1.12 first.
|
||||
- **`tool_choice='required'` confirmed supported** by llama-swap (qwen3.6-35b-a3b-mxfp4, 2026-05-20). Repair tool call is viable.
|
||||
- **`anomalyco/sst` is a mirror, not a fork.** Same applies to `anomalyco/opencode`. Use canonical `sst/sst` and `sst/opencode` sources.
|
||||
@@ -1,204 +1,317 @@
|
||||
# BooCode — Roadmap
|
||||
# BooCode v1.x — Roadmap
|
||||
|
||||
Last updated: 2026-05-17
|
||||
Last updated: 2026-05-20
|
||||
|
||||
## Overview
|
||||
|
||||
BooCode is a standalone code-chat tool at `/opt/boocode/`. Read-only by design in v1.x — pick a project, chat with a local LLM that has file-inspection tools, get streaming responses over WebSocket.
|
||||
BooCode is a standalone code-chat tool at `/opt/boocode/`. Read-only by design — pick a project, chat with a local LLM that has file-inspection tools, get streaming responses over WebSocket.
|
||||
|
||||
Live at `https://code.indifferentketchup.com` (Caddy → Authelia → Tailscale → `100.114.205.53:9500`).
|
||||
|
||||
**Architectural commitments:**
|
||||
|
||||
- No embeddings. File-view tools + sidecar analyzers replace RAG.
|
||||
- No embeddings. The model uses file-view tools (`view_file`, `list_dir`, `grep`, `find_files`) + sidecar analyzers (codecontext, codesight). Walked away from the RAG pipeline May 2026.
|
||||
- Read-only in v1.x. Write tools land in BooCoder (separate container, post-v1.x).
|
||||
- One Postgres (`boocode_db`), one frontend SPA, container-per-service for new capabilities.
|
||||
|
||||
## Current state
|
||||
External code lifted from / referenced in: see `boocode_code_review.md` for full inventory.
|
||||
|
||||
- **main:** v1.8.1 (`b09d0ff` was last known tip prior to v1.8.2).
|
||||
- **Just merged / committed to main:** v1.8.2 — tool-loop fixes (read-only loop cap raised, "tool loop depth exceeded" error surfaced with continue button, `max_tool_calls` AGENTS.md frontmatter, `messages.metadata` column).
|
||||
- **In flight RIGHT NOW:** **v1.x-themes** branch — Claude Code implementing 18-theme system. See "Active work" below.
|
||||
-----
|
||||
|
||||
## Active work
|
||||
## Shipped (status as of 2026-05-20)
|
||||
|
||||
### v1.x-themes — Theme system (in flight)
|
||||
| Version | Theme | Notes |
|
||||
|---|---|---|
|
||||
| v1.0 | Initial scaffold | live |
|
||||
| Batches 1–4.4 | Markdown, sidebar, panes, chats-inside-sessions, archive, fork/delete, header polish, settings drawer | merged |
|
||||
| v1.5 | resolveProjectPath, BOOTSTRAP_ROOT, vitest pin | merged |
|
||||
| v1.6, v1.6.1, v1.6.2 | Mobile pass + RightRail mobile drawer | merged |
|
||||
| v1.7 | Drag-drop file + paste-as-attachment | merged |
|
||||
| v1.8, v1.8.1, v1.8.2 | Settings drawer, git_status tool, WS reconnect, **per-turn budget reset + Continue affordance + CapHitSentinel** | merged |
|
||||
| v1.9.1 | Skills system (`/opt/skills/` + `skill_find`/`skill_use`/`skill_resource` tools + `/skill` slash command) | merged |
|
||||
| v1.9.7 | `ask_user_input` elicitation tool | merged |
|
||||
| **Batch 9 (Agents Tier 2)** | `AGENTS.md` + 6 builtin agents + AgentPicker in ChatInput toolbar + `sessions.agent_id` | **merged in `92bd3b1`**, included in v1.9.1/v1.9.7/v1.10.x tags |
|
||||
| v1.10.0 | BooTerm: separate container, xterm.js + node-pty + tmux | merged |
|
||||
| v1.10.1 | BooTerm-user (spawn as samkintop, login bash, Claude Code/opencode PATH) | merged |
|
||||
| v1.10.4, v1.10.5 | Mobile terminal + XML tool-call fallback parser | merged |
|
||||
| **v1.11.0** | **opencode-style compaction port** (auto-overflow, anchored summary, tail preservation) | merged |
|
||||
| v1.11.1 | Compaction follow-up (working indicator during compaction, unit tests, .bak cleanup) | merged |
|
||||
| v1.11.2 | ContextBar (persistent context-usage indicator) | merged |
|
||||
| v1.11.3 | `ctx_max` capture via `/upstream/<model>/props` (replaces dead `timings.n_ctx` read) | merged |
|
||||
|
||||
**Spec source:** locked in this session. Anchors below derived from `/mnt/user-data/uploads/boocode-theme-previews.html` (16 themes extracted) + spec §3 family rules for the two missing (`fuchsia-noir`, `midnight-sapphire`).
|
||||
-----
|
||||
|
||||
**18 themes, grouped:**
|
||||
|
||||
| Family | IDs |
|
||||
|---|---|
|
||||
| Neutral dark | obsidian (default), gunmetal |
|
||||
| Brown / warm | espresso, volcanic-brown |
|
||||
| Orange / amber | copper, gold |
|
||||
| Red | oxblood, crimson |
|
||||
| Purple | elderflower, plum |
|
||||
| Pink / magenta | steel-pink, fuchsia-noir |
|
||||
| Green | matrix, sage |
|
||||
| Blue | cobalt, midnight-sapphire |
|
||||
| Light-only | ivory, chalk |
|
||||
|
||||
**Dark anchors (bg, card, border, muted-fg, accent):**
|
||||
|
||||
```
|
||||
obsidian #0c0c0e #15151a #1f1f23 #6b6b75 #8b5cf6
|
||||
gunmetal #0d1117 #161b22 #21262d #7d8590 #388bfd
|
||||
espresso #1c1410 #241a14 #2e2218 #8a7058 #c8a880
|
||||
volcanic-brown #140906 #1e0e0a #2e1610 #7a4030 #cc4a1a
|
||||
copper #100800 #1c1408 #2e1f0a #8a6040 #b87333
|
||||
gold #0e0800 #1a1200 #2a1f00 #a07c30 #d4af37
|
||||
oxblood #0a0303 #180606 #2a0808 #7a3028 #8b1a1a
|
||||
crimson #0e0404 #1a0808 #2e0a0a #8a3030 #dc143c
|
||||
elderflower #100818 #1c1024 #2c1830 #8a78a0 #b89cd8
|
||||
plum #0c0814 #180e20 #241830 #7a4878 #8e4585
|
||||
steel-pink #0e0408 #1a080e #2e0c1a #9a4070 #cc33aa
|
||||
fuchsia-noir #0a0610 #14081a #2a0c2e #8a3878 #ff1493
|
||||
matrix #000a00 #031403 #0a200a #208030 #00ff41
|
||||
sage #0a0e08 #141a10 #1e2e1a #7a8870 #9caf88
|
||||
cobalt #020817 #061434 #0c2244 #3060a0 #0047ab
|
||||
midnight-sapphire #02050e #060c1f #0e1a36 #4a6088 #1e3a8a
|
||||
ivory #fdfcf8 #f5f2e8 #e8e4d8 #8a8478 #3a3328 (light-only)
|
||||
chalk #fafaf7 #f0f0ec #e5e5e0 #75756e #2a2a28 (light-only)
|
||||
```
|
||||
|
||||
**Light-variant derivation (for the 16 dark themes):**
|
||||
- Lightest anchor → background
|
||||
- Accent darkens ~15% (HSL L − 15pp)
|
||||
- Foreground = near-black tinted toward family hue
|
||||
- Surfaces / borders scale up symmetrically
|
||||
|
||||
**Fallback:** `ivory` or `chalk` + dark mode → `obsidian` dark.
|
||||
|
||||
**Token map (shadcn nova set):**
|
||||
```
|
||||
background ← anchor 1
|
||||
card / popover ← anchor 2
|
||||
border / muted ← anchor 3
|
||||
muted-foreground ← anchor 4
|
||||
primary / accent ← anchor 5
|
||||
foreground ← derived: anchor-5 hue, ~92% L, ~25% S
|
||||
--destructive ← red family, unchanged across themes
|
||||
--ring ← per-theme accent
|
||||
--radius ← 0.5rem locked
|
||||
fonts ← Inter + JetBrains Mono locked
|
||||
```
|
||||
|
||||
**Wiring locked:**
|
||||
- Schema: `settings.theme_id TEXT NOT NULL DEFAULT 'obsidian'`, `settings.theme_mode TEXT NOT NULL DEFAULT 'dark' CHECK IN ('dark','light','system')`
|
||||
- API: GET `/api/settings` extended, PATCH whitelists 18 theme ids → 400 otherwise
|
||||
- CSS: `apps/web/src/styles/themes/*.css` (18 + `_tokens.css`), imported from `globals.css` (NOT `index.css`)
|
||||
- `.theme-<id>` + `.theme-<id>.dark` composed on `<html>`
|
||||
- `apps/web/src/lib/theme.ts` (new): `THEMES` const, `applyTheme(id, mode)`, `useTheme()` hook. matchMedia subscribed only when `mode === 'system'`
|
||||
- `apps/web/src/App.tsx`: `useTheme()` at top
|
||||
- Settings page: card grid, mode toggle (radio: Dark/Light/System). No header dropdown.
|
||||
- shadcn primitives: `card`, `radio-group` installed via `pnpm dlx shadcn@latest add`. `button`, `label` already present.
|
||||
- FOUC mitigation: localStorage cache + inline `<script>` in `index.html` sets `<html>` class before React hydrates
|
||||
|
||||
**Out of scope (v1):**
|
||||
- Custom user palettes (no color picker)
|
||||
- Per-project / per-session themes
|
||||
- Shiki syntax-highlighting themes
|
||||
- Header quick-switcher
|
||||
|
||||
**Verify after Claude Code hands back:**
|
||||
- `fuchsia-noir` and `midnight-sapphire` visual check — derived, not from preview. Swap hexes if they read wrong.
|
||||
- Light variants of the 16 dark themes — algorithmic. Spot-check 3-4 across families (warm/cool/dark/saturated).
|
||||
- FOUC on hard reload, theme-switch persistence, system-mode matchMedia teardown.
|
||||
|
||||
## Batch summary
|
||||
## In flight / queued
|
||||
|
||||
| Version | Theme | Status |
|
||||
|---|---|---|
|
||||
| v1.0 | Initial scaffold, read-only tools, WS streaming | ✅ Merged |
|
||||
| v1.1-batch1 | Markdown, Copy + Regen, tok/s + ctx, AI naming | ✅ Merged |
|
||||
| v1.1-batch2 | Sidebar restructure | ✅ Merged |
|
||||
| v1.1-batch3 | Pane system, FileBrowserPane + Shiki, cross-tab | ✅ Merged |
|
||||
| v1.1-batch3.5 | Chip infra, `@file`, line-select | ✅ Merged |
|
||||
| v1.2 | Chats inside sessions, right-rail, `/compact`, archive, force-send | ✅ Merged |
|
||||
| v1.2-project-ux | Project archive, sidebar context, Gitea API, bootstrap | ✅ Merged |
|
||||
| v1.3 | Tab-close + chat-archive | ✅ Merged |
|
||||
| v1.4 | Fork message, delete message, header polish (was original Batch 5) | ✅ Merged |
|
||||
| v1.5 | resolveProjectPath, BOOTSTRAP_ROOT, vitest pin | ✅ Merged |
|
||||
| v1.5.1 | Bootstrap hotfix (git in container, SSH keypair, known_hosts) | ✅ Merged (`4a9f207`) |
|
||||
| v1.6 | Mobile pass: drawer, single-pane, long-press, IME-safe, pull-to-refresh, swipe-close | ✅ Merged |
|
||||
| v1.6.1 | RightRail mobile wrapper fix | ✅ Merged |
|
||||
| Tool-loop bump | MAX_TOOL_LOOP_DEPTH 5→15 | ✅ Merged |
|
||||
| v1.6.2 | Workspace + Session+Project headers, ChatTabBar new-chat, RightRail mobile drawer | ✅ Merged |
|
||||
| v1.7 | Drag-drop file + paste-as-attachment (was Batch 6) | ✅ Merged |
|
||||
| v1.8 | Settings drawer + `git_status` added to ALL_TOOL_NAMES (was Batch 7) | ✅ Merged |
|
||||
| v1.8.1 | WS reconnect toast tuning (silent/gray/red thresholds), pane status indicators | ✅ Merged |
|
||||
| v1.8.2 | Tool-loop fixes: read-only cap raised, "depth exceeded" error + continue, `max_tool_calls` frontmatter, `messages.metadata` | ✅ Merged |
|
||||
| **v1.x-themes** | **18 themes, settings page, dark/light/system, FOUC mitigation** | **🔄 Claude Code in flight** |
|
||||
| v1.8.3 | Tool call UI compaction: collapse-by-default, group consecutive same-tool, result preview cap | Planned (small, frontend-only) |
|
||||
| v1.9 | Settings pane (system prompt per project + session, web search toggle, `+` button) | Planned (spec locked, was on branch `v1.9-settings-pane`) |
|
||||
| v1.10 | Web search backend: SearXNG `web_search` + `web_fetch` | Planned |
|
||||
| v1.11 | Agents Tier 2: `AGENTS.md`, per-agent temp/tools whitelist, AgentPicker in ChatInput | Planned |
|
||||
| v1.12 | BooTerm: separate container, xterm.js + node-pty + tmux | Planned |
|
||||
| v1.13 | Architect: codecontext sidecar (MCP, tree-sitter, no embeddings) | Planned |
|
||||
| v1.13b | Architect: repo health (call graph, circular deps, dead code) | Planned |
|
||||
| v1.14 | Tool approval + plan/act mode (cline-style) | Planned |
|
||||
| Post-v1.x | Append-only event log (OpenHands V1) | Planned |
|
||||
| Post-v1.x | BooCoder pending-changes (plandex) | Planned |
|
||||
| Post-v1.x | BooCoder runtime isolation (per-session Docker sandbox) | Planned |
|
||||
| Optional | Multi-provider LLM abstraction (pi-ai) | Skip unless need surfaces |
|
||||
| Far future | Workflow graphs (microsoft/agent-framework concepts) | v2.x topic |
|
||||
| ~~v1.11.4~~ | ~~Per-turn budget + Continue affordance~~ | **CANCELLED** — already shipped in v1.8.2 |
|
||||
| **v1.11.5** | ContextBar relocate (above agent-picker row), thicker, always-visible, remove ChatContextPopover | **dispatched** |
|
||||
| v1.11.6 | Doom-loop guard from opencode (3 identical tool calls → sentinel, abort recursion) | drafted |
|
||||
| v1.11.7 | pathGuard secrets filter (continue.dev's `DEFAULT_SECURITY_IGNORE_FILETYPES`) | drafted |
|
||||
| v1.11.x | Tag consolidation point (everything since v1.11.0) | queued |
|
||||
|
||||
## Flagged follow-ups (not in a batch yet)
|
||||
-----
|
||||
|
||||
- Agents in `/data/AGENTS.md` don't list `git_status` in their `tools:` blocks. Out of scope until pre-BooCoder cleanup pass.
|
||||
- v1.9 dispatch had item (g): verify `useUserEvents` broadcasts `project_updated` on PATCH `/projects/:id`. Add if missing.
|
||||
- v1.8.2 follow-up: confirm `messages.metadata` migration ran clean in prod DB after deploy.
|
||||
## Major work after v1.11.x
|
||||
|
||||
## Order of operations
|
||||
| Version | Theme | LoC est. |
|
||||
|---|---|---|
|
||||
| **v1.12** | codecontext sidecar + tool output truncation + repair tool call (Integration 1 + 3 from May review, fused) | ~600 |
|
||||
| v1.13 | Phase B groundwork — parts table + AI SDK adoption + per-tool `read_only`/`write` tagging | ~1500 |
|
||||
| v1.14 | Phase C — outer agent loop (multi-step until non-tool finish, AGENTS.md `steps` field, reasoning as part type) | ~800 |
|
||||
| v1.15 | Phase D — permission ruleset + MCP client (lays foundation for BooCoder) | ~600 |
|
||||
| v1.16 | Batch 11b — codesight repo_health (call graph, circular deps, dead code) | ~400 |
|
||||
| **v2.0** | Batch 14 — BooCoder pending changes (new container, write tools, plandex pattern) | ~1200 |
|
||||
| v2.1 | Batch 15 — BooCoder runtime isolation (per-session Docker sandbox, OpenHands pattern) | ~600 |
|
||||
| v2.x | Batch 16/17 — Multi-provider LLM (optional, pi-ai) and Workflow graphs (far future, agent-framework concepts) | tbd |
|
||||
|
||||
1. **v1.x-themes** finishes (Claude Code in flight). Audit + smoke test. Merge.
|
||||
2. **v1.8.3** — tool call UI compaction. Small frontend batch, addresses current pain.
|
||||
3. **v1.9** — settings pane. Branch already named `v1.9-settings-pane`. Spec locked.
|
||||
4. **v1.10** — web search backend.
|
||||
5. **v1.11** — agents.
|
||||
6. **v1.12** — BooTerm.
|
||||
-----
|
||||
|
||||
Track B (architect, no UI dep, can run parallel anytime): v1.13 → v1.13b → v1.14.
|
||||
## Roadmap doc deviations and corrections
|
||||
|
||||
This roadmap was significantly out of sync with reality until 2026-05-20. Key corrections folded in:
|
||||
|
||||
1. **Batch 9 (Agents Tier 2) is done**, not "next up." Shipped as commit `92bd3b1`, included in v1.9.1 forward. The original "Track A: Batch 9 next" recommendation was correct but the doc never got updated.
|
||||
2. **v1.6.2 merged.** No longer "in flight."
|
||||
3. **Batch 5 (fork/delete), Batch 6 (drag-drop), Batch 7 (settings drawer), Batch 8 (web search), Batch 10 (BooTerm) all shipped**, scattered across the v1.6–v1.10 version line. Original "Track A polish then agents" plan was abandoned; work happened opportunistically.
|
||||
4. **v1.11.0 was a major unplanned addition** — opencode-style compaction (auto-overflow detection + anchored rolling summary + tail preservation). This is NOT a batch from the old roadmap. It opened a new patch line (v1.11.x) of small follow-ups in front of the original Batches 11–17.
|
||||
5. **Batch 11 (codecontext sidecar) moves to v1.12.** Bundles with truncation and repair-tool-call lift (both from opencode) since they share concerns and the `tool_choice='required'` confirmation makes repair-tool-call viable.
|
||||
6. **Phase B (parts table + AI SDK + tool-call lifecycle) becomes v1.13.** This absorbs the old Batch 13 (append-only event log) — same outcome (typed message parts), different mental framing.
|
||||
7. **Phase C and Phase D are new** (numbered v1.14/v1.15). They originate from the opencode integration analysis, not from the original 17-batch plan. Phase C delivers the outer agent loop with explicit step boundaries. Phase D delivers the permission ruleset + MCP client needed for codecontext to be useful and for BooCoder to gate writes.
|
||||
8. **BooCoder (v2.0/v2.1)** is the second-major-version line. New container, new safety story (pending changes + per-session Docker sandbox). Maps to original Batches 14/15.
|
||||
|
||||
-----
|
||||
|
||||
## v1.11.x patches in detail
|
||||
|
||||
### v1.11.0 — opencode-style compaction port ✅
|
||||
|
||||
**What shipped:** Auto-detection of context overflow (`isOverflow(usage, model)`) triggers compaction on the *next* user turn. Compaction preserves the last 2 turns verbatim and produces an anchored Markdown summary (8-section template lifted verbatim from opencode `compaction.ts`) that replaces older head messages. Summary is rolling — each new compaction updates the prior summary, not stacks. Schema additions: `messages.compacted_at`, `messages.summary`, `messages.tail_start_id`, `chats.needs_compaction`. WS `compacted` frame fires sonner toast on completion.
|
||||
|
||||
**Key divergences from opencode:** Per-chat (not per-session) compaction state because BooCode history is per-chat. UUID `tail_start_id` not BIGINT. No `parent_id` on messages. Context limit comes from `messages.ctx_max` (last-known `n_ctx`), not a `model.context_limit` field.
|
||||
|
||||
### v1.11.1 — Compaction follow-up ✅
|
||||
|
||||
Working-state `chat_status: working/idle` frames around the LLM call inside `compaction.process()`. 24 new vitest cases for the six pure functions (`usable`, `isOverflow`, `estimate`, `turns`, `select`, `buildPrompt`). 7 `.bak-v1.11` files deleted.
|
||||
|
||||
### v1.11.2 — ContextBar ✅
|
||||
|
||||
New `ContextBar.tsx` rendering above MessageList. Shows `{used} / {max} ({pct}%)` with color tiers computed against `max - 20k` reserve (matches `compaction.usable()`): muted <60%, amber 60-80%, orange 80-95%, red ≥95%. Tooltip shows "Auto-compaction at ~N%". Mobile breakpoints: `< 380px` shows "Ctx" + numbers; `380-639px` adds parenthetical %; `≥ 640px` shows full "Context" label.
|
||||
|
||||
### v1.11.3 — ctx_max capture fix ✅
|
||||
|
||||
Discovered the dead code at `inference.ts:479-481` and `compaction.ts:300` reading `parsed.timings.n_ctx` never fired — llama-server emits `prompt_n / predicted_n / *_ms / *_per_second` in timings but NOT `n_ctx`. New `model-context.ts` module fetches `GET /upstream/<model>/props` with 3s timeout, positive cache (no TTL), 60s negative cache. Wired into all 4 ctx_max write sites (3 in inference.ts, 1 in compaction.ts). 12 new vitest cases. 7 historical rows backfilled to `ctx_max = 262144` (single-day backfill, only qwen3.6-35b-a3b-mxfp4 in use).
|
||||
|
||||
### v1.11.4 — CANCELLED
|
||||
|
||||
Original scope: per-turn budget reset + Continue affordance + CapHitSentinel card. Recon revealed all three are already shipped (v1.8.2 timestamps in inference.ts comments). Dead version slot.
|
||||
|
||||
### v1.11.5 — ContextBar relocate (DISPATCHED)
|
||||
|
||||
Relocate ContextBar from above MessageList to above the agent-picker row. Bump height from ~4px bar to ~10-12px. Always-visible (zero-state when no assistant messages + use `model_context_limit` from v1.11.3 cache). Remove `ChatContextPopover` entirely (redundant signal; mobile-hostile).
|
||||
|
||||
### v1.11.6 — Doom-loop guard (QUEUED)
|
||||
|
||||
Detect 3 identical tool calls in a row within one turn (same name + same args via JSON.stringify). On detection: abort tool-call recursion, insert `metadata.kind='doom_loop'` sentinel, trigger summary turn via existing `runCapHitSummary` path. New `DoomLoopSentinel.tsx` component (no Continue button — looping shouldn't be retried with same tools). Per-turn sliding window, scoped to current turn's tool-call accumulator.
|
||||
|
||||
**Lift source:** opencode `processor.ts`, `DOOM_LOOP_THRESHOLD = 3` constant.
|
||||
|
||||
### v1.11.7 — pathGuard secrets filter (QUEUED)
|
||||
|
||||
Extend pathGuard with `DEFAULT_SECURITY_IGNORE_FILETYPES` from continue.dev `core/indexing/ignore.ts`. Three-tier matcher: exact basenames (`credentials`, `secrets.yml`), extensions (`.env`, `.pem`, `.key`, `.crt`, etc.), prefix patterns (`id_rsa`, `id_dsa`, `id_ecdsa`, `id_ed25519`). Blocked files appear in `list_dir` and `find_files` results with `(blocked)` annotation. `view_file` returns `{ error: 'blocked_secret_file', ... }`. `grep` cannot read blocked file contents. No override mechanism in v1.x (use host shell).
|
||||
|
||||
**Why it matters:** `/opt:/opt:ro` mount currently exposes `boolab/.env`, `dubdrive/users.json`, `authelia/state`, every other service's secrets to any tool past path validation. Cheap close on that surface area.
|
||||
|
||||
-----
|
||||
|
||||
## v1.12 — codecontext sidecar + truncation + repair tool call
|
||||
|
||||
Three lifts fused because they share concerns:
|
||||
|
||||
1. **codecontext sidecar** — new container, single-instance, path-addressed multi-project. Mount `/opt/projects:/workspace:ro`. 8 tools wired as static `ToolDef` wrappers in `apps/server/src/services/tools/codecontext/` (one file per tool). HTTP client to `http://codecontext:8765`. New module `apps/server/src/services/codecontext_bridge.ts` translates `project_id` → `/workspace/<relative>/` paths.
|
||||
|
||||
2. **Tool output truncation** — opencode `truncate.ts` pattern. Cap at 2000 lines / 50KB. Larger outputs: write full content server-side, return preview + opaque `id`. New tool `view_truncated_output(id)` retrieves full content by server-mapped id. **No pathGuard exception** for `/tmp` directory — the opaque-id approach avoids exposing a writable filesystem location to the model. Only codecontext outputs need truncation; native tools (view_file 200 lines, grep 200 results, list_dir 500 entries, find_files 200 results) already cap reasonably.
|
||||
|
||||
3. **`experimental_repairToolCall` equivalent** — when model emits malformed tool call (JSON parse fails or Zod validation fails), return a synthetic tool result instead of an error: `{ error, raw_args, tool_name, hint: 'Retry with valid JSON arguments.' }`. Model self-corrects on next step. Add one line to system prompt instructing self-correction on malformed-args results. Confirmed working precondition: `tool_choice: "required"` accepted by llama-swap (verified 2026-05-20 against qwen3.6-35b-a3b-mxfp4).
|
||||
|
||||
**Hand-roll, not AI SDK adoption.** AI SDK migration deferred to v1.13.
|
||||
|
||||
**AGENTS.md updates:** Each of the 6 builtin agents gets a curated codecontext tool whitelist:
|
||||
- Architect: all 8
|
||||
- Debugger: `search_symbols`, `get_dependencies`
|
||||
- Code Reviewer: `get_file_analysis`
|
||||
- Refactorer: `get_semantic_neighborhoods`, `get_dependencies`
|
||||
- Security Auditor: `get_file_analysis`, `search_symbols`, `get_dependencies`
|
||||
- Prompt Builder: none (no structural reasoning relevance)
|
||||
|
||||
**Dependencies:** v1.11.x merged. No others.
|
||||
|
||||
**Estimated:** 600 LoC across 3-4 dispatches under the v1.12 umbrella.
|
||||
|
||||
-----
|
||||
|
||||
## v1.13 — Phase B: parts table + AI SDK + per-tool tagging
|
||||
|
||||
**Goal:** typed message parts replace JSON blobs on `messages.tool_calls` / `tool_results`. Adopt Vercel AI SDK `streamText`. Tag tools as `read_only` or `write` at definition time.
|
||||
|
||||
**Scope:**
|
||||
|
||||
1. Schema: new `message_parts` table (`id, message_id, kind, payload JSONB, sequence`). Kinds: `text`, `tool_call`, `tool_result`, `reasoning`, `step_start`. The `messages` table becomes header-only.
|
||||
2. Inference loop rewritten on AI SDK `streamText`. `streamCompletion` becomes a thin wrapper. Native AI SDK `experimental_repairToolCall` replaces v1.12's hand-rolled version.
|
||||
3. Tool registry: `ToolDef<T>` gains `category: 'read_only' | 'write'` field. BooCode v1.x rejects any `write` tool at registry time (defense in depth for the BooCoder split). Alpha-sort tool list before sending to model (prompt-cache stability).
|
||||
4. Reasoning content (`reasoning_content` from Qwen3.6) captured as its own part type instead of dropped or inlined.
|
||||
|
||||
**Migration risk:** non-trivial. inference.ts is ~1400 lines with custom XML fallback, SSE parsing, compaction integration. Plan dedicated cutover window. Compaction.ts must update to assemble head from parts.
|
||||
|
||||
**Replaces:** Original Batch 13 (append-only event log) — same outcome, different vocabulary.
|
||||
|
||||
**Dependencies:** v1.12 merged.
|
||||
|
||||
-----
|
||||
|
||||
## v1.14 — Phase C: outer agent loop
|
||||
|
||||
**Goal:** explicit multi-step loop per opencode `prompt.ts` `runLoop()`. Replace the current ad-hoc tool-call recursion.
|
||||
|
||||
**Scope:**
|
||||
|
||||
1. Outer loop continues until model returns non-tool finish OR step cap hit. Step ≠ tool call: one step can contain multiple tool calls in parallel.
|
||||
2. `agent.steps ?? Infinity` per-agent step cap. AGENTS.md gains `steps:` field. Refactorer `steps: 5`, Architect `steps: 20`, etc.
|
||||
3. Step-boundary events (`step_start`, `step_finish`) explicit in the parts stream. Per-step snapshot for revert (planned for BooCoder; backend-only in v1.14).
|
||||
4. Doom-loop guard (v1.11.6) migrates from "abort recursion" to "raise within loop iteration." Same predicate, different control flow.
|
||||
|
||||
**Dependencies:** v1.13 merged.
|
||||
|
||||
-----
|
||||
|
||||
## v1.15 — Phase D: permission ruleset + MCP client
|
||||
|
||||
**Goal:** wildcard permission ruleset (opencode `evaluate.ts` pattern) and a proper MCP client implementation. Foundation for BooCoder to gate writes; immediate value for codecontext to be re-wired as a real MCP server.
|
||||
|
||||
**Scope:**
|
||||
|
||||
1. Wildcard rule matcher: `{ permission, pattern, action: 'allow' | 'deny' | 'ask' }`. Last-match-wins. Per-agent rulesets layer under per-session rulesets.
|
||||
2. MCP client implementation: SSE transport, `tools/list` discovery, `tools/call` invocation. codecontext sidecar gets re-pointed from static wrappers (v1.12) to real MCP. New connectors become a config-only addition.
|
||||
3. UI: permission-ask flow when a tool requires `ask` action. Modal or inline card with Allow once / Allow always / Deny.
|
||||
4. v1.x stays read-only by default (no `write` tools in the registry yet).
|
||||
|
||||
**Absorbs:** Original Batch 12 (tool approval + plan/act mode) — same outcome via permission rules instead of mode enum.
|
||||
|
||||
**Dependencies:** v1.13 merged (parts table for permission events). Independent of v1.14.
|
||||
|
||||
-----
|
||||
|
||||
## v1.16 — Batch 11b: codesight repo_health
|
||||
|
||||
Call graph, circular dependency detection, dead code flagging. Port `analyze.mjs` from spirituslab/codesight. New tool `repo_health(project_id)`. In-process Node (not sidecar). Cache results keyed by `(project_id, file_hashes_sig)`.
|
||||
|
||||
**Dependencies:** v1.12 merged (can reuse codecontext parse output where overlapping).
|
||||
|
||||
-----
|
||||
|
||||
## v2.0 — BooCoder pending changes
|
||||
|
||||
New container `boocoder` at `100.114.205.53:9502`. Owns write tools (`edit_file`, `create_file`, `delete_file`, `apply_pending`, `rewind`). Edits queue in `pending_changes` table; nothing touches disk until `/apply`. Per-pane diff UI with Approve/Reject. BooCode chat stays read-only (`/opt:/opt:ro`).
|
||||
|
||||
**Lift source:** plandex pending-changes data model.
|
||||
|
||||
**Dependencies:** v1.13 (parts) + v1.15 (permissions).
|
||||
|
||||
-----
|
||||
|
||||
## v2.1 — BooCoder runtime isolation
|
||||
|
||||
Per-session Docker sandbox spawned by BooCoder on first write. Only project path mounted, not `/opt`. Idle-timeout 30 min. Standard OpenHands runtime contract: HTTP API inside container, BooCoder calls in.
|
||||
|
||||
**Lift source:** OpenHands V1 runtime pattern.
|
||||
|
||||
**Dependencies:** v2.0.
|
||||
|
||||
-----
|
||||
|
||||
## v2.x — Optional / far future
|
||||
|
||||
- **Multi-provider LLM** (pi-ai pattern): Only if a concrete need for Anthropic / OpenAI / Mistral direct surfaces. llama-swap covers everything today.
|
||||
- **Workflow graphs** (microsoft/agent-framework concepts): Multi-agent coordination. Conceptual reference only. Realistically a v3.x topic.
|
||||
|
||||
-----
|
||||
|
||||
## Architecture target state
|
||||
|
||||
### Containers
|
||||
|
||||
| Container | Port | Mount | Purpose | Status |
|
||||
|---|---|---|---|---|
|
||||
| `boocode` | `100.114.205.53:9500` | `/opt:/opt:ro` | Chat + read-only tools + SPA | Live |
|
||||
| `boocode_db` | `127.0.0.1:5500` | `boocode_pgdata` volume | Postgres 16-alpine | Live |
|
||||
| `codecontext` | `100.114.205.53:8765` (internal) | project root :ro | MCP server for architect tools | v1.13 |
|
||||
| `booterm` | `100.114.205.53:9501` | `/opt/repos:/opt/repos:rw` | Terminals (tmux + node-pty) | v1.12 |
|
||||
| `boocoder` | `100.114.205.53:9502` | per-session sandbox | Write tools | Post-v1.x |
|
||||
| `booterm` | `100.114.205.53:9501` | `/opt/repos:/opt/repos:rw` | Terminals (tmux + node-pty) | Live (v1.10.0) |
|
||||
| `codecontext` | `:8765` (internal) | `/opt/projects:/workspace:ro` | MCP server for architect tools | v1.12 |
|
||||
| `boocoder` | `100.114.205.53:9502` | per-session sandbox | Write tools | v2.0 |
|
||||
|
||||
## Schema additions ahead
|
||||
### Schema additions by version
|
||||
|
||||
- v1.x-themes (current): `settings.theme_id`, `settings.theme_mode`
|
||||
- v1.9: `projects.default_system_prompt`, `projects.default_web_search_enabled`, `sessions.web_search_enabled`
|
||||
- v1.11: `sessions.agent_id`
|
||||
- v1.13b: `repo_health_cache (project_id, file_hashes_sig, payload JSONB, created_at)`
|
||||
- v1.14: `sessions.tool_approval_mode`, `sessions.approved_tools`
|
||||
- Post-v1.x: `session_events`; deprecate `messages` long-tail
|
||||
- Post-v1.x: `pending_changes`
|
||||
- **v1.11.0:** `messages.compacted_at`, `messages.summary`, `messages.tail_start_id`, `chats.needs_compaction`
|
||||
- **v1.11.7:** none (pathGuard logic, no DB)
|
||||
- **v1.12:** none (codecontext is stateless on disk; truncation uses in-memory id→path map with TTL cleanup)
|
||||
- **v1.13:** `message_parts` table; `messages` becomes header-only
|
||||
- **v1.14:** `agents.steps` column (or AGENTS.md parser extension; no DB if file-only)
|
||||
- **v1.15:** `permissions` table, `agent_permissions` join, `session_permissions` join
|
||||
- **v1.16:** `repo_health_cache (project_id, file_hashes_sig, payload JSONB, created_at)`
|
||||
- **v2.0:** `pending_changes (id, session_id, file_path, diff TEXT, status, created_at)`
|
||||
|
||||
-----
|
||||
|
||||
## Lift sources (summary)
|
||||
|
||||
Full inventory in `boocode_code_review.md`. Headline items:
|
||||
|
||||
| Source | Used for | Where |
|
||||
|---|---|---|
|
||||
| **`sst/opencode`** (MIT, TS) | **Compaction algorithms** | **v1.11.0 (shipped)** |
|
||||
| `sst/opencode` (MIT, TS) | Doom-loop guard | v1.11.6 |
|
||||
| `sst/opencode` (MIT, TS) | `repairToolCall`, truncate.ts, MCP client, permission evaluate, runLoop | v1.12/v1.13/v1.14/v1.15 |
|
||||
| `continuedev/continue` (Apache-2.0) | `DEFAULT_SECURITY_IGNORE_FILETYPES` | v1.11.7 |
|
||||
| `nmakod/codecontext` (MIT, Go) | Architect: codebase map sidecar | v1.12 |
|
||||
| `spirituslab/codesight` (MIT-ish, TS) | Architect: repo health analyzer | v1.16 |
|
||||
| `Aider-AI/aider` (Apache-2.0) | Fallback `.scm` grammars | v1.12 (fallback) |
|
||||
| `cline/cline` (Apache-2.0) | Plan/Act pattern (absorbed into v1.15 permissions) | v1.15 |
|
||||
| `plandex-ai/plandex` (MIT) | Pending-changes data model | v2.0 |
|
||||
| `OpenHands/OpenHands` (MIT) | Sandbox runtime contract | v2.1 |
|
||||
| `aimasteracc/tree-sitter-analyzer` (MIT) | Outline-first patterns | v1.12 (alt) |
|
||||
| `earendil-works/pi` (MIT) | Multi-provider LLM | v2.x (optional) |
|
||||
|
||||
**Original Batch 13 (event log from OpenHands) replaced** by v1.13 (parts table). Same outcome, different framing.
|
||||
|
||||
-----
|
||||
|
||||
## Decisions log
|
||||
|
||||
- Embeddings dropped from BooCode. File-view tools + sidecar analyzers replace RAG.
|
||||
- Old Batch 11 (aider PageRank port) → replaced by codecontext sidecar (v1.13).
|
||||
- Old Batch 12 (Harrier indexer) → removed entirely.
|
||||
- Batch 9 reordered ahead of 5–8, decoupled from Batch 7 (2026-05-16). Subsequently superseded — settings pane (v1.9) and themes (v1.x-themes) jumped ahead. Agents now slated as v1.11.
|
||||
- Theme work split into its own version (v1.x-themes) rather than blocked behind v1.9 (2026-05-17). Branched off main after v1.8.2 committed.
|
||||
- **Embeddings dropped from BooCode** (May 2026). Replaced RAG with file-view tools + sidecar analyzers.
|
||||
- **Original Batch 11 (aider PageRank port) replaced** by codecontext sidecar approach.
|
||||
- **Original Batch 12 (codebase indexer w/ Harrier) removed.** No embedding infrastructure in BooCode v1.x.
|
||||
- **Globstar parked** — not an architect tool. Future verify-before-commit candidate only.
|
||||
- **codeprysm rejected** — embedding-based. Node/edge taxonomy noted as reference if we ever build our own graph.
|
||||
- **Batch 9 decoupled from Batch 7 (2026-05-16); shipped in `92bd3b1`.** Builtin defaults: six agents (Code Reviewer, Debugger, Refactorer, Architect, Security Auditor, Prompt Builder) with no `model` field. Session model wins by default.
|
||||
- **opencode lift opened** (2026-05-20). Started with compaction (v1.11.0). Continuing through v1.15. Five distinct algorithms: compaction, doom-loop guard, repairToolCall, runLoop, permission evaluate. Plus `truncate.ts` and `MCP client`. Each lifts the algorithm, not the Effect-TS plumbing.
|
||||
- **AI SDK adoption deferred to v1.13.** Hand-roll repairToolCall in v1.12 first. Migrate everything together when parts table lands.
|
||||
- **`tool_choice='required'` confirmed supported** by llama-swap (qwen3.6-35b-a3b-mxfp4, 2026-05-20). Unblocks repair tool call viability.
|
||||
- **v1.11.4 cancelled** (2026-05-20). Per-turn budget reset + Continue affordance + CapHitSentinel were already shipped in v1.8.2. Roadmap was 14 versions stale at time of recon.
|
||||
|
||||
-----
|
||||
|
||||
## Workflow
|
||||
|
||||
Each batch:
|
||||
1. Verify previous merged.
|
||||
2. Dispatch via Paseo to Claude Code at `/opt/boocode` (or OpenCode for smaller batches).
|
||||
3. Recon → blocking questions → implement → hand back.
|
||||
4. Compliance review in separate Claude chat.
|
||||
5. Deploy: `docker compose up --build -d`.
|
||||
6. Smoke test.
|
||||
7. Sam commits and pushes.
|
||||
|
||||
Sam reviews all diffs. Sam commits. Never git pull/push/commit on his behalf.
|
||||
1. Verify previous batch merged. `git log --oneline main -5`.
|
||||
2. Cut branch from main. Single-branch-per-dispatch convention.
|
||||
3. Dispatch via Paseo to Claude Code at `/opt/boocode`.
|
||||
4. Claude Code recon → blocking questions → implement → hand back.
|
||||
5. Compliance review in separate Claude chat (paste handback).
|
||||
6. Build: `docker compose build --no-cache boocode` (no-cache avoids the v1.11.2 stale-bundle trap).
|
||||
7. Restart: `docker compose up -d boocode`.
|
||||
8. Smoke test in browser (hard refresh).
|
||||
9. Sam commits and pushes. **Never** `git pull` / `git push` / `git commit` on his behalf.
|
||||
|
||||
Sam reviews all diffs.
|
||||
|
||||
Reference in New Issue
Block a user