From f92b0810c391b36ceb86e716ff3485fdcadf52a3 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Wed, 20 May 2026 20:28:45 +0000 Subject: [PATCH] v1.11.6: doom-loop guard (3 identical tool calls aborts recursion) --- .../src/services/__tests__/doom-loop.test.ts | 130 ++++++ apps/server/src/services/inference.ts | 332 ++++++++++++- apps/server/src/types/api.ts | 14 +- apps/web/src/api/types.ts | 18 +- apps/web/src/components/DoomLoopSentinel.tsx | 43 ++ apps/web/src/components/MessageBubble.tsx | 8 + boocode_code_review.md | 244 ++++++++++ boocode_roadmap.md | 437 +++++++++++------- 8 files changed, 1050 insertions(+), 176 deletions(-) create mode 100644 apps/server/src/services/__tests__/doom-loop.test.ts create mode 100644 apps/web/src/components/DoomLoopSentinel.tsx create mode 100644 boocode_code_review.md diff --git a/apps/server/src/services/__tests__/doom-loop.test.ts b/apps/server/src/services/__tests__/doom-loop.test.ts new file mode 100644 index 0000000..d51b6e0 --- /dev/null +++ b/apps/server/src/services/__tests__/doom-loop.test.ts @@ -0,0 +1,130 @@ +import { describe, it, expect } from 'vitest'; +import { DOOM_LOOP_THRESHOLD, detectDoomLoop } from '../inference.js'; +import type { ToolCall } from '../../types/api.js'; + +// ---- fixture ---------------------------------------------------------------- +// Tiny helper. `id` is required on ToolCall but irrelevant to detection — +// detectDoomLoop compares name + JSON.stringify(args). Counter-based id keeps +// each call unique so we don't accidentally test id-based equality. + +let counter = 0; +function mkCall(name: string, args: Record = {}): ToolCall { + counter += 1; + return { id: `c${counter}`, name, args }; +} + +// ---- below-threshold ------------------------------------------------------- + +describe('detectDoomLoop — below threshold', () => { + it('returns null for an empty array', () => { + expect(detectDoomLoop([])).toBeNull(); + }); + + it('returns null when fewer than DOOM_LOOP_THRESHOLD calls exist', () => { + // 2 < 3 — sliding-window can't form even if both match. + const a = mkCall('view_file', { path: 'a.ts' }); + const b = mkCall('view_file', { path: 'a.ts' }); + expect(detectDoomLoop([a, b])).toBeNull(); + }); +}); + +// ---- positive detection ---------------------------------------------------- + +describe('detectDoomLoop — positive matches', () => { + it('returns name + args when exactly DOOM_LOOP_THRESHOLD identical calls land', () => { + const calls = [ + mkCall('grep', { pattern: 'TODO', path: 'src' }), + mkCall('grep', { pattern: 'TODO', path: 'src' }), + mkCall('grep', { pattern: 'TODO', path: 'src' }), + ]; + const result = detectDoomLoop(calls); + expect(result).not.toBeNull(); + expect(result!.name).toBe('grep'); + expect(result!.args).toEqual({ pattern: 'TODO', path: 'src' }); + }); + + it('matches sliding window — last DOOM_LOOP_THRESHOLD match even with earlier non-matching calls', () => { + // 4 calls: first differs, last 3 are identical → fire. + const calls = [ + mkCall('list_dir', { path: '/' }), + mkCall('view_file', { path: 'a.ts' }), + mkCall('view_file', { path: 'a.ts' }), + mkCall('view_file', { path: 'a.ts' }), + ]; + const result = detectDoomLoop(calls); + expect(result).not.toBeNull(); + expect(result!.name).toBe('view_file'); + }); + + it('matches identical empty-args calls (defense against {} !== {} reference bug)', () => { + // JSON.stringify on two distinct {} both produce '{}'. Confirms the + // detector uses value-equality not reference-equality. + const calls = [mkCall('ping', {}), mkCall('ping', {}), mkCall('ping', {})]; + expect(detectDoomLoop(calls)).not.toBeNull(); + }); + + it('matches calls with nested args of equal shape', () => { + // Deep-equal via JSON.stringify. If the model emits the same nested + // object three times, that's still a loop. + const nested = { filter: { glob: '*.ts', case: 'sensitive' }, limit: 50 }; + const calls = [ + mkCall('find_files', { ...nested }), + mkCall('find_files', { ...nested }), + mkCall('find_files', { ...nested }), + ]; + expect(detectDoomLoop(calls)).not.toBeNull(); + }); +}); + +// ---- negative detection ---------------------------------------------------- + +describe('detectDoomLoop — negative cases', () => { + it('returns null when 3 calls share name but differ in args', () => { + const calls = [ + mkCall('view_file', { path: 'a.ts' }), + mkCall('view_file', { path: 'b.ts' }), + mkCall('view_file', { path: 'c.ts' }), + ]; + expect(detectDoomLoop(calls)).toBeNull(); + }); + + it('returns null when 3 calls share args but differ in name', () => { + const calls = [ + mkCall('view_file', { path: 'a.ts' }), + mkCall('grep', { path: 'a.ts' }), + mkCall('list_dir', { path: 'a.ts' }), + ]; + expect(detectDoomLoop(calls)).toBeNull(); + }); + + it('returns null when the FIRST three of four match but the latest differs', () => { + // Critical sliding-window edge: detector must ONLY look at the last + // DOOM_LOOP_THRESHOLD entries. Earlier matches don't count if the + // model has since moved on. + const calls = [ + mkCall('grep', { pattern: 'X' }), + mkCall('grep', { pattern: 'X' }), + mkCall('grep', { pattern: 'X' }), + mkCall('view_file', { path: 'a.ts' }), + ]; + expect(detectDoomLoop(calls)).toBeNull(); + }); + + it('returns null when args have same keys but different values', () => { + const calls = [ + mkCall('grep', { pattern: 'TODO', path: 'src' }), + mkCall('grep', { pattern: 'TODO', path: 'src' }), + mkCall('grep', { pattern: 'TODO', path: 'apps' }), + ]; + expect(detectDoomLoop(calls)).toBeNull(); + }); +}); + +// ---- threshold contract ---------------------------------------------------- + +describe('DOOM_LOOP_THRESHOLD', () => { + it('is a positive integer (the public contract — tests assume 3)', () => { + expect(DOOM_LOOP_THRESHOLD).toBeGreaterThan(0); + expect(Number.isInteger(DOOM_LOOP_THRESHOLD)).toBe(true); + }); +}); diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index 02b59df..af577a4 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -54,6 +54,36 @@ function resolveToolBudget(agent: Agent | null): number { const CAP_HIT_SUMMARY_NOTE = (limit: number) => `You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`; +// v1.11.6: doom-loop guard. When the model calls the same tool with the +// same arguments DOOM_LOOP_THRESHOLD times in a row within one user-message +// turn, abort the recursion and run the same wrap-up summary path as the +// cap-hit case. Ported from opencode (DOOM_LOOP_THRESHOLD in +// session/processor.ts). Threshold of 3 is the smallest value that doesn't +// false-positive on a model that retries once after a transient error. +export const DOOM_LOOP_THRESHOLD = 3; + +const DOOM_LOOP_NOTE = (name: string) => + `You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`; + +// Returns the name + args of the looping tool when the LAST +// DOOM_LOOP_THRESHOLD entries in `recentToolCalls` are identical (same name +// AND deep-equal args via JSON.stringify). Returns null otherwise. +// Pure; exported for unit-test access. +export function detectDoomLoop( + recentToolCalls: ToolCall[], +): { name: string; args: Record } | null { + if (recentToolCalls.length < DOOM_LOOP_THRESHOLD) return null; + const last = recentToolCalls.slice(-DOOM_LOOP_THRESHOLD); + const ref = last[0]!; + const refArgs = JSON.stringify(ref.args); + for (let i = 1; i < last.length; i++) { + const tc = last[i]!; + if (tc.name !== ref.name) return null; + if (JSON.stringify(tc.args) !== refArgs) return null; + } + return { name: ref.name, args: ref.args }; +} + function isCapHitSentinel(m: Message): boolean { return ( m.role === 'system' && @@ -63,6 +93,22 @@ function isCapHitSentinel(m: Message): boolean { ); } +// v1.11.6: parallel predicate. Same UI-only semantics as cap-hit sentinels — +// never sent to the LLM (filtered by buildMessagesPayload through the +// isAnySentinel check below). +function isDoomLoopSentinel(m: Message): boolean { + return ( + m.role === 'system' && + m.metadata !== null && + typeof m.metadata === 'object' && + (m.metadata as { kind?: unknown }).kind === 'doom_loop' + ); +} + +function isAnySentinel(m: Message): boolean { + return isCapHitSentinel(m) || isDoomLoopSentinel(m); +} + export interface InferenceFrame { type: | 'message_started' @@ -203,11 +249,11 @@ export function buildMessagesPayload( out.push({ role: 'system', content: m.content }); continue; } - // v1.8.2: cap-hit sentinels are UI-only — never send them to the LLM. The - // synthetic "you've reached the tool budget" note lives only inside the - // summary call's messages array and is never persisted, so on Continue - // the model resumes with a clean context. - if (isCapHitSentinel(m)) continue; + // v1.8.2 / v1.11.6: cap-hit and doom-loop sentinels are UI-only — never + // send them to the LLM. The synthetic instruction note lives only inside + // the summary call's messages array and is never persisted, so on a + // follow-up turn the model resumes with a clean context. + if (isAnySentinel(m)) continue; if (m.role === 'assistant' && m.status === 'streaming') continue; if (m.role === 'assistant' && m.status === 'cancelled') continue; if (m.role === 'tool') { @@ -608,6 +654,11 @@ interface TurnArgs { // resolved budget at the top of each turn. Replaces the older `depth` // counter (which counted iterations, not invocations). toolsUsed: number; + // v1.11.6: ordered tool calls executed in this user-message turn (across + // recursive runAssistantTurn invocations). Reset to [] at user-message + // boundaries by runInference, same as toolsUsed. Doom-loop check at the + // top of runAssistantTurn slices the last DOOM_LOOP_THRESHOLD entries. + recentToolCalls: ToolCall[]; signal: AbortSignal | undefined; } @@ -910,6 +961,11 @@ async function executeToolPhase( // One assistant message can emit multiple tool_calls, so we add the run // count, not 1. The next turn's budget check sees the cumulative total. toolsUsed: toolsUsed + result.toolCalls.length, + // v1.11.6: append the just-executed tool calls to the per-turn history + // so the next runAssistantTurn's doom-loop check can see them. We don't + // cap the array length here — per-turn budgets keep it bounded + // (typically <30 entries), and slicing happens inside detectDoomLoop. + recentToolCalls: [...args.recentToolCalls, ...result.toolCalls], signal, }); } @@ -1029,6 +1085,17 @@ async function runAssistantTurn( return; } + // v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can + // burn through 3 identical calls long before the 15-call budget fires). + // Same in-flight-slot-reuse pattern as runCapHitSummary — wrap-up reply + // lands in args.assistantMessageId, then a doom_loop sentinel is inserted + // to make the abort visible in the chat history. + const loop = detectDoomLoop(args.recentToolCalls); + if (loop) { + await runDoomLoopSummary(ctx, args, session, project, history, agent, loop); + return; + } + const messages = buildMessagesPayload(session, project, history, agent); const state: StreamPhaseState = { accumulated: '', startedAt: null }; @@ -1059,7 +1126,16 @@ export async function runInference( // continue) starts with a clean budget. Tool-call accumulation across // Continue invocations is what the hard ceiling guards against, not the // per-call budget. - return runAssistantTurn(ctx, { sessionId, chatId, assistantMessageId, toolsUsed: 0, signal }); + // v1.11.6: recentToolCalls also resets — doom-loop detection is scoped + // to a single user-message turn, so a Continue starts with no history. + return runAssistantTurn(ctx, { + sessionId, + chatId, + assistantMessageId, + toolsUsed: 0, + recentToolCalls: [], + signal, + }); } // v1.8.2: cap-hit summary flow. Called instead of erroring when the loop @@ -1318,6 +1394,250 @@ async function insertCapHitSentinel( }); } +// v1.11.6: doom-loop wrap-up. Mirrors runCapHitSummary structurally — same +// in-flight-slot reuse, same tools-disabled streaming-summary call, same +// post-finalize sentinel insert + chat_status drop. Differences: +// - synthetic note text comes from DOOM_LOOP_NOTE (names the looping tool) +// - sentinel metadata is { kind: 'doom_loop', tool_name, args, threshold } +// and has no Continue affordance (manual retry would just re-loop) +// - chat_status error path uses reason: 'doom_loop_summary_failed' +// Kept as a clone rather than refactored into a shared helper because the +// two summary paths still differ in error reason + sentinel shape; a third +// sentinel would justify factoring out runWrapUpSummary(opts). +async function runDoomLoopSummary( + ctx: InferenceContext, + args: TurnArgs, + session: Session, + project: Project, + history: Message[], + agent: Agent | null, + loop: { name: string; args: Record }, +): Promise { + const { sessionId, chatId, assistantMessageId, signal } = args; + + const messages = buildMessagesPayload(session, project, history, agent); + messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) }); + + const startedRow = await ctx.sql<{ started_at: string }[]>` + UPDATE messages + SET started_at = clock_timestamp() + WHERE id = ${assistantMessageId} + RETURNING started_at + `; + const startedAt = startedRow[0]?.started_at ?? null; + + ctx.publish(sessionId, { + type: 'message_started', + message_id: assistantMessageId, + chat_id: chatId, + role: 'assistant', + }); + + let accumulated = ''; + let pendingFlushTimer: NodeJS.Timeout | null = null; + let flushPromise: Promise = Promise.resolve(); + const flushNow = () => { + if (pendingFlushTimer) { + clearTimeout(pendingFlushTimer); + pendingFlushTimer = null; + } + const snapshot = accumulated; + flushPromise = flushPromise.then(() => + ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}` + ); + }; + const scheduleFlush = () => { + if (pendingFlushTimer) return; + pendingFlushTimer = setTimeout(() => { + pendingFlushTimer = null; + flushNow(); + }, DB_FLUSH_INTERVAL_MS); + }; + + let summaryOk = false; + let summarySoftCancelled = false; + let summaryError: string | null = null; + let result: StreamResult | null = null; + try { + result = await streamCompletion( + ctx, + session.model, + messages, + { tools: null, temperature: agent?.temperature }, + (delta) => { + accumulated += delta; + ctx.publish(sessionId, { + type: 'delta', + message_id: assistantMessageId, + chat_id: chatId, + content: delta, + }); + scheduleFlush(); + }, + signal, + ); + summaryOk = true; + } catch (err) { + if (err instanceof Error && err.name === 'AbortError') { + summarySoftCancelled = true; + } else { + summaryError = err instanceof Error ? err.message : String(err); + } + } finally { + if (pendingFlushTimer) { + clearTimeout(pendingFlushTimer); + pendingFlushTimer = null; + } + await flushPromise; + } + + if (summaryOk && result) { + const mctx = await modelContext.getModelContext(session.model); + const nCtx = mctx?.n_ctx ?? null; + const [updated] = await ctx.sql< + { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[] + >` + UPDATE messages + SET content = ${result.content}, + status = 'complete', + tokens_used = ${result.completionTokens}, + ctx_used = ${result.promptTokens}, + ctx_max = ${nCtx}, + finished_at = clock_timestamp() + WHERE id = ${assistantMessageId} + RETURNING tokens_used, ctx_used, ctx_max, finished_at + `; + ctx.publish(sessionId, { + type: 'message_complete', + message_id: assistantMessageId, + chat_id: chatId, + tokens_used: updated?.tokens_used ?? null, + ctx_used: updated?.ctx_used ?? null, + ctx_max: updated?.ctx_max ?? null, + started_at: startedAt, + finished_at: updated?.finished_at ?? null, + model: session.model, + }); + } else if (summarySoftCancelled) { + await ctx.sql` + UPDATE messages + SET content = ${accumulated}, + status = 'cancelled', + finished_at = clock_timestamp() + WHERE id = ${assistantMessageId} + `; + ctx.publish(sessionId, { + type: 'message_complete', + message_id: assistantMessageId, + chat_id: chatId, + }); + } else { + // Doom-loop summary failure reuses the existing summary_after_cap_failed + // error reason — the ErrorReason union is shared between sentinel paths + // and the UI surfaces a generic "summary failed" line for both. We don't + // add a new reason code because the user-visible failure mode is the + // same (model gave up mid-summary). Sentinel below still fires. + const errMeta: MessageMetadata = { + kind: 'error', + error_reason: 'summary_after_cap_failed', + error_text: summaryError ?? 'doom-loop summary failed', + }; + await ctx.sql` + UPDATE messages + SET content = ${accumulated}, + status = 'failed', + finished_at = clock_timestamp(), + metadata = ${ctx.sql.json(errMeta as never)} + WHERE id = ${assistantMessageId} + `; + ctx.publish(sessionId, { + type: 'error', + message_id: assistantMessageId, + chat_id: chatId, + error: summaryError ?? 'doom-loop summary failed', + reason: 'summary_after_cap_failed', + }); + } + + const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>` + UPDATE sessions SET updated_at = clock_timestamp() + WHERE id = ${sessionId} + RETURNING project_id, name, updated_at + `; + ctx.publishUser({ + type: 'session_updated', + session_id: sessionId, + project_id: sessRow!.project_id, + name: sessRow!.name, + updated_at: sessRow!.updated_at, + }); + + await insertDoomLoopSentinel(ctx, sessionId, chatId, loop); + + if (summaryOk || summarySoftCancelled) { + ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); + } else { + ctx.publishUser({ + type: 'chat_status', + chat_id: chatId, + status: 'error', + at: new Date().toISOString(), + reason: 'summary_after_cap_failed', + }); + } + + ctx.log.info( + { sessionId, chatId, assistantMessageId, loopedTool: loop.name, summaryOk, summaryCancelled: summarySoftCancelled }, + 'inference doom-loop summary finished', + ); +} + +async function insertDoomLoopSentinel( + ctx: InferenceContext, + sessionId: string, + chatId: string, + loop: { name: string; args: Record }, +): Promise { + // No hard-ceiling / can-continue logic here — doom-loop is a different + // failure mode from cap-hit. Continuing would re-trigger the loop with + // the same tools available; the user needs to restate their question + // or switch agents instead. + const metadata: MessageMetadata = { + kind: 'doom_loop', + tool_name: loop.name, + args: loop.args, + threshold: DOOM_LOOP_THRESHOLD, + }; + const content = `Detected ${DOOM_LOOP_THRESHOLD} identical calls to ${loop.name}. Stopping the tool-call loop. Produce the best answer you can with what you have.`; + + const [row] = await ctx.sql<{ id: string }[]>` + INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata) + VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)}) + RETURNING id + `; + + // Standard frame sequence — same as cap-hit sentinel — so + // useSessionStream's reducer appends the row via the existing path. + ctx.publish(sessionId, { + type: 'message_started', + message_id: row!.id, + chat_id: chatId, + role: 'system', + }); + ctx.publish(sessionId, { + type: 'delta', + message_id: row!.id, + chat_id: chatId, + content, + }); + ctx.publish(sessionId, { + type: 'message_complete', + message_id: row!.id, + chat_id: chatId, + metadata, + }); +} + interface InferenceRegistration { controller: AbortController; completed: Promise; diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts index 38cb781..9344d16 100644 --- a/apps/server/src/types/api.ts +++ b/apps/server/src/types/api.ts @@ -128,9 +128,11 @@ export type ErrorReason = | 'tool_execution_failed' | 'summary_after_cap_failed'; -// v1.8.2: shapes stored in messages.metadata. Discriminated on `kind`. -// cap_hit — system sentinel emitted when tool budget is exhausted -// error — attached to a failed assistant message so UI can show reason +// v1.8.2 / v1.11.6: shapes stored in messages.metadata. Discriminated on `kind`. +// cap_hit — system sentinel emitted when tool budget is exhausted +// doom_loop — system sentinel emitted when the model called the same +// tool with the same args DOOM_LOOP_THRESHOLD times in a row +// error — attached to a failed assistant message so UI can show reason export type MessageMetadata = | { kind: 'cap_hit'; @@ -139,6 +141,12 @@ export type MessageMetadata = agent_name: string | null; can_continue: boolean; } + | { + kind: 'doom_loop'; + tool_name: string; + args: Record; + threshold: number; + } | { kind: 'error'; error_reason: ErrorReason; diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index cc359c9..1d1b4b3 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -112,11 +112,13 @@ export type ErrorReason = | 'tool_execution_failed' | 'summary_after_cap_failed'; -// v1.8.2: shapes stored in Message.metadata. Discriminated on `kind`. -// cap_hit — sentinel emitted when the tool budget is hit; carries the -// budget + agent name + whether Continue is still allowed. -// error — attached to a failed assistant message so the bubble can show -// a specific reason on reload (WS error frame is one-shot). +// v1.8.2 / v1.11.6: shapes stored in Message.metadata. Discriminated on `kind`. +// cap_hit — sentinel emitted when the tool budget is hit; carries the +// budget + agent name + whether Continue is still allowed. +// doom_loop — sentinel emitted when the model called the same tool with +// the same arguments threshold times in a row. +// error — attached to a failed assistant message so the bubble can show +// a specific reason on reload (WS error frame is one-shot). export type MessageMetadata = | { kind: 'cap_hit'; @@ -125,6 +127,12 @@ export type MessageMetadata = agent_name: string | null; can_continue: boolean; } + | { + kind: 'doom_loop'; + tool_name: string; + args: Record; + threshold: number; + } | { kind: 'error'; error_reason: ErrorReason; diff --git a/apps/web/src/components/DoomLoopSentinel.tsx b/apps/web/src/components/DoomLoopSentinel.tsx new file mode 100644 index 0000000..dabb965 --- /dev/null +++ b/apps/web/src/components/DoomLoopSentinel.tsx @@ -0,0 +1,43 @@ +import { AlertCircle } from 'lucide-react'; +import type { Message } from '@/api/types'; + +interface Props { + message: Message; +} + +// v1.11.6: doom-loop sentinel. Renders the system row inserted by +// services/inference.ts insertDoomLoopSentinel when the model called the +// same tool with the same arguments threshold times in a row. Visual +// treatment mirrors CapHitSentinel (amber card + alert icon) so users learn +// "amber alert = the loop hit a guard rail and stopped" regardless of +// which guard fired. Intentionally NO Continue button — retrying with the +// same tools would just re-loop; the user needs to restate the prompt or +// switch agents instead. +export function DoomLoopSentinel({ message }: Props) { + const meta = message.metadata; + const isDoomLoop = + meta !== null && typeof meta === 'object' && meta.kind === 'doom_loop'; + const toolName = isDoomLoop ? meta.tool_name : null; + const threshold = isDoomLoop ? meta.threshold : null; + + return ( +
+
+ +
+
+ Doom loop detected +
+
+ {toolName !== null && threshold !== null + ? `Stopped after ${threshold} identical calls to ${toolName}. The model was looping.` + : message.content} +
+
+ Send a new message with a different angle, or switch agents. +
+
+
+
+ ); +} diff --git a/apps/web/src/components/MessageBubble.tsx b/apps/web/src/components/MessageBubble.tsx index 018bd8d..e382e1d 100644 --- a/apps/web/src/components/MessageBubble.tsx +++ b/apps/web/src/components/MessageBubble.tsx @@ -9,6 +9,7 @@ import { api } from '@/api/client'; import { sessionEvents } from '@/hooks/sessionEvents'; import { sendToTerminal, terminalsRegistry, type TerminalRegistration } from '@/lib/events'; import { CapHitSentinel } from './CapHitSentinel'; +import { DoomLoopSentinel } from './DoomLoopSentinel'; import { CodeBlock } from './CodeBlock'; import { Button } from '@/components/ui/button'; import { @@ -622,6 +623,13 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) { ); } + // v1.11.6: doom-loop sentinel. No Continue affordance — retrying with the + // same tools would just re-loop. The card explains what tripped and + // suggests next steps (new message angle / switch agents). + if (message.role === 'system' && message.metadata?.kind === 'doom_loop') { + return ; + } + // v1.8.2: tool messages and assistant tool_calls are now rendered by // MessageList via ToolCallLine / ToolCallGroup. Tool-role messages reach // this point only if MessageList didn't consume them (shouldn't happen, diff --git a/boocode_code_review.md b/boocode_code_review.md new file mode 100644 index 0000000..d26f830 --- /dev/null +++ b/boocode_code_review.md @@ -0,0 +1,244 @@ +# BooCode — External Code Review & Lift Inventory + +Last updated: 2026-05-20 + +This document tracks every open source repo BooCode references or lifts code from. Pin this so we don't lose attribution and don't re-evaluate the same projects twice. + +BooCode is personal/single-user — license compatibility is non-blocking, but the License column is recorded so we don't accidentally inherit an obligation if BooCode ever goes public. + +----- + +## Reference repos + +### Tier A — actively lifting from / running as sidecar + +#### 1. sst/opencode (NEW Tier A as of 2026-05-20) + +- **URL:** https://github.com/sst/opencode +- **License:** MIT +- **Language:** TypeScript (Effect-TS service-oriented) +- **What it is:** The coding agent Sam uses via Termius/Paseo. Also the source of every algorithm BooCode is porting through v1.15. +- **Why it matters:** opencode's `packages/opencode/src/session/` is the canonical reference implementation for every part of the inference layer BooCode is rebuilding. We lift the algorithms, not the Effect-TS plumbing. +- **Algorithms lifted so far:** + - `session/compaction.ts` → v1.11.0 (shipped). `usable`, `isOverflow`, `select`, `buildPrompt` ported to plain TS. SUMMARY_TEMPLATE markdown skeleton verbatim. + - `session/overflow.ts` → v1.11.0 (shipped). 20k `COMPACTION_BUFFER` constant. +- **Algorithms lifted (queued):** + - `session/processor.ts` `DOOM_LOOP_THRESHOLD=3` → v1.11.6 + - `session/llm.ts` `experimental_repairToolCall` → v1.12 (hand-rolled), then v1.13 (via AI SDK) + - `tool/truncate.ts` truncation + outputPath pattern → v1.12 (adapted: opaque id, not filesystem path) + - `session/prompt.ts` `runLoop()` outer agent loop → v1.14 + - `permission/evaluate.ts` wildcard ruleset → v1.15 + - MCP client (transport, tools/list discovery, tools/call) → v1.15 +- **What NOT to use:** Effect-TS service plumbing. Snapshot/patch system (for tool-edit revert; BooCoder territory if needed). The `experimental_native_runtime` (AI SDK fallback path). opencode's prompts. +- **Source tag:** `dev` branch on `sst/opencode`. Note: `anomalyco/opencode` is a rebranded mirror; use `sst/opencode` as canonical. + +#### 2. nmakod/codecontext + +- **URL:** https://github.com/nmakod/codecontext +- **License:** MIT +- **Language:** Go (single binary) +- **What it is:** AI-oriented codebase context map generator. Tree-sitter parsing across TS/JS/Go/C++/Swift/Python/Java/Rust/Dart/JSON/YAML. Generates `CLAUDE.md`-style structured overview. Bundled MCP server with 8 tools. +- **MCP tools exposed:** `get_codebase_overview`, `get_file_analysis`, `get_symbol_info`, `search_symbols`, `get_dependencies`, `watch_changes`, `get_semantic_neighborhoods` (git co-change patterns — no embeddings), `get_framework_analysis`. +- **Why it matters:** Solves the "architect needs a map" problem without embeddings. +- **How we use it:** Run as sidecar container in v1.12. Wire its MCP tools into BooCode's `inference/tools.ts` as static wrappers in v1.12, then re-wire via real MCP client when v1.15 ships. +- **What NOT to use:** Nothing. Clean fit. + +#### 3. aimasteracc/tree-sitter-analyzer + +- **URL:** https://github.com/aimasteracc/tree-sitter-analyzer +- **License:** MIT +- **Language:** Python, MCP server + CLI +- **What it is:** Local-first code context engine. Outline-first navigation, ripgrep-based impact trace, no embeddings. 17 languages. Claims 54-56% token reduction via TOON format. +- **MCP tools exposed:** `get_code_outline`, `trace_impact`, plus structural search/extract tools. +- **Why it matters:** Backup analyzer with a different response shape — outline-first scales better than codecontext's full dump on huge files. Impact trace is useful for "what calls this function" without a full graph build. +- **How we use it:** Lift the AST query patterns (`.scm` files) and the outline-first response shape. Can also run as a second MCP sidecar alongside codecontext. +- **What NOT to use:** Don't lift the TOON format if it conflicts with shadcn rendering — markdown stays. + +#### 4. spirituslab/codesight + +- **URL:** https://github.com/spirituslab/codesight +- **License:** check repo — assumed MIT-ish +- **Language:** TypeScript/Node +- **What it is:** Static code structure visualization. Symbol extraction, import resolution, call graphs. Detects circular dependencies and dead code (with documented false-positive caveats for `customElements.define()`, framework entry points, dynamic imports). +- **Why it matters:** Gives BooCode a `repo_health` tool — different from codecontext's "what is this" map. This is "what's wrong with this." +- **How we use it:** v1.16. Port the analyzer core (`analyze.mjs`). Call-graph builder + circular-dep + dead-code detectors into BooCode's `tools/repo_health.ts`. Drop the VS Code extension shell entirely. +- **What NOT to use:** The VS Code wrapper, the "idea layer" feature (requires Copilot or Claude Code wiring we don't want). + +#### 5. Aider-AI/aider + +- **URL:** https://github.com/Aider-AI/aider +- **License:** Apache-2.0 +- **Language:** Python +- **What it is:** Git-native AI pair programmer CLI. Pioneered the tree-sitter repo-map + personalized PageRank approach. +- **Why it matters:** Authoritative source of per-language `tags.scm` query files. 60+ languages curated and battle-tested. +- **How we use it:** **Lift directly:** `aider/queries/tree-sitter-*.scm` — drop into BooCode's analyzer for any language codecontext or codesight don't cover natively. +- **What NOT to use:** Don't port `repomap.py` itself — codecontext supersedes it. + +----- + +### Tier B — patterns / partial lift + +#### 6. continuedev/continue + +- **URL:** https://github.com/continuedev/continue +- **License:** Apache-2.0 +- **Language:** TypeScript +- **What it is:** IDE assistant framework. Full RAG pipeline, AST chunking, multi-provider LLM abstraction. +- **Why it matters:** One specific drop-in lift: + 1. `core/indexing/ignore.ts` — `DEFAULT_SECURITY_IGNORE_FILETYPES`. Three-tier matcher (basenames, extensions, prefixes). Going into BooCode's `pathGuard` to block analyzing `.env`, `.pem`, `id_rsa`, etc. +- **How we use it:** v1.11.7. Lift the ignore list, adapt to a `path.basename` + extension + prefix matcher. +- **What NOT to use:** `core/indexing/CodebaseIndexer.ts` and `LanceDbIndex.ts` — embedding-based, the path we walked away from. + +#### 7. cline/cline + +- **URL:** https://github.com/cline/cline +- **License:** Apache-2.0 +- **Language:** TypeScript (VS Code extension) +- **What it is:** Autonomous coding agent. Pioneered plan/act mode and granular per-tool auto-approve. +- **Why it matters:** Pattern source for v1.15 (absorbed into the broader permissions work). Plan/act invariant: in plan mode, write tools hidden from the model's tool registry; in act mode, available but each individual tool can be approval-gated. +- **How we use it:** Lift the *pattern*, not the code. opencode's `permission/evaluate.ts` wildcard ruleset supersedes cline's mode-enum; cline contributes the conceptual framing (read-only invariant in BooCode v1.x). +- **What NOT to use:** Cline's VS Code-specific UI plumbing. The shape is wrong for our stack. + +#### 8. plandex-ai/plandex + +- **URL:** https://github.com/plandex-ai/plandex +- **License:** MIT +- **Language:** Go +- **What it is:** Terminal agent with a pending-changes sandbox. Edits never touch the filesystem until `/apply`. 2M token context. +- **Why it matters:** Reference architecture for BooCoder (v2.0). The "edits queue in a virtual layer, applied atomically" model is the right safety story for write tools. +- **How we use it:** Lift the data model: `pending_changes` table keyed by `(project_id, session_id, file_path)`, with diff content and apply/reject state. Lift the `diff` / `apply` / `rewind` UX vocabulary. +- **What NOT to use:** Plandex's 2M-context-window engineering. Our context is bounded by llama-swap. + +#### 9. OpenHands/OpenHands + +- **URL:** https://github.com/OpenHands/OpenHands +- **License:** MIT +- **Language:** Python +- **What it is:** Autonomous coding agent platform. V1 architecture is built on an append-only typed event log + Docker sandbox runtime. +- **Why it matters:** Two distinct patterns: + 1. Event-log architecture — superseded by v1.13's parts-table approach (which derives from opencode's part-message model). OpenHands event-log is conceptually similar but different shape. + 2. Sandbox runtime — per-session Docker container for write tools. Closes the `/opt:ro` mount risk. +- **How we use it:** v2.1. Lift the runtime container pattern (HTTP API inside container, BooCoder calls in). Don't port the Python implementation directly. +- **What NOT to use:** OpenHands' agent prompts, the full microagent system, the cloud deployment path. Event-log shape (use opencode-derived parts table instead). + +----- + +### Tier C — reference only / partial use / skip + +#### 10. cortexkit/aft (actual repo path: ualtinok/aft) + +- **URL:** https://github.com/ualtinok/aft +- **License:** check repo +- **Language:** Rust binary + TypeScript plugin +- **What it is:** Tree-sitter analysis tools delivered as a Rust binary, communicating with an OpenCode plugin via JSON-over-stdio. Warm-process pattern: one binary per project keeps parse trees in memory. +- **Why it matters:** The BridgePool transport model. If our `codecontext` tool calls get hot (agent loops calling it dozens of times per session), the warm-process pattern is faster than fork-per-call. +- **How we use it:** **Defer.** Profile first. Codecontext sidecar might be fast enough on its own. Revisit if tool-call latency becomes the bottleneck. +- **What NOT to use:** The opencode-plugin wrapper. Wrong integration surface. + +#### 11. codeprysm/codeprysm + +- **URL:** https://github.com/codeprysm/codeprysm +- **License:** check repo +- **Language:** Rust +- **What it is:** Graph-based code intelligence: tree-sitter parsing → node/edge graph in Qdrant, embeddings layered on top, MCP server exposes semantic search. +- **Why it matters:** Clean node/edge taxonomy: nodes = Container/Callable/Data; edges = CONTAINS/USES/DEFINES. +- **How we use it:** Lift the taxonomy *only* if we end up building our own graph instead of relying on codecontext. The embedding half is the trap we walked away from. +- **What NOT to use:** The Qdrant + embedding pipeline. Same anti-pattern as continue's indexer. + +#### 12. DeepSourceCorp/globstar + +- **URL:** https://github.com/DeepSourceCorp/globstar +- **License:** MIT +- **Language:** Go +- **What it is:** Static analysis toolkit for writing code checkers using tree-sitter S-expression queries. YAML interface for simple checkers, Go interface for complex multi-file checkers. +- **Why it matters:** Not for the architect tool. **Future use only.** If BooCoder ever grows a "verify before commit" lane, globstar checkers could be the verification engine: drop YAML checkers into `.globstar/`, run as a pre-apply gate. +- **How we use it:** Park. Not in any current version. +- **What NOT to use:** Don't try to use it as a codebase analyzer — it's a linter framework, wrong tool for the architect role. + +#### 13. getpaseo/paseo + +- **URL:** https://github.com/getpaseo/paseo +- **License:** AGPL-3.0 +- **What it is:** WebSocket daemon ↔ client protocol for agent coordination. Already running in your stack (paseo dispatches Claude Code/opencode). +- **Why it matters:** Patterns for agent lifecycle, `--worktree` flag pattern, ECDH/NaCl security model. +- **How we use it:** Reference for BooCoder isolation (v2.0/v2.1). Note AGPL — fine for personal, blocks public distribution. +- **What NOT to use:** Don't vendor the source. Treat as a peer service. + +#### 14. earendil-works/pi + +- **URL:** https://github.com/earendil-works/pi +- **License:** MIT +- **What it is:** `@mariozechner/pi-agent-core` (tool loop + state machine) and `@mariozechner/pi-ai` (provider abstraction). +- **Why it matters:** If we ever want non-llama-swap inference (Anthropic, OpenAI, Mistral direct), pi-ai is the cleanest TypeScript provider abstraction available. +- **How we use it:** Defer. v2.x optional batch only. + +#### 15. microsoft/agent-framework + +- **URL:** https://github.com/microsoft/agent-framework +- **License:** MIT +- **What it is:** Workflow graphs for multi-agent coordination. +- **Why it matters:** Conceptual reference for far-future multi-agent orchestration. +- **How we use it:** Read the ADRs in `docs/decisions/`. Don't port code — implementation is Azure/Python/.NET-heavy. + +#### 16. microsoft/autogen + +- **URL:** https://github.com/microsoft/autogen +- **License:** MIT +- **What it is:** Earlier Microsoft multi-agent framework. +- **Why it matters:** Effectively sunsetting in favor of agent-framework. +- **How we use it:** Skip. Don't invest in evaluating further. + +#### 17. open-webui/open-webui + +- **URL:** https://github.com/open-webui/open-webui +- **License:** BSD-3 +- **What it is:** Self-hosted LLM frontend. +- **Why it matters:** Python/Svelte, wrong stack. RAG pipeline only worth a read if BooLab needs improvement — unrelated to BooCode. +- **How we use it:** Skip for BooCode. + +----- + +## Lift catalog — what lands where + +| Source repo | Specific artifact | License | BooCode destination | Version | +|---|---|---|---|---| +| `sst/opencode` | `session/compaction.ts` + `session/overflow.ts` algorithms | MIT | `services/compaction.ts` | **v1.11.0 ✅** | +| `sst/opencode` | `session/processor.ts` DOOM_LOOP_THRESHOLD pattern | MIT | `services/inference.ts` doom-loop guard | v1.11.6 | +| `continuedev/continue` | `core/indexing/ignore.ts` DEFAULT_SECURITY_IGNORE_FILETYPES | Apache-2.0 | Extend `path_guard.ts` exclusion list | v1.11.7 | +| `nmakod/codecontext` | Whole binary (sidecar) | MIT | New `codecontext` container, 8 MCP tools wired via static wrappers | v1.12 | +| `sst/opencode` | `session/llm.ts` experimental_repairToolCall pattern | MIT | `services/inference.ts` synthetic invalid-tool result | v1.12 | +| `sst/opencode` | `tool/truncate.ts` truncation + outputPath pattern (adapted: opaque id) | MIT | `services/truncate.ts` + `view_truncated_output` tool | v1.12 | +| `Aider-AI/aider` | `aider/queries/tree-sitter-*.scm` (60+ files) | Apache-2.0 | Fallback grammars for languages not covered by sidecars | v1.12 (fallback) | +| `sst/opencode` | `session/llm.ts` AI SDK adoption + alpha tool ordering | MIT | `services/inference.ts` rewrite | v1.13 | +| `sst/opencode` | Parts-message taxonomy (text, tool_call, tool_result, reasoning, step_start) | MIT | new `message_parts` table | v1.13 | +| `sst/opencode` | `session/prompt.ts` runLoop() outer agent loop | MIT | `services/inference.ts` step-based loop | v1.14 | +| `sst/opencode` | `agent.steps` per-agent step cap | MIT | AGENTS.md + agents.ts | v1.14 | +| `sst/opencode` | `permission/evaluate.ts` wildcard ruleset | MIT | new `permissions` table + matcher | v1.15 | +| `sst/opencode` | `mcp/index.ts` MCP client (SSE transport + tools/list + tools/call) | MIT | new `services/mcp/` module; codecontext re-wired through it | v1.15 | +| `cline/cline` | Plan/Act invariant (read-only mode pattern) | Apache-2.0 | absorbed into v1.15 permissions work | v1.15 | +| `spirituslab/codesight` | `analyze.mjs` — call graph, circular-dep, dead-code | MIT-ish | `apps/server/src/tools/repo_health.ts` | v1.16 | +| `plandex-ai/plandex` | `pending_changes` data model, diff/apply/rewind UX | MIT | New `pending_changes` table, BooCoder write-tool gating | v2.0 | +| `OpenHands/OpenHands` | Sandbox runtime pattern | MIT | New `boocoder` container, per-session Docker | v2.1 | +| `cortexkit/aft` (ualtinok/aft) | BridgePool warm-process JSON-stdio pattern | check | Optimization if profile shows fork overhead | Deferred | +| `codeprysm/codeprysm` | Node/edge taxonomy (Container/Callable/Data, CONTAINS/USES/DEFINES) | check | Reference only if we ever build our own graph | None | +| `DeepSourceCorp/globstar` | Whole toolkit | MIT | Future verify-before-commit gate for BooCoder | Parked | +| `earendil-works/pi` | `pi-ai` provider abstraction | MIT | Multi-provider LLM if pursued | v2.x optional | +| `microsoft/agent-framework` | Workflow graph concepts | MIT | Conceptual only | v3.x | + +----- + +## Decisions log + +- **Embeddings dropped from BooCode** (May 2026). Replaced RAG with file-view tools + sidecar analyzers. +- **opencode promoted to Tier A** (2026-05-20). The compaction port (v1.11.0) made it clear opencode is not just "the agent Sam uses" — it's the canonical reference implementation for everything BooCode is rebuilding through v1.15. Five algorithms identified for lift (compaction, doom-loop, repairToolCall, runLoop, permission evaluate) plus truncate.ts and MCP client. +- **Source is `sst/opencode` `dev` branch.** `anomalyco/opencode` is a rebranded mirror; do not source from there. +- **Original Batch 11 (aider PageRank port) replaced** by codecontext sidecar approach. +- **Original Batch 12 (codebase indexer w/ Harrier) removed.** No embedding infrastructure. +- **Original Batch 13 (OpenHands event log) replaced** by v1.13 parts table (opencode pattern). Same outcome, different shape. +- **Original Batch 12 (cline plan/act mode) absorbed into v1.15** (opencode permission ruleset). Same outcome, wildcard rules instead of mode enum. +- **Aider's `repomap.py` port dropped.** Codecontext supersedes it. Aider contribution narrows to the `.scm` query files only. +- **Globstar role re-scoped.** Not an architect tool — parked for future verify-before-commit gate. +- **codeprysm role re-scoped.** Taxonomy reference only. Embedding half rejected. +- **AI SDK adoption deferred to v1.13.** Hand-roll opencode's repairToolCall pattern in v1.12 first. +- **`tool_choice='required'` confirmed supported** by llama-swap (qwen3.6-35b-a3b-mxfp4, 2026-05-20). Repair tool call is viable. +- **`anomalyco/sst` is a mirror, not a fork.** Same applies to `anomalyco/opencode`. Use canonical `sst/sst` and `sst/opencode` sources. diff --git a/boocode_roadmap.md b/boocode_roadmap.md index a3192c7..146921b 100644 --- a/boocode_roadmap.md +++ b/boocode_roadmap.md @@ -1,204 +1,317 @@ -# BooCode — Roadmap +# BooCode v1.x — Roadmap -Last updated: 2026-05-17 +Last updated: 2026-05-20 ## Overview -BooCode is a standalone code-chat tool at `/opt/boocode/`. Read-only by design in v1.x — pick a project, chat with a local LLM that has file-inspection tools, get streaming responses over WebSocket. +BooCode is a standalone code-chat tool at `/opt/boocode/`. Read-only by design — pick a project, chat with a local LLM that has file-inspection tools, get streaming responses over WebSocket. Live at `https://code.indifferentketchup.com` (Caddy → Authelia → Tailscale → `100.114.205.53:9500`). **Architectural commitments:** -- No embeddings. File-view tools + sidecar analyzers replace RAG. +- No embeddings. The model uses file-view tools (`view_file`, `list_dir`, `grep`, `find_files`) + sidecar analyzers (codecontext, codesight). Walked away from the RAG pipeline May 2026. - Read-only in v1.x. Write tools land in BooCoder (separate container, post-v1.x). - One Postgres (`boocode_db`), one frontend SPA, container-per-service for new capabilities. -## Current state +External code lifted from / referenced in: see `boocode_code_review.md` for full inventory. -- **main:** v1.8.1 (`b09d0ff` was last known tip prior to v1.8.2). -- **Just merged / committed to main:** v1.8.2 — tool-loop fixes (read-only loop cap raised, "tool loop depth exceeded" error surfaced with continue button, `max_tool_calls` AGENTS.md frontmatter, `messages.metadata` column). -- **In flight RIGHT NOW:** **v1.x-themes** branch — Claude Code implementing 18-theme system. See "Active work" below. +----- -## Active work +## Shipped (status as of 2026-05-20) -### v1.x-themes — Theme system (in flight) +| Version | Theme | Notes | +|---|---|---| +| v1.0 | Initial scaffold | live | +| Batches 1–4.4 | Markdown, sidebar, panes, chats-inside-sessions, archive, fork/delete, header polish, settings drawer | merged | +| v1.5 | resolveProjectPath, BOOTSTRAP_ROOT, vitest pin | merged | +| v1.6, v1.6.1, v1.6.2 | Mobile pass + RightRail mobile drawer | merged | +| v1.7 | Drag-drop file + paste-as-attachment | merged | +| v1.8, v1.8.1, v1.8.2 | Settings drawer, git_status tool, WS reconnect, **per-turn budget reset + Continue affordance + CapHitSentinel** | merged | +| v1.9.1 | Skills system (`/opt/skills/` + `skill_find`/`skill_use`/`skill_resource` tools + `/skill` slash command) | merged | +| v1.9.7 | `ask_user_input` elicitation tool | merged | +| **Batch 9 (Agents Tier 2)** | `AGENTS.md` + 6 builtin agents + AgentPicker in ChatInput toolbar + `sessions.agent_id` | **merged in `92bd3b1`**, included in v1.9.1/v1.9.7/v1.10.x tags | +| v1.10.0 | BooTerm: separate container, xterm.js + node-pty + tmux | merged | +| v1.10.1 | BooTerm-user (spawn as samkintop, login bash, Claude Code/opencode PATH) | merged | +| v1.10.4, v1.10.5 | Mobile terminal + XML tool-call fallback parser | merged | +| **v1.11.0** | **opencode-style compaction port** (auto-overflow, anchored summary, tail preservation) | merged | +| v1.11.1 | Compaction follow-up (working indicator during compaction, unit tests, .bak cleanup) | merged | +| v1.11.2 | ContextBar (persistent context-usage indicator) | merged | +| v1.11.3 | `ctx_max` capture via `/upstream//props` (replaces dead `timings.n_ctx` read) | merged | -**Spec source:** locked in this session. Anchors below derived from `/mnt/user-data/uploads/boocode-theme-previews.html` (16 themes extracted) + spec §3 family rules for the two missing (`fuchsia-noir`, `midnight-sapphire`). +----- -**18 themes, grouped:** - -| Family | IDs | -|---|---| -| Neutral dark | obsidian (default), gunmetal | -| Brown / warm | espresso, volcanic-brown | -| Orange / amber | copper, gold | -| Red | oxblood, crimson | -| Purple | elderflower, plum | -| Pink / magenta | steel-pink, fuchsia-noir | -| Green | matrix, sage | -| Blue | cobalt, midnight-sapphire | -| Light-only | ivory, chalk | - -**Dark anchors (bg, card, border, muted-fg, accent):** - -``` -obsidian #0c0c0e #15151a #1f1f23 #6b6b75 #8b5cf6 -gunmetal #0d1117 #161b22 #21262d #7d8590 #388bfd -espresso #1c1410 #241a14 #2e2218 #8a7058 #c8a880 -volcanic-brown #140906 #1e0e0a #2e1610 #7a4030 #cc4a1a -copper #100800 #1c1408 #2e1f0a #8a6040 #b87333 -gold #0e0800 #1a1200 #2a1f00 #a07c30 #d4af37 -oxblood #0a0303 #180606 #2a0808 #7a3028 #8b1a1a -crimson #0e0404 #1a0808 #2e0a0a #8a3030 #dc143c -elderflower #100818 #1c1024 #2c1830 #8a78a0 #b89cd8 -plum #0c0814 #180e20 #241830 #7a4878 #8e4585 -steel-pink #0e0408 #1a080e #2e0c1a #9a4070 #cc33aa -fuchsia-noir #0a0610 #14081a #2a0c2e #8a3878 #ff1493 -matrix #000a00 #031403 #0a200a #208030 #00ff41 -sage #0a0e08 #141a10 #1e2e1a #7a8870 #9caf88 -cobalt #020817 #061434 #0c2244 #3060a0 #0047ab -midnight-sapphire #02050e #060c1f #0e1a36 #4a6088 #1e3a8a -ivory #fdfcf8 #f5f2e8 #e8e4d8 #8a8478 #3a3328 (light-only) -chalk #fafaf7 #f0f0ec #e5e5e0 #75756e #2a2a28 (light-only) -``` - -**Light-variant derivation (for the 16 dark themes):** -- Lightest anchor → background -- Accent darkens ~15% (HSL L − 15pp) -- Foreground = near-black tinted toward family hue -- Surfaces / borders scale up symmetrically - -**Fallback:** `ivory` or `chalk` + dark mode → `obsidian` dark. - -**Token map (shadcn nova set):** -``` -background ← anchor 1 -card / popover ← anchor 2 -border / muted ← anchor 3 -muted-foreground ← anchor 4 -primary / accent ← anchor 5 -foreground ← derived: anchor-5 hue, ~92% L, ~25% S ---destructive ← red family, unchanged across themes ---ring ← per-theme accent ---radius ← 0.5rem locked -fonts ← Inter + JetBrains Mono locked -``` - -**Wiring locked:** -- Schema: `settings.theme_id TEXT NOT NULL DEFAULT 'obsidian'`, `settings.theme_mode TEXT NOT NULL DEFAULT 'dark' CHECK IN ('dark','light','system')` -- API: GET `/api/settings` extended, PATCH whitelists 18 theme ids → 400 otherwise -- CSS: `apps/web/src/styles/themes/*.css` (18 + `_tokens.css`), imported from `globals.css` (NOT `index.css`) -- `.theme-` + `.theme-.dark` composed on `` -- `apps/web/src/lib/theme.ts` (new): `THEMES` const, `applyTheme(id, mode)`, `useTheme()` hook. matchMedia subscribed only when `mode === 'system'` -- `apps/web/src/App.tsx`: `useTheme()` at top -- Settings page: card grid, mode toggle (radio: Dark/Light/System). No header dropdown. -- shadcn primitives: `card`, `radio-group` installed via `pnpm dlx shadcn@latest add`. `button`, `label` already present. -- FOUC mitigation: localStorage cache + inline `