v1.11: opencode-style compaction port
- compaction.ts: usable/isOverflow/estimate/turns/select/buildPrompt/process
- compaction-prompt.ts: SUMMARY_TEMPLATE verbatim from opencode
- schema: messages.{compacted_at,summary,tail_start_id} + chats.needs_compaction
- inference: auto-trigger on overflow, pre-fetch compaction before next turn
- /compact slash command rewired to new path
- WS: chat_status working/idle around compaction + compacted frame
- frontend: SummaryCard + sonner toast on compacted
- 24 unit tests for pure functions
This commit is contained in:
@@ -21,6 +21,8 @@ import {
|
||||
import { PathScopeError, resolveProjectRoot } from './path_guard.js';
|
||||
import { maybeAutoNameChat } from './auto_name.js';
|
||||
import { getAgentById } from './agents.js';
|
||||
import * as compaction from './compaction.js';
|
||||
import type { Broker } from './broker.js';
|
||||
|
||||
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
|
||||
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
|
||||
@@ -147,6 +149,12 @@ export interface InferenceContext {
|
||||
log: FastifyBaseLogger;
|
||||
publish: FramePublisher;
|
||||
publishUser: (frame: UserStreamFrame) => void;
|
||||
// v1.11: passed through so compaction.process can publish 'compacted'
|
||||
// frames on the same session WS channel useSessionStream subscribes to.
|
||||
// Compaction is the only path that needs the raw broker handle (regular
|
||||
// inference goes through `publish`); keeping a separate field avoids
|
||||
// tempting other code paths into bypassing the session-id binding.
|
||||
broker: Broker;
|
||||
}
|
||||
|
||||
// Resolution order: base prompt < agent.system_prompt < user prompt, where
|
||||
@@ -260,17 +268,48 @@ async function loadContext(
|
||||
if (projectRows.length === 0) return null;
|
||||
const project = projectRows[0]!;
|
||||
|
||||
// v1.11: filter compacted messages out of the inference assembly. The GET
|
||||
// /api/sessions/:id/messages endpoint still returns everything (so the UI
|
||||
// can show history with the summary card inline); only LLM payloads skip
|
||||
// compacted rows. compacted_at IS NULL keeps the active summary + tail.
|
||||
const history = await sql<Message[]>`
|
||||
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||
FROM messages
|
||||
WHERE chat_id = ${chatId}
|
||||
WHERE chat_id = ${chatId} AND compacted_at IS NULL
|
||||
ORDER BY created_at ASC, id ASC
|
||||
`;
|
||||
|
||||
return { session, project, history };
|
||||
}
|
||||
|
||||
// v1.11: shared helper used after both finalizeCompletion and executeToolPhase
|
||||
// persist their token counts. Reads tokens off the just-UPDATEd row (which
|
||||
// the caller returns from RETURNING), runs compaction.isOverflow, and flips
|
||||
// chats.needs_compaction. The next runAssistantTurn invocation acts on it.
|
||||
// Silent on missing tokens — llama-swap occasionally omits usage on truncated
|
||||
// streams, and we'd rather miss one overflow than crash the inference path.
|
||||
async function maybeFlagForCompaction(
|
||||
ctx: InferenceContext,
|
||||
chatId: string,
|
||||
updated: { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null } | undefined,
|
||||
): Promise<void> {
|
||||
if (!updated) return;
|
||||
const promptTokens = updated.ctx_used;
|
||||
const completionTokens = updated.tokens_used;
|
||||
const contextLimit = updated.ctx_max;
|
||||
if (typeof promptTokens !== 'number') return;
|
||||
if (typeof completionTokens !== 'number') return;
|
||||
if (typeof contextLimit !== 'number') return;
|
||||
const overflow = compaction.isOverflow(
|
||||
{ prompt_tokens: promptTokens, completion_tokens: completionTokens },
|
||||
contextLimit,
|
||||
);
|
||||
if (!overflow) return;
|
||||
await ctx.sql`UPDATE chats SET needs_compaction = true WHERE id = ${chatId}`;
|
||||
ctx.log.info({ chatId, promptTokens, completionTokens, contextLimit }, 'inference: flagged for compaction');
|
||||
}
|
||||
|
||||
async function* sseLines(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
|
||||
const reader = stream.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
@@ -758,6 +797,10 @@ async function executeToolPhase(
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
`;
|
||||
// v1.11: flag for compaction if this turn pushed us over the usable budget.
|
||||
// We never compact mid-loop (the recursive runAssistantTurn keeps tools
|
||||
// flowing); the flag fires on the NEXT turn's pre-fetch hook above.
|
||||
await maybeFlagForCompaction(ctx, chatId, updated);
|
||||
const [toolSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||
UPDATE sessions SET updated_at = clock_timestamp()
|
||||
WHERE id = ${sessionId}
|
||||
@@ -889,6 +932,9 @@ async function finalizeCompletion(
|
||||
WHERE id = ${assistantMessageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
`;
|
||||
// v1.11: flag for compaction on the terminal turn too. Catches the common
|
||||
// case of a turn that hit the limit without invoking tools.
|
||||
await maybeFlagForCompaction(ctx, chatId, updated);
|
||||
const [completeSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||
UPDATE sessions SET updated_at = clock_timestamp()
|
||||
WHERE id = ${sessionId}
|
||||
@@ -927,6 +973,29 @@ async function runAssistantTurn(
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId } = args;
|
||||
|
||||
// v1.11: if the prior turn flagged this chat for compaction, run it first
|
||||
// so loadContext below reads the post-compaction history. We swallow
|
||||
// compaction failures (clearing the flag so we don't loop) and proceed
|
||||
// with the un-compacted history — a slow turn that hits the model's
|
||||
// hard limit is recoverable; a dead session is not.
|
||||
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
|
||||
SELECT needs_compaction FROM chats WHERE id = ${chatId}
|
||||
`;
|
||||
if (chatFlag[0]?.needs_compaction) {
|
||||
try {
|
||||
await compaction.process({
|
||||
sql: ctx.sql,
|
||||
config: ctx.config,
|
||||
log: ctx.log,
|
||||
broker: ctx.broker,
|
||||
chatId,
|
||||
});
|
||||
} catch (err) {
|
||||
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
|
||||
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
|
||||
}
|
||||
}
|
||||
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
if (!loaded) {
|
||||
ctx.log.warn({ sessionId }, 'inference: session or project missing');
|
||||
@@ -1237,81 +1306,6 @@ async function insertCapHitSentinel(
|
||||
});
|
||||
}
|
||||
|
||||
const COMPACT_SYSTEM_PROMPT =
|
||||
'Summarize the preceding conversation into a dense but complete context paragraph. Preserve all key facts, decisions, file paths, code patterns, and action items. Do not add any new information. Output only the summary paragraph.';
|
||||
|
||||
async function runCompact(
|
||||
ctx: InferenceContext,
|
||||
sessionId: string,
|
||||
chatId: string,
|
||||
compactMessageId: string
|
||||
): Promise<void> {
|
||||
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||
if (!loaded) return;
|
||||
const { session, project, history } = loaded;
|
||||
|
||||
const messagesForSummary = buildMessagesPayload(session, project,
|
||||
history.filter((m) => m.id !== compactMessageId)
|
||||
);
|
||||
messagesForSummary.push({
|
||||
role: 'system',
|
||||
content: COMPACT_SYSTEM_PROMPT,
|
||||
});
|
||||
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_started',
|
||||
message_id: compactMessageId,
|
||||
chat_id: chatId,
|
||||
role: 'assistant',
|
||||
});
|
||||
|
||||
let content = '';
|
||||
try {
|
||||
const result = await streamCompletion(
|
||||
ctx,
|
||||
session.model,
|
||||
messagesForSummary,
|
||||
{ tools: null },
|
||||
(delta) => {
|
||||
content += delta;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'delta',
|
||||
message_id: compactMessageId,
|
||||
chat_id: chatId,
|
||||
content: delta,
|
||||
});
|
||||
}
|
||||
);
|
||||
content = result.content;
|
||||
} catch (err) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
await ctx.sql`
|
||||
UPDATE messages SET status = 'failed', content = ${content}, finished_at = clock_timestamp()
|
||||
WHERE id = ${compactMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'error',
|
||||
message_id: compactMessageId,
|
||||
chat_id: chatId,
|
||||
error: errMsg,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const preCompactCount = history.filter((m) => m.id !== compactMessageId && m.kind !== 'compact').length;
|
||||
const summary = `[Context compacted — ${preCompactCount} messages summarized]\n\n${content}`;
|
||||
|
||||
await ctx.sql`
|
||||
UPDATE messages SET content = ${summary}, status = 'complete', finished_at = clock_timestamp()
|
||||
WHERE id = ${compactMessageId}
|
||||
`;
|
||||
ctx.publish(sessionId, {
|
||||
type: 'message_complete',
|
||||
message_id: compactMessageId,
|
||||
chat_id: chatId,
|
||||
});
|
||||
}
|
||||
|
||||
interface InferenceRegistration {
|
||||
controller: AbortController;
|
||||
completed: Promise<void>;
|
||||
@@ -1328,6 +1322,10 @@ export function createInferenceRunner(
|
||||
const callCtx: InferenceContext = {
|
||||
...ctx,
|
||||
publishUser: (frame) => publishUserFn(user, frame),
|
||||
// v1.11: broker comes in via ctx (set at registration time). Repeated
|
||||
// here so the destructure carries it onto the per-call ctx without
|
||||
// having to add it to every enqueue/cancel signature individually.
|
||||
broker: ctx.broker,
|
||||
};
|
||||
// v1.8 mobile-tabs: announce working before the async loop starts so
|
||||
// every device subscribed to the user channel sees the amber dot.
|
||||
@@ -1357,20 +1355,6 @@ export function createInferenceRunner(
|
||||
})();
|
||||
},
|
||||
|
||||
enqueueCompact(sessionId: string, chatId: string, compactMessageId: string, user: string) {
|
||||
const callCtx: InferenceContext = {
|
||||
...ctx,
|
||||
publishUser: (frame) => publishUserFn(user, frame),
|
||||
};
|
||||
void (async () => {
|
||||
try {
|
||||
await runCompact(callCtx, sessionId, chatId, compactMessageId);
|
||||
} catch (err) {
|
||||
callCtx.log.error({ err }, 'unhandled compact error');
|
||||
}
|
||||
})();
|
||||
},
|
||||
|
||||
async cancel(_sessionId: string, chatId: string): Promise<boolean> {
|
||||
const reg = registry.get(chatId);
|
||||
if (!reg) return false;
|
||||
|
||||
Reference in New Issue
Block a user