v1.11: opencode-style compaction port

- compaction.ts: usable/isOverflow/estimate/turns/select/buildPrompt/process
- compaction-prompt.ts: SUMMARY_TEMPLATE verbatim from opencode
- schema: messages.{compacted_at,summary,tail_start_id} + chats.needs_compaction
- inference: auto-trigger on overflow, pre-fetch compaction before next turn
- /compact slash command rewired to new path
- WS: chat_status working/idle around compaction + compacted frame
- frontend: SummaryCard + sonner toast on compacted
- 24 unit tests for pure functions
This commit is contained in:
2026-05-20 19:05:35 +00:00
parent 6aab4f7d2a
commit dc43dd44f9
14 changed files with 1063 additions and 113 deletions

View File

@@ -19,6 +19,7 @@ import { registerSkillsRoutes } from './routes/skills.js';
import { createInferenceRunner } from './services/inference.js';
import { createBroker } from './services/broker.js';
import { listSkills } from './services/skills.js';
import * as compaction from './services/compaction.js';
async function main() {
const config = loadConfig();
@@ -81,6 +82,11 @@ async function main() {
publish: (sessionId, frame) => {
broker.publish(sessionId, frame as unknown as Record<string, unknown> & { type: string });
},
// v1.11: broker handle for compaction.process to publish 'compacted'
// frames on the per-session channel. Inference's regular publish path
// is bound to (sessionId, InferenceFrame); compaction publishes a
// different frame shape, so it goes through the raw broker.
broker,
},
(user, frame) => {
broker.publishUser(user, frame as unknown as Record<string, unknown> & { type: string });
@@ -90,9 +96,13 @@ async function main() {
enqueueInference: (sessionId, chatId, assistantId, user) => {
inference.enqueue(sessionId, chatId, assistantId, user);
},
enqueueCompact: (sessionId, chatId, compactId, user) => {
inference.enqueueCompact(sessionId, chatId, compactId, user);
},
// v1.11: synchronous compaction. Awaits the LLM call inside the route's
// request lifecycle; the new summary row arrives via the WS 'compacted'
// frame published from inside compaction.process. We let the error
// bubble up so the route can reply 500 — manual /compact failures
// should be loud (the user just clicked a button).
runCompaction: (chatId) =>
compaction.process({ sql, config, log: app.log, broker, chatId }),
cancelInference: async (sessionId, chatId) => {
return inference.cancel(sessionId, chatId);
},

View File

@@ -316,7 +316,8 @@ export function registerChatRoutes(
}
const rows = await sql<Message[]>`
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata,
summary, tail_start_id, compacted_at
FROM messages
WHERE chat_id = ${req.params.id}
ORDER BY created_at ASC, id ASC

View File

@@ -49,7 +49,12 @@ const AskUserInputArgs = z.object({
interface MessageHandlers {
enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
enqueueCompact: (sessionId: string, chatId: string, compactMessageId: string, user: string) => void;
// v1.11: returns a promise that resolves after compaction.process finishes
// (await the LLM call). Throws on failure — the route surfaces a 500.
// Replaces the v1.10 enqueueCompact (which fired-and-forgot a kind='compact'
// streaming row). The new anchored-rolling strategy inserts a single
// summary=true assistant row only after the LLM responds.
runCompaction: (chatId: string) => Promise<void>;
publishUserMessage: (
sessionId: string,
chatId: string,
@@ -81,9 +86,15 @@ export function registerMessageRoutes(
reply.code(404);
return { error: 'session not found' };
}
// v1.11: returns ALL messages including compacted ones. The UI
// distinguishes via the new `summary` flag (renders an accordion
// SummaryCard) and shows compacted_at-stamped rows inline for context.
// Internal inference assembly filters compacted_at IS NULL separately —
// see services/inference.ts loadContext + services/compaction.ts.
const rows = await sql<Message[]>`
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata,
summary, tail_start_id, compacted_at
FROM messages
WHERE session_id = ${req.params.id}
ORDER BY created_at ASC, id ASC
@@ -251,29 +262,30 @@ export function registerMessageRoutes(
}
);
// v1.11: manual /compact. Was a streaming kind='compact' row inserted by
// this handler; now delegates to the anchored-rolling compaction service.
// Synchronous (we await the LLM call) — callers either await or rely on
// the 'compacted' WS frame to refresh their view. The response carries
// no body of interest; the new summary row arrives via the WS frame.
app.post<{ Params: { id: string } }>(
'/api/chats/:id/compact',
async (req, reply) => {
const chatRows = await sql<Chat[]>`
SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open'
const chatRows = await sql<{ id: string }[]>`
SELECT id FROM chats WHERE id = ${req.params.id} AND status = 'open'
`;
if (chatRows.length === 0) {
reply.code(404);
return { error: 'chat not found' };
}
const chat = chatRows[0]!;
const sessionId = chat.session_id;
const [compactMsg] = await sql<{ id: string }[]>`
INSERT INTO messages (session_id, chat_id, role, content, kind, status, created_at)
VALUES (${sessionId}, ${chat.id}, 'system', '', 'compact', 'streaming', clock_timestamp())
RETURNING id
`;
handlers.enqueueCompact(sessionId, chat.id, compactMsg!.id, 'default');
reply.code(202);
return { compact_message_id: compactMsg!.id };
try {
await handlers.runCompaction(chatRows[0]!.id);
} catch (err) {
req.log.error({ err, chatId: chatRows[0]!.id }, 'manual compaction failed');
reply.code(500);
return { error: err instanceof Error ? err.message : 'compaction failed' };
}
reply.code(200);
return { ok: true };
}
);

View File

@@ -21,9 +21,12 @@ export function registerWebSocket(
return;
}
// v1.11: snapshot includes compaction fields so MessageBubble can
// render the SummaryCard for summary=true rows on first connect.
const messages = await sql<Message[]>`
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata,
summary, tail_start_id, compacted_at
FROM messages
WHERE session_id = ${sessionId}
ORDER BY created_at ASC, id ASC

View File

@@ -179,3 +179,25 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
-- v1.11: anchored rolling compaction.
-- compacted_at — marks rows that are "behind the curtain" of the latest
-- summary. Inference assembly filters compacted_at IS NULL;
-- the API GET still returns all rows so the UI can show
-- history with the summary card inline.
-- summary — true on the assistant row that IS the anchored summary.
-- Exactly one row per chat is the "current" summary
-- (every prior summary row is itself compacted_at-stamped
-- when superseded, leaving one live anchor).
-- tail_start_id — points at the first preserved message that the summary
-- covers up to (exclusive). Lets the UI/debug reason about
-- the boundary without re-deriving from compacted_at.
-- needs_compaction — flag on chats (not sessions) because chat history is
-- per-chat; sessions have 1:N chats. Set true post-overflow,
-- cleared by compaction.process at the start of the next
-- inference turn.
ALTER TABLE messages ADD COLUMN IF NOT EXISTS compacted_at TIMESTAMPTZ;
ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT FALSE;
ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);

View File

@@ -0,0 +1,258 @@
import { describe, it, expect } from 'vitest';
import {
usable,
isOverflow,
estimate,
turns,
select,
buildPrompt,
type CompactionMessage,
} from '../compaction.js';
import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';
// ---- fixture ----------------------------------------------------------------
// Tiny constructor for the message shape `compaction.ts` consumes. Default
// values match the post-CP1 schema (summary=false, kind='message', complete).
// Tests that need a summary row pass `summary: true`.
let counter = 0;
function mkMsg(
role: CompactionMessage['role'],
content: string,
overrides: Partial<CompactionMessage> = {},
): CompactionMessage {
counter += 1;
return {
id: `m${counter}`,
role,
content,
kind: 'message',
summary: false,
status: 'complete',
tool_calls: null,
tool_results: null,
metadata: null,
created_at: new Date(counter * 1000).toISOString(),
...overrides,
};
}
// ---- usable -----------------------------------------------------------------
describe('usable', () => {
it('returns 0 when contextLimit is 0', () => {
expect(usable(0)).toBe(0);
});
it('returns 0 when contextLimit is below the 20k buffer', () => {
// Math.max(0, x - 20000) clamps the subtraction so we never report
// negative headroom. A 10k-context model reports 0 usable, which makes
// isOverflow short-circuit to false (correct — we can't size the
// compaction with no headroom).
expect(usable(10_000)).toBe(0);
expect(usable(19_999)).toBe(0);
expect(usable(20_000)).toBe(0);
});
it('subtracts the 20k buffer from a normal-sized context window', () => {
expect(usable(100_000)).toBe(80_000);
expect(usable(32_768)).toBe(12_768);
});
});
// ---- isOverflow -------------------------------------------------------------
describe('isOverflow', () => {
it('returns false when usable is 0 (unknown / sub-buffer context)', () => {
expect(isOverflow({ prompt_tokens: 999_999, completion_tokens: 0 }, 0)).toBe(false);
expect(isOverflow({ prompt_tokens: 0, completion_tokens: 999_999 }, 10_000)).toBe(false);
});
it('returns false at 50% of usable', () => {
// usable(100k) = 80k → 50% = 40k.
expect(isOverflow({ prompt_tokens: 30_000, completion_tokens: 10_000 }, 100_000)).toBe(false);
});
it('returns false just under usable', () => {
expect(isOverflow({ prompt_tokens: 79_000, completion_tokens: 999 }, 100_000)).toBe(false);
});
it('returns true exactly at usable (>=, not strict >)', () => {
expect(isOverflow({ prompt_tokens: 80_000, completion_tokens: 0 }, 100_000)).toBe(true);
});
it('returns true above usable', () => {
expect(isOverflow({ prompt_tokens: 50_000, completion_tokens: 40_000 }, 100_000)).toBe(true);
});
});
// ---- estimate ---------------------------------------------------------------
describe('estimate', () => {
it('returns a tiny value for an empty array (JSON.stringify([]) is "[]")', () => {
// Math.ceil('[]'.length / 4) = 1. Documented here so the next reader
// doesn't think "0" is the expected baseline — char-count/4 will never
// be exactly 0 for any JSON-serializable input.
expect(estimate([])).toBe(1);
});
it('scales roughly with content length', () => {
const tiny = estimate([mkMsg('user', 'hi')]);
const big = estimate([mkMsg('user', 'x'.repeat(4000))]);
expect(big).toBeGreaterThan(tiny);
expect(big).toBeGreaterThanOrEqual(1000); // 4000 chars / 4 = 1000 floor
});
it('is deterministic across repeated calls', () => {
const msgs = [mkMsg('user', 'one'), mkMsg('assistant', 'two')];
expect(estimate(msgs)).toBe(estimate(msgs));
});
});
// ---- turns ------------------------------------------------------------------
describe('turns', () => {
it('returns [] for an empty message list', () => {
expect(turns([])).toEqual([]);
});
it('returns one turn for a single user message', () => {
const u = mkMsg('user', 'hi');
const result = turns([u]);
expect(result).toHaveLength(1);
expect(result[0]).toEqual({ start: 0, end: 1, id: u.id });
});
it('returns two turns for user/assistant/user/assistant', () => {
const u1 = mkMsg('user', 'q1');
const a1 = mkMsg('assistant', 'a1');
const u2 = mkMsg('user', 'q2');
const a2 = mkMsg('assistant', 'a2');
const result = turns([u1, a1, u2, a2]);
expect(result).toEqual([
{ start: 0, end: 2, id: u1.id },
{ start: 2, end: 4, id: u2.id },
]);
});
it('extends the final turn end to include trailing non-user messages', () => {
// Spec wording: "user/assistant + trailing system → trailing included
// in last turn's range". Single-turn variant: [user, assistant, system]
// should produce one turn with end=3 (covers all three indices).
const u = mkMsg('user', 'q');
const a = mkMsg('assistant', 'a');
const s = mkMsg('system', 'note');
const result = turns([u, a, s]);
expect(result).toEqual([{ start: 0, end: 3, id: u.id }]);
});
it('skips user rows flagged as summary (anchored-rolling rows)', () => {
// Defense-in-depth — process() pre-filters summary rows, but turns()
// also skips them so a misuse from another caller doesn't create a
// bogus turn boundary on the summary row itself.
const u1 = mkMsg('user', 'q1');
const a1 = mkMsg('assistant', 'a1');
const sum = mkMsg('user', 'rolled-up', { summary: true });
const u2 = mkMsg('user', 'q2');
const result = turns([u1, a1, sum, u2]);
expect(result.map((t) => t.id)).toEqual([u1.id, u2.id]);
});
});
// ---- select -----------------------------------------------------------------
describe('select', () => {
it('returns empty head + undefined tail for an empty message list', () => {
const result = select([], 100_000);
expect(result.head).toEqual([]);
expect(result.tail_start_id).toBeUndefined();
});
it('full-preserves when there are fewer turns than tail_turns', () => {
// 1 turn but tail_turns=2: keep === turn0 → keep.start === 0 →
// sentinel-return path that signals "no compaction this round".
const u = mkMsg('user', 'only');
const a = mkMsg('assistant', 'a');
const result = select([u, a], 100_000, 2);
expect(result.head).toEqual([u, a]);
expect(result.tail_start_id).toBeUndefined();
});
it('keeps the last tail_turns turns when they all fit the budget', () => {
// 3 turns, all small. tail_turns=2 means keep the last 2; head =
// messages[0..turn2.start] = just turn1's content.
const u1 = mkMsg('user', 'q1');
const a1 = mkMsg('assistant', 'a1');
const u2 = mkMsg('user', 'q2');
const a2 = mkMsg('assistant', 'a2');
const u3 = mkMsg('user', 'q3');
const a3 = mkMsg('assistant', 'a3');
const msgs = [u1, a1, u2, a2, u3, a3];
const result = select(msgs, 100_000, 2);
// Turn boundaries: [0,2), [2,4), [4,6). slice(-2) = turns at 2 and 4.
// Walking backward: u3 fits, then u2 fits → keep={start:2, id:u2.id}.
expect(result.tail_start_id).toBe(u2.id);
expect(result.head).toEqual([u1, a1]);
});
it('splits a turn mid-stream when the whole turn would overflow the budget', () => {
// tail_turns=1 so we look only at the most recent turn. Stuff it past
// 8k of content (max preserve budget) and the splitter walks forward
// looking for the largest suffix that fits.
const u1 = mkMsg('user', 'q1');
const a1 = mkMsg('assistant', 'a1');
const u2 = mkMsg('user', 'q2 with a giant payload');
const huge = mkMsg('assistant', 'X'.repeat(40_000)); // ~10k tokens
const smallTail = mkMsg('assistant', 'short answer');
const msgs = [u1, a1, u2, huge, smallTail];
const result = select(msgs, 100_000, 1);
// The split walks from turn.start+1 forward; the first index whose
// [i, end) slice fits the budget becomes the new keep. We don't assert
// a specific id (depends on character math), only that compaction was
// triggered (tail_start_id set, head non-empty) and that the head
// doesn't include the final small message.
expect(result.tail_start_id).toBeDefined();
expect(result.head.length).toBeGreaterThan(0);
expect(result.head).not.toContain(smallTail);
});
it('full-preserves when no split point fits', () => {
// Single oversized turn; splitTurn walks but each suffix is still too
// big. After the loop, keep is undefined → full-preserve sentinel.
// Force this with a sub-buffer context so budget is the floor (2k),
// and a single 40k-char message.
const u = mkMsg('user', 'oversized');
const a = mkMsg('assistant', 'Y'.repeat(40_000));
const result = select([u, a], 30_000, 1);
// usable(30k) = 10k → budget = min(8k, max(2k, floor(10k*0.25))) =
// min(8k, max(2k, 2500)) = 2500. 40k chars ≈ 10k tokens. Can't fit.
expect(result.tail_start_id).toBeUndefined();
expect(result.head).toEqual([u, a]);
});
});
// ---- buildPrompt ------------------------------------------------------------
describe('buildPrompt', () => {
it('opens with the "create new" anchor when previousSummary is undefined', () => {
const out = buildPrompt(undefined, []);
expect(out.startsWith('Create a new anchored summary')).toBe(true);
expect(out).toContain(SUMMARY_TEMPLATE);
expect(out).not.toContain('<previous-summary>');
});
it('opens with the "update" anchor and embeds previousSummary verbatim', () => {
const prev = '## Goal\n- finish v1.11 compaction';
const out = buildPrompt(prev, []);
expect(out.startsWith('Update the anchored summary')).toBe(true);
expect(out).toContain('<previous-summary>');
expect(out).toContain(prev);
expect(out).toContain('</previous-summary>');
expect(out).toContain(SUMMARY_TEMPLATE);
});
it('appends extra context strings after the template (reserved for plugin injection)', () => {
const out = buildPrompt(undefined, ['extra-context-line']);
expect(out.endsWith('extra-context-line')).toBe(true);
});
});

View File

@@ -0,0 +1,40 @@
// v1.11: anchored rolling summary template. Verbatim port from opencode
// (packages/opencode/src/session/compaction.ts SUMMARY_TEMPLATE). Kept in a
// separate module so the long template literal doesn't bloat compaction.ts.
export const SUMMARY_TEMPLATE = `Output exactly the Markdown structure shown inside <template> and keep the section order unchanged. Do not include the <template> tags in your response.
<template>
## Goal
- [single-sentence task summary]
## Constraints & Preferences
- [user constraints, preferences, specs, or "(none)"]
## Progress
### Done
- [completed work or "(none)"]
### In Progress
- [current work or "(none)"]
### Blocked
- [blockers or "(none)"]
## Key Decisions
- [decision and why, or "(none)"]
## Next Steps
- [ordered next actions or "(none)"]
## Critical Context
- [important technical facts, errors, open questions, or "(none)"]
## Relevant Files
- [file or directory path: why it matters, or "(none)"]
</template>
Rules:
- Keep every section, even when empty.
- Use terse bullets, not prose paragraphs.
- Preserve exact file paths, commands, error strings, and identifiers when known.
- Do not mention the summary process or that context was compacted.`;

View File

@@ -0,0 +1,503 @@
// v1.11: anchored rolling compaction. Ported algorithms (not Effect-TS code)
// from opencode (packages/opencode/src/session/{compaction,overflow}.ts).
//
// What's different from BooCode's legacy /compact:
// - Operates per-chat (chats have N:1 to sessions; history is per-chat).
// - Detects overflow automatically after each inference completion using
// llama-swap's reported n_ctx; flags chats.needs_compaction=true.
// - On the next turn (or manual /compact) we summarize the *head* (messages
// prior to a preserved tail of N user-turns) into a single
// summary=true assistant row. Older messages get compacted_at-stamped so
// inference assembly filters them out; the GET endpoint still returns
// them so the UI can show history with the summary card inline.
// - The summary is *anchored rolling* — exactly one live summary=true row
// per chat. Subsequent compactions read the prior summary as
// previousSummary, ask the LLM to update-merge it, then mark the prior
// summary row compacted_at too (it stays in the UI but isn't sent to the
// LLM again).
import type { FastifyBaseLogger } from 'fastify';
import type { Sql } from '../db.js';
import type { Config } from '../config.js';
import type { Broker } from './broker.js';
import { SUMMARY_TEMPLATE } from './compaction-prompt.js';
const COMPACTION_BUFFER = 20_000;
const MIN_PRESERVE_RECENT_TOKENS = 2_000;
const MAX_PRESERVE_RECENT_TOKENS = 8_000;
const DEFAULT_TAIL_TURNS = 2;
// Subset of Message fields compaction touches. Selecting only what's needed
// keeps process() independent of api.ts mutations and reduces DB egress.
export interface CompactionMessage {
id: string;
role: 'user' | 'assistant' | 'system' | 'tool';
content: string;
kind: 'message' | 'compact';
summary: boolean;
status: 'streaming' | 'complete' | 'failed' | 'cancelled';
tool_calls: Array<{ id: string; name: string; args: Record<string, unknown> }> | null;
tool_results: { tool_call_id: string; output: unknown; truncated: boolean; error?: string } | null;
metadata: { kind?: string } | null;
created_at: string;
}
// === overflow ===
// Tokens we hold in reserve for the model's response so a near-full context
// can still produce a useful turn. Mirrors opencode's COMPACTION_BUFFER.
// Returns 0 when the context limit is unknown (caller treats 0 as "do not
// trigger overflow"); avoids dividing-by-zero downstream.
export function usable(contextLimit: number): number {
if (!contextLimit || contextLimit <= 0) return 0;
return Math.max(0, contextLimit - COMPACTION_BUFFER);
}
export interface Usage {
prompt_tokens: number;
completion_tokens: number;
}
// True when the assistant just used >= usable() tokens. Unknown limit → false
// (we never auto-trigger compaction without a budget — better to keep
// inference flowing than to fall into a compaction we can't size properly).
export function isOverflow(usage: Usage, contextLimit: number): boolean {
const budget = usable(contextLimit);
if (budget <= 0) return false;
return (usage.prompt_tokens + usage.completion_tokens) >= budget;
}
// === selection ===
interface Turn {
start: number;
end: number;
id: string;
}
// Char-count / 4 token estimate. Matches opencode's Token.estimate (which
// also goes through JSON.stringify). Adequate for tail-fitting math; we
// don't need a real tokenizer here — the 20k buffer absorbs the slop.
export function estimate(messages: CompactionMessage[]): number {
return Math.ceil(JSON.stringify(messages).length / 4);
}
// Walk messages, return one Turn per user message that is NOT a summary row.
// end = next-user-start; final turn ends at messages.length.
export function turns(messages: CompactionMessage[]): Turn[] {
const result: Turn[] = [];
for (let i = 0; i < messages.length; i++) {
const m = messages[i]!;
if (m.role !== 'user') continue;
if (m.summary) continue;
result.push({ start: i, end: messages.length, id: m.id });
}
for (let i = 0; i < result.length - 1; i++) {
result[i]!.end = result[i + 1]!.start;
}
return result;
}
// Inside a turn that doesn't fit whole, walk forward from start+1 looking for
// the largest suffix that fits the remaining budget. Returns the keep-start
// index (the first preserved message) or undefined if no suffix fits.
function splitTurn(
messages: CompactionMessage[],
turn: Turn,
budget: number,
): { start: number; id: string } | undefined {
if (budget <= 0) return undefined;
if (turn.end - turn.start <= 1) return undefined;
for (let start = turn.start + 1; start < turn.end; start++) {
const size = estimate(messages.slice(start, turn.end));
if (size > budget) continue;
return { start, id: messages[start]!.id };
}
return undefined;
}
export interface SelectResult {
head: CompactionMessage[];
tail_start_id: string | undefined;
}
// Choose the boundary between the "head" (to be summarized) and the "tail"
// (preserved verbatim). Strategy:
// 1. Reserve a budget for the recent tail. Default ranges [2k, 8k] tokens
// with 25% of usable() as the target.
// 2. Take the last `tail_turns` user-turns; greedily fit from newest back.
// 3. If the next-older turn doesn't fit whole, split it mid-turn.
// 4. If we couldn't keep anything OR everything fit (keep.start === 0),
// return full-preserve (no compaction this round).
export function select(
messages: CompactionMessage[],
contextLimit: number,
tailTurns: number = DEFAULT_TAIL_TURNS,
): SelectResult {
if (tailTurns <= 0) return { head: messages, tail_start_id: undefined };
const budget = Math.min(
MAX_PRESERVE_RECENT_TOKENS,
Math.max(MIN_PRESERVE_RECENT_TOKENS, Math.floor(usable(contextLimit) * 0.25)),
);
const all = turns(messages);
if (all.length === 0) return { head: messages, tail_start_id: undefined };
const recent = all.slice(-tailTurns);
let total = 0;
let keep: { start: number; id: string } | undefined;
for (let i = recent.length - 1; i >= 0; i--) {
const turn = recent[i]!;
const size = estimate(messages.slice(turn.start, turn.end));
if (total + size <= budget) {
total += size;
keep = { start: turn.start, id: turn.id };
continue;
}
const remaining = budget - total;
const split = splitTurn(messages, turn, remaining);
if (split) keep = split;
break;
}
if (!keep || keep.start === 0) {
return { head: messages, tail_start_id: undefined };
}
return {
head: messages.slice(0, keep.start),
tail_start_id: keep.id,
};
}
// === prompt assembly ===
// Build the final user message that asks the model to (re)produce the
// anchored summary. `context` is reserved for future plugin injection;
// callers pass [] today.
export function buildPrompt(
previousSummary: string | undefined,
context: string[],
): string {
const anchor = previousSummary
? [
'Update the anchored summary below using the conversation history above.',
'Preserve still-true details, remove stale details, and merge in the new facts.',
'<previous-summary>',
previousSummary,
'</previous-summary>',
].join('\n')
: 'Create a new anchored summary from the conversation history above.';
return [anchor, SUMMARY_TEMPLATE, ...context].join('\n\n');
}
// === OpenAI conversion (compaction-local; intentionally does NOT call
// inference.ts buildMessagesPayload because that uses the legacy "find latest
// kind='compact' marker and skip everything before it" shortcircuit, which
// would silently drop pre-legacy-compact history before the LLM sees it.
// Compaction wants to send the entire head, full stop.) ===
interface OpenAiMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string | null;
tool_calls?: Array<{
id: string;
type: 'function';
function: { name: string; arguments: string };
}>;
tool_call_id?: string;
}
function isCapHitSentinel(m: CompactionMessage): boolean {
return m.role === 'system' && m.metadata != null && m.metadata.kind === 'cap_hit';
}
function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
const out: OpenAiMessage[] = [];
for (const m of head) {
if (isCapHitSentinel(m)) continue;
if (m.role === 'assistant' && (m.status === 'streaming' || m.status === 'cancelled')) continue;
if (m.kind === 'compact') {
// Legacy compact row — pass through as system context. The new
// anchored summary will subsume it, but the LLM should see it during
// the bridging round so it can carry forward the still-true bits.
out.push({ role: 'system', content: m.content });
continue;
}
if (m.summary) {
// Defense in depth: process() filters these out of the select-input
// already. If one slips through, render it as assistant content so we
// never crash here.
out.push({ role: 'assistant', content: m.content });
continue;
}
if (m.role === 'tool') {
const tr = m.tool_results;
if (!tr) continue;
const outputText = tr.error
? `error: ${tr.error}`
: typeof tr.output === 'string'
? tr.output
: JSON.stringify(tr.output);
out.push({ role: 'tool', content: outputText, tool_call_id: tr.tool_call_id });
continue;
}
if (m.role === 'assistant') {
const msg: OpenAiMessage = {
role: 'assistant',
content: m.content && m.content.length > 0 ? m.content : null,
};
if (m.tool_calls && m.tool_calls.length > 0) {
msg.tool_calls = m.tool_calls.map((tc) => ({
id: tc.id,
type: 'function' as const,
function: { name: tc.name, arguments: JSON.stringify(tc.args) },
}));
}
out.push(msg);
continue;
}
out.push({ role: 'user', content: m.content });
}
return out;
}
// === llama-swap call ===
// Non-streaming completion. Opencode streams; for a one-shot summary call a
// single POST is less code and the latency hit is acceptable (the user
// doesn't see this directly — useSessionStream emits the toast + refetches
// on the 'compacted' frame).
interface CompletionResult {
content: string;
promptTokens: number;
completionTokens: number;
nCtx: number | null;
}
async function callLlamaSwap(
config: Config,
model: string,
messages: OpenAiMessage[],
log: FastifyBaseLogger,
): Promise<CompletionResult> {
const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, messages, stream: false }),
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
}
const json = (await res.json()) as {
choices?: Array<{ message?: { content?: string } }>;
usage?: { prompt_tokens?: number; completion_tokens?: number };
timings?: { n_ctx?: number };
};
const content = json.choices?.[0]?.message?.content ?? '';
const promptTokens = json.usage?.prompt_tokens ?? 0;
const completionTokens = json.usage?.completion_tokens ?? 0;
const nCtx = typeof json.timings?.n_ctx === 'number' ? json.timings.n_ctx : null;
log.debug({ promptTokens, completionTokens, nCtx, chars: content.length }, 'compaction llm complete');
return { content, promptTokens, completionTokens, nCtx };
}
// === entry point ===
export interface ProcessInput {
sql: Sql;
config: Config;
log: FastifyBaseLogger;
broker: Broker;
chatId: string;
}
// Runs one round of anchored rolling compaction on `chatId`. No-ops cleanly
// (clearing needs_compaction) when there's nothing reasonable to compact.
// Throws on LLM failure — callers decide whether to log+swallow or surface.
export async function process(input: ProcessInput): Promise<void> {
const { sql, config, log, broker, chatId } = input;
// 1. Resolve chat → session for model + WS publish channel.
const chatRows = await sql<{ id: string; session_id: string }[]>`
SELECT id, session_id FROM chats WHERE id = ${chatId}
`;
if (chatRows.length === 0) {
log.warn({ chatId }, 'compaction: chat not found');
return;
}
const chat = chatRows[0]!;
const sessionId = chat.session_id;
const sessRows = await sql<{ id: string; model: string }[]>`
SELECT id, model FROM sessions WHERE id = ${sessionId}
`;
if (sessRows.length === 0) {
log.warn({ chatId, sessionId }, 'compaction: session not found');
return;
}
const session = sessRows[0]!;
// 2. All currently-active messages in this chat (compacted_at IS NULL).
// ORDER BY (created_at, id) matches loadContext in inference.ts so the
// turns() boundary logic sees the same sequence the LLM will.
const messages = await sql<CompactionMessage[]>`
SELECT id, role, content, kind, summary, status, tool_calls, tool_results, metadata, created_at
FROM messages
WHERE chat_id = ${chatId} AND compacted_at IS NULL
ORDER BY created_at ASC, id ASC
`;
if (messages.length === 0) {
await sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
return;
}
// 3. Find the prior anchored summary (newest summary=true row). Its content
// becomes previousSummary — the anchor in the prompt. Filter it out of the
// select-input so we don't double-encode (it's already in the anchor text).
const previousSummary = messages.filter((m) => m.summary).at(-1)?.content;
const forSelect = messages.filter((m) => !m.summary);
// 4. Resolve a recent context limit. llama-swap reports timings.n_ctx per
// completion; we cache it on messages.ctx_max. Use the most recent value
// from any message in this chat (oldest assumption is the same model is
// still running). When unknown, fall back to model.context_limit-less
// defaults via the buffer-only path (see usable()).
const ctxRows = await sql<{ ctx_max: number | null }[]>`
SELECT ctx_max FROM messages
WHERE chat_id = ${chatId} AND ctx_max IS NOT NULL
ORDER BY created_at DESC LIMIT 1
`;
const contextLimit = ctxRows[0]?.ctx_max ?? 0;
// 5. Decide head / tail.
const sel = select(forSelect, contextLimit);
if (!sel.tail_start_id || sel.head.length === 0) {
// Full preserve — nothing to compact this round. Clear the flag so we
// don't loop. (Could happen when the chat is short or the budget swung
// wider after a model context bump.)
await sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
log.info({ chatId, contextLimit, msgCount: messages.length }, 'compaction: nothing to compact');
return;
}
// 6. Build the OpenAI request: head as user/assistant/tool turns + a final
// user message carrying buildPrompt(previousSummary, []). No system prompt
// — matches opencode (`system: []`); the template + anchor are sufficient.
const headPayload = buildHeadPayload(sel.head);
const finalUser: OpenAiMessage = { role: 'user', content: buildPrompt(previousSummary, []) };
const payload = [...headPayload, finalUser];
log.info(
{
chatId,
contextLimit,
headLen: sel.head.length,
tailStartId: sel.tail_start_id,
hadPrevSummary: previousSummary !== undefined,
},
'compaction: invoking model',
);
// 6a. Flip the chat dot amber for the duration of the LLM call + DB writes.
// Same { type: 'chat_status', status: 'working', at } shape inference.ts
// emits at runner enqueue. publishUser → broadcasts on the per-user channel
// (all devices / tabs see it) since chat_status is a user-channel frame in
// BooCode (see useChatStatus.ts, which is the consumer).
broker.publishUser('default', {
type: 'chat_status',
chat_id: chatId,
status: 'working',
at: new Date().toISOString(),
});
// try/finally so the dot ALWAYS drops back to idle, even if the LLM call
// throws or a downstream DB write fails. The succeeded flag gates the
// 'compacted' frame + final log: we only signal completion to the UI when
// the new summary row actually landed.
let succeeded = false;
let newId = '';
let result: CompletionResult | undefined;
try {
// 7. Single completion (no tools). Throws on llama-swap failure.
result = await callLlamaSwap(config, session.model, payload, log);
// 8. Insert the new anchored summary row. role='assistant' per spec; the
// UI distinguishes via summary=true. tail_start_id points at the first
// preserved tail message so debug surfaces / future tools can reason
// about the boundary without re-deriving from compacted_at.
const insertRows = await sql<{ id: string }[]>`
INSERT INTO messages (
session_id, chat_id, role, content, kind, status,
summary, tail_start_id,
tokens_used, ctx_used, ctx_max,
created_at, finished_at
)
VALUES (
${sessionId}, ${chatId}, 'assistant', ${result.content}, 'message', 'complete',
true, ${sel.tail_start_id},
${result.completionTokens}, ${result.promptTokens}, ${result.nCtx},
clock_timestamp(), clock_timestamp()
)
RETURNING id
`;
newId = insertRows[0]!.id;
// 9. Mark every prior live message (head + prior summary) as compacted.
// Bound by "created_at strictly less than tail_start_id's created_at" so
// the preserved tail stays compacted_at=NULL. Exclude the new summary
// row we just inserted (it's "now", which is >= tail_start_id's
// created_at anyway, but defensive).
await sql`
UPDATE messages
SET compacted_at = clock_timestamp()
WHERE chat_id = ${chatId}
AND compacted_at IS NULL
AND id != ${newId}
AND created_at < (SELECT created_at FROM messages WHERE id = ${sel.tail_start_id})
`;
// 10. Clear the flag and bump the chat's updated_at so the sidebar
// reflects recent activity.
await sql`
UPDATE chats
SET needs_compaction = false, updated_at = clock_timestamp()
WHERE id = ${chatId}
`;
succeeded = true;
} finally {
// Always restore the dot. Status='idle' (not 'error') even on failure —
// the caller logs/re-surfaces the error separately; the dot doesn't
// need to stay red across reloads for a transient compaction blip.
broker.publishUser('default', {
type: 'chat_status',
chat_id: chatId,
status: 'idle',
at: new Date().toISOString(),
});
}
// 11. Tell the client. useSessionStream subscribes to the per-session WS
// channel; the handler refetches messages (so the new summary row + the
// compacted_at-stamped older rows render correctly) and fires a sonner
// toast. Order matters: idle must precede 'compacted' so the dot is
// already green by the time the refetch toast appears.
if (succeeded) {
broker.publish(sessionId, {
type: 'compacted',
session_id: sessionId,
chat_id: chatId,
summary_message_id: newId,
});
log.info(
{
chatId,
newId,
completionTokens: result?.completionTokens,
promptTokens: result?.promptTokens,
},
'compaction: complete',
);
}
}

View File

@@ -21,6 +21,8 @@ import {
import { PathScopeError, resolveProjectRoot } from './path_guard.js';
import { maybeAutoNameChat } from './auto_name.js';
import { getAgentById } from './agents.js';
import * as compaction from './compaction.js';
import type { Broker } from './broker.js';
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -147,6 +149,12 @@ export interface InferenceContext {
log: FastifyBaseLogger;
publish: FramePublisher;
publishUser: (frame: UserStreamFrame) => void;
// v1.11: passed through so compaction.process can publish 'compacted'
// frames on the same session WS channel useSessionStream subscribes to.
// Compaction is the only path that needs the raw broker handle (regular
// inference goes through `publish`); keeping a separate field avoids
// tempting other code paths into bypassing the session-id binding.
broker: Broker;
}
// Resolution order: base prompt < agent.system_prompt < user prompt, where
@@ -260,17 +268,48 @@ async function loadContext(
if (projectRows.length === 0) return null;
const project = projectRows[0]!;
// v1.11: filter compacted messages out of the inference assembly. The GET
// /api/sessions/:id/messages endpoint still returns everything (so the UI
// can show history with the summary card inline); only LLM payloads skip
// compacted rows. compacted_at IS NULL keeps the active summary + tail.
const history = await sql<Message[]>`
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
FROM messages
WHERE chat_id = ${chatId}
WHERE chat_id = ${chatId} AND compacted_at IS NULL
ORDER BY created_at ASC, id ASC
`;
return { session, project, history };
}
// v1.11: shared helper used after both finalizeCompletion and executeToolPhase
// persist their token counts. Reads tokens off the just-UPDATEd row (which
// the caller returns from RETURNING), runs compaction.isOverflow, and flips
// chats.needs_compaction. The next runAssistantTurn invocation acts on it.
// Silent on missing tokens — llama-swap occasionally omits usage on truncated
// streams, and we'd rather miss one overflow than crash the inference path.
async function maybeFlagForCompaction(
ctx: InferenceContext,
chatId: string,
updated: { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null } | undefined,
): Promise<void> {
if (!updated) return;
const promptTokens = updated.ctx_used;
const completionTokens = updated.tokens_used;
const contextLimit = updated.ctx_max;
if (typeof promptTokens !== 'number') return;
if (typeof completionTokens !== 'number') return;
if (typeof contextLimit !== 'number') return;
const overflow = compaction.isOverflow(
{ prompt_tokens: promptTokens, completion_tokens: completionTokens },
contextLimit,
);
if (!overflow) return;
await ctx.sql`UPDATE chats SET needs_compaction = true WHERE id = ${chatId}`;
ctx.log.info({ chatId, promptTokens, completionTokens, contextLimit }, 'inference: flagged for compaction');
}
async function* sseLines(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
const reader = stream.getReader();
const decoder = new TextDecoder('utf-8');
@@ -758,6 +797,10 @@ async function executeToolPhase(
WHERE id = ${assistantMessageId}
RETURNING tokens_used, ctx_used, ctx_max, finished_at
`;
// v1.11: flag for compaction if this turn pushed us over the usable budget.
// We never compact mid-loop (the recursive runAssistantTurn keeps tools
// flowing); the flag fires on the NEXT turn's pre-fetch hook above.
await maybeFlagForCompaction(ctx, chatId, updated);
const [toolSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
UPDATE sessions SET updated_at = clock_timestamp()
WHERE id = ${sessionId}
@@ -889,6 +932,9 @@ async function finalizeCompletion(
WHERE id = ${assistantMessageId}
RETURNING tokens_used, ctx_used, ctx_max, finished_at
`;
// v1.11: flag for compaction on the terminal turn too. Catches the common
// case of a turn that hit the limit without invoking tools.
await maybeFlagForCompaction(ctx, chatId, updated);
const [completeSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
UPDATE sessions SET updated_at = clock_timestamp()
WHERE id = ${sessionId}
@@ -927,6 +973,29 @@ async function runAssistantTurn(
): Promise<void> {
const { sessionId, chatId } = args;
// v1.11: if the prior turn flagged this chat for compaction, run it first
// so loadContext below reads the post-compaction history. We swallow
// compaction failures (clearing the flag so we don't loop) and proceed
// with the un-compacted history — a slow turn that hits the model's
// hard limit is recoverable; a dead session is not.
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
SELECT needs_compaction FROM chats WHERE id = ${chatId}
`;
if (chatFlag[0]?.needs_compaction) {
try {
await compaction.process({
sql: ctx.sql,
config: ctx.config,
log: ctx.log,
broker: ctx.broker,
chatId,
});
} catch (err) {
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
}
}
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (!loaded) {
ctx.log.warn({ sessionId }, 'inference: session or project missing');
@@ -1237,81 +1306,6 @@ async function insertCapHitSentinel(
});
}
const COMPACT_SYSTEM_PROMPT =
'Summarize the preceding conversation into a dense but complete context paragraph. Preserve all key facts, decisions, file paths, code patterns, and action items. Do not add any new information. Output only the summary paragraph.';
async function runCompact(
ctx: InferenceContext,
sessionId: string,
chatId: string,
compactMessageId: string
): Promise<void> {
const loaded = await loadContext(ctx.sql, sessionId, chatId);
if (!loaded) return;
const { session, project, history } = loaded;
const messagesForSummary = buildMessagesPayload(session, project,
history.filter((m) => m.id !== compactMessageId)
);
messagesForSummary.push({
role: 'system',
content: COMPACT_SYSTEM_PROMPT,
});
ctx.publish(sessionId, {
type: 'message_started',
message_id: compactMessageId,
chat_id: chatId,
role: 'assistant',
});
let content = '';
try {
const result = await streamCompletion(
ctx,
session.model,
messagesForSummary,
{ tools: null },
(delta) => {
content += delta;
ctx.publish(sessionId, {
type: 'delta',
message_id: compactMessageId,
chat_id: chatId,
content: delta,
});
}
);
content = result.content;
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
await ctx.sql`
UPDATE messages SET status = 'failed', content = ${content}, finished_at = clock_timestamp()
WHERE id = ${compactMessageId}
`;
ctx.publish(sessionId, {
type: 'error',
message_id: compactMessageId,
chat_id: chatId,
error: errMsg,
});
return;
}
const preCompactCount = history.filter((m) => m.id !== compactMessageId && m.kind !== 'compact').length;
const summary = `[Context compacted — ${preCompactCount} messages summarized]\n\n${content}`;
await ctx.sql`
UPDATE messages SET content = ${summary}, status = 'complete', finished_at = clock_timestamp()
WHERE id = ${compactMessageId}
`;
ctx.publish(sessionId, {
type: 'message_complete',
message_id: compactMessageId,
chat_id: chatId,
});
}
interface InferenceRegistration {
controller: AbortController;
completed: Promise<void>;
@@ -1328,6 +1322,10 @@ export function createInferenceRunner(
const callCtx: InferenceContext = {
...ctx,
publishUser: (frame) => publishUserFn(user, frame),
// v1.11: broker comes in via ctx (set at registration time). Repeated
// here so the destructure carries it onto the per-call ctx without
// having to add it to every enqueue/cancel signature individually.
broker: ctx.broker,
};
// v1.8 mobile-tabs: announce working before the async loop starts so
// every device subscribed to the user channel sees the amber dot.
@@ -1357,20 +1355,6 @@ export function createInferenceRunner(
})();
},
enqueueCompact(sessionId: string, chatId: string, compactMessageId: string, user: string) {
const callCtx: InferenceContext = {
...ctx,
publishUser: (frame) => publishUserFn(user, frame),
};
void (async () => {
try {
await runCompact(callCtx, sessionId, chatId, compactMessageId);
} catch (err) {
callCtx.log.error({ err }, 'unhandled compact error');
}
})();
},
async cancel(_sessionId: string, chatId: string): Promise<boolean> {
const reg = registry.get(chatId);
if (!reg) return false;

View File

@@ -159,6 +159,12 @@ export interface Message {
// v1.8.2: per-message metadata. See MessageMetadata for the discriminated
// shapes currently in use.
metadata: MessageMetadata | null;
// v1.11: anchored rolling compaction. Optional so consumers that SELECT
// the pre-v1.11 column set still type-check. See compaction.ts +
// schema.sql for semantics.
summary?: boolean;
tail_start_id?: string | null;
compacted_at?: string | null;
}
export interface ModelInfo {

View File

@@ -168,8 +168,11 @@ export const api = {
request<void>(`/api/chats/${chatId}`, { method: 'DELETE' }),
messages: (chatId: string) =>
request<Message[]>(`/api/chats/${chatId}/messages`),
// v1.11: anchored-rolling compaction. POST awaits the LLM call inside
// the route's lifecycle; the new summary row arrives via the 'compacted'
// WS frame (useSessionStream refetches + toasts).
compact: (chatId: string) =>
request<{ compact_message_id: string }>(`/api/chats/${chatId}/compact`, { method: 'POST' }),
request<{ ok: true }>(`/api/chats/${chatId}/compact`, { method: 'POST' }),
stop: (chatId: string) =>
request<{ stopped: boolean }>(`/api/chats/${chatId}/stop`, { method: 'POST' }),
forceSend: (chatId: string, content: string) =>

View File

@@ -145,6 +145,19 @@ export interface Message {
// v1.8.2: per-message metadata; see MessageMetadata. null for the vast
// majority of messages.
metadata: MessageMetadata | null;
// v1.11: anchored rolling compaction fields. Optional on the wire so that
// older API responses (or test fixtures) parse without explicit nulls.
// summary — true on the assistant row that holds the active
// anchored summary. Render via SummaryCard.
// tail_start_id — first preserved tail message the summary covers up to
// (exclusive). Diagnostic only on the client.
// compacted_at — set on rows that are "behind the curtain" of the
// current summary. Returned by the GET endpoint so the
// UI can show history, but the server-side inference
// assembly filters these out.
summary?: boolean;
tail_start_id?: string | null;
compacted_at?: string | null;
}
export interface ModelInfo {
@@ -305,6 +318,11 @@ export type WsFrame =
}
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
| { type: 'chat_renamed'; chat_id: string; name: string }
// v1.11: published by services/compaction.ts after the new anchored
// summary row lands. Carries the new summary row id for diagnostics; the
// session-stream handler ignores the id and re-fetches the full message
// list (the cohort of compacted_at-stamped rows changed too).
| { type: 'compacted'; session_id: string; chat_id: string; summary_message_id: string }
// v1.8.2: `reason` discriminates structured failures (the UI prefers it
// over `error` text when present).
| { type: 'error'; message_id?: string; chat_id?: string; error: string; reason?: ErrorReason };

View File

@@ -537,7 +537,70 @@ function CompactCard({ message, sessionChats }: { message: Message; sessionChats
);
}
// v1.11 anchored rolling summary. Inserted by services/compaction.ts as a
// role='assistant', summary=true row. Distinct from legacy CompactCard
// (which renders the kind='compact' system rows produced by v1.10 /compact).
// Collapsed by default; header shows the timestamp; body renders the
// summary markdown when expanded. Copy button matches CompactCard's affordance.
function SummaryCard({ message }: { message: Message }) {
const [expanded, setExpanded] = useState(false);
const [copied, setCopied] = useState(false);
// Use finished_at when available (that's when the summary actually landed);
// fall back to created_at for any row missing it. Both are ISO strings.
const ts = message.finished_at ?? message.created_at;
const headerTs = ts ? new Date(ts).toLocaleString() : '';
async function handleCopy() {
try {
await navigator.clipboard.writeText(message.content);
setCopied(true);
setTimeout(() => setCopied(false), 1200);
toast.success('Summary copied to clipboard');
} catch {
toast.error('Copy failed');
}
}
return (
<div className="rounded-lg border border-primary/30 bg-primary/5 text-sm">
<div className="flex items-center gap-2 px-3 py-2">
<button
type="button"
onClick={() => setExpanded(!expanded)}
className="flex items-center gap-1.5 flex-1 min-w-0 text-left text-muted-foreground hover:text-foreground"
>
{expanded ? <ChevronDown size={14} /> : <ChevronRight size={14} />}
<span className="text-xs font-medium truncate">
Compacted summary {headerTs}
</span>
</button>
<button
type="button"
onClick={() => void handleCopy()}
className="p-1 rounded hover:bg-muted text-muted-foreground"
aria-label="Copy summary"
title="Copy summary"
>
{copied ? <Check size={12} /> : <Copy size={12} />}
</button>
</div>
{expanded && (
<div className="px-3 pb-3 text-xs leading-relaxed border-t pt-2">
<MarkdownBody content={message.content} />
</div>
)}
</div>
);
}
export function MessageBubble({ message, sessionChats, capHitInfo }: Props) {
// v1.11: anchored rolling summary row. Checked BEFORE the kind==='compact'
// branch because summary=true never coexists with kind='compact' (new
// compactions emit role='assistant' rows with kind='message'+summary=true).
if (message.summary) {
return <SummaryCard message={message} />;
}
if (message.kind === 'compact') {
return <CompactCard message={message} sessionChats={sessionChats} />;
}

View File

@@ -1,5 +1,7 @@
import { useEffect, useRef, useState } from 'react';
import { toast } from 'sonner';
import type { Message, WsFrame } from '@/api/types';
import { api } from '@/api/client';
import { sessionEvents } from './sessionEvents';
// session_renamed frame removed from WsFrame — it was declared but never
@@ -161,6 +163,12 @@ function applyFrame(state: State, frame: WsFrame): State {
: state.messages;
return { ...state, messages: next, error: frame.error };
}
case 'compacted': {
// v1.11: side effects (refetch + toast) live in ws.onmessage; the
// reducer just no-ops so TS exhaustiveness is satisfied without
// duplicating async work inside a synchronous reducer.
return state;
}
}
}
@@ -196,6 +204,25 @@ export function useSessionStream(sessionId: string | undefined) {
ws.onmessage = (ev) => {
try {
const frame = JSON.parse(typeof ev.data === 'string' ? ev.data : '') as WsFrame;
// v1.11: on a compaction completion, re-fetch the message list so
// the new summary row + the cohort of compacted_at-stamped older
// rows render correctly. We dispatch the fresh list as a synthetic
// 'snapshot' frame so the reducer's existing path handles state
// replacement (no need for a parallel "refetched" path).
// The toast is purely UX feedback; missing it would still leave
// the chat in a valid state.
if (frame.type === 'compacted') {
toast.success('Context compacted to free space');
void api.messages
.list(frame.session_id)
.then((messages) => {
setState((s) => applyFrame(s, { type: 'snapshot', messages }));
})
.catch((err: unknown) => {
console.warn('compacted refetch failed', err);
});
return;
}
setState((s) => applyFrame(s, frame));
} catch (err) {
console.warn('bad ws frame', err);