v1.11: opencode-style compaction port

- compaction.ts: usable/isOverflow/estimate/turns/select/buildPrompt/process
- compaction-prompt.ts: SUMMARY_TEMPLATE verbatim from opencode
- schema: messages.{compacted_at,summary,tail_start_id} + chats.needs_compaction
- inference: auto-trigger on overflow, pre-fetch compaction before next turn
- /compact slash command rewired to new path
- WS: chat_status working/idle around compaction + compacted frame
- frontend: SummaryCard + sonner toast on compacted
- 24 unit tests for pure functions
This commit is contained in:
2026-05-20 19:05:35 +00:00
parent 6aab4f7d2a
commit dc43dd44f9
14 changed files with 1063 additions and 113 deletions

View File

@@ -19,6 +19,7 @@ import { registerSkillsRoutes } from './routes/skills.js';
import { createInferenceRunner } from './services/inference.js';
import { createBroker } from './services/broker.js';
import { listSkills } from './services/skills.js';
import * as compaction from './services/compaction.js';
async function main() {
const config = loadConfig();
@@ -81,6 +82,11 @@ async function main() {
publish: (sessionId, frame) => {
broker.publish(sessionId, frame as unknown as Record<string, unknown> & { type: string });
},
// v1.11: broker handle for compaction.process to publish 'compacted'
// frames on the per-session channel. Inference's regular publish path
// is bound to (sessionId, InferenceFrame); compaction publishes a
// different frame shape, so it goes through the raw broker.
broker,
},
(user, frame) => {
broker.publishUser(user, frame as unknown as Record<string, unknown> & { type: string });
@@ -90,9 +96,13 @@ async function main() {
enqueueInference: (sessionId, chatId, assistantId, user) => {
inference.enqueue(sessionId, chatId, assistantId, user);
},
enqueueCompact: (sessionId, chatId, compactId, user) => {
inference.enqueueCompact(sessionId, chatId, compactId, user);
},
// v1.11: synchronous compaction. Awaits the LLM call inside the route's
// request lifecycle; the new summary row arrives via the WS 'compacted'
// frame published from inside compaction.process. We let the error
// bubble up so the route can reply 500 — manual /compact failures
// should be loud (the user just clicked a button).
runCompaction: (chatId) =>
compaction.process({ sql, config, log: app.log, broker, chatId }),
cancelInference: async (sessionId, chatId) => {
return inference.cancel(sessionId, chatId);
},