feat(server): add DCP clean-room context pruning

- Deduplication: removes consecutive identical tool_call+tool_result pairs
- Purge-errors: removes failed/empty tool results
- Transform orchestrator runs strategies in sequence pre-payload
- Wired into turn.ts before buildMessagesPayload
- Clean-room reimplementation (AGPL reference: behavior only)
This commit is contained in:
2026-06-07 17:57:39 +00:00
parent ee749d8698
commit b8b2666fdc
11 changed files with 327 additions and 1 deletions

View File

@@ -0,0 +1,33 @@
import { describe, it, expect } from 'vitest';
import { deduplicate } from '../strategies/deduplication.js';
import type { DcpMessage } from '../messages.js';
describe('deduplicate', () => {
it('removes consecutive identical tool_call+tool_result pairs', () => {
const messages: DcpMessage[] = [
{ role: 'user', content: 'search for x' },
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result1', tool_call_id: '1' },
// Duplicate pair
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result1', tool_call_id: '2' },
];
const { messages: result, stats } = deduplicate(messages);
expect(result).toHaveLength(3); // user + first pair
expect(stats.removedCount).toBe(2);
});
it('preserves non-duplicate content', () => {
const messages: DcpMessage[] = [
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result1', tool_call_id: '1' },
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result2', tool_call_id: '2' }, // Different result
];
const { messages: result, stats } = deduplicate(messages);
expect(result).toHaveLength(4);
expect(stats.removedCount).toBe(0);
});
});

View File

@@ -0,0 +1,22 @@
import { describe, it, expect } from 'vitest';
import { toDcpMessages, fromDcpMessages } from '../messages.js';
describe('toDcpMessages', () => {
it('converts user messages', () => {
const result = toDcpMessages([{ role: 'user', content: 'hello' }]);
expect(result[0].role).toBe('user');
expect(result[0].content).toBe('hello');
});
it('marks Error: content as isError', () => {
const result = toDcpMessages([{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' }]);
expect(result[0].isError).toBe(true);
});
});
describe('fromDcpMessages', () => {
it('round-trips messages', () => {
const original = [{ role: 'user', content: 'hello' }];
expect(fromDcpMessages(toDcpMessages(original))).toEqual(original);
});
});

View File

@@ -0,0 +1,33 @@
import { describe, it, expect } from 'vitest';
import { purgeErrors } from '../strategies/purge-errors.js';
import type { DcpMessage } from '../messages.js';
describe('purgeErrors', () => {
it('removes tool results where content starts with Error:', () => {
const messages: DcpMessage[] = [
{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' },
{ role: 'tool', content: '{"files":[]}', tool_call_id: '2' },
];
const { messages: result, stats } = purgeErrors(messages);
expect(result).toHaveLength(1);
expect(stats.removedCount).toBe(1);
});
it('removes empty tool results', () => {
const messages: DcpMessage[] = [
{ role: 'tool', content: '', tool_call_id: '1' },
];
const { messages: result, stats } = purgeErrors(messages);
expect(result).toHaveLength(0);
expect(stats.removedCount).toBe(1);
});
it('preserves valid tool results', () => {
const messages: DcpMessage[] = [
{ role: 'tool', content: '{"files":["a.ts"]}', tool_call_id: '1' },
];
const { messages: result, stats } = purgeErrors(messages);
expect(result).toHaveLength(1);
expect(stats.removedCount).toBe(0);
});
});

View File

@@ -0,0 +1,25 @@
import { describe, it, expect } from 'vitest';
import { transformMessages } from '../transform.js';
import type { DcpMessage } from '../messages.js';
describe('transformMessages', () => {
it('applies dedup then purge in order', () => {
const input: DcpMessage[] = [
{ role: 'user', content: 'hello' },
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result', tool_call_id: '1' },
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
{ role: 'tool', content: 'result', tool_call_id: '2' }, // Dup
];
const { messages, stats } = transformMessages('test-chat', input);
expect(stats.removedCount).toBeGreaterThan(0);
expect(messages.length).toBeLessThan(input.length);
});
it('handles empty input', () => {
const { messages, stats } = transformMessages('empty', []);
expect(messages).toHaveLength(0);
expect(stats.removedCount).toBe(0);
});
});

View File

@@ -0,0 +1,4 @@
export { transformMessages } from './transform.js';
export type { DcpMessage } from './messages.js';
export { toDcpMessages, fromDcpMessages } from './messages.js';
export { getDcpState, clearDcpState } from './state.js';

View File

@@ -0,0 +1,34 @@
// DCP message shape adapter.
// Converts between BooCode MessagePart[] and the DCP internal shape.
// Clean-room implementation — no AGPL source copied.
export interface DcpMessage {
role: 'user' | 'assistant' | 'tool';
content: string;
tool_call_id?: string;
tool_calls?: Array<{ id: string; name: string; arguments: string }>;
isError?: boolean;
}
export function toDcpMessages(parts: any[]): DcpMessage[] {
return parts.map((p: any) => {
const msg: DcpMessage = { role: p.role, content: p.content ?? '' };
if (p.tool_call_id) msg.tool_call_id = p.tool_call_id;
if (p.tool_calls) msg.tool_calls = p.tool_calls;
if (p.isError) msg.isError = true;
if (p.role === 'tool' && p.content && p.content.startsWith('Error:')) {
msg.isError = true;
}
return msg;
});
}
export function fromDcpMessages(msgs: DcpMessage[]): any[] {
return msgs.map((m) => ({
role: m.role,
content: m.content,
...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {}),
...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
...(m.isError ? { isError: true } : {}),
}));
}

View File

@@ -0,0 +1,27 @@
// Per-chat session state for DCP.
// Tracks last transform timestamp and message count to avoid re-processing.
interface ChatDcpState {
lastTransformAt: number;
lastMessageCount: number;
}
const chatStates = new Map<string, ChatDcpState>();
export function getDcpState(chatId: string): ChatDcpState | undefined {
return chatStates.get(chatId);
}
export function setDcpState(chatId: string, messageCount: number): void {
chatStates.set(chatId, { lastTransformAt: Date.now(), lastMessageCount: messageCount });
}
export function clearDcpState(chatId: string): void {
chatStates.delete(chatId);
}
export function shouldTransform(chatId: string, messageCount: number): boolean {
const state = chatStates.get(chatId);
if (!state) return true;
return state.lastMessageCount !== messageCount;
}

View File

@@ -0,0 +1,50 @@
import type { DcpMessage } from '../messages.js';
export function deduplicate(messages: DcpMessage[]): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
const result: DcpMessage[] = [];
let removedCount = 0;
let freedTokens = 0;
let i = 0;
while (i < messages.length) {
const current: DcpMessage = messages[i]!;
const next = messages[i + 1];
if (
current.role === 'assistant' &&
current.tool_calls &&
next &&
next.role === 'tool' &&
next.tool_call_id === current.tool_calls[0]?.id
) {
const nextNext = messages[i + 2];
const nextNextNext = messages[i + 3];
if (
nextNext &&
nextNext.role === 'assistant' &&
nextNext.tool_calls &&
nextNextNext &&
nextNextNext.role === 'tool' &&
nextNextNext.tool_call_id === nextNext.tool_calls[0]?.id &&
nextNext.tool_calls[0]?.name === current.tool_calls[0]?.name &&
nextNext.tool_calls[0]?.arguments === current.tool_calls[0]?.arguments &&
nextNextNext.content === next.content
) {
result.push(current, next);
i += 4;
removedCount += 2;
freedTokens += Math.ceil(nextNext.content.length / 4);
freedTokens += Math.ceil(current.content.length / 4);
} else {
result.push(current);
i++;
}
} else {
result.push(current);
i++;
}
}
return { messages: result, stats: { removedCount, freedTokens } };
}

View File

@@ -0,0 +1,34 @@
// Purge-errors strategy — removes failed/empty tool_result entries.
// Clean-room implementation.
import type { DcpMessage } from '../messages.js';
const ERROR_PREFIXES = ['Error:', 'error:', 'Error: '];
const DEFAULT_WINDOW = 5;
export function purgeErrors(
messages: DcpMessage[],
windowSize: number = DEFAULT_WINDOW,
): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
const result: DcpMessage[] = [];
let removedCount = 0;
let freedTokens = 0;
for (const msg of messages) {
if (msg.role === 'tool') {
const shouldRemove =
msg.isError ||
ERROR_PREFIXES.some((p) => msg.content.startsWith(p)) ||
msg.content.trim() === '';
if (shouldRemove) {
removedCount++;
freedTokens += Math.ceil(msg.content.length / 4);
continue; // Skip this message
}
}
result.push(msg);
}
return { messages: result, stats: { removedCount, freedTokens } };
}

View File

@@ -0,0 +1,52 @@
// Transform orchestrator — runs DCP strategies in sequence.
// Clean-room implementation.
import type { DcpMessage } from './messages.js';
import { deduplicate } from './strategies/deduplication.js';
import { purgeErrors } from './strategies/purge-errors.js';
import { getDcpState, setDcpState, shouldTransform } from './state.js';
export interface TransformStats {
removedCount: number;
freedTokens: number;
dedupRemoved: number;
purgeRemoved: number;
}
export interface TransformResult {
messages: DcpMessage[];
stats: TransformStats;
}
export function transformMessages(chatId: string, messages: DcpMessage[]): TransformResult {
if (!shouldTransform(chatId, messages.length)) {
return { messages, stats: { removedCount: 0, freedTokens: 0, dedupRemoved: 0, purgeRemoved: 0 } };
}
let m = messages;
// Step 1: Deduplicate
const dedupResult = deduplicate(m);
m = dedupResult.messages;
const dedupRemoved = dedupResult.stats.removedCount;
// Step 2: Purge errors
const purgeResult = purgeErrors(m);
m = purgeResult.messages;
const purgeRemoved = purgeResult.stats.removedCount;
const totalRemoved = dedupRemoved + purgeRemoved;
const totalFreed = dedupResult.stats.freedTokens + purgeResult.stats.freedTokens;
setDcpState(chatId, messages.length);
return {
messages: m,
stats: {
removedCount: totalRemoved,
freedTokens: totalFreed,
dedupRemoved,
purgeRemoved,
},
};
}

View File

@@ -21,6 +21,7 @@ import {
buildMessagesPayload,
loadContext,
} from './payload.js';
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
import {
finalizeCompletion,
finalizeEmpty,
@@ -156,9 +157,20 @@ export async function runAssistantTurn(
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
break;
}
const { session: iterSession, project: iterProject, history } = loaded;
let { session: iterSession, project: iterProject, history } = loaded;
const projectRoot = await resolveProjectRoot(iterProject.path);
try {
const dcpMsgs = toDcpMessages(history);
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
if (stats.removedCount > 0) {
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
history = fromDcpMessages(pruned) as typeof history;
}
} catch (err) {
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
}
// v1.14.0: log step boundary for instrumentation. step_start parts are in
// the schema CHECK but not emitted here — writing to the assistant message
// before the stream phase creates a sequence-0 collision with