feat(server): add DCP clean-room context pruning
- Deduplication: removes consecutive identical tool_call+tool_result pairs - Purge-errors: removes failed/empty tool results - Transform orchestrator runs strategies in sequence pre-payload - Wired into turn.ts before buildMessagesPayload - Clean-room reimplementation (AGPL reference: behavior only)
This commit is contained in:
@@ -0,0 +1,33 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { deduplicate } from '../strategies/deduplication.js';
|
||||||
|
import type { DcpMessage } from '../messages.js';
|
||||||
|
|
||||||
|
describe('deduplicate', () => {
|
||||||
|
it('removes consecutive identical tool_call+tool_result pairs', () => {
|
||||||
|
const messages: DcpMessage[] = [
|
||||||
|
{ role: 'user', content: 'search for x' },
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||||
|
// Duplicate pair
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result1', tool_call_id: '2' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const { messages: result, stats } = deduplicate(messages);
|
||||||
|
expect(result).toHaveLength(3); // user + first pair
|
||||||
|
expect(stats.removedCount).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves non-duplicate content', () => {
|
||||||
|
const messages: DcpMessage[] = [
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result2', tool_call_id: '2' }, // Different result
|
||||||
|
];
|
||||||
|
|
||||||
|
const { messages: result, stats } = deduplicate(messages);
|
||||||
|
expect(result).toHaveLength(4);
|
||||||
|
expect(stats.removedCount).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { toDcpMessages, fromDcpMessages } from '../messages.js';
|
||||||
|
|
||||||
|
describe('toDcpMessages', () => {
|
||||||
|
it('converts user messages', () => {
|
||||||
|
const result = toDcpMessages([{ role: 'user', content: 'hello' }]);
|
||||||
|
expect(result[0].role).toBe('user');
|
||||||
|
expect(result[0].content).toBe('hello');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('marks Error: content as isError', () => {
|
||||||
|
const result = toDcpMessages([{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' }]);
|
||||||
|
expect(result[0].isError).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('fromDcpMessages', () => {
|
||||||
|
it('round-trips messages', () => {
|
||||||
|
const original = [{ role: 'user', content: 'hello' }];
|
||||||
|
expect(fromDcpMessages(toDcpMessages(original))).toEqual(original);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { purgeErrors } from '../strategies/purge-errors.js';
|
||||||
|
import type { DcpMessage } from '../messages.js';
|
||||||
|
|
||||||
|
describe('purgeErrors', () => {
|
||||||
|
it('removes tool results where content starts with Error:', () => {
|
||||||
|
const messages: DcpMessage[] = [
|
||||||
|
{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' },
|
||||||
|
{ role: 'tool', content: '{"files":[]}', tool_call_id: '2' },
|
||||||
|
];
|
||||||
|
const { messages: result, stats } = purgeErrors(messages);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(stats.removedCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('removes empty tool results', () => {
|
||||||
|
const messages: DcpMessage[] = [
|
||||||
|
{ role: 'tool', content: '', tool_call_id: '1' },
|
||||||
|
];
|
||||||
|
const { messages: result, stats } = purgeErrors(messages);
|
||||||
|
expect(result).toHaveLength(0);
|
||||||
|
expect(stats.removedCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves valid tool results', () => {
|
||||||
|
const messages: DcpMessage[] = [
|
||||||
|
{ role: 'tool', content: '{"files":["a.ts"]}', tool_call_id: '1' },
|
||||||
|
];
|
||||||
|
const { messages: result, stats } = purgeErrors(messages);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(stats.removedCount).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { transformMessages } from '../transform.js';
|
||||||
|
import type { DcpMessage } from '../messages.js';
|
||||||
|
|
||||||
|
describe('transformMessages', () => {
|
||||||
|
it('applies dedup then purge in order', () => {
|
||||||
|
const input: DcpMessage[] = [
|
||||||
|
{ role: 'user', content: 'hello' },
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result', tool_call_id: '1' },
|
||||||
|
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||||
|
{ role: 'tool', content: 'result', tool_call_id: '2' }, // Dup
|
||||||
|
];
|
||||||
|
|
||||||
|
const { messages, stats } = transformMessages('test-chat', input);
|
||||||
|
expect(stats.removedCount).toBeGreaterThan(0);
|
||||||
|
expect(messages.length).toBeLessThan(input.length);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles empty input', () => {
|
||||||
|
const { messages, stats } = transformMessages('empty', []);
|
||||||
|
expect(messages).toHaveLength(0);
|
||||||
|
expect(stats.removedCount).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
4
apps/server/src/services/inference/dcp/index.ts
Normal file
4
apps/server/src/services/inference/dcp/index.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
export { transformMessages } from './transform.js';
|
||||||
|
export type { DcpMessage } from './messages.js';
|
||||||
|
export { toDcpMessages, fromDcpMessages } from './messages.js';
|
||||||
|
export { getDcpState, clearDcpState } from './state.js';
|
||||||
34
apps/server/src/services/inference/dcp/messages.ts
Normal file
34
apps/server/src/services/inference/dcp/messages.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
// DCP message shape adapter.
|
||||||
|
// Converts between BooCode MessagePart[] and the DCP internal shape.
|
||||||
|
// Clean-room implementation — no AGPL source copied.
|
||||||
|
|
||||||
|
export interface DcpMessage {
|
||||||
|
role: 'user' | 'assistant' | 'tool';
|
||||||
|
content: string;
|
||||||
|
tool_call_id?: string;
|
||||||
|
tool_calls?: Array<{ id: string; name: string; arguments: string }>;
|
||||||
|
isError?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function toDcpMessages(parts: any[]): DcpMessage[] {
|
||||||
|
return parts.map((p: any) => {
|
||||||
|
const msg: DcpMessage = { role: p.role, content: p.content ?? '' };
|
||||||
|
if (p.tool_call_id) msg.tool_call_id = p.tool_call_id;
|
||||||
|
if (p.tool_calls) msg.tool_calls = p.tool_calls;
|
||||||
|
if (p.isError) msg.isError = true;
|
||||||
|
if (p.role === 'tool' && p.content && p.content.startsWith('Error:')) {
|
||||||
|
msg.isError = true;
|
||||||
|
}
|
||||||
|
return msg;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function fromDcpMessages(msgs: DcpMessage[]): any[] {
|
||||||
|
return msgs.map((m) => ({
|
||||||
|
role: m.role,
|
||||||
|
content: m.content,
|
||||||
|
...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {}),
|
||||||
|
...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
|
||||||
|
...(m.isError ? { isError: true } : {}),
|
||||||
|
}));
|
||||||
|
}
|
||||||
27
apps/server/src/services/inference/dcp/state.ts
Normal file
27
apps/server/src/services/inference/dcp/state.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Per-chat session state for DCP.
|
||||||
|
// Tracks last transform timestamp and message count to avoid re-processing.
|
||||||
|
|
||||||
|
interface ChatDcpState {
|
||||||
|
lastTransformAt: number;
|
||||||
|
lastMessageCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chatStates = new Map<string, ChatDcpState>();
|
||||||
|
|
||||||
|
export function getDcpState(chatId: string): ChatDcpState | undefined {
|
||||||
|
return chatStates.get(chatId);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function setDcpState(chatId: string, messageCount: number): void {
|
||||||
|
chatStates.set(chatId, { lastTransformAt: Date.now(), lastMessageCount: messageCount });
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearDcpState(chatId: string): void {
|
||||||
|
chatStates.delete(chatId);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function shouldTransform(chatId: string, messageCount: number): boolean {
|
||||||
|
const state = chatStates.get(chatId);
|
||||||
|
if (!state) return true;
|
||||||
|
return state.lastMessageCount !== messageCount;
|
||||||
|
}
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
import type { DcpMessage } from '../messages.js';
|
||||||
|
|
||||||
|
export function deduplicate(messages: DcpMessage[]): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||||
|
const result: DcpMessage[] = [];
|
||||||
|
let removedCount = 0;
|
||||||
|
let freedTokens = 0;
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
while (i < messages.length) {
|
||||||
|
const current: DcpMessage = messages[i]!;
|
||||||
|
const next = messages[i + 1];
|
||||||
|
|
||||||
|
if (
|
||||||
|
current.role === 'assistant' &&
|
||||||
|
current.tool_calls &&
|
||||||
|
next &&
|
||||||
|
next.role === 'tool' &&
|
||||||
|
next.tool_call_id === current.tool_calls[0]?.id
|
||||||
|
) {
|
||||||
|
const nextNext = messages[i + 2];
|
||||||
|
const nextNextNext = messages[i + 3];
|
||||||
|
|
||||||
|
if (
|
||||||
|
nextNext &&
|
||||||
|
nextNext.role === 'assistant' &&
|
||||||
|
nextNext.tool_calls &&
|
||||||
|
nextNextNext &&
|
||||||
|
nextNextNext.role === 'tool' &&
|
||||||
|
nextNextNext.tool_call_id === nextNext.tool_calls[0]?.id &&
|
||||||
|
nextNext.tool_calls[0]?.name === current.tool_calls[0]?.name &&
|
||||||
|
nextNext.tool_calls[0]?.arguments === current.tool_calls[0]?.arguments &&
|
||||||
|
nextNextNext.content === next.content
|
||||||
|
) {
|
||||||
|
result.push(current, next);
|
||||||
|
i += 4;
|
||||||
|
removedCount += 2;
|
||||||
|
freedTokens += Math.ceil(nextNext.content.length / 4);
|
||||||
|
freedTokens += Math.ceil(current.content.length / 4);
|
||||||
|
} else {
|
||||||
|
result.push(current);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.push(current);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { messages: result, stats: { removedCount, freedTokens } };
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
// Purge-errors strategy — removes failed/empty tool_result entries.
|
||||||
|
// Clean-room implementation.
|
||||||
|
|
||||||
|
import type { DcpMessage } from '../messages.js';
|
||||||
|
|
||||||
|
const ERROR_PREFIXES = ['Error:', 'error:', 'Error: '];
|
||||||
|
const DEFAULT_WINDOW = 5;
|
||||||
|
|
||||||
|
export function purgeErrors(
|
||||||
|
messages: DcpMessage[],
|
||||||
|
windowSize: number = DEFAULT_WINDOW,
|
||||||
|
): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||||
|
const result: DcpMessage[] = [];
|
||||||
|
let removedCount = 0;
|
||||||
|
let freedTokens = 0;
|
||||||
|
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (msg.role === 'tool') {
|
||||||
|
const shouldRemove =
|
||||||
|
msg.isError ||
|
||||||
|
ERROR_PREFIXES.some((p) => msg.content.startsWith(p)) ||
|
||||||
|
msg.content.trim() === '';
|
||||||
|
|
||||||
|
if (shouldRemove) {
|
||||||
|
removedCount++;
|
||||||
|
freedTokens += Math.ceil(msg.content.length / 4);
|
||||||
|
continue; // Skip this message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { messages: result, stats: { removedCount, freedTokens } };
|
||||||
|
}
|
||||||
52
apps/server/src/services/inference/dcp/transform.ts
Normal file
52
apps/server/src/services/inference/dcp/transform.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
// Transform orchestrator — runs DCP strategies in sequence.
|
||||||
|
// Clean-room implementation.
|
||||||
|
|
||||||
|
import type { DcpMessage } from './messages.js';
|
||||||
|
import { deduplicate } from './strategies/deduplication.js';
|
||||||
|
import { purgeErrors } from './strategies/purge-errors.js';
|
||||||
|
import { getDcpState, setDcpState, shouldTransform } from './state.js';
|
||||||
|
|
||||||
|
export interface TransformStats {
|
||||||
|
removedCount: number;
|
||||||
|
freedTokens: number;
|
||||||
|
dedupRemoved: number;
|
||||||
|
purgeRemoved: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TransformResult {
|
||||||
|
messages: DcpMessage[];
|
||||||
|
stats: TransformStats;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function transformMessages(chatId: string, messages: DcpMessage[]): TransformResult {
|
||||||
|
if (!shouldTransform(chatId, messages.length)) {
|
||||||
|
return { messages, stats: { removedCount: 0, freedTokens: 0, dedupRemoved: 0, purgeRemoved: 0 } };
|
||||||
|
}
|
||||||
|
|
||||||
|
let m = messages;
|
||||||
|
|
||||||
|
// Step 1: Deduplicate
|
||||||
|
const dedupResult = deduplicate(m);
|
||||||
|
m = dedupResult.messages;
|
||||||
|
const dedupRemoved = dedupResult.stats.removedCount;
|
||||||
|
|
||||||
|
// Step 2: Purge errors
|
||||||
|
const purgeResult = purgeErrors(m);
|
||||||
|
m = purgeResult.messages;
|
||||||
|
const purgeRemoved = purgeResult.stats.removedCount;
|
||||||
|
|
||||||
|
const totalRemoved = dedupRemoved + purgeRemoved;
|
||||||
|
const totalFreed = dedupResult.stats.freedTokens + purgeResult.stats.freedTokens;
|
||||||
|
|
||||||
|
setDcpState(chatId, messages.length);
|
||||||
|
|
||||||
|
return {
|
||||||
|
messages: m,
|
||||||
|
stats: {
|
||||||
|
removedCount: totalRemoved,
|
||||||
|
freedTokens: totalFreed,
|
||||||
|
dedupRemoved,
|
||||||
|
purgeRemoved,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -21,6 +21,7 @@ import {
|
|||||||
buildMessagesPayload,
|
buildMessagesPayload,
|
||||||
loadContext,
|
loadContext,
|
||||||
} from './payload.js';
|
} from './payload.js';
|
||||||
|
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
|
||||||
import {
|
import {
|
||||||
finalizeCompletion,
|
finalizeCompletion,
|
||||||
finalizeEmpty,
|
finalizeEmpty,
|
||||||
@@ -156,9 +157,20 @@ export async function runAssistantTurn(
|
|||||||
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
|
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const { session: iterSession, project: iterProject, history } = loaded;
|
let { session: iterSession, project: iterProject, history } = loaded;
|
||||||
const projectRoot = await resolveProjectRoot(iterProject.path);
|
const projectRoot = await resolveProjectRoot(iterProject.path);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const dcpMsgs = toDcpMessages(history);
|
||||||
|
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
|
||||||
|
if (stats.removedCount > 0) {
|
||||||
|
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
|
||||||
|
history = fromDcpMessages(pruned) as typeof history;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
|
||||||
|
}
|
||||||
|
|
||||||
// v1.14.0: log step boundary for instrumentation. step_start parts are in
|
// v1.14.0: log step boundary for instrumentation. step_start parts are in
|
||||||
// the schema CHECK but not emitted here — writing to the assistant message
|
// the schema CHECK but not emitted here — writing to the assistant message
|
||||||
// before the stream phase creates a sequence-0 collision with
|
// before the stream phase creates a sequence-0 collision with
|
||||||
|
|||||||
Reference in New Issue
Block a user