feat(server): add DCP clean-room context pruning
- Deduplication: removes consecutive identical tool_call+tool_result pairs - Purge-errors: removes failed/empty tool results - Transform orchestrator runs strategies in sequence pre-payload - Wired into turn.ts before buildMessagesPayload - Clean-room reimplementation (AGPL reference: behavior only)
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { deduplicate } from '../strategies/deduplication.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('deduplicate', () => {
|
||||
it('removes consecutive identical tool_call+tool_result pairs', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'user', content: 'search for x' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||
// Duplicate pair
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '2' },
|
||||
];
|
||||
|
||||
const { messages: result, stats } = deduplicate(messages);
|
||||
expect(result).toHaveLength(3); // user + first pair
|
||||
expect(stats.removedCount).toBe(2);
|
||||
});
|
||||
|
||||
it('preserves non-duplicate content', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result1', tool_call_id: '1' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result2', tool_call_id: '2' }, // Different result
|
||||
];
|
||||
|
||||
const { messages: result, stats } = deduplicate(messages);
|
||||
expect(result).toHaveLength(4);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,22 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { toDcpMessages, fromDcpMessages } from '../messages.js';
|
||||
|
||||
describe('toDcpMessages', () => {
|
||||
it('converts user messages', () => {
|
||||
const result = toDcpMessages([{ role: 'user', content: 'hello' }]);
|
||||
expect(result[0].role).toBe('user');
|
||||
expect(result[0].content).toBe('hello');
|
||||
});
|
||||
|
||||
it('marks Error: content as isError', () => {
|
||||
const result = toDcpMessages([{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' }]);
|
||||
expect(result[0].isError).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fromDcpMessages', () => {
|
||||
it('round-trips messages', () => {
|
||||
const original = [{ role: 'user', content: 'hello' }];
|
||||
expect(fromDcpMessages(toDcpMessages(original))).toEqual(original);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,33 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { purgeErrors } from '../strategies/purge-errors.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('purgeErrors', () => {
|
||||
it('removes tool results where content starts with Error:', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: 'Error: file not found', tool_call_id: '1' },
|
||||
{ role: 'tool', content: '{"files":[]}', tool_call_id: '2' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(stats.removedCount).toBe(1);
|
||||
});
|
||||
|
||||
it('removes empty tool results', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: '', tool_call_id: '1' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(0);
|
||||
expect(stats.removedCount).toBe(1);
|
||||
});
|
||||
|
||||
it('preserves valid tool results', () => {
|
||||
const messages: DcpMessage[] = [
|
||||
{ role: 'tool', content: '{"files":["a.ts"]}', tool_call_id: '1' },
|
||||
];
|
||||
const { messages: result, stats } = purgeErrors(messages);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,25 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { transformMessages } from '../transform.js';
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
describe('transformMessages', () => {
|
||||
it('applies dedup then purge in order', () => {
|
||||
const input: DcpMessage[] = [
|
||||
{ role: 'user', content: 'hello' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '1', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result', tool_call_id: '1' },
|
||||
{ role: 'assistant', content: '', tool_calls: [{ id: '2', name: 'grep', arguments: '{}' }] },
|
||||
{ role: 'tool', content: 'result', tool_call_id: '2' }, // Dup
|
||||
];
|
||||
|
||||
const { messages, stats } = transformMessages('test-chat', input);
|
||||
expect(stats.removedCount).toBeGreaterThan(0);
|
||||
expect(messages.length).toBeLessThan(input.length);
|
||||
});
|
||||
|
||||
it('handles empty input', () => {
|
||||
const { messages, stats } = transformMessages('empty', []);
|
||||
expect(messages).toHaveLength(0);
|
||||
expect(stats.removedCount).toBe(0);
|
||||
});
|
||||
});
|
||||
4
apps/server/src/services/inference/dcp/index.ts
Normal file
4
apps/server/src/services/inference/dcp/index.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
export { transformMessages } from './transform.js';
|
||||
export type { DcpMessage } from './messages.js';
|
||||
export { toDcpMessages, fromDcpMessages } from './messages.js';
|
||||
export { getDcpState, clearDcpState } from './state.js';
|
||||
34
apps/server/src/services/inference/dcp/messages.ts
Normal file
34
apps/server/src/services/inference/dcp/messages.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
// DCP message shape adapter.
|
||||
// Converts between BooCode MessagePart[] and the DCP internal shape.
|
||||
// Clean-room implementation — no AGPL source copied.
|
||||
|
||||
export interface DcpMessage {
|
||||
role: 'user' | 'assistant' | 'tool';
|
||||
content: string;
|
||||
tool_call_id?: string;
|
||||
tool_calls?: Array<{ id: string; name: string; arguments: string }>;
|
||||
isError?: boolean;
|
||||
}
|
||||
|
||||
export function toDcpMessages(parts: any[]): DcpMessage[] {
|
||||
return parts.map((p: any) => {
|
||||
const msg: DcpMessage = { role: p.role, content: p.content ?? '' };
|
||||
if (p.tool_call_id) msg.tool_call_id = p.tool_call_id;
|
||||
if (p.tool_calls) msg.tool_calls = p.tool_calls;
|
||||
if (p.isError) msg.isError = true;
|
||||
if (p.role === 'tool' && p.content && p.content.startsWith('Error:')) {
|
||||
msg.isError = true;
|
||||
}
|
||||
return msg;
|
||||
});
|
||||
}
|
||||
|
||||
export function fromDcpMessages(msgs: DcpMessage[]): any[] {
|
||||
return msgs.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {}),
|
||||
...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
|
||||
...(m.isError ? { isError: true } : {}),
|
||||
}));
|
||||
}
|
||||
27
apps/server/src/services/inference/dcp/state.ts
Normal file
27
apps/server/src/services/inference/dcp/state.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
// Per-chat session state for DCP.
|
||||
// Tracks last transform timestamp and message count to avoid re-processing.
|
||||
|
||||
interface ChatDcpState {
|
||||
lastTransformAt: number;
|
||||
lastMessageCount: number;
|
||||
}
|
||||
|
||||
const chatStates = new Map<string, ChatDcpState>();
|
||||
|
||||
export function getDcpState(chatId: string): ChatDcpState | undefined {
|
||||
return chatStates.get(chatId);
|
||||
}
|
||||
|
||||
export function setDcpState(chatId: string, messageCount: number): void {
|
||||
chatStates.set(chatId, { lastTransformAt: Date.now(), lastMessageCount: messageCount });
|
||||
}
|
||||
|
||||
export function clearDcpState(chatId: string): void {
|
||||
chatStates.delete(chatId);
|
||||
}
|
||||
|
||||
export function shouldTransform(chatId: string, messageCount: number): boolean {
|
||||
const state = chatStates.get(chatId);
|
||||
if (!state) return true;
|
||||
return state.lastMessageCount !== messageCount;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
export function deduplicate(messages: DcpMessage[]): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||
const result: DcpMessage[] = [];
|
||||
let removedCount = 0;
|
||||
let freedTokens = 0;
|
||||
let i = 0;
|
||||
|
||||
while (i < messages.length) {
|
||||
const current: DcpMessage = messages[i]!;
|
||||
const next = messages[i + 1];
|
||||
|
||||
if (
|
||||
current.role === 'assistant' &&
|
||||
current.tool_calls &&
|
||||
next &&
|
||||
next.role === 'tool' &&
|
||||
next.tool_call_id === current.tool_calls[0]?.id
|
||||
) {
|
||||
const nextNext = messages[i + 2];
|
||||
const nextNextNext = messages[i + 3];
|
||||
|
||||
if (
|
||||
nextNext &&
|
||||
nextNext.role === 'assistant' &&
|
||||
nextNext.tool_calls &&
|
||||
nextNextNext &&
|
||||
nextNextNext.role === 'tool' &&
|
||||
nextNextNext.tool_call_id === nextNext.tool_calls[0]?.id &&
|
||||
nextNext.tool_calls[0]?.name === current.tool_calls[0]?.name &&
|
||||
nextNext.tool_calls[0]?.arguments === current.tool_calls[0]?.arguments &&
|
||||
nextNextNext.content === next.content
|
||||
) {
|
||||
result.push(current, next);
|
||||
i += 4;
|
||||
removedCount += 2;
|
||||
freedTokens += Math.ceil(nextNext.content.length / 4);
|
||||
freedTokens += Math.ceil(current.content.length / 4);
|
||||
} else {
|
||||
result.push(current);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
result.push(current);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return { messages: result, stats: { removedCount, freedTokens } };
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Purge-errors strategy — removes failed/empty tool_result entries.
|
||||
// Clean-room implementation.
|
||||
|
||||
import type { DcpMessage } from '../messages.js';
|
||||
|
||||
const ERROR_PREFIXES = ['Error:', 'error:', 'Error: '];
|
||||
const DEFAULT_WINDOW = 5;
|
||||
|
||||
export function purgeErrors(
|
||||
messages: DcpMessage[],
|
||||
windowSize: number = DEFAULT_WINDOW,
|
||||
): { messages: DcpMessage[]; stats: { removedCount: number; freedTokens: number } } {
|
||||
const result: DcpMessage[] = [];
|
||||
let removedCount = 0;
|
||||
let freedTokens = 0;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'tool') {
|
||||
const shouldRemove =
|
||||
msg.isError ||
|
||||
ERROR_PREFIXES.some((p) => msg.content.startsWith(p)) ||
|
||||
msg.content.trim() === '';
|
||||
|
||||
if (shouldRemove) {
|
||||
removedCount++;
|
||||
freedTokens += Math.ceil(msg.content.length / 4);
|
||||
continue; // Skip this message
|
||||
}
|
||||
}
|
||||
result.push(msg);
|
||||
}
|
||||
|
||||
return { messages: result, stats: { removedCount, freedTokens } };
|
||||
}
|
||||
52
apps/server/src/services/inference/dcp/transform.ts
Normal file
52
apps/server/src/services/inference/dcp/transform.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
// Transform orchestrator — runs DCP strategies in sequence.
|
||||
// Clean-room implementation.
|
||||
|
||||
import type { DcpMessage } from './messages.js';
|
||||
import { deduplicate } from './strategies/deduplication.js';
|
||||
import { purgeErrors } from './strategies/purge-errors.js';
|
||||
import { getDcpState, setDcpState, shouldTransform } from './state.js';
|
||||
|
||||
export interface TransformStats {
|
||||
removedCount: number;
|
||||
freedTokens: number;
|
||||
dedupRemoved: number;
|
||||
purgeRemoved: number;
|
||||
}
|
||||
|
||||
export interface TransformResult {
|
||||
messages: DcpMessage[];
|
||||
stats: TransformStats;
|
||||
}
|
||||
|
||||
export function transformMessages(chatId: string, messages: DcpMessage[]): TransformResult {
|
||||
if (!shouldTransform(chatId, messages.length)) {
|
||||
return { messages, stats: { removedCount: 0, freedTokens: 0, dedupRemoved: 0, purgeRemoved: 0 } };
|
||||
}
|
||||
|
||||
let m = messages;
|
||||
|
||||
// Step 1: Deduplicate
|
||||
const dedupResult = deduplicate(m);
|
||||
m = dedupResult.messages;
|
||||
const dedupRemoved = dedupResult.stats.removedCount;
|
||||
|
||||
// Step 2: Purge errors
|
||||
const purgeResult = purgeErrors(m);
|
||||
m = purgeResult.messages;
|
||||
const purgeRemoved = purgeResult.stats.removedCount;
|
||||
|
||||
const totalRemoved = dedupRemoved + purgeRemoved;
|
||||
const totalFreed = dedupResult.stats.freedTokens + purgeResult.stats.freedTokens;
|
||||
|
||||
setDcpState(chatId, messages.length);
|
||||
|
||||
return {
|
||||
messages: m,
|
||||
stats: {
|
||||
removedCount: totalRemoved,
|
||||
freedTokens: totalFreed,
|
||||
dedupRemoved,
|
||||
purgeRemoved,
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
buildMessagesPayload,
|
||||
loadContext,
|
||||
} from './payload.js';
|
||||
import { toDcpMessages, transformMessages, fromDcpMessages } from './dcp/index.js';
|
||||
import {
|
||||
finalizeCompletion,
|
||||
finalizeEmpty,
|
||||
@@ -156,9 +157,20 @@ export async function runAssistantTurn(
|
||||
ctx.log.warn({ sessionId }, 'inference: session or project missing mid-loop');
|
||||
break;
|
||||
}
|
||||
const { session: iterSession, project: iterProject, history } = loaded;
|
||||
let { session: iterSession, project: iterProject, history } = loaded;
|
||||
const projectRoot = await resolveProjectRoot(iterProject.path);
|
||||
|
||||
try {
|
||||
const dcpMsgs = toDcpMessages(history);
|
||||
const { messages: pruned, stats } = transformMessages(chatId, dcpMsgs);
|
||||
if (stats.removedCount > 0) {
|
||||
ctx.log.info({ chatId, ...stats }, 'dcp: transform removed messages');
|
||||
history = fromDcpMessages(pruned) as typeof history;
|
||||
}
|
||||
} catch (err) {
|
||||
ctx.log.warn({ err: err instanceof Error ? err.message : String(err), chatId }, 'dcp: transform skipped');
|
||||
}
|
||||
|
||||
// v1.14.0: log step boundary for instrumentation. step_start parts are in
|
||||
// the schema CHECK but not emitted here — writing to the assistant message
|
||||
// before the stream phase creates a sequence-0 collision with
|
||||
|
||||
Reference in New Issue
Block a user