fix(coder): strip dcp-message-id tags split across stream chunks

The dcp tag (<dcp-message-id>mNNNN</dcp-message-id>) is streamed token-by-token, so it arrives split across SSE deltas. The existing per-chunk stripDcpTags never sees a complete tag in any single fragment, so fragments pass through and the dispatcher reassembles the tag in textChunks (persisted + shown) — and the terminal message.part.updated path that would strip the full text is suppressed by the dedup gate. Add a stateful cross-chunk stripper (dcp-strip.ts: makeDcpStreamStripper) at the dispatcher's opencode frame boundary: it emits text that cannot be part of a forming tag, holds back only a trailing partial-tag prefix (without swallowing legitimate <…> content), and flushes at turn end. Fixes both live delta frames and persisted content. 11 unit tests incl. split-at-every-boundary and the documented per-chunk-fails case. opencode path only; ACP (goose/qwen/claude) untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 23:16:47 +00:00
parent c65daba5dd
commit f1a85627e4
3 changed files with 180 additions and 8 deletions

View File

@@ -0,0 +1,73 @@
import { describe, it, expect } from 'vitest';
import { stripDcpTags, makeDcpStreamStripper } from '../dcp-strip.js';
// Feed chunks through a fresh stripper and return the fully reassembled output
// (everything emitted during streaming + the final flush) — i.e. what the
// dispatcher would accumulate into the persisted message content.
function run(chunks: string[]): string {
const s = makeDcpStreamStripper();
let out = '';
for (const c of chunks) out += s.push(c);
out += s.flush();
return out;
}
describe('stripDcpTags (one-shot)', () => {
it('removes a complete tag', () => {
expect(stripDcpTags('Yes — "Test".\n\n<dcp-message-id>m0019</dcp-message-id>')).toBe(
'Yes — "Test".\n\n',
);
});
it('leaves text without a tag untouched', () => {
expect(stripDcpTags('no tag here')).toBe('no tag here');
});
});
describe('per-chunk strip is INSUFFICIENT (documents the bug)', () => {
it('a tag split across chunks survives a naive per-chunk .replace()', () => {
const chunks = ['Yes.\n\n<dcp', '-message', '-id>m0019</dcp', '-message-id>'];
const naive = chunks.map(stripDcpTags).join('');
// The reassembled content still contains the tag — this is the screenshot bug.
expect(naive).toContain('<dcp-message-id>m0019</dcp-message-id>');
});
});
describe('makeDcpStreamStripper (cross-chunk fix)', () => {
it('strips a tag split across chunks (the real opencode case)', () => {
expect(run(['Yes.\n\n<dcp', '-message', '-id>m0019</dcp', '-message-id>'])).toBe('Yes.\n\n');
});
it('strips a tag split at EVERY character boundary', () => {
const full = 'Answer.<dcp-message-id>m0019</dcp-message-id>';
expect(run([...full])).toBe('Answer.');
});
it('strips a tag delivered whole in one chunk', () => {
expect(run(['Answer.<dcp-message-id>m0019</dcp-message-id>'])).toBe('Answer.');
});
it('passes through text with no tag', () => {
expect(run(['hello ', 'world'])).toBe('hello world');
});
it('does NOT swallow legitimate < content (code/HTML/generics)', () => {
expect(run(['use ', '<div>', ' and ', 'Array<', 'string>'])).toBe('use <div> and Array<string>');
});
it('handles a lone < that is not a dcp tag, split across chunks', () => {
expect(run(['a <', 'b c'])).toBe('a <b c');
});
it('emits surrounding text and strips a mid-text tag', () => {
expect(run(['before ', '<dcp-message-id>', 'm1', '</dcp-message-id>', ' after'])).toBe(
'before after',
);
});
it('flushes a truncated/never-closed partial tag without leaking it as a complete tag', () => {
// If the stream ends mid-tag, flush strips complete tags; an incomplete
// remnant is returned as-is (no complete tag ever existed to render).
const out = run(['done.<dcp-message-id>m00']);
expect(out).not.toContain('</dcp-message-id>');
});
});