The dcp tag (<dcp-message-id>mNNNN</dcp-message-id>) is streamed token-by-token, so it arrives split across SSE deltas. The existing per-chunk stripDcpTags never sees a complete tag in any single fragment, so fragments pass through and the dispatcher reassembles the tag in textChunks (persisted + shown) — and the terminal message.part.updated path that would strip the full text is suppressed by the dedup gate. Add a stateful cross-chunk stripper (dcp-strip.ts: makeDcpStreamStripper) at the dispatcher's opencode frame boundary: it emits text that cannot be part of a forming tag, holds back only a trailing partial-tag prefix (without swallowing legitimate <…> content), and flushes at turn end. Fixes both live delta frames and persisted content. 11 unit tests incl. split-at-every-boundary and the documented per-chunk-fails case. opencode path only; ACP (goose/qwen/claude) untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
74 lines
2.8 KiB
TypeScript
74 lines
2.8 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { stripDcpTags, makeDcpStreamStripper } from '../dcp-strip.js';
|
|
|
|
// Feed chunks through a fresh stripper and return the fully reassembled output
|
|
// (everything emitted during streaming + the final flush) — i.e. what the
|
|
// dispatcher would accumulate into the persisted message content.
|
|
function run(chunks: string[]): string {
|
|
const s = makeDcpStreamStripper();
|
|
let out = '';
|
|
for (const c of chunks) out += s.push(c);
|
|
out += s.flush();
|
|
return out;
|
|
}
|
|
|
|
describe('stripDcpTags (one-shot)', () => {
|
|
it('removes a complete tag', () => {
|
|
expect(stripDcpTags('Yes — "Test".\n\n<dcp-message-id>m0019</dcp-message-id>')).toBe(
|
|
'Yes — "Test".\n\n',
|
|
);
|
|
});
|
|
it('leaves text without a tag untouched', () => {
|
|
expect(stripDcpTags('no tag here')).toBe('no tag here');
|
|
});
|
|
});
|
|
|
|
describe('per-chunk strip is INSUFFICIENT (documents the bug)', () => {
|
|
it('a tag split across chunks survives a naive per-chunk .replace()', () => {
|
|
const chunks = ['Yes.\n\n<dcp', '-message', '-id>m0019</dcp', '-message-id>'];
|
|
const naive = chunks.map(stripDcpTags).join('');
|
|
// The reassembled content still contains the tag — this is the screenshot bug.
|
|
expect(naive).toContain('<dcp-message-id>m0019</dcp-message-id>');
|
|
});
|
|
});
|
|
|
|
describe('makeDcpStreamStripper (cross-chunk fix)', () => {
|
|
it('strips a tag split across chunks (the real opencode case)', () => {
|
|
expect(run(['Yes.\n\n<dcp', '-message', '-id>m0019</dcp', '-message-id>'])).toBe('Yes.\n\n');
|
|
});
|
|
|
|
it('strips a tag split at EVERY character boundary', () => {
|
|
const full = 'Answer.<dcp-message-id>m0019</dcp-message-id>';
|
|
expect(run([...full])).toBe('Answer.');
|
|
});
|
|
|
|
it('strips a tag delivered whole in one chunk', () => {
|
|
expect(run(['Answer.<dcp-message-id>m0019</dcp-message-id>'])).toBe('Answer.');
|
|
});
|
|
|
|
it('passes through text with no tag', () => {
|
|
expect(run(['hello ', 'world'])).toBe('hello world');
|
|
});
|
|
|
|
it('does NOT swallow legitimate < content (code/HTML/generics)', () => {
|
|
expect(run(['use ', '<div>', ' and ', 'Array<', 'string>'])).toBe('use <div> and Array<string>');
|
|
});
|
|
|
|
it('handles a lone < that is not a dcp tag, split across chunks', () => {
|
|
expect(run(['a <', 'b c'])).toBe('a <b c');
|
|
});
|
|
|
|
it('emits surrounding text and strips a mid-text tag', () => {
|
|
expect(run(['before ', '<dcp-message-id>', 'm1', '</dcp-message-id>', ' after'])).toBe(
|
|
'before after',
|
|
);
|
|
});
|
|
|
|
it('flushes a truncated/never-closed partial tag without leaking it as a complete tag', () => {
|
|
// If the stream ends mid-tag, flush strips complete tags; an incomplete
|
|
// remnant is returned as-is (no complete tag ever existed to render).
|
|
const out = run(['done.<dcp-message-id>m00']);
|
|
expect(out).not.toContain('</dcp-message-id>');
|
|
});
|
|
});
|