fix(coder): strip dcp-message-id tags split across stream chunks
The dcp tag (<dcp-message-id>mNNNN</dcp-message-id>) is streamed token-by-token, so it arrives split across SSE deltas. The existing per-chunk stripDcpTags never sees a complete tag in any single fragment, so fragments pass through and the dispatcher reassembles the tag in textChunks (persisted + shown) — and the terminal message.part.updated path that would strip the full text is suppressed by the dedup gate. Add a stateful cross-chunk stripper (dcp-strip.ts: makeDcpStreamStripper) at the dispatcher's opencode frame boundary: it emits text that cannot be part of a forming tag, holds back only a trailing partial-tag prefix (without swallowing legitimate <…> content), and flushes at turn end. Fixes both live delta frames and persisted content. 11 unit tests incl. split-at-every-boundary and the documented per-chunk-fails case. opencode path only; ACP (goose/qwen/claude) untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
77
apps/coder/src/services/dcp-strip.ts
Normal file
77
apps/coder/src/services/dcp-strip.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Strip opencode-dcp plugin tags (`<dcp-message-id>mNNNN</dcp-message-id>`) that
|
||||
* the @tarquinen/opencode-dcp plugin appends to assistant text and which
|
||||
* otherwise render as literal text in the UI.
|
||||
*
|
||||
* Why a streaming stripper and not a per-chunk `.replace()`: opencode streams
|
||||
* assistant text token-by-token, so the tag arrives SPLIT across many SSE deltas
|
||||
* (`<dcp`, `-message`, `-id>`, `m0019`, `</dcp`, …). A per-chunk regex never sees
|
||||
* a complete tag in any single fragment, so the fragments pass through and the
|
||||
* dispatcher reassembles the full tag in the persisted/displayed content. The
|
||||
* stripper below buffers across chunks: it emits everything that cannot be part
|
||||
* of a forming tag and holds back only a trailing partial-tag prefix until the
|
||||
* next chunk resolves it — without holding back legitimate `<…>` content.
|
||||
*/
|
||||
|
||||
const DCP_TAG_RE = /<dcp-message-id>[^<]*<\/dcp-message-id>/g;
|
||||
const OPEN = '<dcp-message-id>';
|
||||
const CLOSE = '</dcp-message-id>';
|
||||
|
||||
/** One-shot strip of COMPLETE tags. Safe for non-streaming / final content. */
|
||||
export function stripDcpTags(s: string): string {
|
||||
return s.replace(DCP_TAG_RE, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Could `tail` (a substring starting at a `<`) still grow into a complete dcp
|
||||
* tag on a future chunk? If so the caller must hold it back rather than emit it.
|
||||
* Returns false for unrelated `<` content (`<div>`, `<T>`, …) so those stream
|
||||
* normally.
|
||||
*/
|
||||
function isPartialDcp(tail: string): boolean {
|
||||
// A prefix of the opening marker: '<', '<d', …, '<dcp-message-id'.
|
||||
if (OPEN.startsWith(tail)) return true;
|
||||
// Opening marker fully seen — content (and maybe a forming close) still streaming.
|
||||
if (tail.startsWith(OPEN)) {
|
||||
const rest = tail.slice(OPEN.length);
|
||||
const lt = rest.indexOf('<');
|
||||
if (lt === -1) return true; // still inside the [^<]* content run
|
||||
return CLOSE.startsWith(rest.slice(lt)); // a partial close marker forming
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export interface DcpStreamStripper {
|
||||
/** Feed one text chunk; returns the portion safe to emit now (may be ''). */
|
||||
push(chunk: string): string;
|
||||
/** Stream end: returns whatever was held back, with complete tags stripped. */
|
||||
flush(): string;
|
||||
}
|
||||
|
||||
/** Stateful, cross-chunk-safe dcp stripper. One instance per turn. */
|
||||
export function makeDcpStreamStripper(): DcpStreamStripper {
|
||||
let buf = '';
|
||||
return {
|
||||
push(chunk: string): string {
|
||||
buf += chunk;
|
||||
buf = buf.replace(DCP_TAG_RE, ''); // drop any now-complete tags
|
||||
// Find the earliest `<` whose suffix is a forming dcp tag; hold from there,
|
||||
// emit everything before it (real text, including unrelated `<…>`).
|
||||
for (let i = buf.indexOf('<'); i !== -1; i = buf.indexOf('<', i + 1)) {
|
||||
if (isPartialDcp(buf.slice(i))) {
|
||||
const emit = buf.slice(0, i);
|
||||
buf = buf.slice(i);
|
||||
return emit;
|
||||
}
|
||||
}
|
||||
const emit = buf;
|
||||
buf = '';
|
||||
return emit;
|
||||
},
|
||||
flush(): string {
|
||||
const out = stripDcpTags(buf);
|
||||
buf = '';
|
||||
return out;
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user