From f1a85627e49cd49edb4f718a7195b768b1276ef2 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Sat, 30 May 2026 23:16:47 +0000 Subject: [PATCH] fix(coder): strip dcp-message-id tags split across stream chunks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dcp tag (mNNNN) is streamed token-by-token, so it arrives split across SSE deltas. The existing per-chunk stripDcpTags never sees a complete tag in any single fragment, so fragments pass through and the dispatcher reassembles the tag in textChunks (persisted + shown) — and the terminal message.part.updated path that would strip the full text is suppressed by the dedup gate. Add a stateful cross-chunk stripper (dcp-strip.ts: makeDcpStreamStripper) at the dispatcher's opencode frame boundary: it emits text that cannot be part of a forming tag, holds back only a trailing partial-tag prefix (without swallowing legitimate <…> content), and flushes at turn end. Fixes both live delta frames and persisted content. 11 unit tests incl. split-at-every-boundary and the documented per-chunk-fails case. opencode path only; ACP (goose/qwen/claude) untouched. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/services/__tests__/dcp-strip.test.ts | 73 ++++++++++++++++++ apps/coder/src/services/dcp-strip.ts | 77 +++++++++++++++++++ apps/coder/src/services/dispatcher.ts | 38 +++++++-- 3 files changed, 180 insertions(+), 8 deletions(-) create mode 100644 apps/coder/src/services/__tests__/dcp-strip.test.ts create mode 100644 apps/coder/src/services/dcp-strip.ts diff --git a/apps/coder/src/services/__tests__/dcp-strip.test.ts b/apps/coder/src/services/__tests__/dcp-strip.test.ts new file mode 100644 index 0000000..7878634 --- /dev/null +++ b/apps/coder/src/services/__tests__/dcp-strip.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest'; +import { stripDcpTags, makeDcpStreamStripper } from '../dcp-strip.js'; + +// Feed chunks through a fresh stripper and return the fully reassembled output +// (everything emitted during streaming + the final flush) — i.e. what the +// dispatcher would accumulate into the persisted message content. +function run(chunks: string[]): string { + const s = makeDcpStreamStripper(); + let out = ''; + for (const c of chunks) out += s.push(c); + out += s.flush(); + return out; +} + +describe('stripDcpTags (one-shot)', () => { + it('removes a complete tag', () => { + expect(stripDcpTags('Yes — "Test".\n\nm0019')).toBe( + 'Yes — "Test".\n\n', + ); + }); + it('leaves text without a tag untouched', () => { + expect(stripDcpTags('no tag here')).toBe('no tag here'); + }); +}); + +describe('per-chunk strip is INSUFFICIENT (documents the bug)', () => { + it('a tag split across chunks survives a naive per-chunk .replace()', () => { + const chunks = ['Yes.\n\nm0019']; + const naive = chunks.map(stripDcpTags).join(''); + // The reassembled content still contains the tag — this is the screenshot bug. + expect(naive).toContain('m0019'); + }); +}); + +describe('makeDcpStreamStripper (cross-chunk fix)', () => { + it('strips a tag split across chunks (the real opencode case)', () => { + expect(run(['Yes.\n\nm0019'])).toBe('Yes.\n\n'); + }); + + it('strips a tag split at EVERY character boundary', () => { + const full = 'Answer.m0019'; + expect(run([...full])).toBe('Answer.'); + }); + + it('strips a tag delivered whole in one chunk', () => { + expect(run(['Answer.m0019'])).toBe('Answer.'); + }); + + it('passes through text with no tag', () => { + expect(run(['hello ', 'world'])).toBe('hello world'); + }); + + it('does NOT swallow legitimate < content (code/HTML/generics)', () => { + expect(run(['use ', '
', ' and ', 'Array<', 'string>'])).toBe('use
and Array'); + }); + + it('handles a lone < that is not a dcp tag, split across chunks', () => { + expect(run(['a <', 'b c'])).toBe('a { + expect(run(['before ', '', 'm1', '', ' after'])).toBe( + 'before after', + ); + }); + + it('flushes a truncated/never-closed partial tag without leaking it as a complete tag', () => { + // If the stream ends mid-tag, flush strips complete tags; an incomplete + // remnant is returned as-is (no complete tag ever existed to render). + const out = run(['done.m00']); + expect(out).not.toContain(''); + }); +}); diff --git a/apps/coder/src/services/dcp-strip.ts b/apps/coder/src/services/dcp-strip.ts new file mode 100644 index 0000000..107d95c --- /dev/null +++ b/apps/coder/src/services/dcp-strip.ts @@ -0,0 +1,77 @@ +/** + * Strip opencode-dcp plugin tags (`mNNNN`) that + * the @tarquinen/opencode-dcp plugin appends to assistant text and which + * otherwise render as literal text in the UI. + * + * Why a streaming stripper and not a per-chunk `.replace()`: opencode streams + * assistant text token-by-token, so the tag arrives SPLIT across many SSE deltas + * (``, `m0019`, `` content. + */ + +const DCP_TAG_RE = /[^<]*<\/dcp-message-id>/g; +const OPEN = ''; +const CLOSE = ''; + +/** One-shot strip of COMPLETE tags. Safe for non-streaming / final content. */ +export function stripDcpTags(s: string): string { + return s.replace(DCP_TAG_RE, ''); +} + +/** + * Could `tail` (a substring starting at a `<`) still grow into a complete dcp + * tag on a future chunk? If so the caller must hold it back rather than emit it. + * Returns false for unrelated `<` content (`
`, ``, …) so those stream + * normally. + */ +function isPartialDcp(tail: string): boolean { + // A prefix of the opening marker: '<', '`). + for (let i = buf.indexOf('<'); i !== -1; i = buf.indexOf('<', i + 1)) { + if (isPartialDcp(buf.slice(i))) { + const emit = buf.slice(0, i); + buf = buf.slice(i); + return emit; + } + } + const emit = buf; + buf = ''; + return emit; + }, + flush(): string { + const out = stripDcpTags(buf); + buf = ''; + return out; + }, + }; +} diff --git a/apps/coder/src/services/dispatcher.ts b/apps/coder/src/services/dispatcher.ts index fe14c4d..fdbf716 100644 --- a/apps/coder/src/services/dispatcher.ts +++ b/apps/coder/src/services/dispatcher.ts @@ -4,6 +4,7 @@ import type { Broker } from '@boocode/server/broker'; import type { WsFrame } from '@boocode/server/ws-frames'; import type { Config } from '../config.js'; import { createWorktree, diffWorktree, cleanupWorktree, ensureSessionWorktree } from './worktrees.js'; +import { makeDcpStreamStripper } from './dcp-strip.js'; import { dispatchViaAcp } from './acp-dispatch.js'; import { getResolvedRegistry } from './provider-config-registry.js'; import { dispatchViaPty } from './pty-dispatch.js'; @@ -620,21 +621,30 @@ export function createDispatcher(deps: Deps): { start(): void; stop(): Promise(); + // opencode's dcp plugin appends to the + // text, streamed split across deltas — a per-chunk regex misses it (see + // dcp-strip.ts). Buffer text through a cross-chunk stripper so neither the + // live `delta` frames nor the persisted content ever carry the tag. + const dcp = makeDcpStreamStripper(); // Map transport-agnostic AgentEvents → the SAME WS frames the ACP path emits. // This boundary is where message_id/chat_id get attached (the backend never // owns them). const onEvent = (e: AgentEvent): void => { switch (e.type) { - case 'text': - textChunks.push(e.text); - broker.publishFrame(sessionId, { - type: 'delta', - message_id: assistantId, - chat_id: chatId, - content: e.text, - } as WsFrame); + case 'text': { + const safe = dcp.push(e.text); + if (safe) { + textChunks.push(safe); + broker.publishFrame(sessionId, { + type: 'delta', + message_id: assistantId, + chat_id: chatId, + content: safe, + } as WsFrame); + } break; + } case 'reasoning': reasoningChunks.push(e.text); broker.publishFrame(sessionId, { @@ -680,6 +690,18 @@ export function createDispatcher(deps: Deps): { start(): void; stop(): Promise