From 31e1b32be191879ce1d7ce60909074be8b3eef21 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Tue, 26 May 2026 16:22:43 +0000 Subject: [PATCH] v2.2.2-xml-placeholder-reject: drop placeholder XML tool calls at parse time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reject qwen3.6 spurious tails with path "..." or empty args before they enter toolCalls, preventing duplicate assistant answers. Dropped blocks append to flushed text; four new xml-parser tests. DEFERRED-WORK §6 for console.debug → pino cleanup. Co-authored-by: Cursor --- CHANGELOG.md | 4 ++ CURRENT.md | 2 +- .../src/services/__tests__/xml-parser.test.ts | 38 +++++++++++++++++++ .../src/services/inference/xml-parser.ts | 37 +++++++++++++++++- docs/DEFERRED-WORK.md | 12 ++++++ 5 files changed, 91 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bc087a..dcad1ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes per release tag. Most recent on top, ordered by tag creation date (which matches the git history). Tag names follow `vMAJOR.MINOR.PATCH-slug` — the slug describes what shipped, so the tag name alone is enough to recall the batch. +## v2.2.2-xml-placeholder-reject — 2026-05-26 + +Reject placeholder XML tool args at parse time in `extractToolCallBlocks` (`xml-parser.ts`). Drops calls when any string arg is `...`, empty/whitespace, ``, ``, `placeholder`, or angle-bracket sentinels; appends the raw XML block to flushed prose instead of silently deleting it. Fixes qwen3.6 answer-then-spurious-tools tail that caused duplicate assistant rows (full answer + failed `xml_call_*` tools + regenerated answer). Four new tests in `xml-parser.test.ts`. Known nit: rejection logs via `console.debug` instead of pino — filed in `docs/DEFERRED-WORK.md` §6 for a later cleanup. + ## v2.2.1-pane-scoped-chats — 2026-05-26 Follow-up fixes on the v2.2 Paseo provider stack. Pane-scoped chat resolution: `resolveChatId(sql, sessionId, paneId)` reads `sessions.workspace_panes`, requires `pane_id` on coder POST routes, and creates a scoped chat per coder/terminal pane instead of falling back to the session's first open chat (which fused BooCoder writes into the BooChat pane). Client `useWorkspacePanes` seeds new coder/terminal panes with dedicated chats on create, hydrate, and workspace sync; `CoderPane` blocks send until seeded and filters WS frames + `GET /messages?chat_id=` to that chat. External-agent tool UI: new `CoderMessageList` renders BooChat-style `ToolCallLine` timeline (tools before answer text on combined ACP rows). WS user-delta handling replaces content instead of appending (fixes garbled duplicate user messages when optimistic UI met full-body deltas). BooChat inference: `buildMessagesPayload` strips orphan assistant `tool_calls` without matching `tool` rows and skips stray tool rows when the owning assistant turn is incomplete (fixes "Tool results are missing for tool calls" on shared chats with ACP history). Pairs with `v2.2-paseo-providers`. diff --git a/CURRENT.md b/CURRENT.md index 3d49d91..57850d8 100644 --- a/CURRENT.md +++ b/CURRENT.md @@ -5,6 +5,6 @@ Last updated: 2026-05-26 - **Batch:** v2.3-provider-lifecycle (openspec drafted; not started) - **Branch:** `main` - **Blockers:** none -- **Last shipped:** `v2.2.1-pane-scoped-chats` (pairs with `v2.2-paseo-providers` on same commit) +- **Last shipped:** `v2.2.2-xml-placeholder-reject` Update this file when starting or finishing a batch. Agents: read this first for session intent; if stale vs `CHANGELOG.md`, trust CHANGELOG for shipped state. diff --git a/apps/server/src/services/__tests__/xml-parser.test.ts b/apps/server/src/services/__tests__/xml-parser.test.ts index 86d06df..6e2bd32 100644 --- a/apps/server/src/services/__tests__/xml-parser.test.ts +++ b/apps/server/src/services/__tests__/xml-parser.test.ts @@ -270,6 +270,44 @@ describe('extractToolCallBlocks (v1.13.16 — unified extraction)', () => { expect(result.flushed).toBe(input); expect(result.remaining).toBe(''); }); + + describe('placeholder arg rejection (qwen3.6 answer-then-spurious-tools)', () => { + it('rejects with path "..." — 0 calls, block in flushed', () => { + const block = '...'; + const result = extractToolCallBlocks(`Answer text.\n${block}`); + expect(result.calls).toEqual([]); + expect(result.flushed).toContain('Answer text.'); + expect(result.flushed).toContain(block); + expect(result.remaining).toBe(''); + }); + + it('rejects with empty path — 0 calls, block in flushed', () => { + const block = ''; + const result = extractToolCallBlocks(block); + expect(result.calls).toEqual([]); + expect(result.flushed).toBe(block); + expect(result.remaining).toBe(''); + }); + + it('rejects with path "" — 0 calls', () => { + const block = ''; + const result = extractToolCallBlocks(block); + expect(result.calls).toEqual([]); + expect(result.flushed).toBe(block); + }); + + it('returns 1 valid call and flushes placeholder block when mixed in same buffer', () => { + const valid = + '/opt/boocode/README.md'; + const placeholder = + '...'; + const result = extractToolCallBlocks(`${valid} tail ${placeholder}`); + expect(result.calls).toEqual([{ name: 'view_file', args: { path: '/opt/boocode/README.md' } }]); + expect(result.flushed).toContain('tail'); + expect(result.flushed).toContain(placeholder); + expect(result.remaining).toBe(''); + }); + }); }); describe('levenshtein', () => { diff --git a/apps/server/src/services/inference/xml-parser.ts b/apps/server/src/services/inference/xml-parser.ts index 55d833d..0f8b932 100644 --- a/apps/server/src/services/inference/xml-parser.ts +++ b/apps/server/src/services/inference/xml-parser.ts @@ -24,6 +24,34 @@ export interface ParsedCall { args: Record; } +const PLACEHOLDER_LITERALS = new Set(['...', 'placeholder', '', '']); +const ANGLE_BRACKET_SENTINEL_RE = /^<[^>]+>$/; + +/** True when a string arg looks like a model placeholder, not a real path/value. */ +export function isPlaceholderArgValue(value: unknown): boolean { + if (typeof value !== 'string') return false; + const trimmed = value.trim(); + if (trimmed === '') return true; + if (PLACEHOLDER_LITERALS.has(trimmed)) return true; + if (ANGLE_BRACKET_SENTINEL_RE.test(trimmed)) return true; + return false; +} + +function hasPlaceholderArgs(args: Record): boolean { + for (const value of Object.values(args)) { + if (isPlaceholderArgValue(value)) return true; + } + return false; +} + +function logRejectedPlaceholder(parsed: ParsedCall): void { + // Pure helper — no Fastify logger here (stream-phase.ts stays unchanged). + console.debug( + { toolName: parsed.name, args: parsed.args }, + 'rejected placeholder tool call at parse time', + ); +} + // v1.10.5: Qwen-flavor parser. Tightened in v1.13.16 to tolerate whitespace // around `=` (e.g. ``). Name capture is non-whitespace, // non-`>` so a stray space doesn't get absorbed into the function name. @@ -152,7 +180,14 @@ export function extractToolCallBlocks(buffer: string): ToolCallExtraction { const blockEnd = next.closeIdx + next.spec.close.length; const block = buffer.slice(next.openIdx, blockEnd); const parsed = next.spec.parse(block); - if (parsed) calls.push(parsed); + if (parsed) { + if (hasPlaceholderArgs(parsed.args)) { + logRejectedPlaceholder(parsed); + flushed += block; + } else { + calls.push(parsed); + } + } pos = blockEnd; } diff --git a/docs/DEFERRED-WORK.md b/docs/DEFERRED-WORK.md index 0436ec7..34cbdc8 100644 --- a/docs/DEFERRED-WORK.md +++ b/docs/DEFERRED-WORK.md @@ -15,6 +15,7 @@ Last updated: 2026-05-26 | Unified `packages/types` | Maintainability | Low (dev-only) | Medium–High | Type drift between server, coder, web | | Large file splits | Maintainability | None directly | Medium per file | Harder reviews, merge conflicts | | Retire `apps/coder/web/` fallback SPA | Scope / ops | Low — Sam uses CoderPane | Medium | Dual UI maintenance, divergent API client | +| `console.debug` in xml-parser placeholder rejection | Maintainability | None (logs only) | Trivial | Placeholder rejections miss pino pipeline / level filters | --- @@ -325,6 +326,16 @@ Standalone Vite React app (`@boocode/coder-web`) built into `apps/coder/web/dist --- +## 6. xml-parser placeholder rejection — structured logging (v2.2.2 cleanup) + +**Shipped (uncommitted deploy):** `extractToolCallBlocks` rejects placeholder XML tool args at parse time; dropped blocks append to `flushed`. + +**Nit:** rejection path uses `console.debug` instead of the Fastify/pino `log.debug({ toolName, args }, '…')` pattern used elsewhere. Cosmetic — behavior is correct; logs won't appear in the usual structured pipeline or respect `LOG_LEVEL`. + +**Fix:** pass an optional logger into `extractToolCallBlocks` from `stream-phase.ts` executeStreamPhase (one call site), or use a module-level debug hook. Target tag: **v2.2.2** cleanup batch, not a blocker. + +--- + ## Suggested batch ordering If picking these up as openspec batches: @@ -334,6 +345,7 @@ If picking these up as openspec batches: 3. **CoderPane hook extraction** — natural follow-on when adding cancel UI 4. **Zod parity or packages/types** — when next WS/provider field is added 5. **Retire coder/web** — only after explicit “I don’t use :9502 UI” confirmation +6. **v2.2.2 xml-parser log uniformity** — `console.debug` → pino (§6) ---