feat: post-review backlog hardening (cancel/parser/stall/history/9502)

Five independent items from the post-review backlog. F1: Stop on an external
agent task now aborts the running child via a per-task AbortController registry
reachable from the cancel route, and finalizes the assistant message as
cancelled (fixing two latent bugs — catch blocks left the message streaming,
and warm success-paths wrote complete on an aborted turn); warm pools/worktrees
are preserved and the native path is unchanged. F2/F3: prune the tool-call
parser to its two load-bearing exports (unexport eight zero-caller symbols, add
a gate test for the <invoke>-as-text fallback) and route placeholder-rejection
logging through pino. F6: a 90s per-chunk stall-timeout wraps native inference's
fullStream via AbortSignal.any so a hung stream finalizes the message instead of
hanging — no retry (a pure classifyStreamError helper is added). F7: a read-only
view_session_history MCP tool (newest-N, chronological). F9: retire the unused
apps/coder/web :9502 fallback SPA, keeping every API/WS/health/MCP route.
This commit is contained in:
2026-06-03 02:23:11 +00:00
parent 39290957ae
commit 5da72c120a
48 changed files with 1014 additions and 2254 deletions

View File

@@ -11,6 +11,7 @@ import type { Agent, ToolCall } from '../../types/api.js';
import type { ToolJsonSchema } from '../tools.js';
import type { OpenAiMessage } from './payload.js';
import { extractToolCallBlocks } from './tool-call-parser.js';
import { classifyStreamError } from './stream-error-classifier.js';
import type { StreamResult } from './types.js';
import { upstreamModel } from './provider.js';
import {
@@ -193,6 +194,10 @@ function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<type
return out;
}
// F6: per-chunk stall deadline. Exported so tests can advance fake timers by
// exactly this value without hardcoding a magic number.
export const STALL_TIMEOUT_MS = 90_000;
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
// llama-swap) emit tool calls as inline XML inside delta.content rather than
// the structured tool_calls field. We extract them out of the streamed text
@@ -267,6 +272,22 @@ export async function streamCompletion(
// before this. They now go through the same extraBody path as the new params.
const samplerBody = buildSamplerProviderOptions(opts);
// F6: per-chunk stall deadline. If the model stops emitting chunks for
// STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
// abort check below then throws AbortError → handleAbortOrError writes
// 'cancelled'. Timer is bumped on every chunk and cleared in the finally.
// NO retry: partial-stream re-emit is non-idempotent at single-local-instance
// scale; see stream-error-classifier.ts for the future retry seam.
const stallAc = new AbortController();
let stallTimer: ReturnType<typeof setTimeout> | null = null;
const bumpStallTimer = () => {
if (stallTimer !== null) clearTimeout(stallTimer);
stallTimer = setTimeout(() => stallAc.abort(), STALL_TIMEOUT_MS);
};
const effectiveSignal = signal
? AbortSignal.any([signal, stallAc.signal])
: stallAc.signal;
const result = streamText({
model: upstreamModel(ctx.config, model, agent ?? null),
messages: aiMessages,
@@ -277,7 +298,7 @@ export async function streamCompletion(
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
abortSignal: signal,
abortSignal: effectiveSignal,
});
let content = '';
@@ -289,7 +310,11 @@ export async function streamCompletion(
// same flat list and keep the v1.10.5 synthetic id convention.
const toolCalls: ToolCall[] = [];
bumpStallTimer();
try {
for await (const part of result.fullStream) {
bumpStallTimer();
switch (part.type) {
case 'text-delta': {
pendingBuffer += part.text;
@@ -297,7 +322,7 @@ export async function streamCompletion(
// complete <tool_call> or <invoke> block, flushes prose between/around
// them, holds any partial opener for the next chunk, and silently
// drops blocks that fail to parse (matches pre-v1.13.16 behavior).
const extracted = extractToolCallBlocks(pendingBuffer);
const extracted = extractToolCallBlocks(pendingBuffer, ctx.log);
if (extracted.flushed.length > 0) {
content += extracted.flushed;
onDelta(extracted.flushed);
@@ -339,7 +364,9 @@ export async function streamCompletion(
}
case 'error': {
const err = part.error;
throw err instanceof Error ? err : new Error(String(err));
const actualErr = err instanceof Error ? err : new Error(String(err));
ctx.log.warn({ kind: classifyStreamError(actualErr) }, 'stream error part');
throw actualErr;
}
// Intentional no-op: start, start-step, text-start, text-end,
// reasoning-start, reasoning-end, source, file, tool-input-start,
@@ -365,7 +392,8 @@ export async function streamCompletion(
// Without this throw the row would land as status='complete' with partial
// content instead of going through handleAbortOrError → status='cancelled'.
// Smoke D caught this in v1.13.1-A — don't refactor it away.
if (signal?.aborted) {
// F6: also catch the stall timeout arm (stallAc.signal.aborted).
if (signal?.aborted || stallAc.signal.aborted) {
const abortErr = new Error('aborted');
abortErr.name = 'AbortError';
throw abortErr;
@@ -402,4 +430,12 @@ export async function streamCompletion(
completionTokens,
reasoning: reasoningAccumulated,
};
} finally {
// Clear the stall timer whether the stream completes normally, throws, or
// is aborted — prevents a dangling timer from firing after the turn ends.
if (stallTimer !== null) {
clearTimeout(stallTimer);
stallTimer = null;
}
}
}