From ff29b48e3abfaf9dd8206a0726d0e4b26a32d4cf Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Fri, 22 May 2026 13:24:19 +0000 Subject: [PATCH] =?UTF-8?q?v1.13.7:=20stability=20bundle=20=E2=80=94=20usa?= =?UTF-8?q?ge=20capture=20+=20payload/UI=20sanitization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five fixes for latent regressions surfaced during the v1.13.x.cosmetic revert investigation. None alter schema or compaction; all cleanup against the v1.13.1-A AI SDK migration's hidden surface. (1) provider.ts — includeUsage: true on createOpenAICompatible. @ai-sdk/openai-compatible defaults this false, omitting stream_options.include_usage from the request body; llama-swap never emitted the usage block, so result.usage.inputTokens/outputTokens resolved undefined and tokens_used / ctx_used landed NULL in every assistant row since v1.13.1-A. No historical backfill. (2) MessageList.tsx — hasText = m.content.trim().length > 0. AI SDK v6 streaming occasionally emits a leading "\n" text-delta on tool-call-only turns; the literal newline passed length > 0 and rendered an empty bubble + ActionRow between every tool call. Trim catches it without changing semantics for genuine content. (3) MessageBubble.tsx — same trim on hasContent for the no-tool-calls path. Defensive symmetry with MessageList.flatten. (4) payload.ts — buildMessagesPayload skips assistant rows with status='failed' AND assistant rows with status='complete' + empty content + no tool_calls. Without this, a trailing empty/failed assistant + the next attempt's placeholder produced "Cannot have 2 or more assistant messages at the end of the list" rejections from the OpenAI-compatible upstream after cap-hit + Continue. (5) budget.ts — BUDGET_NO_AGENT 15 → 30. Every tool in ALL_TOOLS is read-only today; the 15-cap was forward-looking for write tools that haven't landed. No-agent mode now matches BUDGET_READ_ONLY. 47 LoC across 5 files. 190/190 server tests pass. Verified live: new assistant turns populate StatsLine token data; single-tool-call turns no longer render the stray empty-bubble + ActionRow between tool calls; Continue after cap-hit no longer hits the trailing-assistant API rejection. --- apps/server/src/services/inference/budget.ts | 9 +++++++-- apps/server/src/services/inference/payload.ts | 19 +++++++++++++++++++ .../server/src/services/inference/provider.ts | 8 ++++++++ apps/web/src/components/MessageBubble.tsx | 4 +++- apps/web/src/components/MessageList.tsx | 7 ++++++- 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/apps/server/src/services/inference/budget.ts b/apps/server/src/services/inference/budget.ts index 01659ce..bbcd330 100644 --- a/apps/server/src/services/inference/budget.ts +++ b/apps/server/src/services/inference/budget.ts @@ -5,10 +5,15 @@ import { READ_ONLY_TOOL_NAMES } from '../tools.js'; // - Agent with explicit max_tool_calls: that value. // - Agent with read-only-only tools: BUDGET_READ_ONLY (30). // - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10). -// - No agent (raw chat): BUDGET_NO_AGENT (15). +// - No agent (raw chat): BUDGET_NO_AGENT (30). +// v1.13.7: bumped BUDGET_NO_AGENT 15→30 to match BUDGET_READ_ONLY. Every tool +// in ALL_TOOLS today is read-only (see services/tools.ts comment at +// READ_ONLY_TOOL_NAMES); the cautious 15-cap was a forward-looking guard for +// write tools that haven't landed yet. No-agent mode gets the same toolset as +// an all-read-only agent at runtime, so they should share the same budget. export const BUDGET_READ_ONLY = 30; export const BUDGET_NON_READ_ONLY = 10; -export const BUDGET_NO_AGENT = 15; +export const BUDGET_NO_AGENT = 30; const READ_ONLY_SET: ReadonlySet = new Set(READ_ONLY_TOOL_NAMES); diff --git a/apps/server/src/services/inference/payload.ts b/apps/server/src/services/inference/payload.ts index f5c11f9..ed63f5f 100644 --- a/apps/server/src/services/inference/payload.ts +++ b/apps/server/src/services/inference/payload.ts @@ -63,6 +63,25 @@ export async function buildMessagesPayload( if (isAnySentinel(m)) continue; if (m.role === 'assistant' && m.status === 'streaming') continue; if (m.role === 'assistant' && m.status === 'cancelled') continue; + // v1.13.7: skip failed assistant turns. A failed row carries no usable + // content for the model, and leaving it in the payload alongside any + // following assistant message produces "Cannot have 2 or more assistant + // messages at the end of the list" from the OpenAI-compatible upstream. + if (m.role === 'assistant' && m.status === 'failed') continue; + // v1.13.7: skip "empty" completed assistants — clen=0 + no tool_calls. + // These can land when an upstream stream returns finishReason='stop' with + // no text/tool output (network blip, rate limit recovery, model quirk). + // Same risk as the failed-status case: a trailing empty assistant plus + // the next attempt's assistant placeholder = two trailing assistants and + // the API rejects the whole payload. + if ( + m.role === 'assistant' && + m.status === 'complete' && + (m.content == null || m.content.trim().length === 0) && + (m.tool_calls == null || m.tool_calls.length === 0) + ) { + continue; + } if (m.role === 'tool') { const tr = m.tool_results; if (!tr) continue; diff --git a/apps/server/src/services/inference/provider.ts b/apps/server/src/services/inference/provider.ts index 8edce34..d9faf93 100644 --- a/apps/server/src/services/inference/provider.ts +++ b/apps/server/src/services/inference/provider.ts @@ -15,6 +15,14 @@ function getProvider(baseURL: string): ReturnType provider = createOpenAICompatible({ name: 'llama-swap', baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`, + // v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which + // omits `stream_options.include_usage` from the request body. Without + // it, llama.cpp / llama-swap never emits the trailing usage block, so + // `result.usage` resolves with inputTokens=outputTokens=undefined and + // tokens_used / ctx_used land as NULL in every messages row. Setting + // true here re-enables the per-stream usage payload across all models + // served via the llama-swap provider. + includeUsage: true, }); cache.set(baseURL, provider); } diff --git a/apps/web/src/components/MessageBubble.tsx b/apps/web/src/components/MessageBubble.tsx index e382e1d..abbc2c0 100644 --- a/apps/web/src/components/MessageBubble.tsx +++ b/apps/web/src/components/MessageBubble.tsx @@ -651,7 +651,9 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) { const isStreaming = message.status === 'streaming'; const failed = message.status === 'failed'; - const hasContent = message.content.length > 0; + // v1.13.7: match the MessageList.flatten trim guard so a whitespace-only + // assistant turn doesn't render an empty bubble + dangling ActionRow. + const hasContent = message.content.trim().length > 0; // v1.8.2: if metadata stamps an error reason, surface it inline under the // generic "message failed" line. Keeps the user's eye where it already is // rather than introducing a separate banner. diff --git a/apps/web/src/components/MessageList.tsx b/apps/web/src/components/MessageList.tsx index f90267a..154fa35 100644 --- a/apps/web/src/components/MessageList.tsx +++ b/apps/web/src/components/MessageList.tsx @@ -45,7 +45,12 @@ function flatten(messages: Message[]): RenderItem[] { continue; } const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0; - const hasText = m.content.length > 0; + // v1.13.7: trim before checking. AI SDK v6 streaming occasionally emits a + // leading "\n" text-delta on tool-call-only turns, which used to flow into + // messages.content with length=1 and render an empty bubble + ActionRow + // between each tool call. Whitespace-only content has no visible payload, + // so treat it as no-content. + const hasText = m.content.trim().length > 0; if (m.role === 'assistant' && hasToolCalls) { if (hasText || m.status === 'streaming') { items.push({ kind: 'message', message: m });