From ff29b48e3abfaf9dd8206a0726d0e4b26a32d4cf Mon Sep 17 00:00:00 2001
From: indifferentketchup <samkintop@gmail.com>
Date: Fri, 22 May 2026 13:24:19 +0000
Subject: [PATCH] =?UTF-8?q?v1.13.7:=20stability=20bundle=20=E2=80=94=20usa?=
 =?UTF-8?q?ge=20capture=20+=20payload/UI=20sanitization?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five fixes for latent regressions surfaced during the v1.13.x.cosmetic
revert investigation. None alter schema or compaction; all cleanup
against the v1.13.1-A AI SDK migration's hidden surface.

(1) provider.ts — includeUsage: true on createOpenAICompatible.
@ai-sdk/openai-compatible defaults this false, omitting
stream_options.include_usage from the request body; llama-swap never
emitted the usage block, so result.usage.inputTokens/outputTokens
resolved undefined and tokens_used / ctx_used landed NULL in every
assistant row since v1.13.1-A. No historical backfill.

(2) MessageList.tsx — hasText = m.content.trim().length > 0.
AI SDK v6 streaming occasionally emits a leading "\n" text-delta on
tool-call-only turns; the literal newline passed length > 0 and
rendered an empty bubble + ActionRow between every tool call. Trim
catches it without changing semantics for genuine content.

(3) MessageBubble.tsx — same trim on hasContent for the no-tool-calls
path. Defensive symmetry with MessageList.flatten.

(4) payload.ts — buildMessagesPayload skips assistant rows with
status='failed' AND assistant rows with status='complete' + empty
content + no tool_calls. Without this, a trailing empty/failed
assistant + the next attempt's placeholder produced "Cannot have 2
or more assistant messages at the end of the list" rejections from
the OpenAI-compatible upstream after cap-hit + Continue.

(5) budget.ts — BUDGET_NO_AGENT 15 → 30. Every tool in ALL_TOOLS is
read-only today; the 15-cap was forward-looking for write tools that
haven't landed. No-agent mode now matches BUDGET_READ_ONLY.

47 LoC across 5 files. 190/190 server tests pass.

Verified live: new assistant turns populate StatsLine token data;
single-tool-call turns no longer render the stray empty-bubble +
ActionRow between tool calls; Continue after cap-hit no longer hits
the trailing-assistant API rejection.
---
 apps/server/src/services/inference/budget.ts  |  9 +++++++--
 apps/server/src/services/inference/payload.ts | 19 +++++++++++++++++++
 .../server/src/services/inference/provider.ts |  8 ++++++++
 apps/web/src/components/MessageBubble.tsx     |  4 +++-
 apps/web/src/components/MessageList.tsx       |  7 ++++++-
 5 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/apps/server/src/services/inference/budget.ts b/apps/server/src/services/inference/budget.ts
index 01659ce..bbcd330 100644
--- a/apps/server/src/services/inference/budget.ts
+++ b/apps/server/src/services/inference/budget.ts
@@ -5,10 +5,15 @@ import { READ_ONLY_TOOL_NAMES } from '../tools.js';
 //   - Agent with explicit max_tool_calls: that value.
 //   - Agent with read-only-only tools:    BUDGET_READ_ONLY (30).
 //   - Agent with any non-read-only tool:  BUDGET_NON_READ_ONLY (10).
-//   - No agent (raw chat):                BUDGET_NO_AGENT (15).
+//   - No agent (raw chat):                BUDGET_NO_AGENT (30).
+// v1.13.7: bumped BUDGET_NO_AGENT 15→30 to match BUDGET_READ_ONLY. Every tool
+// in ALL_TOOLS today is read-only (see services/tools.ts comment at
+// READ_ONLY_TOOL_NAMES); the cautious 15-cap was a forward-looking guard for
+// write tools that haven't landed yet. No-agent mode gets the same toolset as
+// an all-read-only agent at runtime, so they should share the same budget.
 export const BUDGET_READ_ONLY = 30;
 export const BUDGET_NON_READ_ONLY = 10;
-export const BUDGET_NO_AGENT = 15;
+export const BUDGET_NO_AGENT = 30;
 
 const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
 
diff --git a/apps/server/src/services/inference/payload.ts b/apps/server/src/services/inference/payload.ts
index f5c11f9..ed63f5f 100644
--- a/apps/server/src/services/inference/payload.ts
+++ b/apps/server/src/services/inference/payload.ts
@@ -63,6 +63,25 @@ export async function buildMessagesPayload(
     if (isAnySentinel(m)) continue;
     if (m.role === 'assistant' && m.status === 'streaming') continue;
     if (m.role === 'assistant' && m.status === 'cancelled') continue;
+    // v1.13.7: skip failed assistant turns. A failed row carries no usable
+    // content for the model, and leaving it in the payload alongside any
+    // following assistant message produces "Cannot have 2 or more assistant
+    // messages at the end of the list" from the OpenAI-compatible upstream.
+    if (m.role === 'assistant' && m.status === 'failed') continue;
+    // v1.13.7: skip "empty" completed assistants — clen=0 + no tool_calls.
+    // These can land when an upstream stream returns finishReason='stop' with
+    // no text/tool output (network blip, rate limit recovery, model quirk).
+    // Same risk as the failed-status case: a trailing empty assistant plus
+    // the next attempt's assistant placeholder = two trailing assistants and
+    // the API rejects the whole payload.
+    if (
+      m.role === 'assistant' &&
+      m.status === 'complete' &&
+      (m.content == null || m.content.trim().length === 0) &&
+      (m.tool_calls == null || m.tool_calls.length === 0)
+    ) {
+      continue;
+    }
     if (m.role === 'tool') {
       const tr = m.tool_results;
       if (!tr) continue;
diff --git a/apps/server/src/services/inference/provider.ts b/apps/server/src/services/inference/provider.ts
index 8edce34..d9faf93 100644
--- a/apps/server/src/services/inference/provider.ts
+++ b/apps/server/src/services/inference/provider.ts
@@ -15,6 +15,14 @@ function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible>
     provider = createOpenAICompatible({
       name: 'llama-swap',
       baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
+      // v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which
+      // omits `stream_options.include_usage` from the request body. Without
+      // it, llama.cpp / llama-swap never emits the trailing usage block, so
+      // `result.usage` resolves with inputTokens=outputTokens=undefined and
+      // tokens_used / ctx_used land as NULL in every messages row. Setting
+      // true here re-enables the per-stream usage payload across all models
+      // served via the llama-swap provider.
+      includeUsage: true,
     });
     cache.set(baseURL, provider);
   }
diff --git a/apps/web/src/components/MessageBubble.tsx b/apps/web/src/components/MessageBubble.tsx
index e382e1d..abbc2c0 100644
--- a/apps/web/src/components/MessageBubble.tsx
+++ b/apps/web/src/components/MessageBubble.tsx
@@ -651,7 +651,9 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) {
 
   const isStreaming = message.status === 'streaming';
   const failed = message.status === 'failed';
-  const hasContent = message.content.length > 0;
+  // v1.13.7: match the MessageList.flatten trim guard so a whitespace-only
+  // assistant turn doesn't render an empty bubble + dangling ActionRow.
+  const hasContent = message.content.trim().length > 0;
   // v1.8.2: if metadata stamps an error reason, surface it inline under the
   // generic "message failed" line. Keeps the user's eye where it already is
   // rather than introducing a separate banner.
diff --git a/apps/web/src/components/MessageList.tsx b/apps/web/src/components/MessageList.tsx
index f90267a..154fa35 100644
--- a/apps/web/src/components/MessageList.tsx
+++ b/apps/web/src/components/MessageList.tsx
@@ -45,7 +45,12 @@ function flatten(messages: Message[]): RenderItem[] {
       continue;
     }
     const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0;
-    const hasText = m.content.length > 0;
+    // v1.13.7: trim before checking. AI SDK v6 streaming occasionally emits a
+    // leading "\n" text-delta on tool-call-only turns, which used to flow into
+    // messages.content with length=1 and render an empty bubble + ActionRow
+    // between each tool call. Whitespace-only content has no visible payload,
+    // so treat it as no-content.
+    const hasText = m.content.trim().length > 0;
     if (m.role === 'assistant' && hasToolCalls) {
       if (hasText || m.status === 'streaming') {
         items.push({ kind: 'message', message: m });