Compare commits
1 Commits
v1.13.4-re
...
v1.13.5-st
| Author | SHA1 | Date | |
|---|---|---|---|
| ff29b48e3a |
@@ -5,10 +5,15 @@ import { READ_ONLY_TOOL_NAMES } from '../tools.js';
|
||||
// - Agent with explicit max_tool_calls: that value.
|
||||
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
||||
// - No agent (raw chat): BUDGET_NO_AGENT (30).
|
||||
// v1.13.7: bumped BUDGET_NO_AGENT 15→30 to match BUDGET_READ_ONLY. Every tool
|
||||
// in ALL_TOOLS today is read-only (see services/tools.ts comment at
|
||||
// READ_ONLY_TOOL_NAMES); the cautious 15-cap was a forward-looking guard for
|
||||
// write tools that haven't landed yet. No-agent mode gets the same toolset as
|
||||
// an all-read-only agent at runtime, so they should share the same budget.
|
||||
export const BUDGET_READ_ONLY = 30;
|
||||
export const BUDGET_NON_READ_ONLY = 10;
|
||||
export const BUDGET_NO_AGENT = 15;
|
||||
export const BUDGET_NO_AGENT = 30;
|
||||
|
||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||
|
||||
|
||||
@@ -63,6 +63,25 @@ export async function buildMessagesPayload(
|
||||
if (isAnySentinel(m)) continue;
|
||||
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
||||
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
||||
// v1.13.7: skip failed assistant turns. A failed row carries no usable
|
||||
// content for the model, and leaving it in the payload alongside any
|
||||
// following assistant message produces "Cannot have 2 or more assistant
|
||||
// messages at the end of the list" from the OpenAI-compatible upstream.
|
||||
if (m.role === 'assistant' && m.status === 'failed') continue;
|
||||
// v1.13.7: skip "empty" completed assistants — clen=0 + no tool_calls.
|
||||
// These can land when an upstream stream returns finishReason='stop' with
|
||||
// no text/tool output (network blip, rate limit recovery, model quirk).
|
||||
// Same risk as the failed-status case: a trailing empty assistant plus
|
||||
// the next attempt's assistant placeholder = two trailing assistants and
|
||||
// the API rejects the whole payload.
|
||||
if (
|
||||
m.role === 'assistant' &&
|
||||
m.status === 'complete' &&
|
||||
(m.content == null || m.content.trim().length === 0) &&
|
||||
(m.tool_calls == null || m.tool_calls.length === 0)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
if (m.role === 'tool') {
|
||||
const tr = m.tool_results;
|
||||
if (!tr) continue;
|
||||
|
||||
@@ -15,6 +15,14 @@ function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible>
|
||||
provider = createOpenAICompatible({
|
||||
name: 'llama-swap',
|
||||
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||
// v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which
|
||||
// omits `stream_options.include_usage` from the request body. Without
|
||||
// it, llama.cpp / llama-swap never emits the trailing usage block, so
|
||||
// `result.usage` resolves with inputTokens=outputTokens=undefined and
|
||||
// tokens_used / ctx_used land as NULL in every messages row. Setting
|
||||
// true here re-enables the per-stream usage payload across all models
|
||||
// served via the llama-swap provider.
|
||||
includeUsage: true,
|
||||
});
|
||||
cache.set(baseURL, provider);
|
||||
}
|
||||
|
||||
@@ -651,7 +651,9 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) {
|
||||
|
||||
const isStreaming = message.status === 'streaming';
|
||||
const failed = message.status === 'failed';
|
||||
const hasContent = message.content.length > 0;
|
||||
// v1.13.7: match the MessageList.flatten trim guard so a whitespace-only
|
||||
// assistant turn doesn't render an empty bubble + dangling ActionRow.
|
||||
const hasContent = message.content.trim().length > 0;
|
||||
// v1.8.2: if metadata stamps an error reason, surface it inline under the
|
||||
// generic "message failed" line. Keeps the user's eye where it already is
|
||||
// rather than introducing a separate banner.
|
||||
|
||||
@@ -45,7 +45,12 @@ function flatten(messages: Message[]): RenderItem[] {
|
||||
continue;
|
||||
}
|
||||
const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0;
|
||||
const hasText = m.content.length > 0;
|
||||
// v1.13.7: trim before checking. AI SDK v6 streaming occasionally emits a
|
||||
// leading "\n" text-delta on tool-call-only turns, which used to flow into
|
||||
// messages.content with length=1 and render an empty bubble + ActionRow
|
||||
// between each tool call. Whitespace-only content has no visible payload,
|
||||
// so treat it as no-content.
|
||||
const hasText = m.content.trim().length > 0;
|
||||
if (m.role === 'assistant' && hasToolCalls) {
|
||||
if (hasText || m.status === 'streaming') {
|
||||
items.push({ kind: 'message', message: m });
|
||||
|
||||
Reference in New Issue
Block a user