Compare commits
1 Commits
v1.13.4-re
...
v1.13.5-st
| Author | SHA1 | Date | |
|---|---|---|---|
| ff29b48e3a |
@@ -5,10 +5,15 @@ import { READ_ONLY_TOOL_NAMES } from '../tools.js';
|
|||||||
// - Agent with explicit max_tool_calls: that value.
|
// - Agent with explicit max_tool_calls: that value.
|
||||||
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||||
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||||
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
// - No agent (raw chat): BUDGET_NO_AGENT (30).
|
||||||
|
// v1.13.7: bumped BUDGET_NO_AGENT 15→30 to match BUDGET_READ_ONLY. Every tool
|
||||||
|
// in ALL_TOOLS today is read-only (see services/tools.ts comment at
|
||||||
|
// READ_ONLY_TOOL_NAMES); the cautious 15-cap was a forward-looking guard for
|
||||||
|
// write tools that haven't landed yet. No-agent mode gets the same toolset as
|
||||||
|
// an all-read-only agent at runtime, so they should share the same budget.
|
||||||
export const BUDGET_READ_ONLY = 30;
|
export const BUDGET_READ_ONLY = 30;
|
||||||
export const BUDGET_NON_READ_ONLY = 10;
|
export const BUDGET_NON_READ_ONLY = 10;
|
||||||
export const BUDGET_NO_AGENT = 15;
|
export const BUDGET_NO_AGENT = 30;
|
||||||
|
|
||||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,25 @@ export async function buildMessagesPayload(
|
|||||||
if (isAnySentinel(m)) continue;
|
if (isAnySentinel(m)) continue;
|
||||||
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
||||||
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
||||||
|
// v1.13.7: skip failed assistant turns. A failed row carries no usable
|
||||||
|
// content for the model, and leaving it in the payload alongside any
|
||||||
|
// following assistant message produces "Cannot have 2 or more assistant
|
||||||
|
// messages at the end of the list" from the OpenAI-compatible upstream.
|
||||||
|
if (m.role === 'assistant' && m.status === 'failed') continue;
|
||||||
|
// v1.13.7: skip "empty" completed assistants — clen=0 + no tool_calls.
|
||||||
|
// These can land when an upstream stream returns finishReason='stop' with
|
||||||
|
// no text/tool output (network blip, rate limit recovery, model quirk).
|
||||||
|
// Same risk as the failed-status case: a trailing empty assistant plus
|
||||||
|
// the next attempt's assistant placeholder = two trailing assistants and
|
||||||
|
// the API rejects the whole payload.
|
||||||
|
if (
|
||||||
|
m.role === 'assistant' &&
|
||||||
|
m.status === 'complete' &&
|
||||||
|
(m.content == null || m.content.trim().length === 0) &&
|
||||||
|
(m.tool_calls == null || m.tool_calls.length === 0)
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (m.role === 'tool') {
|
if (m.role === 'tool') {
|
||||||
const tr = m.tool_results;
|
const tr = m.tool_results;
|
||||||
if (!tr) continue;
|
if (!tr) continue;
|
||||||
|
|||||||
@@ -15,6 +15,14 @@ function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible>
|
|||||||
provider = createOpenAICompatible({
|
provider = createOpenAICompatible({
|
||||||
name: 'llama-swap',
|
name: 'llama-swap',
|
||||||
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||||
|
// v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which
|
||||||
|
// omits `stream_options.include_usage` from the request body. Without
|
||||||
|
// it, llama.cpp / llama-swap never emits the trailing usage block, so
|
||||||
|
// `result.usage` resolves with inputTokens=outputTokens=undefined and
|
||||||
|
// tokens_used / ctx_used land as NULL in every messages row. Setting
|
||||||
|
// true here re-enables the per-stream usage payload across all models
|
||||||
|
// served via the llama-swap provider.
|
||||||
|
includeUsage: true,
|
||||||
});
|
});
|
||||||
cache.set(baseURL, provider);
|
cache.set(baseURL, provider);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -651,7 +651,9 @@ export function MessageBubble({ message, sessionChats, capHitInfo }: Props) {
|
|||||||
|
|
||||||
const isStreaming = message.status === 'streaming';
|
const isStreaming = message.status === 'streaming';
|
||||||
const failed = message.status === 'failed';
|
const failed = message.status === 'failed';
|
||||||
const hasContent = message.content.length > 0;
|
// v1.13.7: match the MessageList.flatten trim guard so a whitespace-only
|
||||||
|
// assistant turn doesn't render an empty bubble + dangling ActionRow.
|
||||||
|
const hasContent = message.content.trim().length > 0;
|
||||||
// v1.8.2: if metadata stamps an error reason, surface it inline under the
|
// v1.8.2: if metadata stamps an error reason, surface it inline under the
|
||||||
// generic "message failed" line. Keeps the user's eye where it already is
|
// generic "message failed" line. Keeps the user's eye where it already is
|
||||||
// rather than introducing a separate banner.
|
// rather than introducing a separate banner.
|
||||||
|
|||||||
@@ -45,7 +45,12 @@ function flatten(messages: Message[]): RenderItem[] {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0;
|
const hasToolCalls = m.tool_calls != null && m.tool_calls.length > 0;
|
||||||
const hasText = m.content.length > 0;
|
// v1.13.7: trim before checking. AI SDK v6 streaming occasionally emits a
|
||||||
|
// leading "\n" text-delta on tool-call-only turns, which used to flow into
|
||||||
|
// messages.content with length=1 and render an empty bubble + ActionRow
|
||||||
|
// between each tool call. Whitespace-only content has no visible payload,
|
||||||
|
// so treat it as no-content.
|
||||||
|
const hasText = m.content.trim().length > 0;
|
||||||
if (m.role === 'assistant' && hasToolCalls) {
|
if (m.role === 'assistant' && hasToolCalls) {
|
||||||
if (hasText || m.status === 'streaming') {
|
if (hasText || m.status === 'streaming') {
|
||||||
items.push({ kind: 'message', message: m });
|
items.push({ kind: 'message', message: m });
|
||||||
|
|||||||
Reference in New Issue
Block a user