v1.13.1-A: install AI SDK v6 + swap streamText into stream-phase.ts adapter
- Add ai@^6 and @ai-sdk/openai-compatible@^2 to apps/server.
- New services/inference/provider.ts: createOpenAICompatible against
llama-swap (baseURL threaded from config.LLAMA_SWAP_URL, cached per
baseURL). No apiKey — Authelia + Tailscale gate llama-swap, not keys.
- streamCompletion rewritten as an adapter over streamText. AI SDK
fullStream parts (text-delta, tool-call, finish, error) map back to
the legacy {content?, tool_calls?, finishReason} StreamResult shape
that executeStreamPhase already consumes. No layer above
streamCompletion changes.
- toModelMessages converts BooCode's OpenAI-shaped history to AI SDK
ModelMessage[]; tool messages need toolName which we look up by
scanning earlier assistant tool_calls for the matching id.
- buildAiTools wraps BooCode's JSON-schema tool defs via
tool({ inputSchema: jsonSchema(parameters) }) with NO execute —
BooCode dispatches tools in tool-phase.ts, not the AI SDK loop.
- XML fallback parser preserved as-is — qwen3.6 still emits XML tool
calls in text content that the structured tool-call layer misses.
- reasoning-delta parts dropped with a debug-level counter — captured
properly in v1.13.1-C.
- Abort path: streamText({ abortSignal }) wires ctx.signal through, but
AI SDK v6 swallows the abort (fullStream iterator exits cleanly
rather than throwing). Post-iteration `if (signal?.aborted) throw` so
handleAbortOrError owns the row and writes status='cancelled'. Caught
by smoke D; would have shipped as status='complete' on stop otherwise.
- Usage frame reads result.usage (inputTokens / outputTokens v6 names)
AFTER stream drain. Single trailing publish through the existing 500ms
throttle. Known regression: ChatThroughput's live mid-stream tick
(v1.12.2) is gone — it now shows a single value at stream end.
TODO(v1.13.1-followup): interpolate outputTokens during streaming
via a delta-cadence counter (e.g. part.text.length/4 token proxy)
and publish every 500ms; reconcile against result.usage at finish.
- Write-path dual-write from v1.13.0 unaffected.
Read path stays on JSON columns. v1.13.1-B flips reads to message_parts.
Smoke verified end-to-end against running container:
- A. Plain text: status='complete', 1 text part.
- B. Single tool prompt → multi-tool chain (4 calls): every assistant
with tool_calls has 2 parts (text+tool_call), every tool row has
1 part (tool_result).
- C. Multi-step covered by B's chain.
- D. Stop mid-stream: status='cancelled' written via handleAbortOrError
after the post-iteration abort throw.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,8 +11,10 @@
|
|||||||
"test": "vitest run"
|
"test": "vitest run"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@ai-sdk/openai-compatible": "^2.0.47",
|
||||||
"@fastify/static": "^7.0.4",
|
"@fastify/static": "^7.0.4",
|
||||||
"@fastify/websocket": "^10.0.1",
|
"@fastify/websocket": "^10.0.1",
|
||||||
|
"ai": "^6.0.190",
|
||||||
"fastify": "^4.28.1",
|
"fastify": "^4.28.1",
|
||||||
"postgres": "^3.4.4",
|
"postgres": "^3.4.4",
|
||||||
"ws": "^8.18.0",
|
"ws": "^8.18.0",
|
||||||
|
|||||||
26
apps/server/src/services/inference/provider.ts
Normal file
26
apps/server/src/services/inference/provider.ts
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||||
|
import type { LanguageModel } from 'ai';
|
||||||
|
|
||||||
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||||
|
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
||||||
|
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
||||||
|
// Tailscale topology and exposing it over the public internet is gated by
|
||||||
|
// Authelia at the Caddy layer, not by API keys.
|
||||||
|
|
||||||
|
const cache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
||||||
|
|
||||||
|
function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
||||||
|
let provider = cache.get(baseURL);
|
||||||
|
if (!provider) {
|
||||||
|
provider = createOpenAICompatible({
|
||||||
|
name: 'llama-swap',
|
||||||
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||||
|
});
|
||||||
|
cache.set(baseURL, provider);
|
||||||
|
}
|
||||||
|
return provider;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upstreamModel(baseURL: string, modelId: string): LanguageModel {
|
||||||
|
return getProvider(baseURL).chatModel(modelId);
|
||||||
|
}
|
||||||
@@ -18,29 +18,8 @@ import type {
|
|||||||
StreamResult,
|
StreamResult,
|
||||||
TurnArgs,
|
TurnArgs,
|
||||||
} from './turn.js';
|
} from './turn.js';
|
||||||
|
import { upstreamModel } from './provider.js';
|
||||||
interface ChatCompletionDelta {
|
import { jsonSchema, streamText, tool, type JSONValue, type ModelMessage } from 'ai';
|
||||||
role?: string;
|
|
||||||
content?: string | null;
|
|
||||||
tool_calls?: Array<{
|
|
||||||
index: number;
|
|
||||||
id?: string;
|
|
||||||
type?: 'function';
|
|
||||||
function?: { name?: string; arguments?: string };
|
|
||||||
}>;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ChatCompletionChunk {
|
|
||||||
choices?: Array<{
|
|
||||||
delta: ChatCompletionDelta;
|
|
||||||
finish_reason: string | null;
|
|
||||||
}>;
|
|
||||||
usage?: {
|
|
||||||
prompt_tokens?: number;
|
|
||||||
completion_tokens?: number;
|
|
||||||
total_tokens?: number;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
interface StreamOptions {
|
interface StreamOptions {
|
||||||
// null = omit tools entirely (compact phase); [] = caller stripped all tools
|
// null = omit tools entirely (compact phase); [] = caller stripped all tools
|
||||||
@@ -49,44 +28,105 @@ interface StreamOptions {
|
|||||||
temperature?: number;
|
temperature?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function* sseLines(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
|
// v1.13.1-A: convert BooCode's OpenAI-shaped history into AI SDK
|
||||||
const reader = stream.getReader();
|
// ModelMessage[]. Tool result messages need a `toolName` field that the
|
||||||
const decoder = new TextDecoder('utf-8');
|
// OpenAI shape doesn't carry; we look it up by scanning earlier assistant
|
||||||
let buffer = '';
|
// `tool_calls` entries for a matching id.
|
||||||
try {
|
function toModelMessages(messages: OpenAiMessage[]): ModelMessage[] {
|
||||||
while (true) {
|
const toolNameById = new Map<string, string>();
|
||||||
const { value, done } = await reader.read();
|
for (const m of messages) {
|
||||||
if (done) break;
|
if (m.role === 'assistant' && m.tool_calls) {
|
||||||
buffer += decoder.decode(value, { stream: true });
|
for (const tc of m.tool_calls) {
|
||||||
let idx;
|
toolNameById.set(tc.id, tc.function.name);
|
||||||
while ((idx = buffer.indexOf('\n')) >= 0) {
|
|
||||||
const line = buffer.slice(0, idx).replace(/\r$/, '');
|
|
||||||
buffer = buffer.slice(idx + 1);
|
|
||||||
if (line.length === 0) continue;
|
|
||||||
yield line;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (buffer.length > 0) yield buffer;
|
|
||||||
} finally {
|
|
||||||
reader.releaseLock();
|
|
||||||
}
|
}
|
||||||
|
const out: ModelMessage[] = [];
|
||||||
|
for (const m of messages) {
|
||||||
|
if (m.role === 'system' || m.role === 'user') {
|
||||||
|
out.push({ role: m.role, content: m.content ?? '' });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (m.role === 'assistant') {
|
||||||
|
const hasTools = m.tool_calls && m.tool_calls.length > 0;
|
||||||
|
if (!hasTools) {
|
||||||
|
// Bare text assistant (string content). null content + no tool_calls
|
||||||
|
// is degenerate but harmless to forward.
|
||||||
|
out.push({ role: 'assistant', content: m.content ?? '' });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const parts: Array<
|
||||||
|
| { type: 'text'; text: string }
|
||||||
|
| { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown }
|
||||||
|
> = [];
|
||||||
|
if (m.content && m.content.length > 0) {
|
||||||
|
parts.push({ type: 'text', text: m.content });
|
||||||
|
}
|
||||||
|
for (const tc of m.tool_calls!) {
|
||||||
|
let input: unknown = {};
|
||||||
|
try {
|
||||||
|
input = tc.function.arguments.length > 0 ? JSON.parse(tc.function.arguments) : {};
|
||||||
|
} catch {
|
||||||
|
// Malformed args from a prior turn: pass through as a raw blob so
|
||||||
|
// the model sees the same shape it emitted. Wraps the string under
|
||||||
|
// _raw to match the buildMessagesPayload upstream convention.
|
||||||
|
input = { _raw: tc.function.arguments };
|
||||||
|
}
|
||||||
|
parts.push({ type: 'tool-call', toolCallId: tc.id, toolName: tc.function.name, input });
|
||||||
|
}
|
||||||
|
out.push({ role: 'assistant', content: parts });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (m.role === 'tool') {
|
||||||
|
const toolCallId = m.tool_call_id ?? '';
|
||||||
|
const toolName = toolNameById.get(toolCallId) ?? 'unknown';
|
||||||
|
const raw = m.content ?? '';
|
||||||
|
let output: { type: 'text'; value: string } | { type: 'json'; value: JSONValue };
|
||||||
|
try {
|
||||||
|
// JSON.parse returns `any`; cast to JSONValue since the upstream
|
||||||
|
// tool_results column is already JSON-serializable by construction.
|
||||||
|
output = { type: 'json', value: JSON.parse(raw) as JSONValue };
|
||||||
|
} catch {
|
||||||
|
output = { type: 'text', value: raw };
|
||||||
|
}
|
||||||
|
out.push({
|
||||||
|
role: 'tool',
|
||||||
|
content: [{ type: 'tool-result', toolCallId, toolName, output }],
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the AI SDK tools record from BooCode's JSON-schema tool definitions.
|
||||||
|
// No `execute` field: BooCode runs tools itself in tool-phase.ts; streamText
|
||||||
|
// surfaces the tool-call parts via fullStream and we capture them for the
|
||||||
|
// outer loop to dispatch.
|
||||||
|
function buildAiTools(schemas: ToolJsonSchema[]): Record<string, ReturnType<typeof tool>> {
|
||||||
|
const out: Record<string, ReturnType<typeof tool>> = {};
|
||||||
|
for (const s of schemas) {
|
||||||
|
out[s.function.name] = tool({
|
||||||
|
description: s.function.description,
|
||||||
|
inputSchema: jsonSchema(s.function.parameters),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
||||||
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
||||||
// the structured delta.tool_calls field. The XML shape is:
|
// the structured tool_calls field. We extract them out of the streamed text
|
||||||
|
// before flushing it to the client, mirroring the pre-AI-SDK behavior.
|
||||||
|
//
|
||||||
|
// XML shape:
|
||||||
// <tool_call>
|
// <tool_call>
|
||||||
// <function=NAME>
|
// <function=NAME>
|
||||||
// <parameter=KEY>
|
// <parameter=KEY>VALUE</parameter>
|
||||||
// VALUE
|
// ...
|
||||||
// </parameter>
|
|
||||||
// ...more parameters...
|
|
||||||
// </function>
|
// </function>
|
||||||
// </tool_call>
|
// </tool_call>
|
||||||
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
||||||
// streamCompletion buffers delta.content, extracts complete blocks, parses
|
|
||||||
// them via parseXmlToolCall, and pushes synthetic entries into the existing
|
|
||||||
// toolCallsBuffer alongside any native JSON-format tool calls.
|
|
||||||
export async function streamCompletion(
|
export async function streamCompletion(
|
||||||
ctx: InferenceContext,
|
ctx: InferenceContext,
|
||||||
model: string,
|
model: string,
|
||||||
@@ -96,149 +136,156 @@ export async function streamCompletion(
|
|||||||
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
||||||
signal?: AbortSignal
|
signal?: AbortSignal
|
||||||
): Promise<StreamResult> {
|
): Promise<StreamResult> {
|
||||||
const body: Record<string, unknown> = {
|
const aiMessages = toModelMessages(messages);
|
||||||
model,
|
const hasTools = opts.tools !== null && opts.tools.length > 0;
|
||||||
messages,
|
const aiTools = hasTools ? buildAiTools(opts.tools!) : undefined;
|
||||||
stream: true,
|
|
||||||
stream_options: { include_usage: true },
|
|
||||||
};
|
|
||||||
if (opts.tools && opts.tools.length > 0) {
|
|
||||||
body['tools'] = opts.tools;
|
|
||||||
body['tool_choice'] = 'auto';
|
|
||||||
}
|
|
||||||
if (typeof opts.temperature === 'number') {
|
|
||||||
body['temperature'] = opts.temperature;
|
|
||||||
}
|
|
||||||
|
|
||||||
const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
const startedAt = Date.now();
|
||||||
method: 'POST',
|
let reasoningDeltaCount = 0;
|
||||||
headers: { 'Content-Type': 'application/json' },
|
|
||||||
body: JSON.stringify(body),
|
const result = streamText({
|
||||||
signal,
|
model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
|
||||||
|
messages: aiMessages,
|
||||||
|
...(aiTools ? { tools: aiTools, toolChoice: 'auto' as const } : {}),
|
||||||
|
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||||
|
abortSignal: signal,
|
||||||
});
|
});
|
||||||
if (!res.ok || !res.body) {
|
|
||||||
const text = await res.text().catch(() => '');
|
|
||||||
throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
let content = '';
|
let content = '';
|
||||||
// v1.10.5: holds delta.content bytes that may contain a partial XML tool
|
|
||||||
// call. Anything not part of a (possibly forming) <tool_call>…</tool_call>
|
|
||||||
// pair is flushed to content + onDelta as soon as we know it's safe.
|
|
||||||
let pendingBuffer = '';
|
let pendingBuffer = '';
|
||||||
let finishReason: string | null = null;
|
let finishReason: string | null = null;
|
||||||
let promptTokens: number | null = null;
|
// v1.13.1-A: AI SDK emits one `tool-call` part per fully-aggregated call,
|
||||||
let completionTokens: number | null = null;
|
// so we no longer need the OpenAI-index reassembly map the manual SSE
|
||||||
const toolCallsBuffer = new Map<number, { id: string; name: string; argsText: string }>();
|
// parser used. XML tool calls extracted from text content go into the
|
||||||
|
// same flat list and keep the v1.10.5 synthetic id convention.
|
||||||
|
const toolCalls: ToolCall[] = [];
|
||||||
|
|
||||||
for await (const line of sseLines(res.body)) {
|
for await (const part of result.fullStream) {
|
||||||
if (!line.startsWith('data:')) continue;
|
switch (part.type) {
|
||||||
const payload = line.slice(5).trim();
|
case 'text-delta': {
|
||||||
if (payload === '[DONE]') break;
|
pendingBuffer += part.text;
|
||||||
let parsed: ChatCompletionChunk;
|
// Extract any complete <tool_call>...</tool_call> blocks before
|
||||||
try {
|
// flushing visible text.
|
||||||
parsed = JSON.parse(payload);
|
while (true) {
|
||||||
} catch {
|
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
||||||
continue;
|
if (startIdx === -1) break;
|
||||||
}
|
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
||||||
|
if (closeIdx === -1) break;
|
||||||
if (parsed.usage) {
|
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
||||||
if (typeof parsed.usage.prompt_tokens === 'number') {
|
const block = pendingBuffer.slice(startIdx, blockEnd);
|
||||||
promptTokens = parsed.usage.prompt_tokens;
|
if (startIdx > 0) {
|
||||||
}
|
const before = pendingBuffer.slice(0, startIdx);
|
||||||
if (typeof parsed.usage.completion_tokens === 'number') {
|
content += before;
|
||||||
completionTokens = parsed.usage.completion_tokens;
|
onDelta(before);
|
||||||
}
|
}
|
||||||
onUsage?.(promptTokens, completionTokens);
|
const parsedCall = parseXmlToolCall(block);
|
||||||
}
|
if (parsedCall) {
|
||||||
// v1.11.3: removed dead `parsed.timings.n_ctx` read. llama-server's
|
const synthIdx = toolCalls.length;
|
||||||
// streaming completion does NOT emit n_ctx in timings (verified
|
toolCalls.push({
|
||||||
// empirically); the authoritative source is llama-swap's
|
id: `xml_call_${synthIdx}`,
|
||||||
// /upstream/<model>/props endpoint, fetched per-turn via
|
name: parsedCall.name,
|
||||||
// model-context.getModelContext() at the finalization sites below.
|
args: parsedCall.args,
|
||||||
|
});
|
||||||
const choice = parsed.choices?.[0];
|
}
|
||||||
if (!choice) continue;
|
// Parse failures still drop the block — leaking <tool_call> XML to
|
||||||
const delta = choice.delta ?? {};
|
// the chat would look worse than silently swallowing the bad block.
|
||||||
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
pendingBuffer = pendingBuffer.slice(blockEnd);
|
||||||
// v1.10.5 XML fallback. Append, then extract any complete tool_call
|
|
||||||
// blocks before deciding what's safe to flush as visible content.
|
|
||||||
pendingBuffer += delta.content;
|
|
||||||
while (true) {
|
|
||||||
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
|
||||||
if (startIdx === -1) break;
|
|
||||||
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
|
||||||
if (closeIdx === -1) break;
|
|
||||||
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
|
||||||
const block = pendingBuffer.slice(startIdx, blockEnd);
|
|
||||||
// Any text before the opener is plain content — flush it now.
|
|
||||||
if (startIdx > 0) {
|
|
||||||
const before = pendingBuffer.slice(0, startIdx);
|
|
||||||
content += before;
|
|
||||||
onDelta(before);
|
|
||||||
}
|
}
|
||||||
const parsedCall = parseXmlToolCall(block);
|
// Hold back any (partial or full) unclosed opener; flush the rest.
|
||||||
if (parsedCall) {
|
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
||||||
const synthIdx = toolCallsBuffer.size;
|
if (partialIdx >= 0) {
|
||||||
toolCallsBuffer.set(synthIdx, {
|
if (partialIdx > 0) {
|
||||||
id: `xml_call_${synthIdx}`,
|
const flush = pendingBuffer.slice(0, partialIdx);
|
||||||
name: parsedCall.name,
|
content += flush;
|
||||||
argsText: JSON.stringify(parsedCall.args),
|
onDelta(flush);
|
||||||
});
|
}
|
||||||
|
pendingBuffer = pendingBuffer.slice(partialIdx);
|
||||||
|
} else if (pendingBuffer.length > 0) {
|
||||||
|
content += pendingBuffer;
|
||||||
|
onDelta(pendingBuffer);
|
||||||
|
pendingBuffer = '';
|
||||||
}
|
}
|
||||||
// If parsing failed we still drop the block — emitting unparseable
|
break;
|
||||||
// XML to the chat would look worse than silently swallowing it.
|
|
||||||
pendingBuffer = pendingBuffer.slice(blockEnd);
|
|
||||||
}
|
}
|
||||||
// After all complete blocks are out, hold back any (partial or full)
|
case 'tool-call': {
|
||||||
// unclosed opener; flush the rest.
|
// AI SDK has already parsed the input into an object. Match the
|
||||||
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
// ToolCall shape BooCode passes around in toolCallsBuffer downstream.
|
||||||
if (partialIdx >= 0) {
|
toolCalls.push({
|
||||||
if (partialIdx > 0) {
|
id: part.toolCallId,
|
||||||
const flush = pendingBuffer.slice(0, partialIdx);
|
name: part.toolName,
|
||||||
content += flush;
|
args: (part.input ?? {}) as Record<string, unknown>,
|
||||||
onDelta(flush);
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'reasoning-delta': {
|
||||||
|
// v1.13.1-A: reasoning parts are dropped for now. v1.13.1-C will
|
||||||
|
// persist them as `kind='reasoning'` rows in message_parts. Counter
|
||||||
|
// is logged at finish so we know whether qwen3.6 actually emits any.
|
||||||
|
reasoningDeltaCount += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'finish': {
|
||||||
|
if (typeof part.finishReason === 'string') {
|
||||||
|
finishReason = part.finishReason;
|
||||||
}
|
}
|
||||||
pendingBuffer = pendingBuffer.slice(partialIdx);
|
break;
|
||||||
} else if (pendingBuffer.length > 0) {
|
|
||||||
content += pendingBuffer;
|
|
||||||
onDelta(pendingBuffer);
|
|
||||||
pendingBuffer = '';
|
|
||||||
}
|
}
|
||||||
}
|
case 'error': {
|
||||||
if (Array.isArray(delta.tool_calls)) {
|
const err = part.error;
|
||||||
for (const tc of delta.tool_calls) {
|
throw err instanceof Error ? err : new Error(String(err));
|
||||||
const idx = tc.index;
|
|
||||||
const existing = toolCallsBuffer.get(idx) ?? { id: '', name: '', argsText: '' };
|
|
||||||
if (tc.id) existing.id = tc.id;
|
|
||||||
if (tc.function?.name) existing.name = tc.function.name;
|
|
||||||
if (typeof tc.function?.arguments === 'string') existing.argsText += tc.function.arguments;
|
|
||||||
toolCallsBuffer.set(idx, existing);
|
|
||||||
}
|
}
|
||||||
|
// Intentional no-op: start, start-step, text-start, text-end,
|
||||||
|
// reasoning-start, reasoning-end, source, file, tool-input-start,
|
||||||
|
// tool-input-delta, tool-input-end, tool-result, tool-error,
|
||||||
|
// finish-step, raw. We only care about the aggregated tool-call and
|
||||||
|
// text-delta paths above; the rest are AI SDK lifecycle/streaming
|
||||||
|
// breadcrumbs that don't change BooCode's persistence or WS contract.
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (choice.finish_reason) finishReason = choice.finish_reason;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// v1.10.5: if the stream ended mid-XML (e.g. model truncated, no closer
|
// v1.13.1-A: drain any buffered partial XML opener as plain text. The
|
||||||
// ever arrived), flush whatever was buffered as plain content so it isn't
|
// pre-AI-SDK path did this on stream end too — better to leak `<tool_c`
|
||||||
// silently dropped. Better to show a stray `<tool_call>` than vanish text.
|
// than vanish the text.
|
||||||
if (pendingBuffer.length > 0) {
|
if (pendingBuffer.length > 0) {
|
||||||
content += pendingBuffer;
|
content += pendingBuffer;
|
||||||
onDelta(pendingBuffer);
|
onDelta(pendingBuffer);
|
||||||
pendingBuffer = '';
|
pendingBuffer = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
const toolCalls: ToolCall[] = [];
|
// v1.13.1-A: AI SDK v6 swallows the abort signal — the fullStream iterator
|
||||||
for (const [, t] of [...toolCallsBuffer.entries()].sort(([a], [b]) => a - b)) {
|
// exits cleanly and we'd otherwise return a successful StreamResult, which
|
||||||
let args: Record<string, unknown> = {};
|
// makes executeStreamPhase call finalizeCompletion and write status='complete'.
|
||||||
if (t.argsText.length > 0) {
|
// Detect post-iteration abort and throw an AbortError so handleAbortOrError
|
||||||
try {
|
// owns the row instead, matching v1.12.x stop-button behavior.
|
||||||
args = JSON.parse(t.argsText);
|
if (signal?.aborted) {
|
||||||
} catch {
|
const abortErr = new Error('aborted');
|
||||||
args = { _raw: t.argsText };
|
abortErr.name = 'AbortError';
|
||||||
}
|
throw abortErr;
|
||||||
}
|
}
|
||||||
toolCalls.push({ id: t.id || `call_${toolCalls.length}`, name: t.name, args });
|
|
||||||
|
// Usage lands as a promise on the result; awaiting after fullStream is
|
||||||
|
// drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
|
||||||
|
let promptTokens: number | null = null;
|
||||||
|
let completionTokens: number | null = null;
|
||||||
|
try {
|
||||||
|
const usage = await result.usage;
|
||||||
|
if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
|
||||||
|
if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
|
||||||
|
} catch {
|
||||||
|
// Some providers omit usage on partial streams; leave both null.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (onUsage && (promptTokens !== null || completionTokens !== null)) {
|
||||||
|
onUsage(promptTokens, completionTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reasoningDeltaCount > 0) {
|
||||||
|
ctx.log.debug(
|
||||||
|
{ reasoningDeltaCount, model, elapsed_ms: Date.now() - startedAt },
|
||||||
|
'streamCompletion: reasoning deltas dropped (captured in v1.13.1-C)',
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { finishReason, content, toolCalls, promptTokens, completionTokens };
|
return { finishReason, content, toolCalls, promptTokens, completionTokens };
|
||||||
@@ -313,9 +360,12 @@ export async function executeStreamPhase(
|
|||||||
const mctxForStream = await modelContext.getModelContext(session.model);
|
const mctxForStream = await modelContext.getModelContext(session.model);
|
||||||
const nCtxForStream = mctxForStream?.n_ctx ?? null;
|
const nCtxForStream = mctxForStream?.n_ctx ?? null;
|
||||||
|
|
||||||
// v1.12.2: throttle live usage publishes to ~500ms. The model can land
|
// v1.12.2 → v1.13.1-A: live usage publishes were throttled to ~500ms when
|
||||||
// dozens of usage frames per second; without a throttle the WS turns into
|
// the manual SSE parser saw `parsed.usage` per chunk. AI SDK v6 surfaces
|
||||||
// a firehose for a few KB savings on each render.
|
// usage only at stream end (result.usage promise), so the throttle is
|
||||||
|
// effectively a single trailing publish. ChatThroughput will tick once at
|
||||||
|
// stream completion rather than mid-stream — known regression vs v1.12.2,
|
||||||
|
// recovered if a future dispatch interpolates from delta cadence.
|
||||||
const USAGE_THROTTLE_MS = 500;
|
const USAGE_THROTTLE_MS = 500;
|
||||||
let lastUsageAt = 0;
|
let lastUsageAt = 0;
|
||||||
let pendingUsage: { p: number | null; c: number | null } | null = null;
|
let pendingUsage: { p: number | null; c: number | null } | null = null;
|
||||||
|
|||||||
88
pnpm-lock.yaml
generated
88
pnpm-lock.yaml
generated
@@ -48,12 +48,18 @@ importers:
|
|||||||
|
|
||||||
apps/server:
|
apps/server:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@ai-sdk/openai-compatible':
|
||||||
|
specifier: ^2.0.47
|
||||||
|
version: 2.0.47(zod@3.25.76)
|
||||||
'@fastify/static':
|
'@fastify/static':
|
||||||
specifier: ^7.0.4
|
specifier: ^7.0.4
|
||||||
version: 7.0.4
|
version: 7.0.4
|
||||||
'@fastify/websocket':
|
'@fastify/websocket':
|
||||||
specifier: ^10.0.1
|
specifier: ^10.0.1
|
||||||
version: 10.0.1
|
version: 10.0.1
|
||||||
|
ai:
|
||||||
|
specifier: ^6.0.190
|
||||||
|
version: 6.0.190(zod@3.25.76)
|
||||||
fastify:
|
fastify:
|
||||||
specifier: ^4.28.1
|
specifier: ^4.28.1
|
||||||
version: 4.29.1
|
version: 4.29.1
|
||||||
@@ -179,6 +185,28 @@ importers:
|
|||||||
|
|
||||||
packages:
|
packages:
|
||||||
|
|
||||||
|
'@ai-sdk/gateway@3.0.119':
|
||||||
|
resolution: {integrity: sha512-VAhfRWC+JexZakkVfmjaJKaTj00x7/UHdE8kMWL3NhuQAlf8oXtg9r4dfvFZrByXxchGRBvYE3biEUyibkg0xg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.25.76 || ^4.1.8
|
||||||
|
|
||||||
|
'@ai-sdk/openai-compatible@2.0.47':
|
||||||
|
resolution: {integrity: sha512-Enm5UlL0zUCrW3792opk5h7hRWxZOZzDe6eQYVFqX9LUOGGCe1h8MZWAGim765nwzgnjlpeYOsuzZmLtRsTPlg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.25.76 || ^4.1.8
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@4.0.27':
|
||||||
|
resolution: {integrity: sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.25.76 || ^4.1.8
|
||||||
|
|
||||||
|
'@ai-sdk/provider@3.0.10':
|
||||||
|
resolution: {integrity: sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
'@alloc/quick-lru@5.2.0':
|
'@alloc/quick-lru@5.2.0':
|
||||||
resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==}
|
resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
@@ -789,6 +817,10 @@ packages:
|
|||||||
'@open-draft/until@2.1.0':
|
'@open-draft/until@2.1.0':
|
||||||
resolution: {integrity: sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==}
|
resolution: {integrity: sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==}
|
||||||
|
|
||||||
|
'@opentelemetry/api@1.9.1':
|
||||||
|
resolution: {integrity: sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==}
|
||||||
|
engines: {node: '>=8.0.0'}
|
||||||
|
|
||||||
'@pinojs/redact@0.4.0':
|
'@pinojs/redact@0.4.0':
|
||||||
resolution: {integrity: sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==}
|
resolution: {integrity: sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==}
|
||||||
|
|
||||||
@@ -1646,6 +1678,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==}
|
resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
|
'@standard-schema/spec@1.1.0':
|
||||||
|
resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
|
||||||
|
|
||||||
'@tailwindcss/node@4.3.0':
|
'@tailwindcss/node@4.3.0':
|
||||||
resolution: {integrity: sha512-aFb4gUhFOgdh9AXo4IzBEOzBkkAxm9VigwDJnMIYv3lcfXCJVesNfbEaBl4BNgVRyid92AmdviqwBUBRKSeY3g==}
|
resolution: {integrity: sha512-aFb4gUhFOgdh9AXo4IzBEOzBkkAxm9VigwDJnMIYv3lcfXCJVesNfbEaBl4BNgVRyid92AmdviqwBUBRKSeY3g==}
|
||||||
|
|
||||||
@@ -1811,6 +1846,10 @@ packages:
|
|||||||
'@ungap/structured-clone@1.3.1':
|
'@ungap/structured-clone@1.3.1':
|
||||||
resolution: {integrity: sha512-mUFwbeTqrVgDQxFveS+df2yfap6iuP20NAKAsBt5jDEoOTDew+zwLAOilHCeQJOVSvmgCX4ogqIrA0mnyr08yQ==}
|
resolution: {integrity: sha512-mUFwbeTqrVgDQxFveS+df2yfap6iuP20NAKAsBt5jDEoOTDew+zwLAOilHCeQJOVSvmgCX4ogqIrA0mnyr08yQ==}
|
||||||
|
|
||||||
|
'@vercel/oidc@3.2.0':
|
||||||
|
resolution: {integrity: sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug==}
|
||||||
|
engines: {node: '>= 20'}
|
||||||
|
|
||||||
'@vitejs/plugin-react@4.7.0':
|
'@vitejs/plugin-react@4.7.0':
|
||||||
resolution: {integrity: sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==}
|
resolution: {integrity: sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==}
|
||||||
engines: {node: ^14.18.0 || >=16.0.0}
|
engines: {node: ^14.18.0 || >=16.0.0}
|
||||||
@@ -1878,6 +1917,12 @@ packages:
|
|||||||
resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
|
resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
|
||||||
engines: {node: '>= 14'}
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
|
ai@6.0.190:
|
||||||
|
resolution: {integrity: sha512-T+ixHbWZ6jmHRREpVVJTkFyWJeCekCdzLPan7lp1F32jG5OUw4+odlVYjtMRXVzogU+pWzpMmXdRiHUmdL/q0w==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.25.76 || ^4.1.8
|
||||||
|
|
||||||
ajv-formats@2.1.1:
|
ajv-formats@2.1.1:
|
||||||
resolution: {integrity: sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==}
|
resolution: {integrity: sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==}
|
||||||
peerDependencies:
|
peerDependencies:
|
||||||
@@ -2694,6 +2739,9 @@ packages:
|
|||||||
json-schema-typed@8.0.2:
|
json-schema-typed@8.0.2:
|
||||||
resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==}
|
resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==}
|
||||||
|
|
||||||
|
json-schema@0.4.0:
|
||||||
|
resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
|
||||||
|
|
||||||
json5@2.2.3:
|
json5@2.2.3:
|
||||||
resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
|
resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
|
||||||
engines: {node: '>=6'}
|
engines: {node: '>=6'}
|
||||||
@@ -3966,6 +4014,30 @@ packages:
|
|||||||
|
|
||||||
snapshots:
|
snapshots:
|
||||||
|
|
||||||
|
'@ai-sdk/gateway@3.0.119(zod@3.25.76)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 3.0.10
|
||||||
|
'@ai-sdk/provider-utils': 4.0.27(zod@3.25.76)
|
||||||
|
'@vercel/oidc': 3.2.0
|
||||||
|
zod: 3.25.76
|
||||||
|
|
||||||
|
'@ai-sdk/openai-compatible@2.0.47(zod@3.25.76)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 3.0.10
|
||||||
|
'@ai-sdk/provider-utils': 4.0.27(zod@3.25.76)
|
||||||
|
zod: 3.25.76
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@4.0.27(zod@3.25.76)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 3.0.10
|
||||||
|
'@standard-schema/spec': 1.1.0
|
||||||
|
eventsource-parser: 3.0.8
|
||||||
|
zod: 3.25.76
|
||||||
|
|
||||||
|
'@ai-sdk/provider@3.0.10':
|
||||||
|
dependencies:
|
||||||
|
json-schema: 0.4.0
|
||||||
|
|
||||||
'@alloc/quick-lru@5.2.0': {}
|
'@alloc/quick-lru@5.2.0': {}
|
||||||
|
|
||||||
'@babel/code-frame@7.29.0':
|
'@babel/code-frame@7.29.0':
|
||||||
@@ -4516,6 +4588,8 @@ snapshots:
|
|||||||
|
|
||||||
'@open-draft/until@2.1.0': {}
|
'@open-draft/until@2.1.0': {}
|
||||||
|
|
||||||
|
'@opentelemetry/api@1.9.1': {}
|
||||||
|
|
||||||
'@pinojs/redact@0.4.0': {}
|
'@pinojs/redact@0.4.0': {}
|
||||||
|
|
||||||
'@pkgjs/parseargs@0.11.0':
|
'@pkgjs/parseargs@0.11.0':
|
||||||
@@ -5386,6 +5460,8 @@ snapshots:
|
|||||||
|
|
||||||
'@sindresorhus/merge-streams@4.0.0': {}
|
'@sindresorhus/merge-streams@4.0.0': {}
|
||||||
|
|
||||||
|
'@standard-schema/spec@1.1.0': {}
|
||||||
|
|
||||||
'@tailwindcss/node@4.3.0':
|
'@tailwindcss/node@4.3.0':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@jridgewell/remapping': 2.3.5
|
'@jridgewell/remapping': 2.3.5
|
||||||
@@ -5548,6 +5624,8 @@ snapshots:
|
|||||||
|
|
||||||
'@ungap/structured-clone@1.3.1': {}
|
'@ungap/structured-clone@1.3.1': {}
|
||||||
|
|
||||||
|
'@vercel/oidc@3.2.0': {}
|
||||||
|
|
||||||
'@vitejs/plugin-react@4.7.0(vite@5.4.21(@types/node@20.19.41)(lightningcss@1.32.0))':
|
'@vitejs/plugin-react@4.7.0(vite@5.4.21(@types/node@20.19.41)(lightningcss@1.32.0))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@babel/core': 7.29.0
|
'@babel/core': 7.29.0
|
||||||
@@ -5628,6 +5706,14 @@ snapshots:
|
|||||||
|
|
||||||
agent-base@7.1.4: {}
|
agent-base@7.1.4: {}
|
||||||
|
|
||||||
|
ai@6.0.190(zod@3.25.76):
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/gateway': 3.0.119(zod@3.25.76)
|
||||||
|
'@ai-sdk/provider': 3.0.10
|
||||||
|
'@ai-sdk/provider-utils': 4.0.27(zod@3.25.76)
|
||||||
|
'@opentelemetry/api': 1.9.1
|
||||||
|
zod: 3.25.76
|
||||||
|
|
||||||
ajv-formats@2.1.1(ajv@8.20.0):
|
ajv-formats@2.1.1(ajv@8.20.0):
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
ajv: 8.20.0
|
ajv: 8.20.0
|
||||||
@@ -6453,6 +6539,8 @@ snapshots:
|
|||||||
|
|
||||||
json-schema-typed@8.0.2: {}
|
json-schema-typed@8.0.2: {}
|
||||||
|
|
||||||
|
json-schema@0.4.0: {}
|
||||||
|
|
||||||
json5@2.2.3: {}
|
json5@2.2.3: {}
|
||||||
|
|
||||||
jsonfile@6.2.1:
|
jsonfile@6.2.1:
|
||||||
|
|||||||
Reference in New Issue
Block a user