Batch 1 — tool-call-parser.ts: replaces xml-parser.ts with a port of
Unsloth's tool_call_parser.py. Adds balanced-brace JSON scanner,
single-param fast path, hasToolSignal/stripToolMarkup/parseToolCallsFromText
exports, and stream-finalization stripping at all three final-write sites
(error-handler, finalizeCompletion, executeToolPhase). Anthropic <invoke>
shape preserved. 75+12 tests.
Batch 2 — web/html-to-md.ts: parse5 tree-walking HTML-to-Markdown converter
ported from Unsloth's _html_to_md.py. Replaces web_fetch's regex stripHtml
with structured markdown output (headings, links, lists, tables, code blocks,
blockquotes, entity decoding). 29 tests.
Batch 3 — llama-args-validator.ts: port of llama_server_args.py deny-list
validator. Wired into AGENTS.md frontmatter parser — llama_extra_args field
validated at load time, rejects managed flags (model identity, networking,
auth/TLS, server UI). No runtime consumer yet (llama-swap boundary). 76 tests.
All three files carry SPDX-License-Identifier: AGPL-3.0-only headers.
LICENSE flipped to AGPL-3.0-only in prior commit (a938cf1).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
203 lines
7.9 KiB
TypeScript
203 lines
7.9 KiB
TypeScript
import type { MessageMetadata, Session } from '../../types/api.js';
|
|
import {
|
|
decideHtmlArtifactWrite,
|
|
detectHtmlArtifact,
|
|
deriveHtmlTitle,
|
|
HTML_ARTIFACT_MAX_BYTES,
|
|
} from '../artifacts.js';
|
|
import * as modelContext from '../model-context.js';
|
|
import { maybeFlagForCompaction } from './payload.js';
|
|
import { insertParts, partsFromAssistantMessage } from './parts.js';
|
|
import type { PartInsert } from './parts.js';
|
|
import { stripToolMarkup } from './tool-call-parser.js';
|
|
import type { InferenceContext, StreamResult, TurnArgs } from './turn.js';
|
|
|
|
export async function handleAbortOrError(
|
|
ctx: InferenceContext,
|
|
args: TurnArgs,
|
|
accumulated: string,
|
|
err: unknown
|
|
): Promise<void> {
|
|
const { sessionId, chatId, assistantMessageId } = args;
|
|
const isAbort = err instanceof Error && err.name === 'AbortError';
|
|
const finalStatus = isAbort ? 'cancelled' : 'failed';
|
|
const errMsg = err instanceof Error ? err.message : String(err);
|
|
accumulated = stripToolMarkup(accumulated, { final: true });
|
|
// v1.8.2: persist a structured error metadata blob on genuine failures so
|
|
// the bubble can render the reason on reload without re-deriving from the
|
|
// (one-shot) WS error frame. User-initiated abort skips this — there's no
|
|
// "reason" to surface for a stop the user already explicitly chose.
|
|
const errorMetadata: MessageMetadata | null = isAbort
|
|
? null
|
|
: { kind: 'error', error_reason: 'llm_provider_error', error_text: errMsg };
|
|
if (errorMetadata) {
|
|
await ctx.sql`
|
|
UPDATE messages
|
|
SET status = ${finalStatus},
|
|
content = ${accumulated},
|
|
finished_at = clock_timestamp(),
|
|
metadata = ${ctx.sql.json(errorMetadata as never)}
|
|
WHERE id = ${assistantMessageId}
|
|
`;
|
|
} else {
|
|
await ctx.sql`
|
|
UPDATE messages
|
|
SET status = ${finalStatus},
|
|
content = ${accumulated},
|
|
finished_at = clock_timestamp()
|
|
WHERE id = ${assistantMessageId}
|
|
`;
|
|
}
|
|
const [failSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
|
UPDATE sessions SET updated_at = clock_timestamp()
|
|
WHERE id = ${sessionId}
|
|
RETURNING project_id, name, updated_at
|
|
`;
|
|
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at });
|
|
// v1.8 mobile-tabs: cancellation is a user-initiated stop, treat as idle;
|
|
// genuine errors flip the dot red. v1.8.2: error path also carries a
|
|
// machine-readable `reason` so the UI can render specifics inline.
|
|
if (isAbort) {
|
|
// v1.12.1: defensive cancellation write. The status=${finalStatus} UPDATE
|
|
// above already sets 'cancelled' for the AbortError case, but a row can
|
|
// leak as 'streaming' when the abort fires between the post-tool-phase
|
|
// INSERT (executeToolPhase) and the next runAssistantTurn's stream setup,
|
|
// bypassing the try/catch around executeStreamPhase. The status guard
|
|
// makes this a no-op when the earlier write already landed.
|
|
await ctx.sql`
|
|
UPDATE messages
|
|
SET status = 'cancelled', content = ${accumulated}, finished_at = clock_timestamp()
|
|
WHERE id = ${args.assistantMessageId} AND status = 'streaming'
|
|
`;
|
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
|
ctx.publish(sessionId, {
|
|
type: 'message_complete',
|
|
message_id: assistantMessageId,
|
|
chat_id: chatId,
|
|
});
|
|
ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled');
|
|
} else {
|
|
ctx.publishUser({
|
|
type: 'chat_status',
|
|
chat_id: chatId,
|
|
status: 'error',
|
|
at: new Date().toISOString(),
|
|
reason: 'llm_provider_error',
|
|
});
|
|
ctx.publish(sessionId, {
|
|
type: 'error',
|
|
message_id: assistantMessageId,
|
|
chat_id: chatId,
|
|
error: errMsg,
|
|
reason: 'llm_provider_error',
|
|
});
|
|
ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
|
|
}
|
|
}
|
|
|
|
export async function finalizeCompletion(
|
|
ctx: InferenceContext,
|
|
args: TurnArgs,
|
|
result: StreamResult,
|
|
startedAt: string | null,
|
|
session: Session
|
|
): Promise<void> {
|
|
const { sessionId, chatId, assistantMessageId } = args;
|
|
const content = stripToolMarkup(result.content, { final: true });
|
|
const { finishReason, promptTokens, completionTokens } = result;
|
|
|
|
// v1.11.3: see executeToolPhase for the rationale.
|
|
const mctx = await modelContext.getModelContext(session.model);
|
|
const nCtx = mctx?.n_ctx ?? null;
|
|
|
|
const [updated] = await ctx.sql<
|
|
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
|
>`
|
|
UPDATE messages
|
|
SET content = ${content},
|
|
status = 'complete',
|
|
tokens_used = ${completionTokens},
|
|
ctx_used = ${promptTokens},
|
|
ctx_max = ${nCtx},
|
|
finished_at = clock_timestamp()
|
|
WHERE id = ${assistantMessageId}
|
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
|
`;
|
|
// v1.13.0: dual-write the text part. finalizeCompletion is the terminal
|
|
// path for text-only assistant turns (no tool calls); tool_calls are null
|
|
// here by construction (the tool-bearing path goes through executeToolPhase).
|
|
// v1.13.1-C: include result.reasoning so reasoning-channel models capture
|
|
// a kind='reasoning' part alongside the text.
|
|
// TODO(v1.13.1): wrap the UPDATE above and this insertParts in a single
|
|
// sql.begin before flipping read authority to message_parts.
|
|
const baseParts: PartInsert[] = partsFromAssistantMessage({
|
|
content,
|
|
tool_calls: null,
|
|
reasoning: result.reasoning,
|
|
}).map((p) => ({
|
|
...p,
|
|
message_id: assistantMessageId,
|
|
}));
|
|
// v1.14.x-html-artifact-panes: opportunistic HTML detection. Adds a
|
|
// SIBLING html_artifact part — never replaces the text part. 1MB cap is
|
|
// graceful: oversized payloads are skipped and the assistant message
|
|
// lands as plain content (warn logged).
|
|
const htmlContent = detectHtmlArtifact(content);
|
|
if (htmlContent !== null) {
|
|
const decision = decideHtmlArtifactWrite(htmlContent);
|
|
if (!decision.write) {
|
|
ctx.log.warn(
|
|
{ assistantMessageId, byteLen: decision.byteLen, cap: HTML_ARTIFACT_MAX_BYTES },
|
|
'html_artifact exceeded 1MB cap; skipping artifact part',
|
|
);
|
|
} else {
|
|
const title = deriveHtmlTitle(htmlContent);
|
|
const nextSeq = baseParts.reduce((m, p) => Math.max(m, p.sequence), -1) + 1;
|
|
baseParts.push({
|
|
message_id: assistantMessageId,
|
|
sequence: nextSeq,
|
|
kind: 'html_artifact',
|
|
payload: {
|
|
html_content: htmlContent,
|
|
char_count: htmlContent.length,
|
|
title,
|
|
},
|
|
});
|
|
}
|
|
}
|
|
await insertParts(ctx.sql, baseParts);
|
|
// v1.11: flag for compaction on the terminal turn too. Catches the common
|
|
// case of a turn that hit the limit without invoking tools.
|
|
await maybeFlagForCompaction(ctx, chatId, updated);
|
|
const [completeSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
|
UPDATE sessions SET updated_at = clock_timestamp()
|
|
WHERE id = ${sessionId}
|
|
RETURNING project_id, name, updated_at
|
|
`;
|
|
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: completeSessRow!.project_id, name: completeSessRow!.name, updated_at: completeSessRow!.updated_at });
|
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
|
ctx.publish(sessionId, {
|
|
type: 'message_complete',
|
|
message_id: assistantMessageId,
|
|
chat_id: chatId,
|
|
tokens_used: updated?.tokens_used ?? null,
|
|
ctx_used: updated?.ctx_used ?? null,
|
|
ctx_max: updated?.ctx_max ?? null,
|
|
started_at: startedAt,
|
|
finished_at: updated?.finished_at ?? null,
|
|
model: session.model,
|
|
});
|
|
ctx.log.info(
|
|
{
|
|
sessionId,
|
|
chatId,
|
|
assistantMessageId,
|
|
finishReason,
|
|
chars: content.length,
|
|
tokens_used: updated?.tokens_used,
|
|
ctx_used: updated?.ctx_used,
|
|
},
|
|
'inference complete'
|
|
);
|
|
}
|