Compare commits
1 Commits
v1.12.3-st
...
ea468ca7fb
| Author | SHA1 | Date | |
|---|---|---|---|
| ea468ca7fb |
@@ -13,7 +13,6 @@ import type {
|
||||
} from '../types/api.js';
|
||||
import {
|
||||
ALL_TOOLS,
|
||||
READ_ONLY_TOOL_NAMES,
|
||||
TOOLS_BY_NAME,
|
||||
toolJsonSchemas,
|
||||
type ToolJsonSchema,
|
||||
@@ -28,88 +27,34 @@ import type { Broker } from './broker.js';
|
||||
// async (awaits the container-guidance loader) — buildMessagesPayload below
|
||||
// is therefore async too, and its three call sites in this file await it.
|
||||
import { buildSystemPrompt } from './system-prompt.js';
|
||||
import { resolveToolBudget } from './inference/budget.js';
|
||||
import {
|
||||
DOOM_LOOP_THRESHOLD,
|
||||
detectDoomLoop,
|
||||
isAnySentinel,
|
||||
} from './inference/sentinels.js';
|
||||
import {
|
||||
XML_TOOL_CLOSE,
|
||||
XML_TOOL_OPEN,
|
||||
parseXmlToolCall,
|
||||
partialXmlOpenerStart,
|
||||
} from './inference/xml-parser.js';
|
||||
|
||||
// v1.12.4: re-exported so external callers (tests, future consumers) keep
|
||||
// importing from services/inference.js as the public surface.
|
||||
export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './inference/sentinels.js';
|
||||
|
||||
const DB_FLUSH_INTERVAL_MS = 500;
|
||||
|
||||
// v1.8.2: tool-call budget defaults. Resolved per-turn by resolveToolBudget.
|
||||
// - Agent with explicit max_tool_calls: that value.
|
||||
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
||||
const BUDGET_READ_ONLY = 30;
|
||||
const BUDGET_NON_READ_ONLY = 10;
|
||||
const BUDGET_NO_AGENT = 15;
|
||||
|
||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||
|
||||
function resolveToolBudget(agent: Agent | null): number {
|
||||
if (agent?.max_tool_calls != null) return agent.max_tool_calls;
|
||||
if (!agent) return BUDGET_NO_AGENT;
|
||||
const allReadOnly = agent.tools.every((t) => READ_ONLY_SET.has(t));
|
||||
return allReadOnly ? BUDGET_READ_ONLY : BUDGET_NON_READ_ONLY;
|
||||
}
|
||||
|
||||
// Synthetic system note appended to the cap-hit summary call. Verbatim from
|
||||
// the v1.8.2 spec — do not paraphrase: the model is more reliable when the
|
||||
// instruction is short, declarative, and identical across calls.
|
||||
const CAP_HIT_SUMMARY_NOTE = (limit: number) =>
|
||||
`You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`;
|
||||
|
||||
// v1.11.6: doom-loop guard. When the model calls the same tool with the
|
||||
// same arguments DOOM_LOOP_THRESHOLD times in a row within one user-message
|
||||
// turn, abort the recursion and run the same wrap-up summary path as the
|
||||
// cap-hit case. Ported from opencode (DOOM_LOOP_THRESHOLD in
|
||||
// session/processor.ts). Threshold of 3 is the smallest value that doesn't
|
||||
// false-positive on a model that retries once after a transient error.
|
||||
export const DOOM_LOOP_THRESHOLD = 3;
|
||||
|
||||
const DOOM_LOOP_NOTE = (name: string) =>
|
||||
`You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`;
|
||||
|
||||
// Returns the name + args of the looping tool when the LAST
|
||||
// DOOM_LOOP_THRESHOLD entries in `recentToolCalls` are identical (same name
|
||||
// AND deep-equal args via JSON.stringify). Returns null otherwise.
|
||||
// Pure; exported for unit-test access.
|
||||
export function detectDoomLoop(
|
||||
recentToolCalls: ToolCall[],
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
if (recentToolCalls.length < DOOM_LOOP_THRESHOLD) return null;
|
||||
const last = recentToolCalls.slice(-DOOM_LOOP_THRESHOLD);
|
||||
const ref = last[0]!;
|
||||
const refArgs = JSON.stringify(ref.args);
|
||||
for (let i = 1; i < last.length; i++) {
|
||||
const tc = last[i]!;
|
||||
if (tc.name !== ref.name) return null;
|
||||
if (JSON.stringify(tc.args) !== refArgs) return null;
|
||||
}
|
||||
return { name: ref.name, args: ref.args };
|
||||
}
|
||||
|
||||
function isCapHitSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'cap_hit'
|
||||
);
|
||||
}
|
||||
|
||||
// v1.11.6: parallel predicate. Same UI-only semantics as cap-hit sentinels —
|
||||
// never sent to the LLM (filtered by buildMessagesPayload through the
|
||||
// isAnySentinel check below).
|
||||
function isDoomLoopSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'doom_loop'
|
||||
);
|
||||
}
|
||||
|
||||
function isAnySentinel(m: Message): boolean {
|
||||
return isCapHitSentinel(m) || isDoomLoopSentinel(m);
|
||||
}
|
||||
|
||||
export interface InferenceFrame {
|
||||
type:
|
||||
| 'message_started'
|
||||
@@ -391,55 +336,6 @@ interface StreamOptions {
|
||||
// streamCompletion buffers delta.content, extracts complete blocks, parses
|
||||
// them via parseXmlToolCall, and pushes synthetic entries into the existing
|
||||
// toolCallsBuffer alongside any native JSON-format tool calls.
|
||||
const XML_TOOL_OPEN = '<tool_call>';
|
||||
const XML_TOOL_CLOSE = '</tool_call>';
|
||||
|
||||
function parseXmlToolCall(
|
||||
block: string,
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
const nameMatch = block.match(/<function=([^>]+)>/);
|
||||
if (!nameMatch || !nameMatch[1]) return null;
|
||||
const name = nameMatch[1].trim();
|
||||
if (!name) return null;
|
||||
const args: Record<string, unknown> = {};
|
||||
// Non-greedy body so each <parameter=…>…</parameter> pair is matched
|
||||
// independently even when multiple appear in the same block.
|
||||
const paramRe = /<parameter=([^>]+)>([\s\S]*?)<\/parameter>/g;
|
||||
for (const m of block.matchAll(paramRe)) {
|
||||
const key = (m[1] ?? '').trim();
|
||||
if (!key) continue;
|
||||
const raw = (m[2] ?? '').trim();
|
||||
try {
|
||||
args[key] = JSON.parse(raw);
|
||||
} catch {
|
||||
args[key] = raw;
|
||||
}
|
||||
}
|
||||
return { name, args };
|
||||
}
|
||||
|
||||
// Locate the first character that begins (or completely contains) an
|
||||
// unfinished <tool_call> opener in `s`. Returns -1 when `s` can be flushed
|
||||
// to the client in full without risking a partial tag leak.
|
||||
// Case 1: a full `<tool_call>` opener with no matching closer — caller
|
||||
// must keep everything from that index forward until the next
|
||||
// chunk arrives with the closer.
|
||||
// Case 2: `s` ends with a strict prefix of `<tool_call>` (e.g. `<tool_c`).
|
||||
// Caller must keep just that suffix in the buffer.
|
||||
// Note: case 1 assumes the calling loop already extracted every complete
|
||||
// <tool_call>…</tool_call> pair before reaching this check.
|
||||
function partialXmlOpenerStart(s: string): number {
|
||||
const fullOpener = s.indexOf(XML_TOOL_OPEN);
|
||||
if (fullOpener !== -1) return fullOpener;
|
||||
const lastLt = s.lastIndexOf('<');
|
||||
if (lastLt === -1) return -1;
|
||||
const suffix = s.slice(lastLt);
|
||||
if (XML_TOOL_OPEN.startsWith(suffix) && suffix.length < XML_TOOL_OPEN.length) {
|
||||
return lastLt;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
async function streamCompletion(
|
||||
ctx: InferenceContext,
|
||||
model: string,
|
||||
|
||||
20
apps/server/src/services/inference/budget.ts
Normal file
20
apps/server/src/services/inference/budget.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import type { Agent } from '../../types/api.js';
|
||||
import { READ_ONLY_TOOL_NAMES } from '../tools.js';
|
||||
|
||||
// v1.8.2: tool-call budget defaults. Resolved per-turn by resolveToolBudget.
|
||||
// - Agent with explicit max_tool_calls: that value.
|
||||
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
||||
export const BUDGET_READ_ONLY = 30;
|
||||
export const BUDGET_NON_READ_ONLY = 10;
|
||||
export const BUDGET_NO_AGENT = 15;
|
||||
|
||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||
|
||||
export function resolveToolBudget(agent: Agent | null): number {
|
||||
if (agent?.max_tool_calls != null) return agent.max_tool_calls;
|
||||
if (!agent) return BUDGET_NO_AGENT;
|
||||
const allReadOnly = agent.tools.every((t) => READ_ONLY_SET.has(t));
|
||||
return allReadOnly ? BUDGET_READ_ONLY : BUDGET_NON_READ_ONLY;
|
||||
}
|
||||
53
apps/server/src/services/inference/sentinels.ts
Normal file
53
apps/server/src/services/inference/sentinels.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { Message, ToolCall } from '../../types/api.js';
|
||||
|
||||
// v1.11.6: doom-loop guard. When the model calls the same tool with the
|
||||
// same arguments DOOM_LOOP_THRESHOLD times in a row within one user-message
|
||||
// turn, abort the recursion and run the same wrap-up summary path as the
|
||||
// cap-hit case. Ported from opencode (DOOM_LOOP_THRESHOLD in
|
||||
// session/processor.ts). Threshold of 3 is the smallest value that doesn't
|
||||
// false-positive on a model that retries once after a transient error.
|
||||
export const DOOM_LOOP_THRESHOLD = 3;
|
||||
|
||||
// Returns the name + args of the looping tool when the LAST
|
||||
// DOOM_LOOP_THRESHOLD entries in `recentToolCalls` are identical (same name
|
||||
// AND deep-equal args via JSON.stringify). Returns null otherwise.
|
||||
// Pure; exported for unit-test access.
|
||||
export function detectDoomLoop(
|
||||
recentToolCalls: ToolCall[],
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
if (recentToolCalls.length < DOOM_LOOP_THRESHOLD) return null;
|
||||
const last = recentToolCalls.slice(-DOOM_LOOP_THRESHOLD);
|
||||
const ref = last[0]!;
|
||||
const refArgs = JSON.stringify(ref.args);
|
||||
for (let i = 1; i < last.length; i++) {
|
||||
const tc = last[i]!;
|
||||
if (tc.name !== ref.name) return null;
|
||||
if (JSON.stringify(tc.args) !== refArgs) return null;
|
||||
}
|
||||
return { name: ref.name, args: ref.args };
|
||||
}
|
||||
|
||||
export function isCapHitSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'cap_hit'
|
||||
);
|
||||
}
|
||||
|
||||
// v1.11.6: parallel predicate. Same UI-only semantics as cap-hit sentinels —
|
||||
// never sent to the LLM (filtered by buildMessagesPayload through the
|
||||
// isAnySentinel check below).
|
||||
export function isDoomLoopSentinel(m: Message): boolean {
|
||||
return (
|
||||
m.role === 'system' &&
|
||||
m.metadata !== null &&
|
||||
typeof m.metadata === 'object' &&
|
||||
(m.metadata as { kind?: unknown }).kind === 'doom_loop'
|
||||
);
|
||||
}
|
||||
|
||||
export function isAnySentinel(m: Message): boolean {
|
||||
return isCapHitSentinel(m) || isDoomLoopSentinel(m);
|
||||
}
|
||||
53
apps/server/src/services/inference/xml-parser.ts
Normal file
53
apps/server/src/services/inference/xml-parser.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
// v1.10.5: XML-tag tool-call fallback. Some models emit
|
||||
// <tool_call><function=foo><parameter=key>value</parameter></function></tool_call>
|
||||
// in plain content instead of using the OpenAI tool_calls JSON channel.
|
||||
// The streaming loop in inference.ts extracts these blocks via these helpers.
|
||||
|
||||
export const XML_TOOL_OPEN = '<tool_call>';
|
||||
export const XML_TOOL_CLOSE = '</tool_call>';
|
||||
|
||||
export function parseXmlToolCall(
|
||||
block: string,
|
||||
): { name: string; args: Record<string, unknown> } | null {
|
||||
const nameMatch = block.match(/<function=([^>]+)>/);
|
||||
if (!nameMatch || !nameMatch[1]) return null;
|
||||
const name = nameMatch[1].trim();
|
||||
if (!name) return null;
|
||||
const args: Record<string, unknown> = {};
|
||||
// Non-greedy body so each <parameter=…>…</parameter> pair is matched
|
||||
// independently even when multiple appear in the same block.
|
||||
const paramRe = /<parameter=([^>]+)>([\s\S]*?)<\/parameter>/g;
|
||||
for (const m of block.matchAll(paramRe)) {
|
||||
const key = (m[1] ?? '').trim();
|
||||
if (!key) continue;
|
||||
const raw = (m[2] ?? '').trim();
|
||||
try {
|
||||
args[key] = JSON.parse(raw);
|
||||
} catch {
|
||||
args[key] = raw;
|
||||
}
|
||||
}
|
||||
return { name, args };
|
||||
}
|
||||
|
||||
// Locate the first character that begins (or completely contains) an
|
||||
// unfinished <tool_call> opener in `s`. Returns -1 when `s` can be flushed
|
||||
// to the client in full without risking a partial tag leak.
|
||||
// Case 1: a full `<tool_call>` opener with no matching closer — caller
|
||||
// must keep everything from that index forward until the next
|
||||
// chunk arrives with the closer.
|
||||
// Case 2: `s` ends with a strict prefix of `<tool_call>` (e.g. `<tool_c`).
|
||||
// Caller must keep just that suffix in the buffer.
|
||||
// Note: case 1 assumes the calling loop already extracted every complete
|
||||
// <tool_call>…</tool_call> pair before reaching this check.
|
||||
export function partialXmlOpenerStart(s: string): number {
|
||||
const fullOpener = s.indexOf(XML_TOOL_OPEN);
|
||||
if (fullOpener !== -1) return fullOpener;
|
||||
const lastLt = s.lastIndexOf('<');
|
||||
if (lastLt === -1) return -1;
|
||||
const suffix = s.slice(lastLt);
|
||||
if (XML_TOOL_OPEN.startsWith(suffix) && suffix.length < XML_TOOL_OPEN.length) {
|
||||
return lastLt;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
Reference in New Issue
Block a user