From 1b70d4199606fe8cfac9288959cedcede23948dc Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Sun, 7 Jun 2026 17:57:58 +0000 Subject: [PATCH] feat(server): add inference reliability - tool-shim and loop detectors - ToolShim recovers XML/JSON tool calls from plain-text model output - detectContentRepeat catches same-content loops - detectToolLoop catches repeated tool invocations - detectDoomLoop combines both detectors --- .../src/services/inference/loop-detectors.ts | 68 +++++++++++++++++++ .../src/services/inference/tool-shim.ts | 45 ++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 apps/server/src/services/inference/loop-detectors.ts create mode 100644 apps/server/src/services/inference/tool-shim.ts diff --git a/apps/server/src/services/inference/loop-detectors.ts b/apps/server/src/services/inference/loop-detectors.ts new file mode 100644 index 0000000..94472b2 --- /dev/null +++ b/apps/server/src/services/inference/loop-detectors.ts @@ -0,0 +1,68 @@ +// Loop detectors — detects repetitive patterns in assistant output +// that indicate a model is stuck in a loop. + +export interface LoopDetectionResult { + isLoop: boolean; + reason?: string; + confidence: number; // 0-1 +} + +const REPEATED_PHRASE_MIN_COUNT = 4; +const REPEATED_TOOL_MIN_COUNT = 3; + +export function detectContentRepeat(messages: string[]): LoopDetectionResult { + if (messages.length < REPEATED_PHRASE_MIN_COUNT) { + return { isLoop: false, confidence: 0 }; + } + + const recent = messages.slice(-REPEATED_PHRASE_MIN_COUNT); + const unique = new Set(recent); + + if (unique.size === 1) { + return { + isLoop: true, + reason: `Same content repeated ${REPEATED_PHRASE_MIN_COUNT} times`, + confidence: 0.9, + }; + } + + if (unique.size <= 2 && recent.length >= 4) { + return { + isLoop: true, + reason: 'Content oscillating between two variants', + confidence: 0.7, + }; + } + + return { isLoop: false, confidence: 0 }; +} + +export function detectToolLoop(toolNames: string[]): LoopDetectionResult { + if (toolNames.length < REPEATED_TOOL_MIN_COUNT) return { isLoop: false, confidence: 0 }; + + const recent = toolNames.slice(-REPEATED_TOOL_MIN_COUNT); + const unique = new Set(recent); + + if (unique.size === 1) { + return { + isLoop: true, + reason: `Same tool "${recent[0]}" called ${REPEATED_TOOL_MIN_COUNT} times consecutively`, + confidence: 0.85, + }; + } + + return { isLoop: false, confidence: 0 }; +} + +export function detectDoomLoop( + messages: string[], + toolNames: string[], +): LoopDetectionResult { + const contentResult = detectContentRepeat(messages); + if (contentResult.isLoop) return contentResult; + + const toolResult = detectToolLoop(toolNames); + if (toolResult.isLoop) return toolResult; + + return { isLoop: false, confidence: 0 }; +} diff --git a/apps/server/src/services/inference/tool-shim.ts b/apps/server/src/services/inference/tool-shim.ts new file mode 100644 index 0000000..9b0bc16 --- /dev/null +++ b/apps/server/src/services/inference/tool-shim.ts @@ -0,0 +1,45 @@ +// ToolShim — recovers structured tool calls from plain-text model output. +// When the model emits tool calls as plain text instead of structured JSON, +// this shim attempts to parse and recover them. + +export interface ParsedToolCall { + id: string; + name: string; + arguments: string; +} + +const TOOL_CALL_PATTERN = /\s*(.+?)<\/name>\s*(.+?)<\/arguments>\s*<\/tool_call>/gs; +const JSON_TOOL_PATTERN = /\{\s*"name":\s*"([^"]+)",\s*"arguments":\s*({.+?})\s*\}/gs; + +export function extractToolCalls(text: string): ParsedToolCall[] { + const calls: ParsedToolCall[] = []; + let match: RegExpExecArray | null; + + // Try XML-style tool calls (common in Qwen output) + const xmlRegex = new RegExp(TOOL_CALL_PATTERN); + while ((match = xmlRegex.exec(text)) !== null) { + calls.push({ + id: `call_${calls.length}`, + name: match[1]!.trim(), + arguments: match[2]!.trim(), + }); + } + + if (calls.length > 0) return calls; + + // Try JSON-style tool calls + const jsonRegex = new RegExp(JSON_TOOL_PATTERN); + while ((match = jsonRegex.exec(text)) !== null) { + calls.push({ + id: `call_${calls.length}`, + name: match[1]!.trim(), + arguments: match[2]!.trim(), + }); + } + + return calls; +} + +export function hasToolCallMarkup(text: string): boolean { + return TOOL_CALL_PATTERN.test(text) || JSON_TOOL_PATTERN.test(text); +}