feat(server): add inference reliability - tool-shim and loop detectors

- ToolShim recovers XML/JSON tool calls from plain-text model output - detectContentRepeat catches same-content loops - detectToolLoop catches repeated tool invocations - detectDoomLoop combines both detectors
2026-06-07 17:57:58 +00:00
parent b64941ad4b
commit 1b70d41996
2 changed files with 113 additions and 0 deletions
--- a/apps/server/src/services/inference/loop-detectors.ts
+++ b/apps/server/src/services/inference/loop-detectors.ts
@@ -0,0 +1,68 @@
+// Loop detectors — detects repetitive patterns in assistant output
+// that indicate a model is stuck in a loop.
+
+export interface LoopDetectionResult {
+  isLoop: boolean;
+  reason?: string;
+  confidence: number; // 0-1
+}
+
+const REPEATED_PHRASE_MIN_COUNT = 4;
+const REPEATED_TOOL_MIN_COUNT = 3;
+
+export function detectContentRepeat(messages: string[]): LoopDetectionResult {
+  if (messages.length < REPEATED_PHRASE_MIN_COUNT) {
+    return { isLoop: false, confidence: 0 };
+  }
+
+  const recent = messages.slice(-REPEATED_PHRASE_MIN_COUNT);
+  const unique = new Set(recent);
+
+  if (unique.size === 1) {
+    return {
+      isLoop: true,
+      reason: `Same content repeated ${REPEATED_PHRASE_MIN_COUNT} times`,
+      confidence: 0.9,
+    };
+  }
+
+  if (unique.size <= 2 && recent.length >= 4) {
+    return {
+      isLoop: true,
+      reason: 'Content oscillating between two variants',
+      confidence: 0.7,
+    };
+  }
+
+  return { isLoop: false, confidence: 0 };
+}
+
+export function detectToolLoop(toolNames: string[]): LoopDetectionResult {
+  if (toolNames.length < REPEATED_TOOL_MIN_COUNT) return { isLoop: false, confidence: 0 };
+
+  const recent = toolNames.slice(-REPEATED_TOOL_MIN_COUNT);
+  const unique = new Set(recent);
+
+  if (unique.size === 1) {
+    return {
+      isLoop: true,
+      reason: `Same tool "${recent[0]}" called ${REPEATED_TOOL_MIN_COUNT} times consecutively`,
+      confidence: 0.85,
+    };
+  }
+
+  return { isLoop: false, confidence: 0 };
+}
+
+export function detectDoomLoop(
+  messages: string[],
+  toolNames: string[],
+): LoopDetectionResult {
+  const contentResult = detectContentRepeat(messages);
+  if (contentResult.isLoop) return contentResult;
+
+  const toolResult = detectToolLoop(toolNames);
+  if (toolResult.isLoop) return toolResult;
+
+  return { isLoop: false, confidence: 0 };
+}
--- a/apps/server/src/services/inference/tool-shim.ts
+++ b/apps/server/src/services/inference/tool-shim.ts
@@ -0,0 +1,45 @@
+// ToolShim — recovers structured tool calls from plain-text model output.
+// When the model emits tool calls as plain text instead of structured JSON,
+// this shim attempts to parse and recover them.
+
+export interface ParsedToolCall {
+  id: string;
+  name: string;
+  arguments: string;
+}
+
+const TOOL_CALL_PATTERN = /<tool_call>\s*<name>(.+?)<\/name>\s*<arguments>(.+?)<\/arguments>\s*<\/tool_call>/gs;
+const JSON_TOOL_PATTERN = /\{\s*"name":\s*"([^"]+)",\s*"arguments":\s*({.+?})\s*\}/gs;
+
+export function extractToolCalls(text: string): ParsedToolCall[] {
+  const calls: ParsedToolCall[] = [];
+  let match: RegExpExecArray | null;
+
+  // Try XML-style tool calls (common in Qwen output)
+  const xmlRegex = new RegExp(TOOL_CALL_PATTERN);
+  while ((match = xmlRegex.exec(text)) !== null) {
+    calls.push({
+      id: `call_${calls.length}`,
+      name: match[1]!.trim(),
+      arguments: match[2]!.trim(),
+    });
+  }
+
+  if (calls.length > 0) return calls;
+
+  // Try JSON-style tool calls
+  const jsonRegex = new RegExp(JSON_TOOL_PATTERN);
+  while ((match = jsonRegex.exec(text)) !== null) {
+    calls.push({
+      id: `call_${calls.length}`,
+      name: match[1]!.trim(),
+      arguments: match[2]!.trim(),
+    });
+  }
+
+  return calls;
+}
+
+export function hasToolCallMarkup(text: string): boolean {
+  return TOOL_CALL_PATTERN.test(text) || JSON_TOOL_PATTERN.test(text);
+}