feat(server): add inference reliability - tool-shim and loop detectors
- ToolShim recovers XML/JSON tool calls from plain-text model output - detectContentRepeat catches same-content loops - detectToolLoop catches repeated tool invocations - detectDoomLoop combines both detectors
This commit is contained in:
68
apps/server/src/services/inference/loop-detectors.ts
Normal file
68
apps/server/src/services/inference/loop-detectors.ts
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
// Loop detectors — detects repetitive patterns in assistant output
|
||||||
|
// that indicate a model is stuck in a loop.
|
||||||
|
|
||||||
|
export interface LoopDetectionResult {
|
||||||
|
isLoop: boolean;
|
||||||
|
reason?: string;
|
||||||
|
confidence: number; // 0-1
|
||||||
|
}
|
||||||
|
|
||||||
|
const REPEATED_PHRASE_MIN_COUNT = 4;
|
||||||
|
const REPEATED_TOOL_MIN_COUNT = 3;
|
||||||
|
|
||||||
|
export function detectContentRepeat(messages: string[]): LoopDetectionResult {
|
||||||
|
if (messages.length < REPEATED_PHRASE_MIN_COUNT) {
|
||||||
|
return { isLoop: false, confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const recent = messages.slice(-REPEATED_PHRASE_MIN_COUNT);
|
||||||
|
const unique = new Set(recent);
|
||||||
|
|
||||||
|
if (unique.size === 1) {
|
||||||
|
return {
|
||||||
|
isLoop: true,
|
||||||
|
reason: `Same content repeated ${REPEATED_PHRASE_MIN_COUNT} times`,
|
||||||
|
confidence: 0.9,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unique.size <= 2 && recent.length >= 4) {
|
||||||
|
return {
|
||||||
|
isLoop: true,
|
||||||
|
reason: 'Content oscillating between two variants',
|
||||||
|
confidence: 0.7,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { isLoop: false, confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function detectToolLoop(toolNames: string[]): LoopDetectionResult {
|
||||||
|
if (toolNames.length < REPEATED_TOOL_MIN_COUNT) return { isLoop: false, confidence: 0 };
|
||||||
|
|
||||||
|
const recent = toolNames.slice(-REPEATED_TOOL_MIN_COUNT);
|
||||||
|
const unique = new Set(recent);
|
||||||
|
|
||||||
|
if (unique.size === 1) {
|
||||||
|
return {
|
||||||
|
isLoop: true,
|
||||||
|
reason: `Same tool "${recent[0]}" called ${REPEATED_TOOL_MIN_COUNT} times consecutively`,
|
||||||
|
confidence: 0.85,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { isLoop: false, confidence: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function detectDoomLoop(
|
||||||
|
messages: string[],
|
||||||
|
toolNames: string[],
|
||||||
|
): LoopDetectionResult {
|
||||||
|
const contentResult = detectContentRepeat(messages);
|
||||||
|
if (contentResult.isLoop) return contentResult;
|
||||||
|
|
||||||
|
const toolResult = detectToolLoop(toolNames);
|
||||||
|
if (toolResult.isLoop) return toolResult;
|
||||||
|
|
||||||
|
return { isLoop: false, confidence: 0 };
|
||||||
|
}
|
||||||
45
apps/server/src/services/inference/tool-shim.ts
Normal file
45
apps/server/src/services/inference/tool-shim.ts
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
// ToolShim — recovers structured tool calls from plain-text model output.
|
||||||
|
// When the model emits tool calls as plain text instead of structured JSON,
|
||||||
|
// this shim attempts to parse and recover them.
|
||||||
|
|
||||||
|
export interface ParsedToolCall {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
arguments: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const TOOL_CALL_PATTERN = /<tool_call>\s*<name>(.+?)<\/name>\s*<arguments>(.+?)<\/arguments>\s*<\/tool_call>/gs;
|
||||||
|
const JSON_TOOL_PATTERN = /\{\s*"name":\s*"([^"]+)",\s*"arguments":\s*({.+?})\s*\}/gs;
|
||||||
|
|
||||||
|
export function extractToolCalls(text: string): ParsedToolCall[] {
|
||||||
|
const calls: ParsedToolCall[] = [];
|
||||||
|
let match: RegExpExecArray | null;
|
||||||
|
|
||||||
|
// Try XML-style tool calls (common in Qwen output)
|
||||||
|
const xmlRegex = new RegExp(TOOL_CALL_PATTERN);
|
||||||
|
while ((match = xmlRegex.exec(text)) !== null) {
|
||||||
|
calls.push({
|
||||||
|
id: `call_${calls.length}`,
|
||||||
|
name: match[1]!.trim(),
|
||||||
|
arguments: match[2]!.trim(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (calls.length > 0) return calls;
|
||||||
|
|
||||||
|
// Try JSON-style tool calls
|
||||||
|
const jsonRegex = new RegExp(JSON_TOOL_PATTERN);
|
||||||
|
while ((match = jsonRegex.exec(text)) !== null) {
|
||||||
|
calls.push({
|
||||||
|
id: `call_${calls.length}`,
|
||||||
|
name: match[1]!.trim(),
|
||||||
|
arguments: match[2]!.trim(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return calls;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hasToolCallMarkup(text: string): boolean {
|
||||||
|
return TOOL_CALL_PATTERN.test(text) || JSON_TOOL_PATTERN.test(text);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user