v1.11.6: doom-loop guard (3 identical tool calls aborts recursion)
This commit is contained in:
130
apps/server/src/services/__tests__/doom-loop.test.ts
Normal file
130
apps/server/src/services/__tests__/doom-loop.test.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { DOOM_LOOP_THRESHOLD, detectDoomLoop } from '../inference.js';
|
||||
import type { ToolCall } from '../../types/api.js';
|
||||
|
||||
// ---- fixture ----------------------------------------------------------------
|
||||
// Tiny helper. `id` is required on ToolCall but irrelevant to detection —
|
||||
// detectDoomLoop compares name + JSON.stringify(args). Counter-based id keeps
|
||||
// each call unique so we don't accidentally test id-based equality.
|
||||
|
||||
let counter = 0;
|
||||
function mkCall(name: string, args: Record<string, unknown> = {}): ToolCall {
|
||||
counter += 1;
|
||||
return { id: `c${counter}`, name, args };
|
||||
}
|
||||
|
||||
// ---- below-threshold -------------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — below threshold', () => {
|
||||
it('returns null for an empty array', () => {
|
||||
expect(detectDoomLoop([])).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when fewer than DOOM_LOOP_THRESHOLD calls exist', () => {
|
||||
// 2 < 3 — sliding-window can't form even if both match.
|
||||
const a = mkCall('view_file', { path: 'a.ts' });
|
||||
const b = mkCall('view_file', { path: 'a.ts' });
|
||||
expect(detectDoomLoop([a, b])).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- positive detection ----------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — positive matches', () => {
|
||||
it('returns name + args when exactly DOOM_LOOP_THRESHOLD identical calls land', () => {
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
];
|
||||
const result = detectDoomLoop(calls);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.name).toBe('grep');
|
||||
expect(result!.args).toEqual({ pattern: 'TODO', path: 'src' });
|
||||
});
|
||||
|
||||
it('matches sliding window — last DOOM_LOOP_THRESHOLD match even with earlier non-matching calls', () => {
|
||||
// 4 calls: first differs, last 3 are identical → fire.
|
||||
const calls = [
|
||||
mkCall('list_dir', { path: '/' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
];
|
||||
const result = detectDoomLoop(calls);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.name).toBe('view_file');
|
||||
});
|
||||
|
||||
it('matches identical empty-args calls (defense against {} !== {} reference bug)', () => {
|
||||
// JSON.stringify on two distinct {} both produce '{}'. Confirms the
|
||||
// detector uses value-equality not reference-equality.
|
||||
const calls = [mkCall('ping', {}), mkCall('ping', {}), mkCall('ping', {})];
|
||||
expect(detectDoomLoop(calls)).not.toBeNull();
|
||||
});
|
||||
|
||||
it('matches calls with nested args of equal shape', () => {
|
||||
// Deep-equal via JSON.stringify. If the model emits the same nested
|
||||
// object three times, that's still a loop.
|
||||
const nested = { filter: { glob: '*.ts', case: 'sensitive' }, limit: 50 };
|
||||
const calls = [
|
||||
mkCall('find_files', { ...nested }),
|
||||
mkCall('find_files', { ...nested }),
|
||||
mkCall('find_files', { ...nested }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).not.toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- negative detection ----------------------------------------------------
|
||||
|
||||
describe('detectDoomLoop — negative cases', () => {
|
||||
it('returns null when 3 calls share name but differ in args', () => {
|
||||
const calls = [
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('view_file', { path: 'b.ts' }),
|
||||
mkCall('view_file', { path: 'c.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when 3 calls share args but differ in name', () => {
|
||||
const calls = [
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
mkCall('grep', { path: 'a.ts' }),
|
||||
mkCall('list_dir', { path: 'a.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when the FIRST three of four match but the latest differs', () => {
|
||||
// Critical sliding-window edge: detector must ONLY look at the last
|
||||
// DOOM_LOOP_THRESHOLD entries. Earlier matches don't count if the
|
||||
// model has since moved on.
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('grep', { pattern: 'X' }),
|
||||
mkCall('view_file', { path: 'a.ts' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when args have same keys but different values', () => {
|
||||
const calls = [
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'src' }),
|
||||
mkCall('grep', { pattern: 'TODO', path: 'apps' }),
|
||||
];
|
||||
expect(detectDoomLoop(calls)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ---- threshold contract ----------------------------------------------------
|
||||
|
||||
describe('DOOM_LOOP_THRESHOLD', () => {
|
||||
it('is a positive integer (the public contract — tests assume 3)', () => {
|
||||
expect(DOOM_LOOP_THRESHOLD).toBeGreaterThan(0);
|
||||
expect(Number.isInteger(DOOM_LOOP_THRESHOLD)).toBe(true);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user