Multi-agent audit + aggressive cleanup across server/web/coder/booterm, delivered behind a DEFER discipline so none of the in-flight files were touched. Removes dead code/deps/columns, dedups server + coder helpers, and splits the oversized modules (tools.ts, opencode-server.ts, sentinel-summaries, turn.ts, TerminalPane.tsx) behind stable contracts. Adds 78 parity/unit tests (server 587, coder 323); fixes two latent bugs (ChatPane queue keys, FileViewerOverlay blank-line parity). Intended tag: v2.7.12-audit-cleanup.
112 lines
4.0 KiB
TypeScript
112 lines
4.0 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import { resolveTurnConfig, MAX_STEPS } from '../inference/turn-config.js';
|
|
import { decideStep, decidePostToolAction } from '../inference/step-decision.js';
|
|
import { DOOM_LOOP_THRESHOLD } from '../inference/sentinels.js';
|
|
import type { MistakeState } from '../inference/mistake-tracker.js';
|
|
import type { Agent, ToolCall } from '../../types/api.js';
|
|
|
|
const BASE_AGENT: Agent = {
|
|
id: 'test-agent',
|
|
name: 'Test',
|
|
description: 'test',
|
|
system_prompt: '',
|
|
temperature: 0.7,
|
|
top_p: null,
|
|
top_k: null,
|
|
min_p: null,
|
|
presence_penalty: null,
|
|
top_n_sigma: null,
|
|
dry_multiplier: null,
|
|
dry_base: null,
|
|
dry_allowed_length: null,
|
|
dry_penalty_last_n: null,
|
|
tools: ['view_file'],
|
|
model: null,
|
|
source: 'global',
|
|
max_tool_calls: null,
|
|
steps: null,
|
|
llama_extra_args: null,
|
|
};
|
|
|
|
function call(name: string, args: Record<string, unknown> = {}): ToolCall {
|
|
return { id: `tc-${name}-${JSON.stringify(args)}`, name, args };
|
|
}
|
|
|
|
describe('resolveTurnConfig', () => {
|
|
it('no agent → budget 100, cap MAX_STEPS, not text-only', () => {
|
|
expect(resolveTurnConfig(null)).toEqual({
|
|
effectiveCap: MAX_STEPS,
|
|
budget: 100,
|
|
isTextOnly: false,
|
|
});
|
|
});
|
|
|
|
it('steps: 0 → effectiveCap 0 and isTextOnly true', () => {
|
|
expect(resolveTurnConfig({ ...BASE_AGENT, steps: 0 })).toEqual({
|
|
effectiveCap: 0,
|
|
budget: 100,
|
|
isTextOnly: true,
|
|
});
|
|
});
|
|
|
|
it('steps below MAX_STEPS → effectiveCap is the agent value', () => {
|
|
expect(resolveTurnConfig({ ...BASE_AGENT, steps: 5 }).effectiveCap).toBe(5);
|
|
});
|
|
|
|
it('steps above MAX_STEPS → effectiveCap clamps to MAX_STEPS', () => {
|
|
expect(resolveTurnConfig({ ...BASE_AGENT, steps: 9999 }).effectiveCap).toBe(MAX_STEPS);
|
|
});
|
|
|
|
it('max_tool_calls overrides the budget', () => {
|
|
expect(resolveTurnConfig({ ...BASE_AGENT, max_tool_calls: 12 }).budget).toBe(12);
|
|
});
|
|
});
|
|
|
|
describe('decideStep (top-of-loop gate)', () => {
|
|
it('returns stream when no doom loop and under budget', () => {
|
|
expect(decideStep({ recentToolCalls: [], toolsUsed: 0, budget: 30 })).toEqual({ kind: 'stream' });
|
|
});
|
|
|
|
it('returns budget when toolsUsed has reached the budget', () => {
|
|
expect(decideStep({ recentToolCalls: [], toolsUsed: 30, budget: 30 })).toEqual({ kind: 'budget' });
|
|
});
|
|
|
|
it('returns doom (with the looping call) on identical-repeat tail', () => {
|
|
const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('view_file', { path: '/a' }));
|
|
const d = decideStep({ recentToolCalls: recent, toolsUsed: 1, budget: 30 });
|
|
expect(d.kind).toBe('doom');
|
|
if (d.kind === 'doom') {
|
|
expect(d.loop.name).toBe('view_file');
|
|
expect(d.loop.args).toEqual({ path: '/a' });
|
|
}
|
|
});
|
|
|
|
it('doom takes precedence over budget when both would trip', () => {
|
|
const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('grep', { q: 'x' }));
|
|
expect(decideStep({ recentToolCalls: recent, toolsUsed: 30, budget: 30 }).kind).toBe('doom');
|
|
});
|
|
});
|
|
|
|
describe('decidePostToolAction (post-tool decision)', () => {
|
|
const clean: MistakeState = { run: [], nudges: 0 };
|
|
|
|
it('non-continue actions stop the loop without consulting the tracker', () => {
|
|
expect(decidePostToolAction('paused', { run: ['exec_error', 'exec_error', 'exec_error'], nudges: 0 })).toBe('stop');
|
|
expect(decidePostToolAction('synthesis_done', clean)).toBe('stop');
|
|
});
|
|
|
|
it('continue with a clean tracker → continue', () => {
|
|
expect(decidePostToolAction('continue', clean)).toBe('continue');
|
|
});
|
|
|
|
it('continue with a threshold streak and no prior nudge → nudge', () => {
|
|
const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 0 };
|
|
expect(decidePostToolAction('continue', tracker)).toBe('nudge');
|
|
});
|
|
|
|
it('continue with a threshold streak after a nudge already fired → escalate', () => {
|
|
const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 1 };
|
|
expect(decidePostToolAction('continue', tracker)).toBe('escalate');
|
|
});
|
|
});
|