boocode/apps/server/src/services/__tests__/step-decision.test.ts

import { describe, expect, it } from 'vitest';
import { resolveTurnConfig, MAX_STEPS } from '../inference/turn-config.js';
import { decideStep, decidePostToolAction } from '../inference/step-decision.js';
import { DOOM_LOOP_THRESHOLD } from '../inference/sentinels.js';
import type { MistakeState } from '../inference/mistake-tracker.js';
import type { Agent, ToolCall } from '../../types/api.js';

const BASE_AGENT: Agent = {
  id: 'test-agent',
  name: 'Test',
  description: 'test',
  system_prompt: '',
  temperature: 0.7,
  top_p: null,
  top_k: null,
  min_p: null,
  presence_penalty: null,
  top_n_sigma: null,
  dry_multiplier: null,
  dry_base: null,
  dry_allowed_length: null,
  dry_penalty_last_n: null,
  tools: ['view_file'],
  model: null,
  source: 'global',
  max_tool_calls: null,
  steps: null,
  llama_extra_args: null,
};

function call(name: string, args: Record<string, unknown> = {}): ToolCall {
  return { id: `tc-${name}-${JSON.stringify(args)}`, name, args };
}

describe('resolveTurnConfig', () => {
  it('no agent → budget 100, cap MAX_STEPS, not text-only', () => {
    expect(resolveTurnConfig(null)).toEqual({
      effectiveCap: MAX_STEPS,
      budget: 100,
      isTextOnly: false,
    });
  });

  it('steps: 0 → effectiveCap 0 and isTextOnly true', () => {
    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 0 })).toEqual({
      effectiveCap: 0,
      budget: 100,
      isTextOnly: true,
    });
  });

  it('steps below MAX_STEPS → effectiveCap is the agent value', () => {
    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 5 }).effectiveCap).toBe(5);
  });

  it('steps above MAX_STEPS → effectiveCap clamps to MAX_STEPS', () => {
    expect(resolveTurnConfig({ ...BASE_AGENT, steps: 9999 }).effectiveCap).toBe(MAX_STEPS);
  });

  it('max_tool_calls overrides the budget', () => {
    expect(resolveTurnConfig({ ...BASE_AGENT, max_tool_calls: 12 }).budget).toBe(12);
  });
});

describe('decideStep (top-of-loop gate)', () => {
  it('returns stream when no doom loop and under budget', () => {
    expect(decideStep({ recentToolCalls: [], toolsUsed: 0, budget: 30 })).toEqual({ kind: 'stream' });
  });

  it('returns budget when toolsUsed has reached the budget', () => {
    expect(decideStep({ recentToolCalls: [], toolsUsed: 30, budget: 30 })).toEqual({ kind: 'budget' });
  });

  it('returns doom (with the looping call) on identical-repeat tail', () => {
    const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('view_file', { path: '/a' }));
    const d = decideStep({ recentToolCalls: recent, toolsUsed: 1, budget: 30 });
    expect(d.kind).toBe('doom');
    if (d.kind === 'doom') {
      expect(d.loop.name).toBe('view_file');
      expect(d.loop.args).toEqual({ path: '/a' });
    }
  });

  it('doom takes precedence over budget when both would trip', () => {
    const recent = Array.from({ length: DOOM_LOOP_THRESHOLD }, () => call('grep', { q: 'x' }));
    expect(decideStep({ recentToolCalls: recent, toolsUsed: 30, budget: 30 }).kind).toBe('doom');
  });
});

describe('decidePostToolAction (post-tool decision)', () => {
  const clean: MistakeState = { run: [], nudges: 0 };

  it('non-continue actions stop the loop without consulting the tracker', () => {
    expect(decidePostToolAction('paused', { run: ['exec_error', 'exec_error', 'exec_error'], nudges: 0 })).toBe('stop');
    expect(decidePostToolAction('synthesis_done', clean)).toBe('stop');
  });

  it('continue with a clean tracker → continue', () => {
    expect(decidePostToolAction('continue', clean)).toBe('continue');
  });

  it('continue with a threshold streak and no prior nudge → nudge', () => {
    const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 0 };
    expect(decidePostToolAction('continue', tracker)).toBe('nudge');
  });

  it('continue with a threshold streak after a nudge already fired → escalate', () => {
    const tracker: MistakeState = { run: ['zod_reject', 'tool_not_found', 'exec_error'], nudges: 1 };
    expect(decidePostToolAction('continue', tracker)).toBe('escalate');
  });
});