Files
boocode/apps/coder/src/services/__tests__/flow-runner-decisions.test.ts
indifferentketchup 1937af8df9 feat: in-app Orchestrator (Phase 2) — multi-agent conductor
Brings the deterministic Han-flow conductor into BooCode: launch any read-only
flow from BooChat or BooCoder, watch each agent stream live in a Paseo-style
run pane, get an evidence-disciplined report — on local Qwen, persisted and
resumable. Read-only enforced hard via qwen --approval-mode plan (orchestrator
tasks fail closed if qwen is unavailable; never fall to write-capable native).

Backend (apps/coder): re-homed conductor defs, flow_runs/flow_steps schema,
flow-runner + dispatcher onTaskTerminal hook, restart-resume, runs routes
(launch/list/get/cancel), user-channel WS. Contracts: two flow_run_* frames.
Web: orchestrator pane kind + OrchestratorPane, Workflow button + slash flows
(BooChat/BooCoder parity), FlowLauncherDialog, "New Orchestrator" in the + and
split menus, runs history + export. Plan: openspec/changes/orchestrator.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 15:22:48 +00:00

390 lines
16 KiB
TypeScript

import { describe, it, expect } from 'vitest';
import type { Flow, Step, StepContext } from '../../conductor/types.js';
import {
manifestSteps,
readySteps,
partitionReady,
isRunComplete,
isStuck,
reconcileResumeStep,
reconcileRun,
shouldFailOnMissingAgent,
type SchedulerState,
} from '../flow-runner-decisions.js';
/**
* The DB-driven flow-runner replaces the Phase-1 in-memory wave scheduler
* (conductor/src/flow.ts). These pure helpers are that scheduler's decision
* core, lifted out so the DB-backed runner stays a thin IO shell over a
* testable kernel (repo pattern: turn-guard.ts / lifecycle-decisions.ts).
*
* The schedule must match conductor/flow.ts:27-41 exactly: a step is ready when
* every dependency is settled (completed OR skipped/excluded), and a ready step
* is skipped when its when() guard returns false against the current context.
*/
// A small synthetic flow exercising: parallel angles, a band-gated angle, a code
// fold that fans them in, an agent synthesizer, and a terminal validator.
function makeFlow(): Flow {
const steps: Step[] = [
{ id: 'a', kind: 'agent', agent: 'analyst-a', run: () => 'prompt a' },
{
id: 'b',
kind: 'agent',
agent: 'analyst-b',
// band-gated: only runs at medium+ (mirrors Angle.minBand gating)
when: (ctx) => ctx.input.band === 'medium' || ctx.input.band === 'large',
run: () => 'prompt b',
},
{ id: 'fold', kind: 'code', deps: ['a', 'b'], run: (ctx) => `fold:${Object.keys(ctx.results).join(',')}` },
{ id: 'synth', kind: 'agent', agent: 'architect', deps: ['fold'], run: () => 'prompt synth' },
{ id: 'val', kind: 'agent', agent: 'validator', deps: ['synth'], run: () => 'prompt val' },
];
return { name: 'demo', description: 'demo flow', steps, render: () => 'report' };
}
function ctxOf(band: string, results: Record<string, string> = {}): StepContext {
return { input: { question: 'q', band }, results };
}
const emptyState = (over: Partial<SchedulerState> = {}): SchedulerState => ({
done: new Set(),
skipped: new Set(),
inFlight: new Set(),
excluded: new Set(),
...over,
});
describe('manifestSteps', () => {
it('drops a band-gated step at small band, keeps it at medium', () => {
const flow = makeFlow();
const small = manifestSteps(flow, ctxOf('small')).map((s) => s.id);
expect(small).toEqual(['a', 'fold', 'synth', 'val']); // b excluded by when()
const medium = manifestSteps(flow, ctxOf('medium')).map((s) => s.id);
expect(medium).toEqual(['a', 'b', 'fold', 'synth', 'val']);
});
it('includes every step when no when() guards are defined', () => {
const flow: Flow = {
name: 'guardless',
description: 'no guards',
steps: [
{ id: 'x', kind: 'agent', agent: 'a', run: () => 'p' },
{ id: 'y', kind: 'code', deps: ['x'], run: () => 'r' },
],
render: () => '',
};
expect(manifestSteps(flow, ctxOf('small')).map((s) => s.id)).toEqual(['x', 'y']);
});
it('returns empty when every when() guard evaluates false', () => {
const flow: Flow = {
name: 'all-gated',
description: 'all filtered',
steps: [
{ id: 'p', kind: 'agent', agent: 'a', when: () => false, run: () => 'p' },
{ id: 'q', kind: 'agent', agent: 'b', when: () => false, run: () => 'q' },
],
render: () => '',
};
expect(manifestSteps(flow, ctxOf('small'))).toEqual([]);
});
});
describe('readySteps', () => {
it('returns only dep-free, unsettled steps first', () => {
const flow = makeFlow();
// small band → b is excluded; a is the only dep-free live step.
const state = emptyState({ excluded: new Set(['b']) });
expect(readySteps(flow, state).map((s) => s.id)).toEqual(['a']);
});
it('treats an excluded dependency as satisfied (fold unblocks once a is done)', () => {
const flow = makeFlow();
// a completed, b excluded (small band) → fold's deps [a,b] are both settled.
const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
expect(readySteps(flow, state).map((s) => s.id)).toEqual(['fold']);
});
it('does not re-offer a step that is in flight', () => {
const flow = makeFlow();
const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
expect(readySteps(flow, state).map((s) => s.id)).toEqual([]);
});
it('waits on an unfinished dependency', () => {
const flow = makeFlow();
// fold done, synth done → val is ready; synth not done → val not ready.
const blocked = emptyState({ done: new Set(['a', 'fold']), excluded: new Set(['b']) });
expect(readySteps(flow, blocked).map((s) => s.id)).toEqual(['synth']);
});
it('returns both parallel dep-free steps when neither is excluded or settled', () => {
const flow = makeFlow();
// readySteps does NOT evaluate when() — that is partitionReady's job.
// Both a and b have no deps and are unsettled → both offered.
expect(readySteps(flow, emptyState()).map((s) => s.id)).toEqual(['a', 'b']);
});
it('does not unblock a step whose dep is in flight (dep not yet satisfied)', () => {
const flow = makeFlow();
// a is in flight → isSatisfied('a') === false → fold (deps:['a','b']) blocked.
// b excluded (satisfied). Nothing else has all deps satisfied → empty wave.
const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
const ready = readySteps(flow, state).map((s) => s.id);
expect(ready).not.toContain('fold');
expect(ready).toEqual([]);
});
it('advances through the full wave chain: a+b+fold+synth done → val is ready', () => {
const flow = makeFlow();
const state = emptyState({ done: new Set(['a', 'b', 'fold', 'synth']) });
expect(readySteps(flow, state).map((s) => s.id)).toEqual(['val']);
});
});
describe('partitionReady', () => {
it('routes a when()-false step to toSkip, the rest to toRun', () => {
const flow = makeFlow();
// At small band both a and b are "ready" if offered; b's guard fails → skip.
const ready = [flow.steps[0]!, flow.steps[1]!]; // a, b
const { toRun, toSkip } = partitionReady(ready, ctxOf('small'));
expect(toRun.map((s) => s.id)).toEqual(['a']);
expect(toSkip.map((s) => s.id)).toEqual(['b']);
});
it('keeps every guardless step in toRun', () => {
const flow = makeFlow();
const ready = [flow.steps[2]!, flow.steps[3]!]; // fold, synth (no when)
const { toRun, toSkip } = partitionReady(ready, ctxOf('large'));
expect(toRun.map((s) => s.id)).toEqual(['fold', 'synth']);
expect(toSkip).toEqual([]);
});
it('handles an empty ready list gracefully', () => {
const { toRun, toSkip } = partitionReady([], ctxOf('small'));
expect(toRun).toEqual([]);
expect(toSkip).toEqual([]);
});
});
describe('isRunComplete / isStuck', () => {
it('is complete when every step is settled or excluded', () => {
const flow = makeFlow();
const state = emptyState({
done: new Set(['a', 'fold', 'synth', 'val']),
excluded: new Set(['b']),
});
expect(isRunComplete(flow, state)).toBe(true);
expect(isStuck(flow, state)).toBe(false);
});
it('is not complete while a step is pending', () => {
const flow = makeFlow();
const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
expect(isRunComplete(flow, state)).toBe(false);
});
it('is not stuck while a ready step still exists', () => {
const flow = makeFlow();
// a done, b excluded → fold is ready, so the run can still progress.
const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
expect(isStuck(flow, state)).toBe(false);
});
it('is stuck on an unsatisfiable dependency with nothing in flight', () => {
// 'orphan' depends on 'ghost', which is never produced (not a step, never
// settled) — and nothing is in flight to ever settle it: a dead end.
const cyclic: Flow = {
name: 'stuck',
description: 'unsatisfiable',
steps: [{ id: 'orphan', kind: 'agent', agent: 'x', deps: ['ghost'], run: () => 'p' }],
render: () => 'r',
};
const state = emptyState();
expect(readySteps(cyclic, state)).toEqual([]);
expect(isRunComplete(cyclic, state)).toBe(false);
expect(isStuck(cyclic, state)).toBe(true);
});
it('is complete when every step is skipped (no done, no excluded)', () => {
const flow = makeFlow();
const state = emptyState({ skipped: new Set(['a', 'b', 'fold', 'synth', 'val']) });
expect(isRunComplete(flow, state)).toBe(true);
expect(isStuck(flow, state)).toBe(false);
});
it('is complete when steps are spread across done + skipped + excluded', () => {
const flow = makeFlow();
// a done, b excluded, fold done, synth skipped, val done — all settled.
const state = emptyState({
done: new Set(['a', 'fold', 'val']),
skipped: new Set(['synth']),
excluded: new Set(['b']),
});
expect(isRunComplete(flow, state)).toBe(true);
expect(isStuck(flow, state)).toBe(false);
});
it('is NOT stuck when in-flight tasks exist even if no step is currently ready', () => {
const flow = makeFlow();
// a is in flight → fold's dep unsatisfied → nothing ready.
// But a is still running so the run CAN make progress → not stuck.
const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
expect(isStuck(flow, state)).toBe(false);
expect(isRunComplete(flow, state)).toBe(false);
expect(readySteps(flow, state)).toEqual([]); // confirms nothing ready
});
});
// ─── Resume reconciliation (D-9) ─────────────────────────────────────────────
describe('reconcileResumeStep', () => {
it('keeps non-running steps regardless of task state', () => {
for (const status of ['completed', 'skipped', 'failed', 'cancelled', 'pending']) {
expect(reconcileResumeStep(status, 'tid', 'running')).toBe('keep');
}
});
it('re-dispatches a running step with no task_id', () => {
expect(reconcileResumeStep('running', null, null)).toBe('re-dispatch');
});
it('re-dispatches a running step whose task is absent from the DB', () => {
expect(reconcileResumeStep('running', 'tid', null)).toBe('re-dispatch');
});
it('marks done when the task completed before the callback ran', () => {
expect(reconcileResumeStep('running', 'tid', 'completed')).toBe('mark-done');
});
it('marks failed when the task failed', () => {
expect(reconcileResumeStep('running', 'tid', 'failed')).toBe('mark-failed');
});
it('marks cancelled when the task was cancelled', () => {
expect(reconcileResumeStep('running', 'tid', 'cancelled')).toBe('mark-cancelled');
});
it('keeps a running step whose task is pending (dispatcher startup poll handles it)', () => {
expect(reconcileResumeStep('running', 'tid', 'pending')).toBe('keep');
});
it('re-dispatches when the task is running (PTY dead on restart)', () => {
expect(reconcileResumeStep('running', 'tid', 'running')).toBe('re-dispatch');
});
it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
expect(reconcileResumeStep('running', 'tid', 'blocked')).toBe('re-dispatch');
});
});
// ─── Dispatcher routing guard (H1) ───────────────────────────────────────────
describe('shouldFailOnMissingAgent', () => {
it('returns true for qwen+plan (orchestrator read-only gate)', () => {
expect(shouldFailOnMissingAgent('qwen', 'plan')).toBe(true);
});
it('returns false for qwen without plan mode', () => {
expect(shouldFailOnMissingAgent('qwen', null)).toBe(false);
expect(shouldFailOnMissingAgent('qwen', 'auto')).toBe(false);
});
it('returns false for non-qwen agents even with plan mode', () => {
expect(shouldFailOnMissingAgent('goose', 'plan')).toBe(false);
expect(shouldFailOnMissingAgent('opencode', 'plan')).toBe(false);
expect(shouldFailOnMissingAgent('claude', 'plan')).toBe(false);
});
it('returns false for qwen with any mode other than plan', () => {
for (const mode of ['bypassPermissions', 'acceptEdits', 'dontAsk', 'default', '']) {
expect(shouldFailOnMissingAgent('qwen', mode)).toBe(false);
}
});
it('returns false for empty or unknown agent name even with plan mode', () => {
expect(shouldFailOnMissingAgent('', 'plan')).toBe(false);
expect(shouldFailOnMissingAgent('native', 'plan')).toBe(false);
expect(shouldFailOnMissingAgent('boocode', 'plan')).toBe(false);
});
});
describe('reconcileRun', () => {
it('returns one decision per step', () => {
const steps = [
{ stepId: 'a', taskId: null, status: 'completed' },
{ stepId: 'b', taskId: 't1', status: 'running' },
{ stepId: 'c', taskId: 't2', status: 'running' },
];
const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
const decisions = reconcileRun(steps, taskStates);
expect(decisions).toHaveLength(3);
expect(decisions[0]).toEqual({ stepId: 'a', action: 'keep' });
expect(decisions[1]).toEqual({ stepId: 'b', action: 'mark-done' });
expect(decisions[2]).toEqual({ stepId: 'c', action: 're-dispatch' });
});
it('handles a mixed run: completed steps kept, live-pending kept, stale re-dispatched', () => {
const steps = [
{ stepId: 'finder-1', taskId: 't1', status: 'completed' },
{ stepId: 'finder-2', taskId: 't2', status: 'running' }, // PTY dead
{ stepId: 'finder-3', taskId: 't3', status: 'running' }, // pending in dispatcher
{ stepId: 'synth', taskId: null, status: 'pending' }, // not yet started
];
const taskStates = new Map([
['t1', 'completed'],
['t2', 'running'], // stuck — PTY dead
['t3', 'pending'], // dispatcher will handle
]);
const decisions = reconcileRun(steps, taskStates);
expect(decisions.find((d) => d.stepId === 'finder-1')?.action).toBe('keep');
expect(decisions.find((d) => d.stepId === 'finder-2')?.action).toBe('re-dispatch');
expect(decisions.find((d) => d.stepId === 'finder-3')?.action).toBe('keep');
expect(decisions.find((d) => d.stepId === 'synth')?.action).toBe('keep');
});
it('produces mark-failed for a failed task and mark-done for a completed task', () => {
const steps = [
{ stepId: 'a', taskId: 't1', status: 'running' },
{ stepId: 'b', taskId: 't2', status: 'running' },
];
const taskStates = new Map([['t1', 'failed'], ['t2', 'completed']]);
const decisions = reconcileRun(steps, taskStates);
expect(decisions.find((d) => d.stepId === 'a')?.action).toBe('mark-failed');
expect(decisions.find((d) => d.stepId === 'b')?.action).toBe('mark-done');
});
it('is idempotent: a re-dispatched step (task now pending) is kept on second call', () => {
// After re-dispatch, flow_steps.status stays 'running' but task_id → new pending task.
const steps = [{ stepId: 'x', taskId: 'new-task', status: 'running' }];
const taskStates = new Map([['new-task', 'pending']]);
const decisions = reconcileRun(steps, taskStates);
expect(decisions[0]?.action).toBe('keep');
});
it('returns an empty array when there are no steps', () => {
expect(reconcileRun([], new Map())).toEqual([]);
});
it('re-dispatches a running step whose taskId is absent from the taskStates map', () => {
// taskId is set but the task row no longer exists in the DB → taskState resolves to null.
const steps = [{ stepId: 'x', taskId: 'orphan-task', status: 'running' }];
const decisions = reconcileRun(steps, new Map());
expect(decisions[0]?.action).toBe('re-dispatch');
});
it('re-dispatches a running step with null taskId', () => {
const steps = [{ stepId: 'y', taskId: null, status: 'running' }];
const decisions = reconcileRun(steps, new Map());
expect(decisions[0]?.action).toBe('re-dispatch');
});
it('propagates mark-cancelled when the associated task was cancelled before the callback ran', () => {
const steps = [{ stepId: 'z', taskId: 'tid', status: 'running' }];
const taskStates = new Map([['tid', 'cancelled']]);
const decisions = reconcileRun(steps, taskStates);
expect(decisions[0]?.action).toBe('mark-cancelled');
});
});