feat: in-app Orchestrator (Phase 2) — multi-agent conductor

Brings the deterministic Han-flow conductor into BooCode: launch any read-only flow from BooChat or BooCoder, watch each agent stream live in a Paseo-style run pane, get an evidence-disciplined report — on local Qwen, persisted and resumable. Read-only enforced hard via qwen --approval-mode plan (orchestrator tasks fail closed if qwen is unavailable; never fall to write-capable native). Backend (apps/coder): re-homed conductor defs, flow_runs/flow_steps schema, flow-runner + dispatcher onTaskTerminal hook, restart-resume, runs routes (launch/list/get/cancel), user-channel WS. Contracts: two flow_run_* frames. Web: orchestrator pane kind + OrchestratorPane, Workflow button + slash flows (BooChat/BooCoder parity), FlowLauncherDialog, "New Orchestrator" in the + and split menus, runs history + export. Plan: openspec/changes/orchestrator. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:59:07 +00:00
parent 519b1d2ca1
commit 1937af8df9
118 changed files with 15723 additions and 27 deletions
--- a/apps/coder/src/services/tests/flow-runner-decisions.test.ts
+++ b/apps/coder/src/services/tests/flow-runner-decisions.test.ts
@@ -0,0 +1,389 @@
+import { describe, it, expect } from 'vitest';
+import type { Flow, Step, StepContext } from '../../conductor/types.js';
+import {
+  manifestSteps,
+  readySteps,
+  partitionReady,
+  isRunComplete,
+  isStuck,
+  reconcileResumeStep,
+  reconcileRun,
+  shouldFailOnMissingAgent,
+  type SchedulerState,
+} from '../flow-runner-decisions.js';
+
+/**
+ * The DB-driven flow-runner replaces the Phase-1 in-memory wave scheduler
+ * (conductor/src/flow.ts). These pure helpers are that scheduler's decision
+ * core, lifted out so the DB-backed runner stays a thin IO shell over a
+ * testable kernel (repo pattern: turn-guard.ts / lifecycle-decisions.ts).
+ *
+ * The schedule must match conductor/flow.ts:27-41 exactly: a step is ready when
+ * every dependency is settled (completed OR skipped/excluded), and a ready step
+ * is skipped when its when() guard returns false against the current context.
+ */
+
+// A small synthetic flow exercising: parallel angles, a band-gated angle, a code
+// fold that fans them in, an agent synthesizer, and a terminal validator.
+function makeFlow(): Flow {
+  const steps: Step[] = [
+    { id: 'a', kind: 'agent', agent: 'analyst-a', run: () => 'prompt a' },
+    {
+      id: 'b',
+      kind: 'agent',
+      agent: 'analyst-b',
+      // band-gated: only runs at medium+ (mirrors Angle.minBand gating)
+      when: (ctx) => ctx.input.band === 'medium' || ctx.input.band === 'large',
+      run: () => 'prompt b',
+    },
+    { id: 'fold', kind: 'code', deps: ['a', 'b'], run: (ctx) => `fold:${Object.keys(ctx.results).join(',')}` },
+    { id: 'synth', kind: 'agent', agent: 'architect', deps: ['fold'], run: () => 'prompt synth' },
+    { id: 'val', kind: 'agent', agent: 'validator', deps: ['synth'], run: () => 'prompt val' },
+  ];
+  return { name: 'demo', description: 'demo flow', steps, render: () => 'report' };
+}
+
+function ctxOf(band: string, results: Record<string, string> = {}): StepContext {
+  return { input: { question: 'q', band }, results };
+}
+
+const emptyState = (over: Partial<SchedulerState> = {}): SchedulerState => ({
+  done: new Set(),
+  skipped: new Set(),
+  inFlight: new Set(),
+  excluded: new Set(),
+  ...over,
+});
+
+describe('manifestSteps', () => {
+  it('drops a band-gated step at small band, keeps it at medium', () => {
+    const flow = makeFlow();
+    const small = manifestSteps(flow, ctxOf('small')).map((s) => s.id);
+    expect(small).toEqual(['a', 'fold', 'synth', 'val']); // b excluded by when()
+    const medium = manifestSteps(flow, ctxOf('medium')).map((s) => s.id);
+    expect(medium).toEqual(['a', 'b', 'fold', 'synth', 'val']);
+  });
+
+  it('includes every step when no when() guards are defined', () => {
+    const flow: Flow = {
+      name: 'guardless',
+      description: 'no guards',
+      steps: [
+        { id: 'x', kind: 'agent', agent: 'a', run: () => 'p' },
+        { id: 'y', kind: 'code', deps: ['x'], run: () => 'r' },
+      ],
+      render: () => '',
+    };
+    expect(manifestSteps(flow, ctxOf('small')).map((s) => s.id)).toEqual(['x', 'y']);
+  });
+
+  it('returns empty when every when() guard evaluates false', () => {
+    const flow: Flow = {
+      name: 'all-gated',
+      description: 'all filtered',
+      steps: [
+        { id: 'p', kind: 'agent', agent: 'a', when: () => false, run: () => 'p' },
+        { id: 'q', kind: 'agent', agent: 'b', when: () => false, run: () => 'q' },
+      ],
+      render: () => '',
+    };
+    expect(manifestSteps(flow, ctxOf('small'))).toEqual([]);
+  });
+});
+
+describe('readySteps', () => {
+  it('returns only dep-free, unsettled steps first', () => {
+    const flow = makeFlow();
+    // small band → b is excluded; a is the only dep-free live step.
+    const state = emptyState({ excluded: new Set(['b']) });
+    expect(readySteps(flow, state).map((s) => s.id)).toEqual(['a']);
+  });
+
+  it('treats an excluded dependency as satisfied (fold unblocks once a is done)', () => {
+    const flow = makeFlow();
+    // a completed, b excluded (small band) → fold's deps [a,b] are both settled.
+    const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
+    expect(readySteps(flow, state).map((s) => s.id)).toEqual(['fold']);
+  });
+
+  it('does not re-offer a step that is in flight', () => {
+    const flow = makeFlow();
+    const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
+    expect(readySteps(flow, state).map((s) => s.id)).toEqual([]);
+  });
+
+  it('waits on an unfinished dependency', () => {
+    const flow = makeFlow();
+    // fold done, synth done → val is ready; synth not done → val not ready.
+    const blocked = emptyState({ done: new Set(['a', 'fold']), excluded: new Set(['b']) });
+    expect(readySteps(flow, blocked).map((s) => s.id)).toEqual(['synth']);
+  });
+
+  it('returns both parallel dep-free steps when neither is excluded or settled', () => {
+    const flow = makeFlow();
+    // readySteps does NOT evaluate when() — that is partitionReady's job.
+    // Both a and b have no deps and are unsettled → both offered.
+    expect(readySteps(flow, emptyState()).map((s) => s.id)).toEqual(['a', 'b']);
+  });
+
+  it('does not unblock a step whose dep is in flight (dep not yet satisfied)', () => {
+    const flow = makeFlow();
+    // a is in flight → isSatisfied('a') === false → fold (deps:['a','b']) blocked.
+    // b excluded (satisfied). Nothing else has all deps satisfied → empty wave.
+    const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
+    const ready = readySteps(flow, state).map((s) => s.id);
+    expect(ready).not.toContain('fold');
+    expect(ready).toEqual([]);
+  });
+
+  it('advances through the full wave chain: a+b+fold+synth done → val is ready', () => {
+    const flow = makeFlow();
+    const state = emptyState({ done: new Set(['a', 'b', 'fold', 'synth']) });
+    expect(readySteps(flow, state).map((s) => s.id)).toEqual(['val']);
+  });
+});
+
+describe('partitionReady', () => {
+  it('routes a when()-false step to toSkip, the rest to toRun', () => {
+    const flow = makeFlow();
+    // At small band both a and b are "ready" if offered; b's guard fails → skip.
+    const ready = [flow.steps[0]!, flow.steps[1]!]; // a, b
+    const { toRun, toSkip } = partitionReady(ready, ctxOf('small'));
+    expect(toRun.map((s) => s.id)).toEqual(['a']);
+    expect(toSkip.map((s) => s.id)).toEqual(['b']);
+  });
+
+  it('keeps every guardless step in toRun', () => {
+    const flow = makeFlow();
+    const ready = [flow.steps[2]!, flow.steps[3]!]; // fold, synth (no when)
+    const { toRun, toSkip } = partitionReady(ready, ctxOf('large'));
+    expect(toRun.map((s) => s.id)).toEqual(['fold', 'synth']);
+    expect(toSkip).toEqual([]);
+  });
+
+  it('handles an empty ready list gracefully', () => {
+    const { toRun, toSkip } = partitionReady([], ctxOf('small'));
+    expect(toRun).toEqual([]);
+    expect(toSkip).toEqual([]);
+  });
+});
+
+describe('isRunComplete / isStuck', () => {
+  it('is complete when every step is settled or excluded', () => {
+    const flow = makeFlow();
+    const state = emptyState({
+      done: new Set(['a', 'fold', 'synth', 'val']),
+      excluded: new Set(['b']),
+    });
+    expect(isRunComplete(flow, state)).toBe(true);
+    expect(isStuck(flow, state)).toBe(false);
+  });
+
+  it('is not complete while a step is pending', () => {
+    const flow = makeFlow();
+    const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
+    expect(isRunComplete(flow, state)).toBe(false);
+  });
+
+  it('is not stuck while a ready step still exists', () => {
+    const flow = makeFlow();
+    // a done, b excluded → fold is ready, so the run can still progress.
+    const state = emptyState({ done: new Set(['a']), excluded: new Set(['b']) });
+    expect(isStuck(flow, state)).toBe(false);
+  });
+
+  it('is stuck on an unsatisfiable dependency with nothing in flight', () => {
+    // 'orphan' depends on 'ghost', which is never produced (not a step, never
+    // settled) — and nothing is in flight to ever settle it: a dead end.
+    const cyclic: Flow = {
+      name: 'stuck',
+      description: 'unsatisfiable',
+      steps: [{ id: 'orphan', kind: 'agent', agent: 'x', deps: ['ghost'], run: () => 'p' }],
+      render: () => 'r',
+    };
+    const state = emptyState();
+    expect(readySteps(cyclic, state)).toEqual([]);
+    expect(isRunComplete(cyclic, state)).toBe(false);
+    expect(isStuck(cyclic, state)).toBe(true);
+  });
+
+  it('is complete when every step is skipped (no done, no excluded)', () => {
+    const flow = makeFlow();
+    const state = emptyState({ skipped: new Set(['a', 'b', 'fold', 'synth', 'val']) });
+    expect(isRunComplete(flow, state)).toBe(true);
+    expect(isStuck(flow, state)).toBe(false);
+  });
+
+  it('is complete when steps are spread across done + skipped + excluded', () => {
+    const flow = makeFlow();
+    // a done, b excluded, fold done, synth skipped, val done — all settled.
+    const state = emptyState({
+      done: new Set(['a', 'fold', 'val']),
+      skipped: new Set(['synth']),
+      excluded: new Set(['b']),
+    });
+    expect(isRunComplete(flow, state)).toBe(true);
+    expect(isStuck(flow, state)).toBe(false);
+  });
+
+  it('is NOT stuck when in-flight tasks exist even if no step is currently ready', () => {
+    const flow = makeFlow();
+    // a is in flight → fold's dep unsatisfied → nothing ready.
+    // But a is still running so the run CAN make progress → not stuck.
+    const state = emptyState({ inFlight: new Set(['a']), excluded: new Set(['b']) });
+    expect(isStuck(flow, state)).toBe(false);
+    expect(isRunComplete(flow, state)).toBe(false);
+    expect(readySteps(flow, state)).toEqual([]); // confirms nothing ready
+  });
+});
+
+// ─── Resume reconciliation (D-9) ─────────────────────────────────────────────
+
+describe('reconcileResumeStep', () => {
+  it('keeps non-running steps regardless of task state', () => {
+    for (const status of ['completed', 'skipped', 'failed', 'cancelled', 'pending']) {
+      expect(reconcileResumeStep(status, 'tid', 'running')).toBe('keep');
+    }
+  });
+
+  it('re-dispatches a running step with no task_id', () => {
+    expect(reconcileResumeStep('running', null, null)).toBe('re-dispatch');
+  });
+
+  it('re-dispatches a running step whose task is absent from the DB', () => {
+    expect(reconcileResumeStep('running', 'tid', null)).toBe('re-dispatch');
+  });
+
+  it('marks done when the task completed before the callback ran', () => {
+    expect(reconcileResumeStep('running', 'tid', 'completed')).toBe('mark-done');
+  });
+
+  it('marks failed when the task failed', () => {
+    expect(reconcileResumeStep('running', 'tid', 'failed')).toBe('mark-failed');
+  });
+
+  it('marks cancelled when the task was cancelled', () => {
+    expect(reconcileResumeStep('running', 'tid', 'cancelled')).toBe('mark-cancelled');
+  });
+
+  it('keeps a running step whose task is pending (dispatcher startup poll handles it)', () => {
+    expect(reconcileResumeStep('running', 'tid', 'pending')).toBe('keep');
+  });
+
+  it('re-dispatches when the task is running (PTY dead on restart)', () => {
+    expect(reconcileResumeStep('running', 'tid', 'running')).toBe('re-dispatch');
+  });
+
+  it('re-dispatches when the task is blocked (permission dialog gone on restart)', () => {
+    expect(reconcileResumeStep('running', 'tid', 'blocked')).toBe('re-dispatch');
+  });
+});
+
+// ─── Dispatcher routing guard (H1) ───────────────────────────────────────────
+
+describe('shouldFailOnMissingAgent', () => {
+  it('returns true for qwen+plan (orchestrator read-only gate)', () => {
+    expect(shouldFailOnMissingAgent('qwen', 'plan')).toBe(true);
+  });
+
+  it('returns false for qwen without plan mode', () => {
+    expect(shouldFailOnMissingAgent('qwen', null)).toBe(false);
+    expect(shouldFailOnMissingAgent('qwen', 'auto')).toBe(false);
+  });
+
+  it('returns false for non-qwen agents even with plan mode', () => {
+    expect(shouldFailOnMissingAgent('goose', 'plan')).toBe(false);
+    expect(shouldFailOnMissingAgent('opencode', 'plan')).toBe(false);
+    expect(shouldFailOnMissingAgent('claude', 'plan')).toBe(false);
+  });
+
+  it('returns false for qwen with any mode other than plan', () => {
+    for (const mode of ['bypassPermissions', 'acceptEdits', 'dontAsk', 'default', '']) {
+      expect(shouldFailOnMissingAgent('qwen', mode)).toBe(false);
+    }
+  });
+
+  it('returns false for empty or unknown agent name even with plan mode', () => {
+    expect(shouldFailOnMissingAgent('', 'plan')).toBe(false);
+    expect(shouldFailOnMissingAgent('native', 'plan')).toBe(false);
+    expect(shouldFailOnMissingAgent('boocode', 'plan')).toBe(false);
+  });
+});
+
+describe('reconcileRun', () => {
+  it('returns one decision per step', () => {
+    const steps = [
+      { stepId: 'a', taskId: null, status: 'completed' },
+      { stepId: 'b', taskId: 't1', status: 'running' },
+      { stepId: 'c', taskId: 't2', status: 'running' },
+    ];
+    const taskStates = new Map([['t1', 'completed'], ['t2', 'running']]);
+    const decisions = reconcileRun(steps, taskStates);
+    expect(decisions).toHaveLength(3);
+    expect(decisions[0]).toEqual({ stepId: 'a', action: 'keep' });
+    expect(decisions[1]).toEqual({ stepId: 'b', action: 'mark-done' });
+    expect(decisions[2]).toEqual({ stepId: 'c', action: 're-dispatch' });
+  });
+
+  it('handles a mixed run: completed steps kept, live-pending kept, stale re-dispatched', () => {
+    const steps = [
+      { stepId: 'finder-1',  taskId: 't1',  status: 'completed' },
+      { stepId: 'finder-2',  taskId: 't2',  status: 'running' },  // PTY dead
+      { stepId: 'finder-3',  taskId: 't3',  status: 'running' },  // pending in dispatcher
+      { stepId: 'synth',     taskId: null,  status: 'pending' },  // not yet started
+    ];
+    const taskStates = new Map([
+      ['t1', 'completed'],
+      ['t2', 'running'],   // stuck — PTY dead
+      ['t3', 'pending'],   // dispatcher will handle
+    ]);
+    const decisions = reconcileRun(steps, taskStates);
+    expect(decisions.find((d) => d.stepId === 'finder-1')?.action).toBe('keep');
+    expect(decisions.find((d) => d.stepId === 'finder-2')?.action).toBe('re-dispatch');
+    expect(decisions.find((d) => d.stepId === 'finder-3')?.action).toBe('keep');
+    expect(decisions.find((d) => d.stepId === 'synth')?.action).toBe('keep');
+  });
+
+  it('produces mark-failed for a failed task and mark-done for a completed task', () => {
+    const steps = [
+      { stepId: 'a', taskId: 't1', status: 'running' },
+      { stepId: 'b', taskId: 't2', status: 'running' },
+    ];
+    const taskStates = new Map([['t1', 'failed'], ['t2', 'completed']]);
+    const decisions = reconcileRun(steps, taskStates);
+    expect(decisions.find((d) => d.stepId === 'a')?.action).toBe('mark-failed');
+    expect(decisions.find((d) => d.stepId === 'b')?.action).toBe('mark-done');
+  });
+
+  it('is idempotent: a re-dispatched step (task now pending) is kept on second call', () => {
+    // After re-dispatch, flow_steps.status stays 'running' but task_id → new pending task.
+    const steps = [{ stepId: 'x', taskId: 'new-task', status: 'running' }];
+    const taskStates = new Map([['new-task', 'pending']]);
+    const decisions = reconcileRun(steps, taskStates);
+    expect(decisions[0]?.action).toBe('keep');
+  });
+
+  it('returns an empty array when there are no steps', () => {
+    expect(reconcileRun([], new Map())).toEqual([]);
+  });
+
+  it('re-dispatches a running step whose taskId is absent from the taskStates map', () => {
+    // taskId is set but the task row no longer exists in the DB → taskState resolves to null.
+    const steps = [{ stepId: 'x', taskId: 'orphan-task', status: 'running' }];
+    const decisions = reconcileRun(steps, new Map());
+    expect(decisions[0]?.action).toBe('re-dispatch');
+  });
+
+  it('re-dispatches a running step with null taskId', () => {
+    const steps = [{ stepId: 'y', taskId: null, status: 'running' }];
+    const decisions = reconcileRun(steps, new Map());
+    expect(decisions[0]?.action).toBe('re-dispatch');
+  });
+
+  it('propagates mark-cancelled when the associated task was cancelled before the callback ran', () => {
+    const steps = [{ stepId: 'z', taskId: 'tid', status: 'running' }];
+    const taskStates = new Map([['tid', 'cancelled']]);
+    const decisions = reconcileRun(steps, taskStates);
+    expect(decisions[0]?.action).toBe('mark-cancelled');
+  });
+});