feat: in-app Orchestrator (Phase 2) — multi-agent conductor

Brings the deterministic Han-flow conductor into BooCode: launch any read-only flow from BooChat or BooCoder, watch each agent stream live in a Paseo-style run pane, get an evidence-disciplined report — on local Qwen, persisted and resumable. Read-only enforced hard via qwen --approval-mode plan (orchestrator tasks fail closed if qwen is unavailable; never fall to write-capable native). Backend (apps/coder): re-homed conductor defs, flow_runs/flow_steps schema, flow-runner + dispatcher onTaskTerminal hook, restart-resume, runs routes (launch/list/get/cancel), user-channel WS. Contracts: two flow_run_* frames. Web: orchestrator pane kind + OrchestratorPane, Workflow button + slash flows (BooChat/BooCoder parity), FlowLauncherDialog, "New Orchestrator" in the + and split menus, runs history + export. Plan: openspec/changes/orchestrator. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:59:07 +00:00
parent 519b1d2ca1
commit 1937af8df9
118 changed files with 15723 additions and 27 deletions
--- a/apps/coder/src/services/dispatcher.ts
+++ b/apps/coder/src/services/dispatcher.ts
@@ -28,6 +28,7 @@ import {
  classifyTerminalStatus,
  type TerminalMessageStatus,
 } from './finalize-message.js';
+import { shouldFailOnMissingAgent } from './flow-runner-decisions.js';

 interface InferenceRunner {
  enqueue: (sessionId: string, chatId: string, assistantId: string, user: string) => void;
@@ -41,8 +42,23 @@ interface Deps {
  broker: Broker;
  log: FastifyBaseLogger;
  config: Config;
+  /**
+   * Orchestrator hook (D-2). Fired once per task as it reaches a terminal state
+   * (completed | failed | cancelled), AFTER the run-function has written that
+   * state. Path-agnostic — it keys off the settled `tasks` row, not any single
+   * run-function's terminal branch, so it fires for native + every external
+   * path. The flow-runner wires this to advance its `flow_runs`; absent (default)
+   * the dispatcher behaves exactly as before. Best-effort: a throw is logged and
+   * swallowed so it can never wedge the poll loop.
+   */
+  onTaskTerminal?: (taskId: string, state: string) => void;
 }

+// Terminal task states the orchestrator hook fires on. 'blocked' is excluded —
+// a blocked task is awaiting a human decision, so its runTask promise has not
+// settled yet (the hook only fires after runTask settles).
+const TERMINAL_TASK_STATES = new Set(['completed', 'failed', 'cancelled']);
+
 // LISTEN/NOTIFY ('tasks_new') is the fast path — the dispatcher reacts to new
 // tasks immediately. The poll is only a safety net for notifications missed
 // during a listen-connection drop (porsager auto-reconnects), so it can stay slow.
@@ -54,7 +70,7 @@ export function createDispatcher(deps: Deps): {
  start(): void;
  stop(): Promise<void>;
 } {
-  const { sql, inference, broker, log, config } = deps;
+  const { sql, inference, broker, log, config, onTaskTerminal } = deps;
  let timer: ReturnType<typeof setInterval> | null = null;
  let listener: { unlisten: () => Promise<void> } | null = null;
  let polling = false;
@@ -134,6 +150,28 @@ export function createDispatcher(deps: Deps): {
    return taskControllers.cancel(taskId);
  }

+  // D-2: notify the orchestrator that a task settled. Re-reads the terminal state
+  // the run-function wrote (so this is path-agnostic — it works for native and
+  // every external path without instrumenting each terminal branch). Best-effort:
+  // a read failure or a callback throw is logged and swallowed.
+  function fireTaskTerminal(taskId: string): void {
+    if (!onTaskTerminal) return;
+    sql<{ state: string }[]>`SELECT state FROM tasks WHERE id = ${taskId}`
+      .then((rows) => {
+        const state = rows[0]?.state;
+        if (state && TERMINAL_TASK_STATES.has(state)) {
+          try {
+            onTaskTerminal(taskId, state);
+          } catch (err) {
+            log.error({ err, taskId }, 'dispatcher: onTaskTerminal callback threw');
+          }
+        }
+      })
+      .catch((err) => {
+        log.error({ err, taskId }, 'dispatcher: onTaskTerminal state read failed');
+      });
+  }
+
  async function poll(): Promise<void> {
    // `polling` serializes poll() execution itself (timer + NOTIFY can fire
    // concurrently) so we never double-select a task. It does NOT serialize task
@@ -172,6 +210,15 @@ export function createDispatcher(deps: Deps): {
          taskControllers.delete(task.id);
        });
        inflight.set(key, p);
+        // D-2: fire the orchestrator hook once the run settles (terminal state
+        // written), on both fulfilment and rejection. Detached from `p` so it
+        // never affects the inflight lifecycle or stop()'s drain.
+        if (onTaskTerminal) {
+          void p.then(
+            () => fireTaskTerminal(task.id),
+            () => fireTaskTerminal(task.id),
+          );
+        }
      }
    } finally {
      polling = false;
@@ -197,6 +244,24 @@ export function createDispatcher(deps: Deps): {
        SELECT name, supports_acp, install_path FROM available_agents WHERE name = ${task.agent}
      `;
      if (agentRow) {
+        // ORCHESTRATOR READ-ONLY INVARIANT (D-4). A qwen task dispatched in plan
+        // mode MUST bind the hard tool-level gate, and only the PTY path applies
+        // it (`qwen --approval-mode plan`, pty-dispatch.ts:75 — reads allowed,
+        // writes blocked inside the agent binary). The ACP paths set the mode via
+        // a session RPC (`setSessionMode`) that is fail-OPEN — a failed/ignored
+        // call leaves the agent write-capable — so they are never safe for the
+        // read-only guarantee. Force the one-shot PTY path for qwen+plan tasks
+        // regardless of available_agents.supports_acp (which probes true for qwen,
+        // since `qwen --help` lists `--acp`). This is correct on its own merits
+        // too: qwen's ACP bridge is an HTTP daemon, not the stdio ACP that
+        // dispatchViaAcp drives (root CLAUDE.md), so PTY is the working qwen path.
+        // Scoped to qwen (the orchestrator's only agent) to avoid changing the
+        // routing of any other agent; the ACP fail-closed guard (acp-dispatch.ts)
+        // backstops a plan-mode task that reaches ACP by any other route.
+        if (task.agent === 'qwen' && task.mode_id === 'plan') {
+          await runExternalAgent(task, /* supportsAcp */ false, agentRow.install_path);
+          return;
+        }
        // v2.6 (1.7): opencode routes to its warm HTTP-server backend.
        // v2.6 Phase 2 (2.4): goose/qwen route to the warm ACP backend WHEN the
        // task came from a real chat tab (session_id + chat_id) — shouldUseWarmBackend.
@@ -217,6 +282,19 @@ export function createDispatcher(deps: Deps): {
        }
        return;
      }
+      // Orchestrator (qwen+plan) tasks must NEVER fall through to write-capable
+      // native inference — the PTY plan-mode path is the only safe route. Fail
+      // hard so the flow-runner propagates a clear error to the run. (H1 fix)
+      if (shouldFailOnMissingAgent(task.agent, task.mode_id)) {
+        const errMsg = 'orchestrator task cannot run: qwen agent is not available (probe failed or binary missing)';
+        log.error({ taskId, agent: task.agent }, `dispatcher: ${errMsg}`);
+        await sql`
+          UPDATE tasks
+          SET state = 'failed', ended_at = clock_timestamp(), output_summary = ${errMsg}
+          WHERE id = ${taskId}
+        `.catch(() => {});
+        return;
+      }
      // Agent specified but not available — fall through to Path A with a warning
      log.warn({ taskId, agent: task.agent }, 'dispatcher: specified agent not available, falling back to native');
    }