boocode/packages/ion/src/engine/dag-executor.ts

/**
 * DAG Executor — the core execution engine for Ion workflows.
 *
 * Traverses a DAG of nodes in topological layers, executing nodes within
 * each layer concurrently via Promise.allSettled. Supports prompt nodes,
 * command/bash/script nodes, approval gates, loop nodes, and cancel nodes.
 *
 * Architecture mirrors Archon's dag-executor: Kahn's algorithm for layer
 * building, trigger rules for dependency evaluation, and Promise.allSettled
 * for concurrent execution within each layer.
 */

import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';

import type {
  DagNode,
  PromptNode,
  BashNode,
  ScriptNode,
  ApprovalNode,
  LoopNode,
  CancelNode,
  NodeOutput,
  NodeExecutionResult,
  TriggerRule,
  WorkflowDefinition,
} from '../schema/index.js';
import { DEFAULT_TRIGGER_RULE } from '../schema/index.js';
import {
  isBashNode,
  isScriptNode,
  isLoopNode,
  isApprovalNode,
  isCancelNode,
  isPromptNode,
  isCommandNode,
} from '../schema/index.js';

import type { IWorkflowPlatform, WorkflowDeps, WorkflowConfig } from './deps.js';
import {
  evaluateCondition,
  substituteWorkflowVariables,
  substituteNodeOutputRefs,
  buildPromptWithContext,
  classifyError,
  safeSendMessage,
  formatSubprocessFailure,
  sleep,
  retryWithBackoff,
  OutputRefError,
  DagCycleError,
  NodeTimeoutError,
  ApprovalRejectedError,
  LoopMaxIterationsError,
} from './utils.js';

const execFileAsync = promisify(execFile);

// ---------------------------------------------------------------------------
// Topological layer building (Kahn's algorithm)
// ---------------------------------------------------------------------------

/**
 * Build topological layers from a flat list of DAG nodes using Kahn's algorithm.
 *
 * Each layer contains nodes that can execute concurrently. Nodes in layer N+1
 * depend only on nodes in layers 0..N.
 *
 * @param nodes - Flat list of DAG nodes with `depends_on` references.
 * @returns Array of layers, each layer being an array of nodes.
 * @throws DagCycleError if a cycle is detected.
 */
export function buildTopologicalLayers(nodes: DagNode[]): DagNode[][] {
  const nodeMap = new Map<string, DagNode>();
  const inDegree = new Map<string, number>();
  const adjacency = new Map<string, Set<string>>(); // dep → nodes that depend on it

  // Initialize
  for (const node of nodes) {
    nodeMap.set(node.id, node);
    inDegree.set(node.id, node.depends_on.length);
    for (const dep of node.depends_on) {
      if (!adjacency.has(dep)) adjacency.set(dep, new Set());
      adjacency.get(dep)!.add(node.id);
    }
  }

  // Start with zero-in-degree nodes
  let currentLayer: string[] = [];
  for (const [id, degree] of inDegree) {
    if (degree === 0) currentLayer.push(id);
  }

  const layers: DagNode[][] = [];
  let totalProcessed = 0;

  while (currentLayer.length > 0) {
    // Build the layer from current zero-in-degree nodes
    const layerNodes = currentLayer
      .map((id) => nodeMap.get(id))
      .filter((n): n is DagNode => n !== undefined);
    layers.push(layerNodes);
    totalProcessed += layerNodes.length;

    // Reduce in-degree for dependents
    const nextLayer: string[] = [];
    for (const id of currentLayer) {
      const dependents = adjacency.get(id);
      if (!dependents) continue;
      for (const depId of dependents) {
        const currentDeg = inDegree.get(depId)! - 1;
        inDegree.set(depId, currentDeg);
        if (currentDeg === 0) nextLayer.push(depId);
      }
    }

    currentLayer = nextLayer;
  }

  // Cycle detection
  if (totalProcessed < nodes.length) {
    throw new DagCycleError(nodes.length, totalProcessed);
  }

  return layers;
}

// ---------------------------------------------------------------------------
// Trigger rule evaluation
// ---------------------------------------------------------------------------

/**
 * Check whether a node should run or be skipped based on its trigger rule
 * and the completion states of its dependencies.
 *
 * @param node - The DAG node to evaluate.
 * @param nodeOutputs - Map of completed node outputs.
 * @returns `'run'` if the node should execute, `'skip'` if it should be skipped.
 */
export function checkTriggerRule(
  node: DagNode,
  nodeOutputs: Map<string, NodeOutput>,
): 'run' | 'skip' {
  const rule: TriggerRule = node.trigger_rule ?? DEFAULT_TRIGGER_RULE;

  if (node.depends_on.length === 0) return 'run';

  const depOutputs = node.depends_on.map((depId) => nodeOutputs.get(depId));

  switch (rule) {
    case 'all_success':
      // All dependencies must have completed successfully
      return depOutputs.every((o) => o?.state === 'completed') ? 'run' : 'skip';

    case 'one_success':
      // At least one dependency must have completed successfully
      return depOutputs.some((o) => o?.state === 'completed') ? 'run' : 'skip';

    case 'all_done':
      // All dependencies must have finished (any terminal status)
      return depOutputs.every((o) => o !== undefined) ? 'run' : 'skip';

    case 'none_failed_min_one_success':
      // No dependency failed AND at least one succeeded
      const hasFailure = depOutputs.some((o) => o?.state === 'failed');
      const hasSuccess = depOutputs.some((o) => o?.state === 'completed');
      return !hasFailure && hasSuccess ? 'run' : 'skip';

    default:
      return 'run';
  }
}

// ---------------------------------------------------------------------------
// Node output reference substitution
// ---------------------------------------------------------------------------

/**
 * Substitute node output references in a prompt string.
 *
 * Resolves `$nodeId.output` → full text, `$nodeId.output.field` → structured field.
 *
 * @param prompt - Template string with `$nodeId.output` references.
 * @param nodeOutputs - Map of node id → NodeOutput.
 * @param escapedForBash - If true, escape special bash characters in output values.
 */
export { substituteNodeOutputRefs } from './utils.js';

// ---------------------------------------------------------------------------
// Prompt / command node execution
// ---------------------------------------------------------------------------

/**
 * Execute a single PromptNode or CommandNode by sending a prompt to an AI provider.
 *
 * Handles:
 * - Loading prompt from command file or inline prompt
 * - Variable substitution (workflow vars + node output refs)
 * - Streaming: accumulates output, forwards messages to platform
 * - Idle timeout: aborts after configurable period of inactivity
 * - Structured output: validates against output_format schema, reask loop
 * - Retry on transient errors
 */
export async function executeNodeInternal(
  node: PromptNode,
  deps: WorkflowDeps,
  platform: IWorkflowPlatform,
  conversationId: string,
  cwd: string,
  config: WorkflowConfig,
  nodeOutputs: Map<string, NodeOutput>,
  workflowVariables: Record<string, unknown>,
): Promise<NodeExecutionResult> {
  const providerId = node.provider ?? config.assistant;
  const provider = deps.getAgentProvider(providerId);

  // Resolve prompt text
  let promptText: string;
  if (node.command_file) {
    try {
      const filePath = join(cwd, node.command_file);
      promptText = await readFile(filePath, 'utf-8');
    } catch (err) {
      return {
        state: 'failed',
        error: `Failed to read command file "${node.command_file}": ${err instanceof Error ? err.message : String(err)}`,
      };
    }
  } else if (node.prompt) {
    promptText = node.prompt;
  } else {
    return { state: 'failed', error: `Prompt node "${node.id}" has neither prompt nor command_file` };
  }

  // Apply variable substitution
  try {
    promptText = buildPromptWithContext(promptText, workflowVariables, nodeOutputs);
  } catch (err) {
    if (err instanceof OutputRefError) {
      return { state: 'failed', error: err.message };
    }
    throw err;
  }

  // Merge node-level env vars
  const mergedVars = { ...workflowVariables, ...(node.env ?? {}) };

  // Retry configuration
  const maxAttempts = node.retry?.max_attempts ?? 1;
  const onError = node.retry?.on_error ?? 'transient';
  const delayMs = node.retry?.delay_ms ?? 1000;

  // Idle timeout
  const idleTimeoutMs = node.idle_timeout_ms ?? 300_000; // 5 minutes default

  // Structured output reask loop
  const maxReaskAttempts = node.output_format ? 3 : 1;
  let lastOutput = '';
  let lastFields: Record<string, unknown> | undefined;
  let lastError: string | undefined;
  let costUsd: number | undefined;

  for (let reaskAttempt = 0; reaskAttempt < maxReaskAttempts; reaskAttempt++) {
    const currentPrompt = reaskAttempt === 0
      ? promptText
      : `${promptText}\n\nPrevious response did not match the expected format. Please try again, ensuring your response matches: ${JSON.stringify(node.output_format)}`;

    // Execute with retry
    let responseText: string | undefined;
    let retryError: unknown;

    const retryPredicate = onError === 'all'
      ? undefined
      : (err: unknown) => classifyError(err) === 'transient' || classifyError(err) === 'rate_limit';

    try {
      responseText = await retryWithBackoff(
        () => executeWithIdleTimeout(provider, currentPrompt, idleTimeoutMs, node.id),
        maxAttempts,
        delayMs,
        retryPredicate,
      );
    } catch (err) {
      retryError = err;
    }

    if (retryError) {
      const category = classifyError(retryError);
      if (category === 'timeout') {
        return {
          state: 'failed',
          error: `Node "${node.id}" timed out after ${idleTimeoutMs}ms of inactivity`,
        };
      }
      return {
        state: 'failed',
        error: retryError instanceof Error ? retryError.message : String(retryError),
      };
    }

    lastOutput = responseText ?? '';

    // Validate structured output if schema provided
    if (node.output_format && lastOutput) {
      try {
        const parsed = tryParseStructuredOutput(lastOutput);
        if (parsed) {
          const validation = validateStructuredOutput(parsed, node.output_format);
          if (validation.valid) {
            lastFields = parsed;
            break; // Valid structured output
          }
          // If not valid and we have reask attempts left, continue loop
          if (reaskAttempt < maxReaskAttempts - 1) continue;
        }
      } catch {
        // If parsing fails and we have reask attempts left, continue
        if (reaskAttempt < maxReaskAttempts - 1) continue;
      }
    }

    // No structured output required, or best-effort on last attempt
    break;
  }

  // Notify platform
  await safeSendMessage(
    platform,
    conversationId,
    `✅ Node "${node.name ?? node.id}" completed`,
  );

  return {
    state: 'completed',
    output: lastOutput,
    fields: lastFields,
    costUsd,
  };
}

/**
 * Execute a provider call with an idle timeout.
 *
 * If no response is received within `idleTimeoutMs`, the request is aborted.
 */
async function executeWithIdleTimeout(
  provider: { sendPrompt: (prompt: string, options?: Record<string, unknown>) => Promise<string> },
  prompt: string,
  idleTimeoutMs: number,
  nodeId: string,
): Promise<string> {
  const controller = new AbortController();
  const timeoutId = setTimeout(() => controller.abort(), idleTimeoutMs);

  try {
    const result = await provider.sendPrompt(prompt, { signal: controller.signal });
    clearTimeout(timeoutId);
    return result;
  } catch (err) {
    clearTimeout(timeoutId);
    if (controller.signal.aborted) {
      throw new NodeTimeoutError(nodeId, idleTimeoutMs);
    }
    throw err;
  }
}

/**
 * Attempt to parse structured output from a model response.
 *
 * Tries JSON parse first, then looks for JSON within markdown code blocks.
 */
function tryParseStructuredOutput(text: string): Record<string, unknown> | undefined {
  // Try direct JSON parse
  try {
    const parsed = JSON.parse(text);
    if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
      return parsed as Record<string, unknown>;
    }
  } catch {
    // Not direct JSON
  }

  // Try extracting JSON from markdown code block
  const jsonBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
  if (jsonBlockMatch) {
    try {
      const parsed = JSON.parse(jsonBlockMatch[1]!);
      if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
        return parsed as Record<string, unknown>;
      }
    } catch {
      // Not valid JSON in code block
    }
  }

  return undefined;
}

/**
 * Validate parsed output against a schema definition.
 *
 * Simple validation: checks that all required keys are present.
 * Full JSON Schema validation would require a library; this is best-effort.
 */
function validateStructuredOutput(
  parsed: Record<string, unknown>,
  schema: Record<string, unknown>,
): { valid: boolean; missingKeys?: string[] } {
  const required = schema['required'];
  if (Array.isArray(required)) {
    const missing = required.filter((key) => !(key in parsed));
    if (missing.length > 0) {
      return { valid: false, missingKeys: missing as string[] };
    }
  }
  return { valid: true };
}

// ---------------------------------------------------------------------------
// Script / Bash node execution
// ---------------------------------------------------------------------------

/**
 * Execute a BashNode or ScriptNode.
 *
 * For bash nodes: runs `bash -c <script>` with timeout.
 * For script nodes with runtime 'bun': runs `bun -e <script>` or `bun run`.
 * For script nodes with runtime 'uv': runs `uv run python -c <script>`.
 */
export async function executeScriptNode(
  node: BashNode | ScriptNode,
  cwd: string,
  envVars: Record<string, string>,
  artifactsDir: string,
): Promise<NodeExecutionResult> {
  const mergedEnv: Record<string, string> = { ...process.env as Record<string, string>, ...envVars };

  try {
    if (isBashNode(node)) {
      return await executeBashNode(node, cwd, mergedEnv);
    }

    if (isScriptNode(node)) {
      return await executeScriptNodeByRuntime(node, cwd, mergedEnv, artifactsDir);
    }

    return { state: 'failed', error: `Unknown script node kind: ${(node as DagNode).kind}` };
  } catch (err) {
    return {
      state: 'failed',
      error: err instanceof Error ? err.message : String(err),
    };
  }
}

async function executeBashNode(
  node: BashNode,
  cwd: string,
  env: Record<string, string>,
): Promise<NodeExecutionResult> {
  const timeoutMs = node.timeout_ms ?? 60_000;

  try {
    const { stdout, stderr } = await execFileAsync('bash', ['-c', node.bash], {
      cwd,
      env,
      timeout: timeoutMs,
      maxBuffer: 10 * 1024 * 1024, // 10MB
    });

    return {
      state: 'completed',
      output: stdout.trim(),
    };
  } catch (err) {
    if (err instanceof Error && 'code' in err) {
      const execErr = err as Error & { code?: string; stdout?: string; stderr?: string; killed?: boolean };
      if (execErr.killed) {
        return {
          state: 'failed',
          error: formatSubprocessFailure(
            `bash -c "${node.bash.substring(0, 80)}..."`,
            null,
            execErr.stdout ?? '',
            'Process was killed (likely timeout)',
          ),
        };
      }
      return {
        state: 'failed',
        error: formatSubprocessFailure(
          node.bash,
          Number((err as NodeJS.ErrnoException).code) || null,
          execErr.stdout ?? '',
          execErr.stderr ?? '',
        ),
      };
    }
    return { state: 'failed', error: String(err) };
  }
}

async function executeScriptNodeByRuntime(
  node: ScriptNode,
  cwd: string,
  env: Record<string, string>,
  artifactsDir: string,
): Promise<NodeExecutionResult> {
  const timeoutMs = node.timeout_ms ?? 120_000;

  if (node.runtime === 'bun') {
    // Install deps if specified
    if (node.deps.length > 0) {
      try {
        await execFileAsync('bun', ['add', ...node.deps], {
          cwd: artifactsDir,
          env,
          timeout: 60_000,
        });
      } catch (err) {
        return {
          state: 'failed',
          error: `Failed to install bun dependencies: ${err instanceof Error ? err.message : String(err)}`,
        };
      }
    }

    const args = node.deps.length > 0 ? ['run', '-e', node.script] : ['-e', node.script];
    try {
      const { stdout, stderr } = await execFileAsync('bun', args, {
        cwd,
        env,
        timeout: timeoutMs,
        maxBuffer: 10 * 1024 * 1024,
      });
      return { state: 'completed', output: stdout.trim() };
    } catch (err) {
      return handleSubprocessError(err, `bun ${args.join(' ')}`, node.id);
    }
  }

  if (node.runtime === 'uv') {
    // Install deps if specified
    if (node.deps.length > 0) {
      try {
        await execFileAsync('uv', ['pip', 'install', ...node.deps], {
          cwd: artifactsDir,
          env,
          timeout: 120_000,
        });
      } catch (err) {
        return {
          state: 'failed',
          error: `Failed to install uv dependencies: ${err instanceof Error ? err.message : String(err)}`,
        };
      }
    }

    try {
      const { stdout, stderr } = await execFileAsync('uv', ['run', 'python', '-c', node.script], {
        cwd,
        env,
        timeout: timeoutMs,
        maxBuffer: 10 * 1024 * 1024,
      });
      return { state: 'completed', output: stdout.trim() };
    } catch (err) {
      return handleSubprocessError(err, `uv run python -c "..."`, node.id);
    }
  }

  return { state: 'failed', error: `Unsupported script runtime: ${node.runtime}` };
}

function handleSubprocessError(err: unknown, command: string, nodeId: string): NodeExecutionResult {
  if (err instanceof Error && 'code' in err) {
    const execErr = err as Error & { code?: string; stdout?: string; stderr?: string; killed?: boolean };
    if (execErr.killed) {
      return {
        state: 'failed',
        error: `Script node "${nodeId}" was killed (likely timeout)`,
      };
    }
    return {
      state: 'failed',
      error: formatSubprocessFailure(
        command,
        Number((err as NodeJS.ErrnoException).code) || null,
        execErr.stdout ?? '',
        execErr.stderr ?? '',
      ),
    };
  }
  return { state: 'failed', error: String(err) };
}

// ---------------------------------------------------------------------------
// Approval node handling
// ---------------------------------------------------------------------------

/**
 * Handle an approval node — pause the workflow and wait for human approval.
 *
 * On approval: optionally capture the approver's response.
 * On rejection: execute on_reject prompt if configured, or fail the node.
 */
export async function handleApprovalNode(
  node: ApprovalNode,
  deps: WorkflowDeps,
  platform: IWorkflowPlatform,
  conversationId: string,
  workflowRunId: string,
  nodeOutputs: Map<string, NodeOutput>,
  workflowVariables: Record<string, unknown>,
): Promise<NodeExecutionResult> {
  // Pause the workflow run
  await deps.store.updateWorkflowRun(workflowRunId, {
    status: 'pending' as const,
  });

  // Send approval request to user
  const approvalMessage = buildPromptWithContext(
    node.message,
    workflowVariables,
    nodeOutputs,
  );

  await safeSendMessage(platform, conversationId, `🔒 **Approval Required**: ${approvalMessage}`);

  // Emit structured event for approval gate
  if (platform.sendStructuredEvent) {
    await platform.sendStructuredEvent(conversationId, {
      type: 'approval_required',
      nodeId: node.id,
      nodeName: node.name,
      message: approvalMessage,
      runId: workflowRunId,
    });
  }

  // Poll for approval response
  const maxPollMs = 24 * 60 * 60 * 1000; // 24 hours
  const pollIntervalMs = 2000;
  const startTime = Date.now();

  while (Date.now() - startTime < maxPollMs) {
    await sleep(pollIntervalMs);

    // Check if the run has been resumed with approval context
    const run = await deps.store.getWorkflowRun(workflowRunId);
    if (!run) {
      return { state: 'failed', error: `Workflow run ${workflowRunId} not found during approval poll` };
    }

    // Check for approval context in the run's output
    if (run.output && typeof run.output === 'object') {
      const approvalContext = run.output as Record<string, unknown>;
      const approvalKey = `__approval_${node.id}`;

      if (approvalKey in approvalContext) {
        const decision = approvalContext[approvalKey] as Record<string, unknown>;

        if (decision.approved === true) {
          // Approved — resume
          await deps.store.updateWorkflowRun(workflowRunId, { status: 'running' });
          const response = typeof decision.response === 'string' ? decision.response : undefined;

          await safeSendMessage(
            platform,
            conversationId,
            `✅ Approval granted for "${node.name ?? node.id}"`,
          );

          return {
            state: 'completed',
            output: response ?? 'Approved',
          };
        } else {
          // Rejected
          if (node.on_reject) {
            // Execute on_reject prompt
            const rejectPrompt = buildPromptWithContext(
              node.on_reject,
              workflowVariables,
              nodeOutputs,
            );
            try {
              const provider = deps.getAgentProvider(deps.loadConfig ? (await deps.loadConfig(process.cwd())).assistant : 'default');
              const rejectResponse = await provider.sendPrompt(rejectPrompt);
              await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
              return {
                state: 'failed',
                error: `Approval rejected for "${node.name ?? node.id}"`,
                output: rejectResponse,
              };
            } catch {
              // Fall through to simple rejection
            }
          }

          await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
          throw new ApprovalRejectedError(node.id, String(decision.reason ?? ''));
        }
      }
    }
  }

  // Timeout
  await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
  return {
    state: 'failed',
    error: `Approval node "${node.id}" timed out waiting for response`,
  };
}

// ---------------------------------------------------------------------------
// Loop node handling
// ---------------------------------------------------------------------------

/**
 * Handle a loop node — iterate until a condition is met or max iterations reached.
 *
 * Supports:
 * - Fresh context (new session each iteration)
 * - until_bash (bash exit code 0 = complete)
 * - Interactive (human gate between iterations)
 * - $LOOP_PREV_OUTPUT tracking between iterations
 */
export async function handleLoopNode(
  node: LoopNode,
  deps: WorkflowDeps,
  platform: IWorkflowPlatform,
  conversationId: string,
  cwd: string,
  config: WorkflowConfig,
  nodeOutputs: Map<string, NodeOutput>,
  workflowVariables: Record<string, unknown>,
): Promise<NodeExecutionResult> {
  const { config: loopConfig } = node;
  const providerId = node.provider ?? config.assistant;
  const provider = deps.getAgentProvider(providerId);

  let iterationOutput = '';
  let iterationCount = 0;
  const maxIterations = loopConfig.max_iterations;

  // Merge node-level env vars
  const mergedVars = { ...workflowVariables, ...(node.env ?? {}) };

  await safeSendMessage(
    platform,
    conversationId,
    `🔄 Loop "${node.name ?? node.id}" starting (max ${maxIterations} iterations)`,
  );

  for (let i = 0; i < maxIterations; i++) {
    iterationCount = i + 1;

    // Build iteration prompt with $LOOP_PREV_OUTPUT substitution
    let iterationPrompt = loopConfig.prompt;
    if (iterationOutput) {
      iterationPrompt = iterationPrompt.replace(/\$LOOP_PREV_OUTPUT/g, iterationOutput);
    }
    iterationPrompt = buildPromptWithContext(iterationPrompt, mergedVars, nodeOutputs);

    // Execute iteration
    if (loopConfig.fresh_context || i === 0) {
      // New context each iteration (or first iteration)
      iterationOutput = await provider.sendPrompt(iterationPrompt);
    } else {
      // Continue in same context
      iterationOutput = await provider.sendPrompt(iterationPrompt);
    }

    // Check until_bash condition
    if (loopConfig.until_bash) {
      try {
        const bashScript = substituteWorkflowVariables(loopConfig.until_bash, mergedVars);
        await execFileAsync('bash', ['-c', bashScript], { cwd, timeout: 30_000 });
        // Exit code 0 = condition met
        await safeSendMessage(
          platform,
          conversationId,
          `✅ Loop "${node.name ?? node.id}" completed after ${iterationCount} iterations (bash condition met)`,
        );
        return { state: 'completed', output: iterationOutput };
      } catch {
        // Non-zero exit = condition not met, continue looping
      }
    }

    // Check natural language until condition
    // Ask the model if the condition is met
    const checkPrompt = `Given the following output, is this condition met: "${loopConfig.until}"?\n\nOutput:\n${iterationOutput}\n\nAnswer ONLY "yes" or "no".`;
    const checkResponse = await provider.sendPrompt(checkPrompt);
    if (checkResponse.trim().toLowerCase().startsWith('yes')) {
      await safeSendMessage(
        platform,
        conversationId,
        `✅ Loop "${node.name ?? node.id}" completed after ${iterationCount} iterations`,
      );
      return { state: 'completed', output: iterationOutput };
    }

    // Interactive gate
    if (loopConfig.interactive) {
      const gateMessage = loopConfig.gate_message ?? `Loop iteration ${iterationCount}/${maxIterations}. Continue?`;
      await safeSendMessage(platform, conversationId, `⏸️ ${gateMessage}`);

      if (platform.sendStructuredEvent) {
        await platform.sendStructuredEvent(conversationId, {
          type: 'loop_gate',
          nodeId: node.id,
          iteration: iterationCount,
          message: gateMessage,
        });
      }

      // Wait for gate approval (simplified: poll for response)
      const gateResult = await pollForLoopGateApproval(
        deps,
        platform,
        conversationId,
        node.id,
        iterationCount,
      );

      if (!gateResult) {
        await safeSendMessage(
          platform,
          conversationId,
          `🛑 Loop "${node.name ?? node.id}" stopped by user at iteration ${iterationCount}`,
        );
        return { state: 'completed', output: iterationOutput };
      }
    }
  }

  // Max iterations reached
  throw new LoopMaxIterationsError(node.id, maxIterations);
}

/**
 * Poll for loop gate approval.
 *
 * Simplified: waits for a structured event response.
 * In production, this would integrate with the platform's event system.
 */
async function pollForLoopGateApproval(
  deps: WorkflowDeps,
  platform: IWorkflowPlatform,
  conversationId: string,
  nodeId: string,
  iteration: number,
): Promise<boolean> {
  // Default: auto-approve after a short delay
  // In a real implementation, this would poll the store for user input
  await sleep(1000);
  return true;
}

// ---------------------------------------------------------------------------
// Main DAG workflow executor
// ---------------------------------------------------------------------------

/**
 * Result of executing a complete DAG workflow.
 */
export interface DagWorkflowResult {
  /** Summary string describing the execution outcome. */
  summary: string;
  /** Map of node id → NodeOutput for all completed nodes. */
  nodeOutputs: Map<string, NodeOutput>;
  /** Whether the workflow completed successfully. */
  success: boolean;
  /** Error message if the workflow failed. */
  error?: string;
}

/**
 * Execute a complete DAG workflow.
 *
 * This is the main entry point called by the top-level executor.
 * It builds topological layers from the workflow nodes, then executes
 * each layer sequentially with nodes within a layer running concurrently.
 *
 * @param deps - Dependency injection container.
 * @param platform - Platform interface for messaging.
 * @param conversationId - Conversation channel id.
 * @param cwd - Working directory for file operations.
 * @param workflow - The workflow definition.
 * @param workflowRun - The persisted workflow run record.
 * @param priorCompletedNodes - Pre-populated outputs from a prior run (for resume).
 * @returns DagWorkflowResult with summary and node outputs.
 */
export async function executeDagWorkflow(
  deps: WorkflowDeps,
  platform: IWorkflowPlatform,
  conversationId: string,
  cwd: string,
  workflow: WorkflowDefinition,
  workflowRun: { id: string; status: string },
  priorCompletedNodes?: Record<string, Record<string, unknown>>,
): Promise<DagWorkflowResult> {
  const nodeOutputs = new Map<string, NodeOutput>();
  const workflowVariables: Record<string, unknown> = { ...(workflowRun as Record<string, unknown>) };

  // Pre-populate from prior completed nodes (resume support)
  if (priorCompletedNodes) {
    for (const [nodeId, output] of Object.entries(priorCompletedNodes)) {
      nodeOutputs.set(nodeId, {
        nodeId,
        output: (output.output as string) ?? '',
        state: (output.state as 'completed' | 'failed' | 'skipped') ?? 'completed',
        text: output.text as string | undefined,
        fields: output.fields as Record<string, unknown> | undefined,
        error: output.error as string | undefined,
        costUsd: output.costUsd as number | undefined,
      });
    }
  }

  // Build topological layers
  let layers: DagNode[][] = [];
  try {
    layers = buildTopologicalLayers(workflow.nodes);
  } catch (err) {
    if (err instanceof DagCycleError) {
      return {
        summary: `Workflow failed: ${err.message}`,
        nodeOutputs,
        success: false,
        error: err.message,
      };
    }
    throw err;
  }

  // Load config for provider resolution
  const config = await deps.loadConfig(cwd);

  // Execute layers
  for (let layerIndex = 0; layerIndex < layers.length; layerIndex++) {
    const layer = layers[layerIndex]!;

    await safeSendMessage(
      platform,
      conversationId,
      `📋 Executing layer ${layerIndex + 1}/${layers.length} (${layer.length} node${layer.length > 1 ? 's' : ''})`,
    );

    // Execute all nodes in the layer concurrently
    const results = await Promise.allSettled(
      layer.map(async (node) => {
        // Skip already-completed nodes (resume)
        if (nodeOutputs.has(node.id) && nodeOutputs.get(node.id)!.state === 'completed') {
          return { nodeId: node.id, result: nodeOutputs.get(node.id)! } as const;
        }

        // Check when condition
        if (node.when) {
          const shouldRun = evaluateCondition(node.when, workflowVariables);
          if (!shouldRun) {
            const skippedOutput: NodeOutput = {
              nodeId: node.id,
              output: '',
              state: 'skipped',
            };
            nodeOutputs.set(node.id, skippedOutput);
            return { nodeId: node.id, result: skippedOutput } as const;
          }
        }

        // Check trigger rule
        const triggerResult = checkTriggerRule(node, nodeOutputs);
        if (triggerResult === 'skip') {
          const skippedOutput: NodeOutput = {
            nodeId: node.id,
            output: '',
            state: 'skipped',
          };
          nodeOutputs.set(node.id, skippedOutput);
          return { nodeId: node.id, result: skippedOutput } as const;
        }

        // Dispatch to correct handler
        try {
          // Emit node start event
          await deps.store.createWorkflowEvent({
            runId: workflowRun.id,
            nodeId: node.id,
            type: 'node_started',
            data: { kind: node.kind, name: node.name },
          });

          let result: NodeExecutionResult;

          if (isCancelNode(node)) {
            // Cancel node — immediately fail the workflow
            await deps.store.createWorkflowEvent({
              runId: workflowRun.id,
              nodeId: node.id,
              type: 'node_cancelled',
              data: { reason: node.reason ?? 'Workflow cancelled by cancel node' },
            });
            throw new Error(node.reason ?? 'Workflow cancelled by cancel node');
          } else if (isApprovalNode(node)) {
            result = await handleApprovalNode(
              node,
              deps,
              platform,
              conversationId,
              workflowRun.id,
              nodeOutputs,
              workflowVariables,
            );
          } else if (isLoopNode(node)) {
            result = await handleLoopNode(
              node,
              deps,
              platform,
              conversationId,
              cwd,
              config,
              nodeOutputs,
              workflowVariables,
            );
          } else if (isBashNode(node) || isScriptNode(node)) {
            result = await executeScriptNode(
              node,
              cwd,
              { ...config.envVars, ...(node.env ?? {}) },
              join(cwd, '.ion', 'artifacts', workflowRun.id),
            );
          } else if (isPromptNode(node)) {
            result = await executeNodeInternal(
              node,
              deps,
              platform,
              conversationId,
              cwd,
              config,
              nodeOutputs,
              workflowVariables,
            );
          } else if (isCommandNode(node)) {
            // CommandNode — execute as bash
            const bashNode: BashNode = {
              id: node.id,
              kind: 'bash',
              name: node.name,
              bash: node.command,
              timeout_ms: node.retry?.delay_ms ? undefined : 60_000,
              depends_on: node.depends_on,
              trigger_rule: node.trigger_rule,
              retry: node.retry,
              env: node.env,
            };
            result = await executeScriptNode(
              bashNode,
              cwd,
              { ...config.envVars, ...(node.env ?? {}) },
              join(cwd, '.ion', 'artifacts', workflowRun.id),
            );
          } else {
            result = {
              state: 'failed',
              error: `Unknown node kind: ${(node as DagNode).kind}`,
            };
          }

          // Convert to NodeOutput and store
          const nodeOutput: NodeOutput = {
            nodeId: node.id,
            output: result.output ?? '',
            state: result.state,
            text: result.output,
            fields: result.fields,
            error: result.error,
            costUsd: result.costUsd,
          };
          nodeOutputs.set(node.id, nodeOutput);

          // Emit node completion event
          await deps.store.createWorkflowEvent({
            runId: workflowRun.id,
            nodeId: node.id,
            type: result.state === 'completed' ? 'node_completed' : 'node_failed',
            data: { state: result.state, error: result.error },
          });

          return { nodeId: node.id, result: nodeOutput } as const;
        } catch (err) {
          const errorMsg = err instanceof Error ? err.message : String(err);
          const failedOutput: NodeOutput = {
            nodeId: node.id,
            output: '',
            state: 'failed',
            error: errorMsg,
          };
          nodeOutputs.set(node.id, failedOutput);

          await deps.store.createWorkflowEvent({
            runId: workflowRun.id,
            nodeId: node.id,
            type: 'node_failed',
            data: { error: errorMsg },
          });

          return { nodeId: node.id, result: failedOutput } as const;
        }
      }),
    );

    // Check for failures that should stop the workflow
    const failures = results.filter(
      (r): r is PromiseRejectedResult => r.status === 'rejected',
    );
    if (failures.length > 0) {
      // A cancel node or unhandled error — stop the workflow
      const errorMsg = failures
        .map((f) => (f.reason instanceof Error ? f.reason.message : String(f.reason)))
        .join('; ');
      return {
        summary: `Workflow failed at layer ${layerIndex + 1}: ${errorMsg}`,
        nodeOutputs,
        success: false,
        error: errorMsg,
      };
    }
  }

  // All layers completed
  const completedCount = [...nodeOutputs.values()].filter((o) => o.state === 'completed').length;
  const skippedCount = [...nodeOutputs.values()].filter((o) => o.state === 'skipped').length;
  const failedCount = [...nodeOutputs.values()].filter((o) => o.state === 'failed').length;

  const summary = `Workflow completed: ${completedCount} succeeded, ${skippedCount} skipped, ${failedCount} failed`;
  return {
    summary,
    nodeOutputs,
    success: failedCount === 0,
    error: failedCount > 0 ? `${failedCount} node(s) failed` : undefined,
  };
}