New @boocode/ion package (v0.0.1) for inference optimization network. .codesight/ wiki artifacts for codebase documentation. .omo/ work plans for openspec cleanup and enhanced file panel.
1149 lines
36 KiB
TypeScript
1149 lines
36 KiB
TypeScript
/**
|
|
* DAG Executor — the core execution engine for Ion workflows.
|
|
*
|
|
* Traverses a DAG of nodes in topological layers, executing nodes within
|
|
* each layer concurrently via Promise.allSettled. Supports prompt nodes,
|
|
* command/bash/script nodes, approval gates, loop nodes, and cancel nodes.
|
|
*
|
|
* Architecture mirrors Archon's dag-executor: Kahn's algorithm for layer
|
|
* building, trigger rules for dependency evaluation, and Promise.allSettled
|
|
* for concurrent execution within each layer.
|
|
*/
|
|
|
|
import { execFile } from 'node:child_process';
|
|
import { promisify } from 'node:util';
|
|
import { readFile } from 'node:fs/promises';
|
|
import { join } from 'node:path';
|
|
|
|
import type {
|
|
DagNode,
|
|
PromptNode,
|
|
BashNode,
|
|
ScriptNode,
|
|
ApprovalNode,
|
|
LoopNode,
|
|
CancelNode,
|
|
NodeOutput,
|
|
NodeExecutionResult,
|
|
TriggerRule,
|
|
WorkflowDefinition,
|
|
} from '../schema/index.js';
|
|
import { DEFAULT_TRIGGER_RULE } from '../schema/index.js';
|
|
import {
|
|
isBashNode,
|
|
isScriptNode,
|
|
isLoopNode,
|
|
isApprovalNode,
|
|
isCancelNode,
|
|
isPromptNode,
|
|
isCommandNode,
|
|
} from '../schema/index.js';
|
|
|
|
import type { IWorkflowPlatform, WorkflowDeps, WorkflowConfig } from './deps.js';
|
|
import {
|
|
evaluateCondition,
|
|
substituteWorkflowVariables,
|
|
substituteNodeOutputRefs,
|
|
buildPromptWithContext,
|
|
classifyError,
|
|
safeSendMessage,
|
|
formatSubprocessFailure,
|
|
sleep,
|
|
retryWithBackoff,
|
|
OutputRefError,
|
|
DagCycleError,
|
|
NodeTimeoutError,
|
|
ApprovalRejectedError,
|
|
LoopMaxIterationsError,
|
|
} from './utils.js';
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Topological layer building (Kahn's algorithm)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Build topological layers from a flat list of DAG nodes using Kahn's algorithm.
|
|
*
|
|
* Each layer contains nodes that can execute concurrently. Nodes in layer N+1
|
|
* depend only on nodes in layers 0..N.
|
|
*
|
|
* @param nodes - Flat list of DAG nodes with `depends_on` references.
|
|
* @returns Array of layers, each layer being an array of nodes.
|
|
* @throws DagCycleError if a cycle is detected.
|
|
*/
|
|
export function buildTopologicalLayers(nodes: DagNode[]): DagNode[][] {
|
|
const nodeMap = new Map<string, DagNode>();
|
|
const inDegree = new Map<string, number>();
|
|
const adjacency = new Map<string, Set<string>>(); // dep → nodes that depend on it
|
|
|
|
// Initialize
|
|
for (const node of nodes) {
|
|
nodeMap.set(node.id, node);
|
|
inDegree.set(node.id, node.depends_on.length);
|
|
for (const dep of node.depends_on) {
|
|
if (!adjacency.has(dep)) adjacency.set(dep, new Set());
|
|
adjacency.get(dep)!.add(node.id);
|
|
}
|
|
}
|
|
|
|
// Start with zero-in-degree nodes
|
|
let currentLayer: string[] = [];
|
|
for (const [id, degree] of inDegree) {
|
|
if (degree === 0) currentLayer.push(id);
|
|
}
|
|
|
|
const layers: DagNode[][] = [];
|
|
let totalProcessed = 0;
|
|
|
|
while (currentLayer.length > 0) {
|
|
// Build the layer from current zero-in-degree nodes
|
|
const layerNodes = currentLayer
|
|
.map((id) => nodeMap.get(id))
|
|
.filter((n): n is DagNode => n !== undefined);
|
|
layers.push(layerNodes);
|
|
totalProcessed += layerNodes.length;
|
|
|
|
// Reduce in-degree for dependents
|
|
const nextLayer: string[] = [];
|
|
for (const id of currentLayer) {
|
|
const dependents = adjacency.get(id);
|
|
if (!dependents) continue;
|
|
for (const depId of dependents) {
|
|
const currentDeg = inDegree.get(depId)! - 1;
|
|
inDegree.set(depId, currentDeg);
|
|
if (currentDeg === 0) nextLayer.push(depId);
|
|
}
|
|
}
|
|
|
|
currentLayer = nextLayer;
|
|
}
|
|
|
|
// Cycle detection
|
|
if (totalProcessed < nodes.length) {
|
|
throw new DagCycleError(nodes.length, totalProcessed);
|
|
}
|
|
|
|
return layers;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Trigger rule evaluation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Check whether a node should run or be skipped based on its trigger rule
|
|
* and the completion states of its dependencies.
|
|
*
|
|
* @param node - The DAG node to evaluate.
|
|
* @param nodeOutputs - Map of completed node outputs.
|
|
* @returns `'run'` if the node should execute, `'skip'` if it should be skipped.
|
|
*/
|
|
export function checkTriggerRule(
|
|
node: DagNode,
|
|
nodeOutputs: Map<string, NodeOutput>,
|
|
): 'run' | 'skip' {
|
|
const rule: TriggerRule = node.trigger_rule ?? DEFAULT_TRIGGER_RULE;
|
|
|
|
if (node.depends_on.length === 0) return 'run';
|
|
|
|
const depOutputs = node.depends_on.map((depId) => nodeOutputs.get(depId));
|
|
|
|
switch (rule) {
|
|
case 'all_success':
|
|
// All dependencies must have completed successfully
|
|
return depOutputs.every((o) => o?.state === 'completed') ? 'run' : 'skip';
|
|
|
|
case 'one_success':
|
|
// At least one dependency must have completed successfully
|
|
return depOutputs.some((o) => o?.state === 'completed') ? 'run' : 'skip';
|
|
|
|
case 'all_done':
|
|
// All dependencies must have finished (any terminal status)
|
|
return depOutputs.every((o) => o !== undefined) ? 'run' : 'skip';
|
|
|
|
case 'none_failed_min_one_success':
|
|
// No dependency failed AND at least one succeeded
|
|
const hasFailure = depOutputs.some((o) => o?.state === 'failed');
|
|
const hasSuccess = depOutputs.some((o) => o?.state === 'completed');
|
|
return !hasFailure && hasSuccess ? 'run' : 'skip';
|
|
|
|
default:
|
|
return 'run';
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Node output reference substitution
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Substitute node output references in a prompt string.
|
|
*
|
|
* Resolves `$nodeId.output` → full text, `$nodeId.output.field` → structured field.
|
|
*
|
|
* @param prompt - Template string with `$nodeId.output` references.
|
|
* @param nodeOutputs - Map of node id → NodeOutput.
|
|
* @param escapedForBash - If true, escape special bash characters in output values.
|
|
*/
|
|
export { substituteNodeOutputRefs } from './utils.js';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Prompt / command node execution
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Execute a single PromptNode or CommandNode by sending a prompt to an AI provider.
|
|
*
|
|
* Handles:
|
|
* - Loading prompt from command file or inline prompt
|
|
* - Variable substitution (workflow vars + node output refs)
|
|
* - Streaming: accumulates output, forwards messages to platform
|
|
* - Idle timeout: aborts after configurable period of inactivity
|
|
* - Structured output: validates against output_format schema, reask loop
|
|
* - Retry on transient errors
|
|
*/
|
|
export async function executeNodeInternal(
|
|
node: PromptNode,
|
|
deps: WorkflowDeps,
|
|
platform: IWorkflowPlatform,
|
|
conversationId: string,
|
|
cwd: string,
|
|
config: WorkflowConfig,
|
|
nodeOutputs: Map<string, NodeOutput>,
|
|
workflowVariables: Record<string, unknown>,
|
|
): Promise<NodeExecutionResult> {
|
|
const providerId = node.provider ?? config.assistant;
|
|
const provider = deps.getAgentProvider(providerId);
|
|
|
|
// Resolve prompt text
|
|
let promptText: string;
|
|
if (node.command_file) {
|
|
try {
|
|
const filePath = join(cwd, node.command_file);
|
|
promptText = await readFile(filePath, 'utf-8');
|
|
} catch (err) {
|
|
return {
|
|
state: 'failed',
|
|
error: `Failed to read command file "${node.command_file}": ${err instanceof Error ? err.message : String(err)}`,
|
|
};
|
|
}
|
|
} else if (node.prompt) {
|
|
promptText = node.prompt;
|
|
} else {
|
|
return { state: 'failed', error: `Prompt node "${node.id}" has neither prompt nor command_file` };
|
|
}
|
|
|
|
// Apply variable substitution
|
|
try {
|
|
promptText = buildPromptWithContext(promptText, workflowVariables, nodeOutputs);
|
|
} catch (err) {
|
|
if (err instanceof OutputRefError) {
|
|
return { state: 'failed', error: err.message };
|
|
}
|
|
throw err;
|
|
}
|
|
|
|
// Merge node-level env vars
|
|
const mergedVars = { ...workflowVariables, ...(node.env ?? {}) };
|
|
|
|
// Retry configuration
|
|
const maxAttempts = node.retry?.max_attempts ?? 1;
|
|
const onError = node.retry?.on_error ?? 'transient';
|
|
const delayMs = node.retry?.delay_ms ?? 1000;
|
|
|
|
// Idle timeout
|
|
const idleTimeoutMs = node.idle_timeout_ms ?? 300_000; // 5 minutes default
|
|
|
|
// Structured output reask loop
|
|
const maxReaskAttempts = node.output_format ? 3 : 1;
|
|
let lastOutput = '';
|
|
let lastFields: Record<string, unknown> | undefined;
|
|
let lastError: string | undefined;
|
|
let costUsd: number | undefined;
|
|
|
|
for (let reaskAttempt = 0; reaskAttempt < maxReaskAttempts; reaskAttempt++) {
|
|
const currentPrompt = reaskAttempt === 0
|
|
? promptText
|
|
: `${promptText}\n\nPrevious response did not match the expected format. Please try again, ensuring your response matches: ${JSON.stringify(node.output_format)}`;
|
|
|
|
// Execute with retry
|
|
let responseText: string | undefined;
|
|
let retryError: unknown;
|
|
|
|
const retryPredicate = onError === 'all'
|
|
? undefined
|
|
: (err: unknown) => classifyError(err) === 'transient' || classifyError(err) === 'rate_limit';
|
|
|
|
try {
|
|
responseText = await retryWithBackoff(
|
|
() => executeWithIdleTimeout(provider, currentPrompt, idleTimeoutMs, node.id),
|
|
maxAttempts,
|
|
delayMs,
|
|
retryPredicate,
|
|
);
|
|
} catch (err) {
|
|
retryError = err;
|
|
}
|
|
|
|
if (retryError) {
|
|
const category = classifyError(retryError);
|
|
if (category === 'timeout') {
|
|
return {
|
|
state: 'failed',
|
|
error: `Node "${node.id}" timed out after ${idleTimeoutMs}ms of inactivity`,
|
|
};
|
|
}
|
|
return {
|
|
state: 'failed',
|
|
error: retryError instanceof Error ? retryError.message : String(retryError),
|
|
};
|
|
}
|
|
|
|
lastOutput = responseText ?? '';
|
|
|
|
// Validate structured output if schema provided
|
|
if (node.output_format && lastOutput) {
|
|
try {
|
|
const parsed = tryParseStructuredOutput(lastOutput);
|
|
if (parsed) {
|
|
const validation = validateStructuredOutput(parsed, node.output_format);
|
|
if (validation.valid) {
|
|
lastFields = parsed;
|
|
break; // Valid structured output
|
|
}
|
|
// If not valid and we have reask attempts left, continue loop
|
|
if (reaskAttempt < maxReaskAttempts - 1) continue;
|
|
}
|
|
} catch {
|
|
// If parsing fails and we have reask attempts left, continue
|
|
if (reaskAttempt < maxReaskAttempts - 1) continue;
|
|
}
|
|
}
|
|
|
|
// No structured output required, or best-effort on last attempt
|
|
break;
|
|
}
|
|
|
|
// Notify platform
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`✅ Node "${node.name ?? node.id}" completed`,
|
|
);
|
|
|
|
return {
|
|
state: 'completed',
|
|
output: lastOutput,
|
|
fields: lastFields,
|
|
costUsd,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Execute a provider call with an idle timeout.
|
|
*
|
|
* If no response is received within `idleTimeoutMs`, the request is aborted.
|
|
*/
|
|
async function executeWithIdleTimeout(
|
|
provider: { sendPrompt: (prompt: string, options?: Record<string, unknown>) => Promise<string> },
|
|
prompt: string,
|
|
idleTimeoutMs: number,
|
|
nodeId: string,
|
|
): Promise<string> {
|
|
const controller = new AbortController();
|
|
const timeoutId = setTimeout(() => controller.abort(), idleTimeoutMs);
|
|
|
|
try {
|
|
const result = await provider.sendPrompt(prompt, { signal: controller.signal });
|
|
clearTimeout(timeoutId);
|
|
return result;
|
|
} catch (err) {
|
|
clearTimeout(timeoutId);
|
|
if (controller.signal.aborted) {
|
|
throw new NodeTimeoutError(nodeId, idleTimeoutMs);
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Attempt to parse structured output from a model response.
|
|
*
|
|
* Tries JSON parse first, then looks for JSON within markdown code blocks.
|
|
*/
|
|
function tryParseStructuredOutput(text: string): Record<string, unknown> | undefined {
|
|
// Try direct JSON parse
|
|
try {
|
|
const parsed = JSON.parse(text);
|
|
if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
|
|
return parsed as Record<string, unknown>;
|
|
}
|
|
} catch {
|
|
// Not direct JSON
|
|
}
|
|
|
|
// Try extracting JSON from markdown code block
|
|
const jsonBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
if (jsonBlockMatch) {
|
|
try {
|
|
const parsed = JSON.parse(jsonBlockMatch[1]!);
|
|
if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) {
|
|
return parsed as Record<string, unknown>;
|
|
}
|
|
} catch {
|
|
// Not valid JSON in code block
|
|
}
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Validate parsed output against a schema definition.
|
|
*
|
|
* Simple validation: checks that all required keys are present.
|
|
* Full JSON Schema validation would require a library; this is best-effort.
|
|
*/
|
|
function validateStructuredOutput(
|
|
parsed: Record<string, unknown>,
|
|
schema: Record<string, unknown>,
|
|
): { valid: boolean; missingKeys?: string[] } {
|
|
const required = schema['required'];
|
|
if (Array.isArray(required)) {
|
|
const missing = required.filter((key) => !(key in parsed));
|
|
if (missing.length > 0) {
|
|
return { valid: false, missingKeys: missing as string[] };
|
|
}
|
|
}
|
|
return { valid: true };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Script / Bash node execution
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Execute a BashNode or ScriptNode.
|
|
*
|
|
* For bash nodes: runs `bash -c <script>` with timeout.
|
|
* For script nodes with runtime 'bun': runs `bun -e <script>` or `bun run`.
|
|
* For script nodes with runtime 'uv': runs `uv run python -c <script>`.
|
|
*/
|
|
export async function executeScriptNode(
|
|
node: BashNode | ScriptNode,
|
|
cwd: string,
|
|
envVars: Record<string, string>,
|
|
artifactsDir: string,
|
|
): Promise<NodeExecutionResult> {
|
|
const mergedEnv: Record<string, string> = { ...process.env as Record<string, string>, ...envVars };
|
|
|
|
try {
|
|
if (isBashNode(node)) {
|
|
return await executeBashNode(node, cwd, mergedEnv);
|
|
}
|
|
|
|
if (isScriptNode(node)) {
|
|
return await executeScriptNodeByRuntime(node, cwd, mergedEnv, artifactsDir);
|
|
}
|
|
|
|
return { state: 'failed', error: `Unknown script node kind: ${(node as DagNode).kind}` };
|
|
} catch (err) {
|
|
return {
|
|
state: 'failed',
|
|
error: err instanceof Error ? err.message : String(err),
|
|
};
|
|
}
|
|
}
|
|
|
|
async function executeBashNode(
|
|
node: BashNode,
|
|
cwd: string,
|
|
env: Record<string, string>,
|
|
): Promise<NodeExecutionResult> {
|
|
const timeoutMs = node.timeout_ms ?? 60_000;
|
|
|
|
try {
|
|
const { stdout, stderr } = await execFileAsync('bash', ['-c', node.bash], {
|
|
cwd,
|
|
env,
|
|
timeout: timeoutMs,
|
|
maxBuffer: 10 * 1024 * 1024, // 10MB
|
|
});
|
|
|
|
return {
|
|
state: 'completed',
|
|
output: stdout.trim(),
|
|
};
|
|
} catch (err) {
|
|
if (err instanceof Error && 'code' in err) {
|
|
const execErr = err as Error & { code?: string; stdout?: string; stderr?: string; killed?: boolean };
|
|
if (execErr.killed) {
|
|
return {
|
|
state: 'failed',
|
|
error: formatSubprocessFailure(
|
|
`bash -c "${node.bash.substring(0, 80)}..."`,
|
|
null,
|
|
execErr.stdout ?? '',
|
|
'Process was killed (likely timeout)',
|
|
),
|
|
};
|
|
}
|
|
return {
|
|
state: 'failed',
|
|
error: formatSubprocessFailure(
|
|
node.bash,
|
|
Number((err as NodeJS.ErrnoException).code) || null,
|
|
execErr.stdout ?? '',
|
|
execErr.stderr ?? '',
|
|
),
|
|
};
|
|
}
|
|
return { state: 'failed', error: String(err) };
|
|
}
|
|
}
|
|
|
|
async function executeScriptNodeByRuntime(
|
|
node: ScriptNode,
|
|
cwd: string,
|
|
env: Record<string, string>,
|
|
artifactsDir: string,
|
|
): Promise<NodeExecutionResult> {
|
|
const timeoutMs = node.timeout_ms ?? 120_000;
|
|
|
|
if (node.runtime === 'bun') {
|
|
// Install deps if specified
|
|
if (node.deps.length > 0) {
|
|
try {
|
|
await execFileAsync('bun', ['add', ...node.deps], {
|
|
cwd: artifactsDir,
|
|
env,
|
|
timeout: 60_000,
|
|
});
|
|
} catch (err) {
|
|
return {
|
|
state: 'failed',
|
|
error: `Failed to install bun dependencies: ${err instanceof Error ? err.message : String(err)}`,
|
|
};
|
|
}
|
|
}
|
|
|
|
const args = node.deps.length > 0 ? ['run', '-e', node.script] : ['-e', node.script];
|
|
try {
|
|
const { stdout, stderr } = await execFileAsync('bun', args, {
|
|
cwd,
|
|
env,
|
|
timeout: timeoutMs,
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
});
|
|
return { state: 'completed', output: stdout.trim() };
|
|
} catch (err) {
|
|
return handleSubprocessError(err, `bun ${args.join(' ')}`, node.id);
|
|
}
|
|
}
|
|
|
|
if (node.runtime === 'uv') {
|
|
// Install deps if specified
|
|
if (node.deps.length > 0) {
|
|
try {
|
|
await execFileAsync('uv', ['pip', 'install', ...node.deps], {
|
|
cwd: artifactsDir,
|
|
env,
|
|
timeout: 120_000,
|
|
});
|
|
} catch (err) {
|
|
return {
|
|
state: 'failed',
|
|
error: `Failed to install uv dependencies: ${err instanceof Error ? err.message : String(err)}`,
|
|
};
|
|
}
|
|
}
|
|
|
|
try {
|
|
const { stdout, stderr } = await execFileAsync('uv', ['run', 'python', '-c', node.script], {
|
|
cwd,
|
|
env,
|
|
timeout: timeoutMs,
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
});
|
|
return { state: 'completed', output: stdout.trim() };
|
|
} catch (err) {
|
|
return handleSubprocessError(err, `uv run python -c "..."`, node.id);
|
|
}
|
|
}
|
|
|
|
return { state: 'failed', error: `Unsupported script runtime: ${node.runtime}` };
|
|
}
|
|
|
|
function handleSubprocessError(err: unknown, command: string, nodeId: string): NodeExecutionResult {
|
|
if (err instanceof Error && 'code' in err) {
|
|
const execErr = err as Error & { code?: string; stdout?: string; stderr?: string; killed?: boolean };
|
|
if (execErr.killed) {
|
|
return {
|
|
state: 'failed',
|
|
error: `Script node "${nodeId}" was killed (likely timeout)`,
|
|
};
|
|
}
|
|
return {
|
|
state: 'failed',
|
|
error: formatSubprocessFailure(
|
|
command,
|
|
Number((err as NodeJS.ErrnoException).code) || null,
|
|
execErr.stdout ?? '',
|
|
execErr.stderr ?? '',
|
|
),
|
|
};
|
|
}
|
|
return { state: 'failed', error: String(err) };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Approval node handling
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Handle an approval node — pause the workflow and wait for human approval.
|
|
*
|
|
* On approval: optionally capture the approver's response.
|
|
* On rejection: execute on_reject prompt if configured, or fail the node.
|
|
*/
|
|
export async function handleApprovalNode(
|
|
node: ApprovalNode,
|
|
deps: WorkflowDeps,
|
|
platform: IWorkflowPlatform,
|
|
conversationId: string,
|
|
workflowRunId: string,
|
|
nodeOutputs: Map<string, NodeOutput>,
|
|
workflowVariables: Record<string, unknown>,
|
|
): Promise<NodeExecutionResult> {
|
|
// Pause the workflow run
|
|
await deps.store.updateWorkflowRun(workflowRunId, {
|
|
status: 'pending' as const,
|
|
});
|
|
|
|
// Send approval request to user
|
|
const approvalMessage = buildPromptWithContext(
|
|
node.message,
|
|
workflowVariables,
|
|
nodeOutputs,
|
|
);
|
|
|
|
await safeSendMessage(platform, conversationId, `🔒 **Approval Required**: ${approvalMessage}`);
|
|
|
|
// Emit structured event for approval gate
|
|
if (platform.sendStructuredEvent) {
|
|
await platform.sendStructuredEvent(conversationId, {
|
|
type: 'approval_required',
|
|
nodeId: node.id,
|
|
nodeName: node.name,
|
|
message: approvalMessage,
|
|
runId: workflowRunId,
|
|
});
|
|
}
|
|
|
|
// Poll for approval response
|
|
const maxPollMs = 24 * 60 * 60 * 1000; // 24 hours
|
|
const pollIntervalMs = 2000;
|
|
const startTime = Date.now();
|
|
|
|
while (Date.now() - startTime < maxPollMs) {
|
|
await sleep(pollIntervalMs);
|
|
|
|
// Check if the run has been resumed with approval context
|
|
const run = await deps.store.getWorkflowRun(workflowRunId);
|
|
if (!run) {
|
|
return { state: 'failed', error: `Workflow run ${workflowRunId} not found during approval poll` };
|
|
}
|
|
|
|
// Check for approval context in the run's output
|
|
if (run.output && typeof run.output === 'object') {
|
|
const approvalContext = run.output as Record<string, unknown>;
|
|
const approvalKey = `__approval_${node.id}`;
|
|
|
|
if (approvalKey in approvalContext) {
|
|
const decision = approvalContext[approvalKey] as Record<string, unknown>;
|
|
|
|
if (decision.approved === true) {
|
|
// Approved — resume
|
|
await deps.store.updateWorkflowRun(workflowRunId, { status: 'running' });
|
|
const response = typeof decision.response === 'string' ? decision.response : undefined;
|
|
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`✅ Approval granted for "${node.name ?? node.id}"`,
|
|
);
|
|
|
|
return {
|
|
state: 'completed',
|
|
output: response ?? 'Approved',
|
|
};
|
|
} else {
|
|
// Rejected
|
|
if (node.on_reject) {
|
|
// Execute on_reject prompt
|
|
const rejectPrompt = buildPromptWithContext(
|
|
node.on_reject,
|
|
workflowVariables,
|
|
nodeOutputs,
|
|
);
|
|
try {
|
|
const provider = deps.getAgentProvider(deps.loadConfig ? (await deps.loadConfig(process.cwd())).assistant : 'default');
|
|
const rejectResponse = await provider.sendPrompt(rejectPrompt);
|
|
await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
|
|
return {
|
|
state: 'failed',
|
|
error: `Approval rejected for "${node.name ?? node.id}"`,
|
|
output: rejectResponse,
|
|
};
|
|
} catch {
|
|
// Fall through to simple rejection
|
|
}
|
|
}
|
|
|
|
await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
|
|
throw new ApprovalRejectedError(node.id, String(decision.reason ?? ''));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Timeout
|
|
await deps.store.updateWorkflowRun(workflowRunId, { status: 'failed' });
|
|
return {
|
|
state: 'failed',
|
|
error: `Approval node "${node.id}" timed out waiting for response`,
|
|
};
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Loop node handling
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Handle a loop node — iterate until a condition is met or max iterations reached.
|
|
*
|
|
* Supports:
|
|
* - Fresh context (new session each iteration)
|
|
* - until_bash (bash exit code 0 = complete)
|
|
* - Interactive (human gate between iterations)
|
|
* - $LOOP_PREV_OUTPUT tracking between iterations
|
|
*/
|
|
export async function handleLoopNode(
|
|
node: LoopNode,
|
|
deps: WorkflowDeps,
|
|
platform: IWorkflowPlatform,
|
|
conversationId: string,
|
|
cwd: string,
|
|
config: WorkflowConfig,
|
|
nodeOutputs: Map<string, NodeOutput>,
|
|
workflowVariables: Record<string, unknown>,
|
|
): Promise<NodeExecutionResult> {
|
|
const { config: loopConfig } = node;
|
|
const providerId = node.provider ?? config.assistant;
|
|
const provider = deps.getAgentProvider(providerId);
|
|
|
|
let iterationOutput = '';
|
|
let iterationCount = 0;
|
|
const maxIterations = loopConfig.max_iterations;
|
|
|
|
// Merge node-level env vars
|
|
const mergedVars = { ...workflowVariables, ...(node.env ?? {}) };
|
|
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`🔄 Loop "${node.name ?? node.id}" starting (max ${maxIterations} iterations)`,
|
|
);
|
|
|
|
for (let i = 0; i < maxIterations; i++) {
|
|
iterationCount = i + 1;
|
|
|
|
// Build iteration prompt with $LOOP_PREV_OUTPUT substitution
|
|
let iterationPrompt = loopConfig.prompt;
|
|
if (iterationOutput) {
|
|
iterationPrompt = iterationPrompt.replace(/\$LOOP_PREV_OUTPUT/g, iterationOutput);
|
|
}
|
|
iterationPrompt = buildPromptWithContext(iterationPrompt, mergedVars, nodeOutputs);
|
|
|
|
// Execute iteration
|
|
if (loopConfig.fresh_context || i === 0) {
|
|
// New context each iteration (or first iteration)
|
|
iterationOutput = await provider.sendPrompt(iterationPrompt);
|
|
} else {
|
|
// Continue in same context
|
|
iterationOutput = await provider.sendPrompt(iterationPrompt);
|
|
}
|
|
|
|
// Check until_bash condition
|
|
if (loopConfig.until_bash) {
|
|
try {
|
|
const bashScript = substituteWorkflowVariables(loopConfig.until_bash, mergedVars);
|
|
await execFileAsync('bash', ['-c', bashScript], { cwd, timeout: 30_000 });
|
|
// Exit code 0 = condition met
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`✅ Loop "${node.name ?? node.id}" completed after ${iterationCount} iterations (bash condition met)`,
|
|
);
|
|
return { state: 'completed', output: iterationOutput };
|
|
} catch {
|
|
// Non-zero exit = condition not met, continue looping
|
|
}
|
|
}
|
|
|
|
// Check natural language until condition
|
|
// Ask the model if the condition is met
|
|
const checkPrompt = `Given the following output, is this condition met: "${loopConfig.until}"?\n\nOutput:\n${iterationOutput}\n\nAnswer ONLY "yes" or "no".`;
|
|
const checkResponse = await provider.sendPrompt(checkPrompt);
|
|
if (checkResponse.trim().toLowerCase().startsWith('yes')) {
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`✅ Loop "${node.name ?? node.id}" completed after ${iterationCount} iterations`,
|
|
);
|
|
return { state: 'completed', output: iterationOutput };
|
|
}
|
|
|
|
// Interactive gate
|
|
if (loopConfig.interactive) {
|
|
const gateMessage = loopConfig.gate_message ?? `Loop iteration ${iterationCount}/${maxIterations}. Continue?`;
|
|
await safeSendMessage(platform, conversationId, `⏸️ ${gateMessage}`);
|
|
|
|
if (platform.sendStructuredEvent) {
|
|
await platform.sendStructuredEvent(conversationId, {
|
|
type: 'loop_gate',
|
|
nodeId: node.id,
|
|
iteration: iterationCount,
|
|
message: gateMessage,
|
|
});
|
|
}
|
|
|
|
// Wait for gate approval (simplified: poll for response)
|
|
const gateResult = await pollForLoopGateApproval(
|
|
deps,
|
|
platform,
|
|
conversationId,
|
|
node.id,
|
|
iterationCount,
|
|
);
|
|
|
|
if (!gateResult) {
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`🛑 Loop "${node.name ?? node.id}" stopped by user at iteration ${iterationCount}`,
|
|
);
|
|
return { state: 'completed', output: iterationOutput };
|
|
}
|
|
}
|
|
}
|
|
|
|
// Max iterations reached
|
|
throw new LoopMaxIterationsError(node.id, maxIterations);
|
|
}
|
|
|
|
/**
|
|
* Poll for loop gate approval.
|
|
*
|
|
* Simplified: waits for a structured event response.
|
|
* In production, this would integrate with the platform's event system.
|
|
*/
|
|
async function pollForLoopGateApproval(
|
|
deps: WorkflowDeps,
|
|
platform: IWorkflowPlatform,
|
|
conversationId: string,
|
|
nodeId: string,
|
|
iteration: number,
|
|
): Promise<boolean> {
|
|
// Default: auto-approve after a short delay
|
|
// In a real implementation, this would poll the store for user input
|
|
await sleep(1000);
|
|
return true;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Main DAG workflow executor
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Result of executing a complete DAG workflow.
|
|
*/
|
|
export interface DagWorkflowResult {
|
|
/** Summary string describing the execution outcome. */
|
|
summary: string;
|
|
/** Map of node id → NodeOutput for all completed nodes. */
|
|
nodeOutputs: Map<string, NodeOutput>;
|
|
/** Whether the workflow completed successfully. */
|
|
success: boolean;
|
|
/** Error message if the workflow failed. */
|
|
error?: string;
|
|
}
|
|
|
|
/**
|
|
* Execute a complete DAG workflow.
|
|
*
|
|
* This is the main entry point called by the top-level executor.
|
|
* It builds topological layers from the workflow nodes, then executes
|
|
* each layer sequentially with nodes within a layer running concurrently.
|
|
*
|
|
* @param deps - Dependency injection container.
|
|
* @param platform - Platform interface for messaging.
|
|
* @param conversationId - Conversation channel id.
|
|
* @param cwd - Working directory for file operations.
|
|
* @param workflow - The workflow definition.
|
|
* @param workflowRun - The persisted workflow run record.
|
|
* @param priorCompletedNodes - Pre-populated outputs from a prior run (for resume).
|
|
* @returns DagWorkflowResult with summary and node outputs.
|
|
*/
|
|
export async function executeDagWorkflow(
|
|
deps: WorkflowDeps,
|
|
platform: IWorkflowPlatform,
|
|
conversationId: string,
|
|
cwd: string,
|
|
workflow: WorkflowDefinition,
|
|
workflowRun: { id: string; status: string },
|
|
priorCompletedNodes?: Record<string, Record<string, unknown>>,
|
|
): Promise<DagWorkflowResult> {
|
|
const nodeOutputs = new Map<string, NodeOutput>();
|
|
const workflowVariables: Record<string, unknown> = { ...(workflowRun as Record<string, unknown>) };
|
|
|
|
// Pre-populate from prior completed nodes (resume support)
|
|
if (priorCompletedNodes) {
|
|
for (const [nodeId, output] of Object.entries(priorCompletedNodes)) {
|
|
nodeOutputs.set(nodeId, {
|
|
nodeId,
|
|
output: (output.output as string) ?? '',
|
|
state: (output.state as 'completed' | 'failed' | 'skipped') ?? 'completed',
|
|
text: output.text as string | undefined,
|
|
fields: output.fields as Record<string, unknown> | undefined,
|
|
error: output.error as string | undefined,
|
|
costUsd: output.costUsd as number | undefined,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Build topological layers
|
|
let layers: DagNode[][] = [];
|
|
try {
|
|
layers = buildTopologicalLayers(workflow.nodes);
|
|
} catch (err) {
|
|
if (err instanceof DagCycleError) {
|
|
return {
|
|
summary: `Workflow failed: ${err.message}`,
|
|
nodeOutputs,
|
|
success: false,
|
|
error: err.message,
|
|
};
|
|
}
|
|
throw err;
|
|
}
|
|
|
|
// Load config for provider resolution
|
|
const config = await deps.loadConfig(cwd);
|
|
|
|
// Execute layers
|
|
for (let layerIndex = 0; layerIndex < layers.length; layerIndex++) {
|
|
const layer = layers[layerIndex]!;
|
|
|
|
await safeSendMessage(
|
|
platform,
|
|
conversationId,
|
|
`📋 Executing layer ${layerIndex + 1}/${layers.length} (${layer.length} node${layer.length > 1 ? 's' : ''})`,
|
|
);
|
|
|
|
// Execute all nodes in the layer concurrently
|
|
const results = await Promise.allSettled(
|
|
layer.map(async (node) => {
|
|
// Skip already-completed nodes (resume)
|
|
if (nodeOutputs.has(node.id) && nodeOutputs.get(node.id)!.state === 'completed') {
|
|
return { nodeId: node.id, result: nodeOutputs.get(node.id)! } as const;
|
|
}
|
|
|
|
// Check when condition
|
|
if (node.when) {
|
|
const shouldRun = evaluateCondition(node.when, workflowVariables);
|
|
if (!shouldRun) {
|
|
const skippedOutput: NodeOutput = {
|
|
nodeId: node.id,
|
|
output: '',
|
|
state: 'skipped',
|
|
};
|
|
nodeOutputs.set(node.id, skippedOutput);
|
|
return { nodeId: node.id, result: skippedOutput } as const;
|
|
}
|
|
}
|
|
|
|
// Check trigger rule
|
|
const triggerResult = checkTriggerRule(node, nodeOutputs);
|
|
if (triggerResult === 'skip') {
|
|
const skippedOutput: NodeOutput = {
|
|
nodeId: node.id,
|
|
output: '',
|
|
state: 'skipped',
|
|
};
|
|
nodeOutputs.set(node.id, skippedOutput);
|
|
return { nodeId: node.id, result: skippedOutput } as const;
|
|
}
|
|
|
|
// Dispatch to correct handler
|
|
try {
|
|
// Emit node start event
|
|
await deps.store.createWorkflowEvent({
|
|
runId: workflowRun.id,
|
|
nodeId: node.id,
|
|
type: 'node_started',
|
|
data: { kind: node.kind, name: node.name },
|
|
});
|
|
|
|
let result: NodeExecutionResult;
|
|
|
|
if (isCancelNode(node)) {
|
|
// Cancel node — immediately fail the workflow
|
|
await deps.store.createWorkflowEvent({
|
|
runId: workflowRun.id,
|
|
nodeId: node.id,
|
|
type: 'node_cancelled',
|
|
data: { reason: node.reason ?? 'Workflow cancelled by cancel node' },
|
|
});
|
|
throw new Error(node.reason ?? 'Workflow cancelled by cancel node');
|
|
} else if (isApprovalNode(node)) {
|
|
result = await handleApprovalNode(
|
|
node,
|
|
deps,
|
|
platform,
|
|
conversationId,
|
|
workflowRun.id,
|
|
nodeOutputs,
|
|
workflowVariables,
|
|
);
|
|
} else if (isLoopNode(node)) {
|
|
result = await handleLoopNode(
|
|
node,
|
|
deps,
|
|
platform,
|
|
conversationId,
|
|
cwd,
|
|
config,
|
|
nodeOutputs,
|
|
workflowVariables,
|
|
);
|
|
} else if (isBashNode(node) || isScriptNode(node)) {
|
|
result = await executeScriptNode(
|
|
node,
|
|
cwd,
|
|
{ ...config.envVars, ...(node.env ?? {}) },
|
|
join(cwd, '.ion', 'artifacts', workflowRun.id),
|
|
);
|
|
} else if (isPromptNode(node)) {
|
|
result = await executeNodeInternal(
|
|
node,
|
|
deps,
|
|
platform,
|
|
conversationId,
|
|
cwd,
|
|
config,
|
|
nodeOutputs,
|
|
workflowVariables,
|
|
);
|
|
} else if (isCommandNode(node)) {
|
|
// CommandNode — execute as bash
|
|
const bashNode: BashNode = {
|
|
id: node.id,
|
|
kind: 'bash',
|
|
name: node.name,
|
|
bash: node.command,
|
|
timeout_ms: node.retry?.delay_ms ? undefined : 60_000,
|
|
depends_on: node.depends_on,
|
|
trigger_rule: node.trigger_rule,
|
|
retry: node.retry,
|
|
env: node.env,
|
|
};
|
|
result = await executeScriptNode(
|
|
bashNode,
|
|
cwd,
|
|
{ ...config.envVars, ...(node.env ?? {}) },
|
|
join(cwd, '.ion', 'artifacts', workflowRun.id),
|
|
);
|
|
} else {
|
|
result = {
|
|
state: 'failed',
|
|
error: `Unknown node kind: ${(node as DagNode).kind}`,
|
|
};
|
|
}
|
|
|
|
// Convert to NodeOutput and store
|
|
const nodeOutput: NodeOutput = {
|
|
nodeId: node.id,
|
|
output: result.output ?? '',
|
|
state: result.state,
|
|
text: result.output,
|
|
fields: result.fields,
|
|
error: result.error,
|
|
costUsd: result.costUsd,
|
|
};
|
|
nodeOutputs.set(node.id, nodeOutput);
|
|
|
|
// Emit node completion event
|
|
await deps.store.createWorkflowEvent({
|
|
runId: workflowRun.id,
|
|
nodeId: node.id,
|
|
type: result.state === 'completed' ? 'node_completed' : 'node_failed',
|
|
data: { state: result.state, error: result.error },
|
|
});
|
|
|
|
return { nodeId: node.id, result: nodeOutput } as const;
|
|
} catch (err) {
|
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
const failedOutput: NodeOutput = {
|
|
nodeId: node.id,
|
|
output: '',
|
|
state: 'failed',
|
|
error: errorMsg,
|
|
};
|
|
nodeOutputs.set(node.id, failedOutput);
|
|
|
|
await deps.store.createWorkflowEvent({
|
|
runId: workflowRun.id,
|
|
nodeId: node.id,
|
|
type: 'node_failed',
|
|
data: { error: errorMsg },
|
|
});
|
|
|
|
return { nodeId: node.id, result: failedOutput } as const;
|
|
}
|
|
}),
|
|
);
|
|
|
|
// Check for failures that should stop the workflow
|
|
const failures = results.filter(
|
|
(r): r is PromiseRejectedResult => r.status === 'rejected',
|
|
);
|
|
if (failures.length > 0) {
|
|
// A cancel node or unhandled error — stop the workflow
|
|
const errorMsg = failures
|
|
.map((f) => (f.reason instanceof Error ? f.reason.message : String(f.reason)))
|
|
.join('; ');
|
|
return {
|
|
summary: `Workflow failed at layer ${layerIndex + 1}: ${errorMsg}`,
|
|
nodeOutputs,
|
|
success: false,
|
|
error: errorMsg,
|
|
};
|
|
}
|
|
}
|
|
|
|
// All layers completed
|
|
const completedCount = [...nodeOutputs.values()].filter((o) => o.state === 'completed').length;
|
|
const skippedCount = [...nodeOutputs.values()].filter((o) => o.state === 'skipped').length;
|
|
const failedCount = [...nodeOutputs.values()].filter((o) => o.state === 'failed').length;
|
|
|
|
const summary = `Workflow completed: ${completedCount} succeeded, ${skippedCount} skipped, ${failedCount} failed`;
|
|
return {
|
|
summary,
|
|
nodeOutputs,
|
|
success: failedCount === 0,
|
|
error: failedCount > 0 ? `${failedCount} node(s) failed` : undefined,
|
|
};
|
|
} |