/** * SOP Markdown parser for the Ion workflow engine. * * Parses `.sop.md` files (Agent SOP format) into structured `SopDocument` * objects that can be converted to YAML workflow definitions. */ // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- /** A single parameter declared in the SOP's Parameters section. */ export interface SopParameter { /** Parameter name (camelCase by convention). */ name: string; /** Whether the parameter is required or optional. */ type: 'required' | 'optional'; /** Default value (only present when type is 'optional'). */ default?: string; /** Human-readable description of the parameter. */ description: string; } /** A single step declared in the SOP's Steps section. */ export interface SopStep { /** Step number (1-based). */ number: number; /** Short human-readable step name. */ name: string; /** Full body text of the step (may be multi-line). */ body: string; /** Constraints text extracted from the step, if any. */ constraints?: string; } /** The fully-parsed SOP document. */ export interface SopDocument { /** Title extracted from the first `# heading`. */ title: string; /** Overview section content. */ overview: string; /** Parsed parameters (empty array if section absent). */ parameters: SopParameter[]; /** Parsed steps (empty array if section absent). */ steps: SopStep[]; /** Optional examples section content. */ examples?: string; } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /** * Extract a section body from markdown text. * * A section starts with `## ` and ends at the next `## ` or `# ` * heading (or end of string). */ function extractSection(markdown: string, heading: string): string | null { const pattern = new RegExp( `^##\\s+${escapeRegex(heading)}\\s*\\n([\\s\\S]*?)(?=\\n##|\\n#|$)`, 'm', ); const match = markdown.match(pattern); return match?.[1]?.trim() ?? null; } /** Escape special regex characters in a literal string. */ function escapeRegex(str: string): string { return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } // --------------------------------------------------------------------------- // Section parsers // --------------------------------------------------------------------------- /** Parse the Parameters section into structured `SopParameter` objects. */ function parseParameters(raw: string): SopParameter[] { const parameters: SopParameter[] = []; // Match lines like: - **paramName** (required): Description here // - **paramName** (optional, default: value): Description here const paramRegex = /^-\s+\*\*(\w+)\*\*\s+\((required|optional)(?:,\s*default:\s*([^)]+))?\):\s+(.+)$/gm; let match: RegExpExecArray | null; while ((match = paramRegex.exec(raw)) !== null) { const name = match[1]!; const type = match[2]! as 'required' | 'optional'; const defaultVal = match[3]; // may be undefined (optional group) const description = match[4]!; const param: SopParameter = { name, type, description, }; if (defaultVal !== undefined) { param.default = defaultVal.trim(); } parameters.push(param); } return parameters; } /** Parse the Steps section into structured `SopStep` objects. */ function parseSteps(raw: string): SopStep[] { const steps: SopStep[] = []; // Find all ### sub-headings like "### 1. Step Name" const stepHeadingRegex = /^###\s+(\d+)\.\s+(.+)$/gm; // Collect heading positions: [startIndex, endIndex, number, name] const headings: { number: number; name: string; start: number; end: number }[] = []; let match: RegExpExecArray | null; while ((match = stepHeadingRegex.exec(raw)) !== null) { headings.push({ number: parseInt(match[1]!, 10), name: match[2]!.trim(), start: match.index, end: -1, // filled in below }); } // Set end positions: each heading ends where the next one starts, or at EOF for (let i = 0; i < headings.length; i++) { const heading = headings[i]!; heading.end = i + 1 < headings.length ? headings[i + 1]!.start : raw.length; } for (const heading of headings) { // The body starts after the heading line itself const headingLineEnd = raw.indexOf('\n', heading.start); const bodyStart = headingLineEnd === -1 ? raw.length : headingLineEnd + 1; const sectionText = raw.slice(bodyStart, heading.end).trim(); // Extract constraints if present const constraintsMatch = sectionText.match( /\*\*Constraints:\*\*\s*\n([\s\S]*?)(?=\n###|\n##|$)/, ); const constraints = constraintsMatch?.[1]?.trim(); // Body is everything before the Constraints heading (or the whole text) let body: string; if (constraintsMatch?.index !== undefined) { body = sectionText.slice(0, constraintsMatch.index).trim(); } else { body = sectionText; } steps.push({ number: heading.number, name: heading.name, body, ...(constraints ? { constraints } : {}), }); } return steps; } // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- /** * Parse a `.sop.md` markdown string into a structured `SopDocument`. * * @param markdown - The raw markdown content of a `.sop.md` file. * @returns A parsed `SopDocument` with title, overview, parameters, steps, * and optional examples. */ export function parseSopContent(markdown: string): SopDocument { // --- Title (first h1) --- const titleMatch = markdown.match(/^#\s+(.+)$/m); const title = titleMatch?.[1]?.trim() ?? 'Untitled SOP'; // --- Overview --- const overviewRaw = extractSection(markdown, 'Overview'); const overview = overviewRaw ?? ''; // --- Parameters --- const parametersRaw = extractSection(markdown, 'Parameters'); const parameters = parametersRaw ? parseParameters(parametersRaw) : []; // --- Steps --- const stepsRaw = extractSection(markdown, 'Steps'); const steps = stepsRaw ? parseSteps(stepsRaw) : []; // --- Examples (optional) --- const examplesRaw = extractSection(markdown, 'Examples'); return { title, overview, parameters, steps, ...(examplesRaw !== null ? { examples: examplesRaw } : {}), }; }