chore: snapshot working tree - pty_exited notifications + in-flight inference WIP

feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
2026-06-14 12:48:47 +00:00
parent 0ed506f1da
commit b18de2a331
204 changed files with 25344 additions and 867 deletions
--- a/apps/control/src/services/ssh-config.ts
+++ b/apps/control/src/services/ssh-config.ts
@@ -0,0 +1,361 @@
+/**
+ * P9.1: SSH config editor for llama-swap hosts.
+ *
+ * Pipeline (design §5, stackctl flow with the tests stackctl never had):
+ *   SFTP/SSH read -> schema-validated edit (config-schema.json from the fork)
+ *   -> diff preview -> timestamped backup -> write -> restart -> health-wait.
+ *
+ * SSH I/O is shelled out via `ssh` (matching the booterm precedent — no ssh2
+ * dependency, key from `secrets/`), injected as `SshExec` so every failure path
+ * is unit-testable without a live host. The pure helpers (validate, diff,
+ * backup filename) carry the logic and are tested directly.
+ */
+
+import { spawn } from 'node:child_process';
+import { createRequire } from 'node:module';
+import { load as loadYaml } from 'js-yaml';
+import type { ValidateFunction } from 'ajv';
+
+// ajv + ajv-formats are CJS. Under NodeNext ESM the default-import interop binds
+// the namespace, not the constructable class, so load them via createRequire to
+// get the real module.exports (class / plugin fn) at both type and runtime.
+const require = createRequire(import.meta.url);
+const Ajv = require('ajv') as typeof import('ajv').default;
+const addFormats = require('ajv-formats') as typeof import('ajv-formats').default;
+
+// ─── host SSH target ─────────────────────────────────────────────────────────
+
+export interface SshTarget {
+  host: string;
+  user: string;
+  keyPath: string;
+}
+
+export interface ExecResult {
+  code: number;
+  stdout: string;
+  stderr: string;
+}
+
+/** Injectable SSH executor. `stdin`, when present, is piped to the remote command. */
+export type SshExec = (target: SshTarget, command: string, stdin?: string) => Promise<ExecResult>;
+
+// ─── pure: schema validation ─────────────────────────────────────────────────
+
+export interface ValidationResult {
+  valid: boolean;
+  errors: string[];
+  /** Parsed config object when YAML is syntactically valid. */
+  parsed?: unknown;
+}
+
+let cachedValidator: ValidateFunction | null = null;
+let cachedSchemaRef: object | null = null;
+
+function getValidator(schema: object): ValidateFunction {
+  if (cachedValidator && cachedSchemaRef === schema) return cachedValidator;
+  const ajv = new Ajv({ allErrors: true, strict: false });
+  addFormats(ajv);
+  const validate = ajv.compile(schema);
+  cachedValidator = validate;
+  cachedSchemaRef = schema;
+  return validate;
+}
+
+/**
+ * Validate a llama-swap config YAML string against the fork's
+ * config-schema.json. Catches YAML syntax errors first, then schema errors.
+ * Pure — no I/O; the schema object is passed in.
+ */
+export function validateLlamaConfig(yamlText: string, schema: object): ValidationResult {
+  let parsed: unknown;
+  try {
+    parsed = loadYaml(yamlText);
+  } catch (err) {
+    return { valid: false, errors: [`YAML parse error: ${(err as Error).message}`] };
+  }
+  if (parsed === null || typeof parsed !== 'object') {
+    return { valid: false, errors: ['config must be a YAML mapping'], parsed };
+  }
+
+  const validate = getValidator(schema);
+  const ok = validate(parsed);
+  if (ok) return { valid: true, errors: [], parsed };
+
+  const errors = (validate.errors ?? []).map((e) => {
+    const path = e.instancePath || '(root)';
+    return `${path} ${e.message ?? 'invalid'}`;
+  });
+  return { valid: false, errors: errors.length ? errors : ['schema validation failed'], parsed };
+}
+
+// ─── pure: unified-ish diff ──────────────────────────────────────────────────
+
+/**
+ * Produce a compact line diff between two texts. Trims a common prefix/suffix
+ * and marks the changed middle with -/+ lines. Sufficient for a preview; not a
+ * minimal-edit Myers diff.
+ */
+export function computeDiff(oldText: string, newText: string): string {
+  const oldLines = oldText.split('\n');
+  const newLines = newText.split('\n');
+
+  let start = 0;
+  while (start < oldLines.length && start < newLines.length && oldLines[start] === newLines[start]) {
+    start++;
+  }
+  let endOld = oldLines.length - 1;
+  let endNew = newLines.length - 1;
+  while (endOld >= start && endNew >= start && oldLines[endOld] === newLines[endNew]) {
+    endOld--;
+    endNew--;
+  }
+
+  if (endOld < start && endNew < start) return ''; // identical
+
+  const out: string[] = [];
+  out.push(`@@ lines ${start + 1}..${endOld + 1} -> ${start + 1}..${endNew + 1} @@`);
+  for (let i = start; i <= endOld; i++) out.push(`- ${oldLines[i]}`);
+  for (let i = start; i <= endNew; i++) out.push(`+ ${newLines[i]}`);
+  return out.join('\n');
+}
+
+// ─── pure: backup filename ───────────────────────────────────────────────────
+
+/** Timestamped backup path: `<configPath>.bak-YYYYMMDDTHHMMSSZ`. */
+export function backupFilename(configPath: string, now: Date): string {
+  const stamp = now.toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
+  return `${configPath}.bak-${stamp}`;
+}
+
+// ─── RemoteOps seam (shell vs wrapper) ───────────────────────────────────────
+//
+// 'shell' mode issues raw shell commands (P9.1 behavior). 'wrapper' mode issues
+// fixed verbs so the key can be bound to an authorized_keys forced command that
+// hardcodes the paths. Both drive the same apply pipeline.
+
+export type SshMode = 'shell' | 'wrapper';
+
+export interface RemoteOps {
+  read(): Promise<string>;
+  backup(now: Date): Promise<string>;        // returns the backup path
+  write(content: string): Promise<void>;
+  restart(restartCmd: string): Promise<void>;
+}
+
+function fail(label: string, res: ExecResult): never {
+  throw new Error(`${label} failed (exit ${res.code}): ${res.stderr.slice(0, 300)}`);
+}
+
+/** Raw-command ops (no wrapper on the host). */
+export function shellOps(target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
+  return {
+    async read() {
+      const r = await exec(target, `cat ${shellQuote(configPath)}`);
+      if (r.code !== 0) fail('read', r);
+      return r.stdout;
+    },
+    async backup(now) {
+      const backupPath = backupFilename(configPath, now);
+      const r = await exec(target, `cp ${shellQuote(configPath)} ${shellQuote(backupPath)}`);
+      if (r.code !== 0) fail('backup', r);
+      return backupPath;
+    },
+    async write(content) {
+      const r = await exec(target, `cat > ${shellQuote(configPath)}`, content);
+      if (r.code !== 0) fail('write', r);
+    },
+    async restart(restartCmd) {
+      const r = await exec(target, restartCmd);
+      if (r.code !== 0) fail('restart', r);
+    },
+  };
+}
+
+/** Verb ops for a forced-command-locked key. The wrapper hardcodes the paths;
+ *  the backup verb stamps and returns the backup path on stdout. */
+export function wrapperOps(target: SshTarget, exec: SshExec): RemoteOps {
+  return {
+    async read() {
+      const r = await exec(target, 'read');
+      if (r.code !== 0) fail('read', r);
+      return r.stdout;
+    },
+    async backup() {
+      const r = await exec(target, 'backup');
+      if (r.code !== 0) fail('backup', r);
+      return r.stdout.trim();
+    },
+    async write(content) {
+      const r = await exec(target, 'write', content);
+      if (r.code !== 0) fail('write', r);
+    },
+    async restart() {
+      const r = await exec(target, 'restart');
+      if (r.code !== 0) fail('restart', r);
+    },
+  };
+}
+
+export function makeRemoteOps(mode: SshMode, target: SshTarget, configPath: string, exec: SshExec): RemoteOps {
+  return mode === 'wrapper' ? wrapperOps(target, exec) : shellOps(target, configPath, exec);
+}
+
+// ─── orchestration (injectable exec) ─────────────────────────────────────────
+
+/** Read the remote config file (mode-aware; defaults to shell for compat). */
+export async function readRemoteConfig(
+  target: SshTarget,
+  configPath: string,
+  exec: SshExec,
+  mode: SshMode = 'shell',
+): Promise<string> {
+  return makeRemoteOps(mode, target, configPath, exec).read();
+}
+
+export interface ApplyResult {
+  ok: boolean;
+  step: 'validate' | 'backup' | 'write' | 'restart' | 'health' | 'done';
+  backupPath?: string;
+  diff?: string;
+  error?: string;
+}
+
+export interface ApplyOptions {
+  target: SshTarget;
+  configPath: string;
+  restartCmd: string;
+  newConfig: string;
+  schema: object;
+  baseUrl: string;
+  exec: SshExec;
+  /** 'shell' (default) or 'wrapper'. */
+  mode?: SshMode;
+  fetcher?: typeof fetch;
+  now?: Date;
+  healthAttempts?: number;
+  healthDelayMs?: number;
+}
+
+/**
+ * The full apply pipeline. Aborts at the first failing step and reports which
+ * one. Backup ALWAYS precedes write, so a failed write leaves the timestamped
+ * backup intact for manual recovery. Mode selects the wire commands (raw shell
+ * vs forced-command verbs); the pipeline is identical.
+ */
+export async function applyRemoteConfig(opts: ApplyOptions): Promise<ApplyResult> {
+  const {
+    target, configPath, restartCmd, newConfig, schema, baseUrl, exec,
+    mode = 'shell', fetcher = fetch, now = new Date(),
+    healthAttempts = 10, healthDelayMs = 2000,
+  } = opts;
+
+  const ops = makeRemoteOps(mode, target, configPath, exec);
+
+  // 1. Validate before touching the host.
+  const validation = validateLlamaConfig(newConfig, schema);
+  if (!validation.valid) {
+    return { ok: false, step: 'validate', error: validation.errors.join('; ') };
+  }
+
+  // Read current for diff + so an unreadable host fails before any write.
+  let current = '';
+  try {
+    current = await ops.read();
+  } catch (err) {
+    return { ok: false, step: 'validate', error: `read current failed: ${(err as Error).message}` };
+  }
+  const diff = computeDiff(current, newConfig);
+
+  // 2. Timestamped backup BEFORE write.
+  let backupPath: string;
+  try {
+    backupPath = await ops.backup(now);
+  } catch (err) {
+    return { ok: false, step: 'backup', diff, error: (err as Error).message };
+  }
+
+  // 3. Write new config.
+  try {
+    await ops.write(newConfig);
+  } catch (err) {
+    return { ok: false, step: 'write', backupPath, diff, error: (err as Error).message };
+  }
+
+  // 4. Restart the service.
+  try {
+    await ops.restart(restartCmd);
+  } catch (err) {
+    return { ok: false, step: 'restart', backupPath, diff, error: (err as Error).message };
+  }
+
+  // 5. Health-wait: poll the provider until it serves /v1/models.
+  const healthy = await healthWait(baseUrl, fetcher, healthAttempts, healthDelayMs);
+  if (!healthy) {
+    return { ok: false, step: 'health', backupPath, diff, error: 'health check did not pass after restart; backup retained' };
+  }
+
+  return { ok: true, step: 'done', backupPath, diff };
+}
+
+/** Poll the provider's /v1/models until it responds OK or attempts run out. */
+export async function healthWait(
+  baseUrl: string,
+  fetcher: typeof fetch,
+  attempts: number,
+  delayMs: number,
+): Promise<boolean> {
+  for (let i = 0; i < attempts; i++) {
+    try {
+      const res = await fetcher(`${baseUrl.replace(/\/+$/, '')}/v1/models`, {
+        signal: AbortSignal.timeout(5_000),
+      });
+      if (res.ok) return true;
+    } catch {
+      // not up yet
+    }
+    if (i < attempts - 1) await sleep(delayMs);
+  }
+  return false;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((r) => setTimeout(r, ms));
+}
+
+// Minimal POSIX single-quote shell escape for the remote command string.
+function shellQuote(s: string): string {
+  return `'${s.replace(/'/g, `'\\''`)}'`;
+}
+
+// ─── real SSH executor (spawn) ───────────────────────────────────────────────
+
+/**
+ * Default SSH executor. Uses the system `ssh` with an explicit identity file and
+ * IdentitiesOnly so the agent's default key is never offered (the boocode Gitea
+ * lesson). BatchMode avoids interactive prompts hanging the service.
+ */
+export const sshExec: SshExec = (target, command, stdin) => {
+  return new Promise<ExecResult>((resolve) => {
+    const args = [
+      '-i', target.keyPath,
+      '-o', 'IdentitiesOnly=yes',
+      '-o', 'BatchMode=yes',
+      '-o', 'StrictHostKeyChecking=accept-new',
+      '-o', 'ConnectTimeout=10',
+      `${target.user}@${target.host}`,
+      command,
+    ];
+    const child = spawn('ssh', args, { stdio: ['pipe', 'pipe', 'pipe'] });
+    let stdout = '';
+    let stderr = '';
+    child.stdout.on('data', (d) => { stdout += d.toString(); });
+    child.stderr.on('data', (d) => { stderr += d.toString(); });
+    child.on('error', (err) => resolve({ code: 127, stdout, stderr: `${stderr}${(err as Error).message}` }));
+    child.on('close', (code) => resolve({ code: code ?? 1, stdout, stderr }));
+    if (stdin !== undefined) {
+      child.stdin.write(stdin);
+    }
+    child.stdin.end();
+  });
+};