/** * v2.6 Phase 1 — OpenCodeServerBackend. * * Warm, multi-turn backend for the `opencode` agent. One `opencode serve` HTTP * server per BooCoder process; one opencode session per BooCode session (resumed * on switch-back); one SSE read loop PER session, each scoped to that session's * worktree directory so sessions in different directories stream concurrently * (P1.5-a — replaced the Phase-1 single-stream-last-directory model). * * Implements the Phase 0 `AgentBackend` interface. Emits transport-agnostic * `AgentEvent`s — the dispatcher (Phase 1.7, NOT wired in this batch) maps them * to WS frames. No dispatcher/route references this file yet. * * Spec: openspec/changes/v2-6-persistent-agent-sessions/design.md §2 / §2a. * SDK shapes verified by direct read of @opencode-ai/sdk@1.15.12 dist .d.ts: * - client methods take FLATTENED params (sessionID/directory/body all inline), * not {path,query,body}. create→{directory}, promptAsync→{sessionID,directory, * parts,model}, abort→{sessionID,directory}. model is {providerID,modelID}. * - client.event() resolves to { stream: AsyncGenerator }; the * real event is chunk.payload (discriminate on chunk.payload.type). * - promptAsync is fire-and-forget (204); the turn completes via a * 'session.idle' event for that opencode session id. */ import { spawn, spawnSync, type ChildProcess } from 'node:child_process'; import { createHash } from 'node:crypto'; import { createServer, connect as netConnect } from 'node:net'; import type { FastifyBaseLogger } from 'fastify'; import { createOpencodeClient, type OpencodeClient, type Event, type Part, type ToolPart, type ToolState, type AssistantMessage, } from '@opencode-ai/sdk/v2/client'; import type { ToolCallStatus } from '@agentclientprotocol/sdk'; import type { Sql } from '../../db.js'; import type { AcpToolSnapshot } from '../acp-tool-snapshot.js'; import { armAbortGuard, noteTurnActivity, consumeTerminal } from './turn-guard.js'; import { stepEndedToUsage, type StepUsage } from './opencode-usage.js'; import { decideRestart, DEFAULT_HEALTH_FAILURE_THRESHOLD } from './lifecycle-decisions.js'; import type { AgentBackend, AgentEvent, AgentSessionHandle, EnsureSessionOpts, PromptCtx, TurnResult, } from '../agent-backend.js'; const READY_TIMEOUT_MS = 30_000; const SSE_RECONNECT_DELAY_MS = 1_000; /** * No-activity backstop for an in-flight turn. opencode streams reasoning/text/tool * deltas continuously while working, so "zero events for this long" means the turn * is wedged or its terminal event (session.idle) was lost (see the reconnect race * below). Generous so a legitimately slow turn never trips it. */ const TURN_INACTIVITY_MS = 180_000; /** One in-flight turn's emitter + completion settler. */ interface TurnState { onEvent: (e: AgentEvent) => void; settle: (r: TurnResult) => void; } /** Per-(opencode session) demux state. dedup sets scoped here, cleared per turn. */ interface SessionState { boocodeSessionId: string; agentSessionId: string; /** Worktree directory for SDK `directory` routing; refreshed each turn from ctx. */ worktreePath: string; /** dedup gate: `${type}:${id}` added on delta, deleted-and-tested on updated. Cleared at turn end. */ streamedPartKeys: Set; /** partID → 'text' | 'reasoning', so a delta with a non-'reasoning' field is still classed right. Cleared at turn end. */ partTypeById: Map; activeTurn: TurnState | null; /** Inactivity backstop timer for the active turn; null when no turn in flight. */ watchdog: ReturnType | null; /** Per-session SSE subscription handle. Non-null while the loop is running; * aborting it tears down the underlying fetch and exits the loop. */ sseAbort: AbortController | null; /** F.1 post-abort orphan-terminal guard: swallow the one session.idle/error * opencode emits for an aborted turn so it can't settle the next turn. */ swallowNextTerminal: boolean; } export interface OpenCodeServerBackendDeps { sql: Sql; log: FastifyBaseLogger; /** Absolute path to the opencode binary (resolved from available_agents at wiring time, Phase 1.7). */ opencodeBinary: string; } export class OpenCodeServerBackend implements AgentBackend { readonly backend = 'opencode_server' as const; private readonly sql: Sql; private readonly log: FastifyBaseLogger; private readonly opencodeBinary: string; private child: ChildProcess | null = null; private client: OpencodeClient | null = null; private port: number | null = null; private up = false; private serverStarting: Promise | null = null; // Phase 3 busy-aware health monitor (openchamber lift): consecutive failed // probes + the start of an unhealthy-while-busy window feed `decideRestart`. private consecutiveHealthFailures = 0; private unhealthyBusySince = 0; private restarting: Promise | null = null; /** opencode session id → demux state. Maintained by ensureSession; read by the SSE loop. */ private readonly byOpencodeId = new Map(); constructor(deps: OpenCodeServerBackendDeps) { this.sql = deps.sql; this.log = deps.log; this.opencodeBinary = deps.opencodeBinary; } /** §2: liveness for the health endpoint + dispatcher fallback decision. */ health(): 'up' | 'down' { return this.up ? 'up' : 'down'; } /** Phase 3: busy iff ANY pooled opencode session has an in-flight turn. The * pool reads this to skip idle/LRU eviction and the health-monitor to defer a * restart (never tear down a session mid-stream). */ isBusy(): boolean { for (const st of this.byOpencodeId.values()) { if (st.activeTurn) return true; } return false; } // ─── Server lifecycle (1.2: spawn once + client + ready; Phase 3 crash-restart) ── /** * Lazy: start the single server on first use; re-spawn after a crash. Idempotent * within one live server — `serverStarting` caches the in-flight start, and is * reset to null by the crash handler so the NEXT ensureServer re-spawns a fresh * server (Phase 3 crash recovery). A dead-but-not-yet-reaped child (exit handler * raced) is also treated as needing a restart. */ private ensureServer(): Promise { const childDead = this.child != null && (this.child.exitCode !== null || this.child.signalCode !== null); if (!this.serverStarting || (!this.up && childDead)) { this.serverStarting = this.startServer(); } return this.serverStarting; } private async startServer(): Promise { const port = await freePort(); // Phase 1: run unsecured on loopback (opencode's documented default — serve.ts // only WARNS when OPENCODE_SERVER_PASSWORD is unset). The real boundary is the // 127.0.0.1 bind. Defense-in-depth basic-auth is deferred: the hey-api client's // auth wiring + opencode's exact scheme must be confirmed against a live server // first, else every request 401s. Recon explicitly said "do NOT block on it". const child = spawn(this.opencodeBinary, ['serve', '--hostname', '127.0.0.1', '--port', String(port)], { stdio: ['ignore', 'pipe', 'pipe'], env: { ...process.env }, }); this.child = child; this.port = port; // Child lifetime is the backend's (the pool's), NOT a request's. We never tie // it to a per-turn abort signal. Phase 3: on unexpected exit we recover — // settle any in-flight turns as failed, mark their agent_sessions rows crashed, // and reset `serverStarting` so the next ensureServer re-spawns. opencode keeps // sessions on disk, but a fresh server's in-memory state is gone, so the next // turn's ensureSession (rows now 'crashed') creates fresh opencode sessions. child.on('exit', (code, signal) => { // Only react to THIS child's exit (a restart may have swapped in a new one). if (this.child !== child) return; this.handleServerCrash(code, signal, port); }); await waitForReady(child, READY_TIMEOUT_MS); this.client = createOpencodeClient({ baseUrl: `http://127.0.0.1:${port}` }); this.up = true; this.log.info({ port }, 'opencode-server: ready'); } /** * Crash handler (Phase 3, lift of openchamber's restart-on-exit path). The * server died with N live opencode sessions; we can't restart it here (the next * turn does, lazily — avoids a restart storm if the binary is broken). We: * 1. fail every in-flight turn so its dispatcher unblocks + publishes an error, * 2. mark each session's agent_sessions row 'crashed' so ensureSession won't * resume a now-dead native session id (it creates fresh), * 3. tear down the SSE loops + demux state (stale against the dead server), * 4. reclaim the port + reset state so the next ensureServer re-spawns. */ private handleServerCrash(code: number | null, signal: NodeJS.Signals | null, port: number): void { this.up = false; const states = [...this.byOpencodeId.values()]; this.log.warn( { code, signal, port, liveSessions: states.length }, 'opencode-server: child exited — recovering (fail in-flight, mark crashed, re-spawn next turn)', ); const crashedIds: string[] = []; for (const st of states) { st.sseAbort?.abort(); if (st.activeTurn) { st.activeTurn.settle({ ok: false, error: 'opencode server crashed mid-turn' }); st.activeTurn = null; } if (st.watchdog) { clearTimeout(st.watchdog); st.watchdog = null; } crashedIds.push(st.agentSessionId); } // Drop the demux map: every session id is stale against a fresh server. this.byOpencodeId.clear(); this.client = null; this.serverStarting = null; // force a re-spawn on the next ensureServer if (crashedIds.length > 0) { this.sql` UPDATE agent_sessions SET status = 'crashed' WHERE agent_session_id = ANY(${crashedIds}) AND status <> 'closed' `.catch((err) => { this.log.warn({ err: errMsg(err) }, 'opencode-server: failed to mark crashed sessions (non-fatal)'); }); } // Reclaim the port so a re-spawn on a fixed/leaked port isn't blocked. Best // effort; the next start uses a fresh ephemeral port anyway. reclaimPort(port); } /** * Phase 3 proactive health monitor (openchamber `runHealthCheckCycle` lift, * busy-aware). Probes the server's /global/health; on a sustained failure of a * NON-busy server, force a restart so the next turn isn't blocked by a wedged * (hung-but-not-exited) process. Busy servers are deferred via the stale-grace in * `decideRestart` — never tear down live work. Driven by the pool's periodic * sweep (best-effort; a crash-exit is already handled by `handleServerCrash` + * lazy `ensureServer` re-spawn, so this only catches the hung case). No-op when * the server was never started or a restart is already in flight. */ async tickHealth(now: number = Date.now()): Promise { if (!this.child || this.restarting) return; const childExited = this.child.exitCode !== null || this.child.signalCode !== null; // An exited child is recovered lazily by ensureServer; don't double-restart it. if (childExited) return; const healthy = await this.probeHealth(); if (healthy) { this.consecutiveHealthFailures = 0; this.unhealthyBusySince = 0; return; } this.consecutiveHealthFailures += 1; const busy = this.isBusy(); const decision = decideRestart({ processExited: false, consecutiveFailures: this.consecutiveHealthFailures, busy, unhealthyBusySince: this.unhealthyBusySince, now, failureThreshold: DEFAULT_HEALTH_FAILURE_THRESHOLD, }); // Stamp the start of an unhealthy-while-busy window so the stale-grace can fire. if (busy && this.unhealthyBusySince === 0) this.unhealthyBusySince = now; if (decision.action === 'restart') { this.log.warn( { failures: this.consecutiveHealthFailures, busy, reason: decision.reason }, 'opencode-server: health monitor forcing restart', ); this.consecutiveHealthFailures = 0; this.unhealthyBusySince = 0; await this.restartServer(); } } private async probeHealth(): Promise { if (!this.client) return false; try { const res = await this.client.global.health(); return !res.error; } catch { return false; } } /** Force-kill the current server + reclaim its port; the next ensureServer * re-spawns (lazy). Mirrors handleServerCrash's state reset but is initiated by * the health monitor rather than the OS. */ private async restartServer(): Promise { if (this.restarting) return this.restarting; this.restarting = (async () => { const child = this.child; const port = this.port; this.up = false; // Fail in-flight turns + mark sessions crashed via the same path as a crash. if (child) { this.handleServerCrash(null, null, port ?? 0); if (!child.killed) child.kill('SIGTERM'); } if (port) { reclaimPort(port); await waitForPortRelease(port, 3_000); } this.child = null; })().finally(() => { this.restarting = null; }); return this.restarting; } // ─── SSE read loop + demux + translate (1.3) + dedup (1.4) ─────────────────── /** Per-session SSE subscription, scoped to the session's worktree directory. * opencode scopes events by the `directory` query param (defaults to the * server's cwd if omitted), so two sessions in different worktrees each get * their own dir-scoped stream and never drop each other's events. Idempotent: * a no-op if this session's loop is already running. Started from ensureSession * (and defensively from prompt) once worktreePath is known. */ private startSessionEventLoop(state: SessionState): void { if (state.sseAbort) return; // already running const abort = new AbortController(); state.sseAbort = abort; void this.runSessionEventLoop(state, abort).finally(() => { // Only clear if this controller is still the live one (a later restart may // have already installed a new one). if (state.sseAbort === abort) state.sseAbort = null; }); } private async runSessionEventLoop(state: SessionState, abort: AbortController): Promise { const signal = abort.signal; while (this.up && this.client && !signal.aborted) { try { // Re-read worktreePath each (re)subscribe so a directory refresh is picked // up on reconnect. Passing `signal` lets close/dispose tear down a stream // that's parked in `for await` between events. const sub = await this.client.event.subscribe( { directory: state.worktreePath }, { signal }, ); for await (const ev of sub.stream) { if (signal.aborted) break; // Dir-scoped streams should only carry this session's events, but two // sessions sharing a worktree (possible post-P1.5-b) each receive BOTH // sessions' events — so drop anything that isn't ours, else the other // session's deltas get processed twice (once per loop). const sid = eventSessionId(ev); if (sid != null && sid !== state.agentSessionId) continue; this.dispatchEvent(ev); } if (this.up && !signal.aborted) { await this.reconcile(state); // recover an idle/error lost during the gap await sleep(SSE_RECONNECT_DELAY_MS); } } catch (err) { if (!this.up || signal.aborted) break; this.log.warn( { err: errMsg(err), agentSessionId: state.agentSessionId }, 'opencode-server: session event loop error; reconnecting', ); await this.reconcile(state); await sleep(SSE_RECONNECT_DELAY_MS); } } } /** Demux one event to the owning session's active turn. Unknown/between-turns → drop. */ private dispatchEvent(ev: Event): void { switch (ev.type) { // ─── session.next.* — live streaming events (the primary path) ───────── case 'session.next.text.delta': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); const cleaned = stripDcpTags(p.delta); if (cleaned) st.activeTurn.onEvent({ type: 'text', text: cleaned }); return; } case 'session.next.reasoning.delta': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); st.activeTurn.onEvent({ type: 'reasoning', text: p.delta }); return; } case 'session.next.tool.called': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); const snap: AcpToolSnapshot = { toolCallId: p.callID, title: p.tool, kind: null, status: 'in_progress', rawInput: p.input, rawOutput: undefined, }; st.activeTurn.onEvent({ type: 'tool_call', toolCall: snap }); return; } case 'session.next.tool.success': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); const output = p.content?.map((c) => ('text' in c ? (c as { text: string }).text : '')).join('') ?? ''; const snap: AcpToolSnapshot = { toolCallId: p.callID, title: p.callID, kind: null, status: 'completed', rawInput: undefined, rawOutput: output, }; st.activeTurn.onEvent({ type: 'tool_update', toolCall: snap }); return; } case 'session.next.tool.failed': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); const snap: AcpToolSnapshot = { toolCallId: p.callID, title: p.callID, kind: null, status: 'failed', rawInput: undefined, rawOutput: errToString(p.error), }; st.activeTurn.onEvent({ type: 'tool_update', toolCall: snap }); return; } // ─── per-step usage (U.6) — token/cost accounting for opencode sessions ── case 'session.next.step.ended': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); // Accumulate this step's normalized usage onto the (chat_id, agent) row. // Fire-and-forget: a DB hiccup must not stall the turn. opencode emits this // once per LLM step, so a multi-tool turn sums several deltas. const usage = stepEndedToUsage(p); void this.accumulateUsage(st, usage); return; } // ─── message.part.* — terminal/post-hoc events (dedup gate) ──────────── case 'message.part.delta': { const p = ev.properties; const st = this.byOpencodeId.get(p.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); const isReasoning = p.field === 'reasoning' || st.partTypeById.get(p.partID) === 'reasoning'; if (isReasoning) { st.streamedPartKeys.add(`reasoning:${p.partID}`); st.activeTurn.onEvent({ type: 'reasoning', text: p.delta }); } else if (p.field === 'text') { st.streamedPartKeys.add(`text:${p.partID}`); const cleaned = stripDcpTags(p.delta); if (cleaned) st.activeTurn.onEvent({ type: 'text', text: cleaned }); } return; } case 'message.part.updated': { const part = ev.properties.part; const st = this.byOpencodeId.get(part.sessionID); if (!st?.activeTurn) return; this.bumpActivity(st); this.handleUpdatedPart(part, st); return; } // ─── lifecycle ───────────────────────────────────────────────────────── case 'session.idle': { const st = this.byOpencodeId.get(ev.properties.sessionID); if (!st) return; if (consumeTerminal(st) === 'swallow') return; // F.1: drop the post-abort orphan st.activeTurn?.settle({ ok: true }); return; } case 'session.error': { const sid = ev.properties.sessionID; if (!sid) return; const st = this.byOpencodeId.get(sid); if (!st) return; if (consumeTerminal(st) === 'swallow') return; // F.1: drop the post-abort orphan st.activeTurn?.settle({ ok: false, error: errToString(ev.properties.error) }); return; } default: return; } } /** Terminal part: dedup gate for text/reasoning; tool parts → tool_call/tool_update. */ private handleUpdatedPart(part: Part, st: SessionState): void { const turn = st.activeTurn; if (!turn) return; if (part.type === 'text' || part.type === 'reasoning') { st.partTypeById.set(part.id, part.type); const key = resolvePartDedupeKey(part, part.type); if (key && st.streamedPartKeys.delete(key)) return; // already streamed via delta const raw = part.text ?? ''; const text = part.type === 'text' ? stripDcpTags(raw) : raw; if (text && part.time?.end != null) { turn.onEvent({ type: part.type, text }); } return; } if (part.type === 'tool') { const snap = toolPartToSnapshot(part); const status = part.state?.status; // tool_call on start (pending/running), tool_update on terminal (completed/error). // The current ACP path merges both into one frame; the contract keeps them // distinct because opencode's SSE distinguishes start from result. const event: AgentEvent = status === 'completed' || status === 'error' ? { type: 'tool_update', toolCall: snap } : { type: 'tool_call', toolCall: snap }; turn.onEvent(event); return; } // NOTE: opencode's SSE payload union carries no available-commands event, so the // AgentEvent 'commands' arm is intentionally never emitted here (1.3). } // ─── turn-completion resilience (watchdog + reconnect reconcile) ───────────── /** Reset the inactivity backstop on any event routed to a session's active turn. */ private bumpActivity(st: SessionState): void { if (!st.activeTurn) return; // A live turn is producing → the post-abort orphan-terminal window is over. noteTurnActivity(st); if (st.watchdog) clearTimeout(st.watchdog); st.watchdog = setTimeout(() => { void this.onTurnStall(st); }, TURN_INACTIVITY_MS); st.watchdog.unref?.(); } /** Watchdog fired: reconcile once; if the server says still-running we can't tell, so fail closed. * Also mark the agent_sessions row crashed so a stale session isn't resumed next turn. */ private async onTurnStall(st: SessionState): Promise { const settled = await this.reconcile(st); if (!settled) { this.log.warn({ agentSessionId: st.agentSessionId }, 'opencode-server: turn stalled (no activity), failing + marking crashed'); await this.sql` UPDATE agent_sessions SET status = 'crashed' WHERE agent_session_id = ${st.agentSessionId} `.catch(() => {}); st.activeTurn?.settle({ ok: false, error: 'turn timed out (no activity)' }); } } /** * Ask the server whether this session's turn already finished — recovers a * session.idle/error lost during an SSE gap. Returns true if it settled the turn. * Inconclusive (still running / call failed) → false; the watchdog covers that. */ private async reconcile(st: SessionState): Promise { const turn = st.activeTurn; if (!turn || !this.client) return false; try { const res = await this.client.session.messages({ sessionID: st.agentSessionId, directory: st.worktreePath, }); if (res.error || !res.data) return false; let lastAssistant: AssistantMessage | undefined; for (let i = res.data.length - 1; i >= 0; i--) { const info = res.data[i]!.info; if (info.role === 'assistant') { lastAssistant = info; break; } } if (!lastAssistant) return false; if (lastAssistant.error != null) { turn.settle({ ok: false, error: errToString(lastAssistant.error) }); return true; } if (lastAssistant.time.completed != null) { turn.settle({ ok: true }); return true; } return false; // still running — the live stream will deliver session.idle } catch { return false; // inconclusive — watchdog backstop covers it } } // ─── per-step usage persistence (U.6) ──────────────────────────────────────── /** * Accumulate one `session.next.step.ended`'s normalized usage onto the session's * agent_sessions row, keyed by the resumed `agent_session_id` (unique per active * row — the dispatcher's `(chat_id, agent)` lookup wrote it). Running totals for * the whole conversation context (not last-step). Zero-delta steps are skipped to * avoid a no-op write. Errors are swallowed: usage telemetry must never fail a turn. */ private async accumulateUsage(st: SessionState, u: StepUsage): Promise { if (u.input === 0 && u.output === 0 && u.cost === 0) return; try { await this.sql` UPDATE agent_sessions SET input_tokens = input_tokens + ${u.input}, output_tokens = output_tokens + ${u.output}, cost = cost + ${u.cost} WHERE agent_session_id = ${st.agentSessionId} `; } catch (err) { this.log.warn( { err: errMsg(err), agentSessionId: st.agentSessionId }, 'opencode-server: failed to persist step usage (non-fatal)', ); } } // ─── ensureSession: create-or-resume against agent_sessions (1.5) ──────────── async ensureSession(sessionId: string, opts: EnsureSessionOpts): Promise { await this.ensureServer(); if (!this.client) throw new Error('opencode-server: client not ready after ensureServer'); const configHash = sessionConfigHash(opts.model); // P1.5-b: agent_sessions is keyed (chat_id, agent) — the tab/chat is the // context unit (two tabs in one session = two contexts sharing one worktree). // session_id + worktree_id are retained as informational (SET NULL) columns. const [row] = await this.sql<{ agent_session_id: string | null; status: string; config_hash: string | null }[]>` SELECT agent_session_id, status, config_hash FROM agent_sessions WHERE chat_id = ${opts.chatId} AND agent = ${opts.agent} `; let agentSessionId = row?.agent_session_id ?? null; // Don't resume crashed sessions or sessions whose config drifted (model change). const shouldResume = agentSessionId && row!.status !== 'crashed' && (row!.config_hash == null || row!.config_hash === configHash); if (!shouldResume) { if (agentSessionId) { this.log.info({ sessionId, oldStatus: row!.status, hashMatch: row!.config_hash === configHash }, 'opencode-server: not resuming stale session, creating fresh'); this.byOpencodeId.delete(agentSessionId); } const created = await this.client.session.create({ directory: opts.worktreePath }); if (created.error || !created.data) { throw new Error(`opencode-server: session.create failed: ${errToString(created.error)}`); } agentSessionId = created.data.id; await this.sql` INSERT INTO agent_sessions (chat_id, session_id, worktree_id, agent, backend, agent_session_id, server_port, status, last_active_at, config_hash) VALUES (${opts.chatId}, ${sessionId}, ${opts.worktreeId}, ${opts.agent}, 'opencode_server', ${agentSessionId}, ${this.port}, 'active', clock_timestamp(), ${configHash}) ON CONFLICT (chat_id, agent) DO UPDATE SET session_id = EXCLUDED.session_id, worktree_id = EXCLUDED.worktree_id, backend = 'opencode_server', agent_session_id = EXCLUDED.agent_session_id, server_port = EXCLUDED.server_port, status = 'active', last_active_at = clock_timestamp(), config_hash = EXCLUDED.config_hash `; } else { await this.sql` UPDATE agent_sessions SET status = 'active', last_active_at = clock_timestamp(), server_port = ${this.port}, config_hash = ${configHash} WHERE chat_id = ${opts.chatId} AND agent = ${opts.agent} `; } // Both branches above guarantee agentSessionId is non-null. const ocSessionId = agentSessionId!; // Register / refresh the demux entry the SSE loop keys on. Preserve an existing // entry (and any in-flight turn) — just refresh the routing fields. let state = this.byOpencodeId.get(ocSessionId); if (state) { state.boocodeSessionId = sessionId; state.worktreePath = opts.worktreePath; } else { state = { boocodeSessionId: sessionId, agentSessionId: ocSessionId, worktreePath: opts.worktreePath, streamedPartKeys: new Set(), partTypeById: new Map(), activeTurn: null, watchdog: null, sseAbort: null, swallowNextTerminal: false, }; this.byOpencodeId.set(ocSessionId, state); } // Start this session's own SSE loop, scoped to its worktree directory. Both // fresh-create and resume reach here; idempotent, so a re-ensure (e.g. a // second turn) won't spawn a duplicate loop. this.startSessionEventLoop(state); return { sessionId, agent: opts.agent, backend: 'opencode_server', chatId: opts.chatId, worktreeId: opts.worktreeId, agentSessionId: ocSessionId, serverPort: this.port, }; } // ─── prompt: send one turn (1.6) ───────────────────────────────────────────── async prompt(handle: AgentSessionHandle, input: string, ctx: PromptCtx): Promise { if (!this.client) throw new Error('opencode-server: client not ready'); const oc = handle.agentSessionId; if (!oc) throw new Error('opencode-server: handle has no agentSessionId'); let state = this.byOpencodeId.get(oc); if (!state) { state = { boocodeSessionId: handle.sessionId, agentSessionId: oc, worktreePath: ctx.worktreePath, streamedPartKeys: new Set(), partTypeById: new Map(), activeTurn: null, watchdog: null, sseAbort: null, swallowNextTerminal: false, }; this.byOpencodeId.set(oc, state); } const session = state; // Authoritative per-turn directory for SDK routing + reconcile. session.worktreePath = ctx.worktreePath; // Defensive: ensureSession normally starts the loop, but if prompt is reached // with a freshly-created state (no loop yet), start it so the turn streams. // Idempotent when ensureSession already started one. this.startSessionEventLoop(session); const client = this.client; return await new Promise((resolve) => { let settled = false; const cleanup = () => { session.activeTurn = null; if (session.watchdog) { clearTimeout(session.watchdog); session.watchdog = null; } session.streamedPartKeys.clear(); session.partTypeById.clear(); ctx.signal.removeEventListener('abort', onAbort); }; const settle = (r: TurnResult) => { if (settled) return; settled = true; cleanup(); resolve(r); }; const onAbort = () => { // Abort the turn only — never the server. client.session.abort({ sessionID: oc, directory: ctx.worktreePath }).catch(() => {}); // F.1: opencode emits one trailing session.idle/error for the cancelled // turn — arm the guard so it's swallowed, not used to settle the next turn. armAbortGuard(session); settle({ ok: false, error: 'aborted' }); }; session.activeTurn = { onEvent: ctx.onEvent, settle }; this.bumpActivity(session); // arm the inactivity backstop if (ctx.signal.aborted) { onAbort(); return; } ctx.signal.addEventListener('abort', onAbort, { once: true }); const model = parseModel(ctx.model); client.session .promptAsync({ sessionID: oc, directory: ctx.worktreePath, parts: [{ type: 'text', text: input }], ...(model ? { model } : {}), }) .then((res) => { // promptAsync is fire-and-forget (204); the turn completes via session.idle. // Only a submission error settles here. if (res.error) settle({ ok: false, error: errToString(res.error) }); }) .catch((err) => settle({ ok: false, error: errMsg(err) })); }); } // ─── teardown ──────────────────────────────────────────────────────────────── async closeSession(handle: AgentSessionHandle): Promise { if (handle.agentSessionId) { // Stop this session's SSE loop before dropping its demux entry. this.byOpencodeId.get(handle.agentSessionId)?.sseAbort?.abort(); this.byOpencodeId.delete(handle.agentSessionId); } await this.sql` UPDATE agent_sessions SET status = 'closed' WHERE chat_id = ${handle.chatId} AND agent = ${handle.agent} `.catch(() => {}); } async dispose(): Promise { this.up = false; // Abort every per-session SSE loop so none survive the teardown. for (const st of this.byOpencodeId.values()) st.sseAbort?.abort(); const child = this.child; this.child = null; this.client = null; this.byOpencodeId.clear(); if (child && !child.killed) { child.kill('SIGTERM'); const t = setTimeout(() => { if (!child.killed) child.kill('SIGKILL'); }, 5_000); t.unref(); } } } // ─── helpers ────────────────────────────────────────────────────────────────── /** Extract the opencode sessionID an event belongs to, across event shapes. * Most carry `properties.sessionID`; `message.part.updated` nests it under * `properties.part.sessionID`. Returns null when the event has no session * (the per-session loop then leaves it to dispatchEvent, which drops it). */ function eventSessionId(ev: Event): string | null { const props = (ev as { properties?: unknown }).properties; if (!props || typeof props !== 'object') return null; if (ev.type === 'message.part.updated') { const part = (props as { part?: { sessionID?: string } }).part; return part?.sessionID ?? null; } return (props as { sessionID?: string }).sessionID ?? null; } /** BooCoder model string "provider/model" → opencode's structured {providerID, modelID}. */ function parseModel(model: string | undefined): { providerID: string; modelID: string } | undefined { if (!model || !model.trim()) return undefined; const trimmed = model.trim(); const idx = trimmed.indexOf('/'); if (idx > 0 && idx < trimmed.length - 1) { return { providerID: trimmed.slice(0, idx), modelID: trimmed.slice(idx + 1) }; } // No slash but non-empty → infer llama-swap (the only configured provider). // Guard against bare '/' or trailing/leading slash. if (idx < 0 && trimmed.length > 0) { return { providerID: 'llama-swap', modelID: trimmed }; } return undefined; } /** Ported verbatim from Paseo opencode-agent.ts: id → message-id fallback → null. */ function resolvePartDedupeKey(part: { id: string; messageID: string }, type: string): string | null { if (part.id.trim().length > 0) return `${type}:${part.id}`; if (part.messageID.trim().length > 0) return `${type}:message:${part.messageID}`; return null; } /** opencode ToolPart → ACP-shaped snapshot (reuses the existing persist/render path). */ function toolPartToSnapshot(part: ToolPart): AcpToolSnapshot { const state = part.state; let rawInput: unknown; let rawOutput: unknown; let title: string | undefined; if (state) { if ('input' in state) rawInput = (state as { input?: unknown }).input; if ('output' in state) rawOutput = (state as { output?: unknown }).output; else if ('error' in state) rawOutput = (state as { error?: unknown }).error; if ('title' in state) title = (state as { title?: string }).title; } return { toolCallId: part.callID, title: title ?? part.tool, kind: null, status: mapToolStatus(state?.status), rawInput, rawOutput, }; } function mapToolStatus(s: ToolState['status'] | undefined): ToolCallStatus | null { switch (s) { case 'pending': return 'pending'; case 'running': return 'in_progress'; case 'completed': return 'completed'; case 'error': return 'failed'; default: return null; } } /** * Reclaim a loopback port a dead opencode child may still hold (lift of * openchamber `killProcessOnPort`). Best-effort, POSIX-only (`lsof`/`kill`); a * failure is harmless because the next spawn allocates a fresh ephemeral port. * Never kills this process. Synchronous + short-timeout so the crash handler * doesn't block. */ function reclaimPort(port: number | null): void { if (!port || process.platform === 'win32') return; try { const res = spawnSync('lsof', ['-ti', `:${port}`], { encoding: 'utf8', timeout: 3_000, windowsHide: true }); const out = res.stdout || ''; const myPid = process.pid; for (const pidStr of out.split(/\s+/)) { const pid = parseInt(pidStr.trim(), 10); if (pid && pid !== myPid) { try { spawnSync('kill', ['-9', String(pid)], { stdio: 'ignore', timeout: 2_000 }); } catch { // ignore — best effort } } } } catch { // lsof absent or failed — the fresh-ephemeral-port spawn doesn't need this. } } /** * Resolve true once nothing is listening on `port` (lift of openchamber * `waitForPortRelease`). Used before re-spawning on a fixed port; with ephemeral * ports it's a fast no-op. Probes 127.0.0.1; resolves false at the deadline. */ function waitForPortRelease(port: number, timeoutMs: number): Promise { const deadline = Date.now() + timeoutMs; return new Promise((resolve) => { const attempt = () => { const socket = netConnect({ port, host: '127.0.0.1' }); let settled = false; const finish = (released: boolean) => { if (settled) return; settled = true; socket.removeAllListeners(); socket.destroy(); if (released || Date.now() >= deadline) { resolve(released); return; } setTimeout(attempt, 150); }; socket.once('connect', () => finish(false)); socket.once('error', (err: NodeJS.ErrnoException) => { if (err && (err.code === 'ECONNREFUSED' || err.code === 'EHOSTUNREACH')) finish(true); else finish(false); }); socket.setTimeout(500, () => finish(true)); }; attempt(); }); } /** Bind-probe an ephemeral port on loopback. */ function freePort(): Promise { return new Promise((resolve, reject) => { const srv = createServer(); srv.unref(); srv.on('error', reject); srv.listen(0, '127.0.0.1', () => { const addr = srv.address(); if (addr && typeof addr === 'object') { const { port } = addr; srv.close(() => resolve(port)); } else { srv.close(() => reject(new Error('opencode-server: could not determine a free port'))); } }); }); } /** Resolve when the child prints the ready line; reject on timeout or early exit. */ function waitForReady(child: ChildProcess, timeoutMs: number): Promise { return new Promise((resolve, reject) => { let done = false; let stderrBuf = ''; const finish = (err?: Error) => { if (done) return; done = true; clearTimeout(timer); child.stdout?.off('data', onOut); child.stderr?.off('data', onErr); child.off('exit', onExit); if (err) reject(err); else resolve(); }; const onOut = (buf: Buffer) => { if (buf.toString().includes('opencode server listening on')) finish(); }; const onErr = (buf: Buffer) => { stderrBuf += buf.toString(); }; const onExit = (code: number | null) => finish(new Error(`opencode serve exited before ready (code ${code}); stderr: ${stderrBuf.slice(-2000)}`)); const timer = setTimeout( () => finish(new Error(`opencode serve not ready in ${timeoutMs}ms; stderr: ${stderrBuf.slice(-2000)}`)), timeoutMs, ); child.stdout?.on('data', onOut); child.stderr?.on('data', onErr); child.on('exit', onExit); }); } function sleep(ms: number): Promise { return new Promise((r) => setTimeout(r, ms)); } /** Strip opencode-dcp plugin tags that render as literal text in the UI. */ function stripDcpTags(s: string): string { return s.replace(/[^<]*<\/dcp-message-id>/g, ''); } function errMsg(e: unknown): string { return e instanceof Error ? e.message : String(e); } function errToString(e: unknown): string { if (e == null) return 'unknown error'; if (typeof e === 'string') return e; if (e instanceof Error) return e.message; try { return JSON.stringify(e); } catch { return String(e); } } /** Hash of stable config — detects model changes across sessions without * invalidating on ephemeral state like the random server port (which changes * every BooCoder restart). */ function sessionConfigHash(model: string): string { return createHash('sha256').update(`opencode_server|${model}`).digest('hex').slice(0, 16); }