feat(coder): v2.6 Phase 3 — lifecycle hardening (idle evict, crash recovery, worktree reaper)
Idle TTL eviction per (chat,agent) + LRU cap (never a busy backend); pure lifecycle-decisions.ts (TDD). Crash recovery lifts openchamber's health-monitor + busy-aware-restart + stale-grace state machine into opencode-server.ts (+ port reclaim) and warm-acp.ts; opencode crash -> fresh sessions, ACP -> re-session/new. F.1 turn-guard + U.6 usage preserved (their tests pass). Orphan worktree reaper (1h grace, superset-style dirty/unpushed preflight, Paseo soft-delete) + close hooks + diff re-baseline after apply_pending. 35 new tests + DB-opt-in reconnect test; 215 coder tests pass; tsc + build clean. Completes v2.6. Follow-ups out of scope: apps/server close-hook caller, 3.7 DiffPanel staging hint, live smokes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
233
apps/coder/src/services/__tests__/agent-pool.test.ts
Normal file
233
apps/coder/src/services/__tests__/agent-pool.test.ts
Normal file
@@ -0,0 +1,233 @@
|
||||
import { describe, it, expect, vi } from 'vitest';
|
||||
import { AgentPool, OPENCODE_POOL_KEY } from '../agent-pool.js';
|
||||
import type {
|
||||
AgentBackend,
|
||||
AgentSessionHandle,
|
||||
EnsureSessionOpts,
|
||||
PromptCtx,
|
||||
TurnResult,
|
||||
} from '../agent-backend.js';
|
||||
|
||||
/**
|
||||
* v2.6 Phase 3 — AgentPool lifecycle unit test (T.1). No DB / no child process:
|
||||
* a fake AgentBackend records dispose + reports busy/health, so we exercise
|
||||
* get-or-create, idle eviction, the LRU cap, the busy-never-evict rule, closeChat,
|
||||
* and dispose-drains directly. The pure decisions are covered separately in
|
||||
* backends/__tests__/lifecycle-decisions.test.ts; this verifies the wiring.
|
||||
*/
|
||||
|
||||
class FakeBackend implements AgentBackend {
|
||||
disposed = 0;
|
||||
closedSessions = 0;
|
||||
private busyFlag = false;
|
||||
tickHealthCalls = 0;
|
||||
|
||||
constructor(public readonly name = 'fake') {}
|
||||
|
||||
setBusy(b: boolean): void {
|
||||
this.busyFlag = b;
|
||||
}
|
||||
|
||||
// — AgentBackend —
|
||||
async ensureSession(sessionId: string, opts: EnsureSessionOpts): Promise<AgentSessionHandle> {
|
||||
return {
|
||||
sessionId,
|
||||
agent: opts.agent,
|
||||
backend: 'acp_warm',
|
||||
chatId: opts.chatId,
|
||||
worktreeId: opts.worktreeId,
|
||||
agentSessionId: 'fake-session',
|
||||
serverPort: null,
|
||||
};
|
||||
}
|
||||
async prompt(_h: AgentSessionHandle, _input: string, _ctx: PromptCtx): Promise<TurnResult> {
|
||||
return { ok: true };
|
||||
}
|
||||
async closeSession(): Promise<void> {
|
||||
this.closedSessions++;
|
||||
}
|
||||
async dispose(): Promise<void> {
|
||||
this.disposed++;
|
||||
}
|
||||
health(): 'up' | 'down' {
|
||||
return 'up';
|
||||
}
|
||||
isBusy(): boolean {
|
||||
return this.busyFlag;
|
||||
}
|
||||
async tickHealth(): Promise<void> {
|
||||
this.tickHealthCalls++;
|
||||
}
|
||||
}
|
||||
|
||||
describe('AgentPool — get/register/touch (3.1)', () => {
|
||||
it('register then get returns the same backend', () => {
|
||||
const pool = new AgentPool();
|
||||
const b = new FakeBackend();
|
||||
pool.register('chat-1', 'goose', b);
|
||||
expect(pool.get('chat-1', 'goose')).toBe(b);
|
||||
expect(pool.get('chat-1', 'qwen')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('peek does NOT exist for a missing key', () => {
|
||||
const pool = new AgentPool();
|
||||
expect(pool.peek('nope', 'goose')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('health reports size + busy count', () => {
|
||||
const pool = new AgentPool();
|
||||
const a = new FakeBackend();
|
||||
const b = new FakeBackend();
|
||||
b.setBusy(true);
|
||||
pool.register('c1', 'goose', a);
|
||||
pool.register('c2', 'qwen', b);
|
||||
expect(pool.health()).toEqual({ size: 2, busy: 1 });
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentPool.sweep — idle TTL eviction (3.1)', () => {
|
||||
it('evicts an idle backend past the TTL and disposes it', async () => {
|
||||
const pool = new AgentPool({ idleTtlMs: 1_000, maxLive: 100 });
|
||||
const b = new FakeBackend();
|
||||
pool.register('c1', 'goose', b);
|
||||
// Sweep with now far past the registration → idle → evicted.
|
||||
const { evicted } = await pool.sweep(Date.now() + 10_000);
|
||||
expect(evicted).toEqual(['c1:goose']);
|
||||
expect(b.disposed).toBe(1);
|
||||
expect(pool.get('c1', 'goose')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('never evicts a busy backend even past the TTL', async () => {
|
||||
const pool = new AgentPool({ idleTtlMs: 1_000, maxLive: 100 });
|
||||
const b = new FakeBackend();
|
||||
b.setBusy(true);
|
||||
pool.register('c1', 'goose', b);
|
||||
const { evicted } = await pool.sweep(Date.now() + 10_000);
|
||||
expect(evicted).toEqual([]);
|
||||
expect(b.disposed).toBe(0);
|
||||
expect(pool.get('c1', 'goose')).toBe(b);
|
||||
});
|
||||
|
||||
it('touch keeps a backend warm so the TTL measures from the last turn', async () => {
|
||||
const pool = new AgentPool({ idleTtlMs: 5_000, maxLive: 100 });
|
||||
const b = new FakeBackend();
|
||||
pool.register('c1', 'goose', b);
|
||||
const base = Date.now();
|
||||
// 4s later, touch — resets activity. A sweep at +6s from base is only +2s from
|
||||
// the touch → still within TTL → not evicted.
|
||||
vi.spyOn(Date, 'now').mockReturnValue(base + 4_000);
|
||||
pool.touch('c1', 'goose');
|
||||
vi.restoreAllMocks();
|
||||
const { evicted } = await pool.sweep(base + 6_000);
|
||||
expect(evicted).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentPool.sweep — LRU cap (3.4)', () => {
|
||||
it('evicts the least-recently-used beyond the cap', async () => {
|
||||
const pool = new AgentPool({ idleTtlMs: 1_000_000, maxLive: 2 });
|
||||
const base = 1_000_000;
|
||||
const mk = (key: string, regAt: number) => {
|
||||
vi.spyOn(Date, 'now').mockReturnValue(regAt);
|
||||
const b = new FakeBackend(key);
|
||||
const [chat, agent] = key.split(':');
|
||||
pool.register(chat!, agent!, b);
|
||||
vi.restoreAllMocks();
|
||||
return b;
|
||||
};
|
||||
const a = mk('c1:goose', base + 100);
|
||||
const b = mk('c2:goose', base + 300);
|
||||
const c = mk('c3:goose', base + 200);
|
||||
// 3 entries, cap 2, all within idle TTL → LRU (oldest = a@+100) evicted.
|
||||
const { evicted } = await pool.sweep(base + 1_000);
|
||||
expect(evicted).toEqual(['c1:goose']);
|
||||
expect(a.disposed).toBe(1);
|
||||
expect(b.disposed).toBe(0);
|
||||
expect(c.disposed).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentPool.sweep — proactive health probe (3.2)', () => {
|
||||
it('drives each backend tickHealth before eviction', async () => {
|
||||
const pool = new AgentPool({ idleTtlMs: 1_000_000, maxLive: 100 });
|
||||
const b = new FakeBackend();
|
||||
pool.register('c1', 'opencode', b);
|
||||
await pool.sweep(Date.now());
|
||||
expect(b.tickHealthCalls).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentPool.closeChat — chat-close teardown (3.3)', () => {
|
||||
it('disposes only the matching chat keys, leaving others + the shared server', async () => {
|
||||
const pool = new AgentPool();
|
||||
const goose = new FakeBackend('goose');
|
||||
const qwen = new FakeBackend('qwen');
|
||||
const other = new FakeBackend('other-chat');
|
||||
const ocServer = new FakeBackend('opencode-server');
|
||||
pool.register('chat-1', 'goose', goose);
|
||||
pool.register('chat-1', 'qwen', qwen);
|
||||
pool.register('chat-2', 'goose', other);
|
||||
pool.register(OPENCODE_POOL_KEY, 'opencode', ocServer);
|
||||
|
||||
const removed = await pool.closeChat('chat-1');
|
||||
expect(removed.sort()).toEqual(['chat-1:goose', 'chat-1:qwen']);
|
||||
expect(goose.disposed).toBe(1);
|
||||
expect(qwen.disposed).toBe(1);
|
||||
// other chat + shared opencode server untouched.
|
||||
expect(other.disposed).toBe(0);
|
||||
expect(ocServer.disposed).toBe(0);
|
||||
expect(pool.peek('chat-2', 'goose')).toBe(other);
|
||||
expect(pool.peek(OPENCODE_POOL_KEY, 'opencode')).toBe(ocServer);
|
||||
});
|
||||
|
||||
it('does not dispose a busy backend on closeChat', async () => {
|
||||
const pool = new AgentPool();
|
||||
const b = new FakeBackend();
|
||||
b.setBusy(true);
|
||||
pool.register('chat-1', 'goose', b);
|
||||
const removed = await pool.closeChat('chat-1');
|
||||
expect(removed).toEqual([]);
|
||||
expect(b.disposed).toBe(0);
|
||||
});
|
||||
|
||||
it('does not match a chat id that is a prefix of another', async () => {
|
||||
// 'chat-1' must not match 'chat-10' — keys are `${chatId}:${agent}` so the
|
||||
// colon delimiter prevents the prefix collision.
|
||||
const pool = new AgentPool();
|
||||
const a = new FakeBackend();
|
||||
const b = new FakeBackend();
|
||||
pool.register('chat-1', 'goose', a);
|
||||
pool.register('chat-10', 'goose', b);
|
||||
await pool.closeChat('chat-1');
|
||||
expect(a.disposed).toBe(1);
|
||||
expect(b.disposed).toBe(0);
|
||||
expect(pool.peek('chat-10', 'goose')).toBe(b);
|
||||
});
|
||||
});
|
||||
|
||||
describe('AgentPool.dispose — drain all (T.1)', () => {
|
||||
it('disposes every backend and clears the map', async () => {
|
||||
const pool = new AgentPool();
|
||||
const a = new FakeBackend();
|
||||
const b = new FakeBackend();
|
||||
pool.register('c1', 'goose', a);
|
||||
pool.register('c2', 'qwen', b);
|
||||
await pool.dispose();
|
||||
expect(a.disposed).toBe(1);
|
||||
expect(b.disposed).toBe(1);
|
||||
expect(pool.health()).toEqual({ size: 0, busy: 0 });
|
||||
});
|
||||
|
||||
it('tolerates a backend whose dispose throws', async () => {
|
||||
const pool = new AgentPool();
|
||||
const good = new FakeBackend();
|
||||
const bad = new FakeBackend();
|
||||
bad.dispose = async () => {
|
||||
throw new Error('boom');
|
||||
};
|
||||
pool.register('c1', 'goose', bad);
|
||||
pool.register('c2', 'qwen', good);
|
||||
await expect(pool.dispose()).resolves.toBeUndefined();
|
||||
expect(good.disposed).toBe(1);
|
||||
});
|
||||
});
|
||||
170
apps/coder/src/services/__tests__/reconnect_integration.test.ts
Normal file
170
apps/coder/src/services/__tests__/reconnect_integration.test.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { readFileSync, existsSync } from 'node:fs';
|
||||
import { rm, mkdir } from 'node:fs/promises';
|
||||
import { resolve } from 'node:path';
|
||||
import postgres from 'postgres';
|
||||
import {
|
||||
ensureSessionWorktree,
|
||||
closeChatBackendState,
|
||||
rebaselineWorktreeAfterApply,
|
||||
} from '../worktrees.js';
|
||||
import { reapOrphanWorktrees } from '../orphan-worktree-reaper.js';
|
||||
import { hostExec } from '../host-exec.js';
|
||||
|
||||
/**
|
||||
* v2.6 Phase 3 (3.6) — reconnect-after-restart integration test.
|
||||
*
|
||||
* Proves the DB-truth side of crash/restart recovery: a BooCoder restart wipes the
|
||||
* in-memory pool, but the persistent `worktrees` + `agent_sessions` rows survive,
|
||||
* so the "next turn" re-resolves the SAME worktree (reattach, no new dir) and the
|
||||
* agent-session row is still there to resume from. Also exercises the chat-close
|
||||
* hook (3.3), the apply re-baseline (3.5), and the orphan reaper (3.4) end-to-end
|
||||
* against a real git repo + postgres.
|
||||
*
|
||||
* Requires DATABASE_URL (DB-opt-in; skips cleanly otherwise) AND git on PATH. Runs:
|
||||
* DATABASE_URL='postgres://boocode:devpass@localhost:5500/boochat' pnpm -C apps/coder test
|
||||
*/
|
||||
describe.runIf(!!process.env.DATABASE_URL)('reconnect after restart (Phase 3)', () => {
|
||||
let sql: ReturnType<typeof postgres>;
|
||||
const stamp = Date.now();
|
||||
const projectDir = `/tmp/boocode-reconnect-proj-${stamp}`;
|
||||
let projectId: string;
|
||||
let sessionId: string;
|
||||
let chatId: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
sql = postgres(process.env.DATABASE_URL!, { max: 3 });
|
||||
|
||||
// Both schemas land in the one boochat DB: server owns sessions/chats/projects,
|
||||
// coder owns worktrees/agent_sessions (FK targets must pre-exist → server first).
|
||||
const serverSchema = resolve(__dirname, '../../../../server/src/schema.sql');
|
||||
const coderSchema = resolve(__dirname, '../../schema.sql');
|
||||
await sql.unsafe(readFileSync(serverSchema, 'utf8'));
|
||||
await sql.unsafe(readFileSync(coderSchema, 'utf8'));
|
||||
|
||||
// A real git repo with one commit so worktree add / diff / rev-parse work.
|
||||
await mkdir(projectDir, { recursive: true });
|
||||
await hostExec(
|
||||
`cd ${projectDir} && git init -q && git config user.email t@t && git config user.name t ` +
|
||||
`&& echo hello > README.md && git add -A && git commit -qm init`,
|
||||
{ timeoutMs: 20_000 },
|
||||
);
|
||||
|
||||
const [project] = await sql<{ id: string }[]>`
|
||||
INSERT INTO projects (name, path, status) VALUES ('reconnect-test', ${projectDir}, 'open') RETURNING id
|
||||
`;
|
||||
projectId = project!.id;
|
||||
const [session] = await sql<{ id: string }[]>`
|
||||
INSERT INTO sessions (project_id, name, model, status)
|
||||
VALUES (${projectId}, 'recon', 'm', 'open') RETURNING id
|
||||
`;
|
||||
sessionId = session!.id;
|
||||
const [chat] = await sql<{ id: string }[]>`
|
||||
INSERT INTO chats (session_id, name, status) VALUES (${sessionId}, 'tab', 'open') RETURNING id
|
||||
`;
|
||||
chatId = chat!.id;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (sql) {
|
||||
// Best-effort worktree cleanup before dropping rows.
|
||||
const rows = await sql<{ path: string }[]>`SELECT path FROM worktrees WHERE session_id = ${sessionId}`.catch(() => []);
|
||||
for (const r of rows) {
|
||||
await hostExec(`git -C ${projectDir} worktree remove ${r.path} --force`, { timeoutMs: 10_000 }).catch(() => {});
|
||||
}
|
||||
await sql`DELETE FROM agent_sessions WHERE chat_id = ${chatId}`.catch(() => {});
|
||||
await sql`DELETE FROM worktrees WHERE session_id = ${sessionId}`.catch(() => {});
|
||||
await sql`DELETE FROM chats WHERE id = ${chatId}`.catch(() => {});
|
||||
await sql`DELETE FROM sessions WHERE id = ${sessionId}`.catch(() => {});
|
||||
await sql`DELETE FROM projects WHERE id = ${projectId}`.catch(() => {});
|
||||
await sql.end({ timeout: 5 });
|
||||
}
|
||||
await rm(projectDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('reattaches the SAME worktree across a simulated restart (no new dir)', async () => {
|
||||
// "Turn 1" — first ensureSessionWorktree creates the worktree + row.
|
||||
const first = await ensureSessionWorktree(sql, projectDir, sessionId);
|
||||
expect(existsSync(first.worktreePath)).toBe(true);
|
||||
expect(first.baseCommit).toBeTruthy();
|
||||
|
||||
// Simulate an agent_sessions row written by turn 1 (opencode).
|
||||
await sql`
|
||||
INSERT INTO agent_sessions (chat_id, session_id, worktree_id, agent, backend, agent_session_id, status, last_active_at)
|
||||
VALUES (${chatId}, ${sessionId}, ${first.worktreeId}, 'opencode', 'opencode_server', 'oc-sess-1', 'active', clock_timestamp())
|
||||
ON CONFLICT (chat_id, agent) DO NOTHING
|
||||
`;
|
||||
|
||||
// "Restart" = brand-new resolution with NO in-memory state. ensureSessionWorktree
|
||||
// must return the EXISTING row (same id + path), proving reattach not re-create.
|
||||
const second = await ensureSessionWorktree(sql, projectDir, sessionId);
|
||||
expect(second.worktreeId).toBe(first.worktreeId);
|
||||
expect(second.worktreePath).toBe(first.worktreePath);
|
||||
expect(second.baseCommit).toBe(first.baseCommit);
|
||||
|
||||
// The agent_sessions row survived the "restart" with its resume handle intact.
|
||||
const [row] = await sql<{ agent_session_id: string; status: string }[]>`
|
||||
SELECT agent_session_id, status FROM agent_sessions WHERE chat_id = ${chatId} AND agent = 'opencode'
|
||||
`;
|
||||
expect(row!.agent_session_id).toBe('oc-sess-1');
|
||||
});
|
||||
|
||||
it('re-baselines the worktree diff after apply (3.5)', async () => {
|
||||
const wt = await ensureSessionWorktree(sql, projectDir, sessionId);
|
||||
const baseBefore = wt.baseCommit;
|
||||
// Make a change in the worktree (as an external agent would).
|
||||
await hostExec(`cd ${wt.worktreePath} && echo change >> README.md`, { timeoutMs: 10_000 });
|
||||
|
||||
const r = await rebaselineWorktreeAfterApply(sql, sessionId);
|
||||
expect(r.rebaselined).toBe(true);
|
||||
expect(r.newBaseCommit).toBeTruthy();
|
||||
expect(r.newBaseCommit).not.toBe(baseBefore);
|
||||
|
||||
const [row] = await sql<{ base_commit: string }[]>`
|
||||
SELECT base_commit FROM worktrees WHERE session_id = ${sessionId} AND status = 'active'
|
||||
`;
|
||||
expect(row!.base_commit).toBe(r.newBaseCommit);
|
||||
|
||||
// Idempotent: a second re-baseline with no new edits is a no-op.
|
||||
const r2 = await rebaselineWorktreeAfterApply(sql, sessionId);
|
||||
expect(r2.rebaselined).toBe(false);
|
||||
});
|
||||
|
||||
it('chat-close hook closes agent rows + removes the worktree on the last chat (3.3)', async () => {
|
||||
// Sanity: an active worktree + agent row exist from the prior tests.
|
||||
const beforeWt = await sql<{ id: string }[]>`SELECT id FROM worktrees WHERE session_id = ${sessionId} AND status = 'active'`;
|
||||
expect(beforeWt.length).toBe(1);
|
||||
|
||||
const result = await closeChatBackendState(sql, chatId);
|
||||
expect(result.agentRowsClosed).toBeGreaterThanOrEqual(1);
|
||||
// chatId is the session's only chat → worktree removed (it was clean after the
|
||||
// re-baseline commit), not at-risk.
|
||||
expect(result.worktreeAtRisk).toBe(false);
|
||||
expect(result.worktreeRemoved).toBe(true);
|
||||
|
||||
const [agentRow] = await sql<{ status: string }[]>`
|
||||
SELECT status FROM agent_sessions WHERE chat_id = ${chatId} AND agent = 'opencode'
|
||||
`;
|
||||
expect(agentRow!.status).toBe('closed');
|
||||
|
||||
const activeWt = await sql<{ id: string }[]>`SELECT id FROM worktrees WHERE session_id = ${sessionId} AND status = 'active'`;
|
||||
expect(activeWt.length).toBe(0); // archived, no longer active
|
||||
});
|
||||
|
||||
it('orphan reaper leaves a live worktree alone and reaps a row-less dir (3.4)', async () => {
|
||||
// Recreate a live worktree for this session (the close test archived the old one).
|
||||
const live = await ensureSessionWorktree(sql, projectDir, sessionId);
|
||||
expect(existsSync(live.worktreePath)).toBe(true);
|
||||
|
||||
// A live worktree (active row) with grace 0 must NOT be reaped.
|
||||
const r1 = await reapOrphanWorktrees(sql, console as never, 0, Date.now());
|
||||
expect(r1.reaped).not.toContain(live.worktreePath);
|
||||
|
||||
// Now archive its row (simulating a leaked dir) and reap again — it becomes an
|
||||
// orphan and is reclaimed (it's clean → not at-risk).
|
||||
await sql`UPDATE worktrees SET status = 'archived' WHERE id = ${live.worktreeId}`;
|
||||
const r2 = await reapOrphanWorktrees(sql, console as never, 0, Date.now());
|
||||
expect(r2.reaped).toContain(live.worktreePath);
|
||||
expect(existsSync(live.worktreePath)).toBe(false);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user