Idle TTL eviction per (chat,agent) + LRU cap (never a busy backend); pure lifecycle-decisions.ts (TDD). Crash recovery lifts openchamber's health-monitor + busy-aware-restart + stale-grace state machine into opencode-server.ts (+ port reclaim) and warm-acp.ts; opencode crash -> fresh sessions, ACP -> re-session/new. F.1 turn-guard + U.6 usage preserved (their tests pass). Orphan worktree reaper (1h grace, superset-style dirty/unpushed preflight, Paseo soft-delete) + close hooks + diff re-baseline after apply_pending. 35 new tests + DB-opt-in reconnect test; 215 coder tests pass; tsc + build clean. Completes v2.6. Follow-ups out of scope: apps/server close-hook caller, 3.7 DiffPanel staging hint, live smokes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
171 lines
6.7 KiB
TypeScript
171 lines
6.7 KiB
TypeScript
/**
|
|
* v2.6 Phase 3 (3.4) — orphan worktree reaper.
|
|
*
|
|
* Reclaims on-disk session worktree dirs under WORKTREE_BASE that have NO live
|
|
* (`status='active'`) row in the `worktrees` table — leaks from a crash between
|
|
* `git worktree add` and the DB insert, a missed chat-close hook, or a manual rm
|
|
* of the DB row. Extends the periodic-sweeper pattern (apps/server's truncation +
|
|
* stale-streaming reaper).
|
|
*
|
|
* SAFETY (Paseo worktree-archive cascade + superset destroy-saga lift): before
|
|
* removing ANY dir, run `checkWorktreeWorkAtRisk` — a dirty / unpushed / unmerged
|
|
* worktree is SKIPPED (logged), never force-removed. The pure orphan-target
|
|
* selection (which dirs are candidates) lives in
|
|
* `backends/lifecycle-decisions.ts:selectOrphanWorktreeTargets` and is unit-tested;
|
|
* this module does the DB read + fs stat + git preflight + removal side-effects.
|
|
*
|
|
* The mtime grace (default 1h) means a dir mid-`ensureSessionWorktree` (created on
|
|
* disk, row not yet committed) is never swept — the grace window covers the gap.
|
|
*/
|
|
import { readdir, stat } from 'node:fs/promises';
|
|
import { join } from 'node:path';
|
|
import type { FastifyBaseLogger } from 'fastify';
|
|
import type { Sql } from '../db.js';
|
|
import { WORKTREE_BASE, checkWorktreeWorkAtRisk } from './worktrees.js';
|
|
import { hostExec } from './host-exec.js';
|
|
import {
|
|
selectOrphanWorktreeTargets,
|
|
DEFAULT_ORPHAN_WORKTREE_GRACE_MS,
|
|
} from './backends/lifecycle-decisions.js';
|
|
|
|
export interface OrphanWorktreeReaperDeps {
|
|
sql: Sql;
|
|
log: FastifyBaseLogger;
|
|
intervalMs: number;
|
|
graceMs?: number;
|
|
}
|
|
|
|
export interface OrphanReaperResult {
|
|
scanned: number;
|
|
candidates: number;
|
|
reaped: string[];
|
|
skippedAtRisk: string[];
|
|
}
|
|
|
|
/** Single-pass reap: select orphan candidates, preflight at-risk, remove the safe. */
|
|
export async function reapOrphanWorktrees(
|
|
sql: Sql,
|
|
log: FastifyBaseLogger,
|
|
graceMs: number = DEFAULT_ORPHAN_WORKTREE_GRACE_MS,
|
|
now: number = Date.now(),
|
|
): Promise<OrphanReaperResult> {
|
|
// Enumerate on-disk session worktree dirs (`sess-*`). Per-task worktrees
|
|
// (arena/new_task/MCP) are cleaned up inline by the one-shot path, so we only
|
|
// own the persistent session dirs the warm paths leave behind.
|
|
let dirents: string[];
|
|
try {
|
|
dirents = await readdir(WORKTREE_BASE);
|
|
} catch {
|
|
return { scanned: 0, candidates: 0, reaped: [], skippedAtRisk: [] }; // base absent → nothing to do
|
|
}
|
|
const onDisk: { path: string; mtimeMs: number }[] = [];
|
|
for (const name of dirents) {
|
|
if (!name.startsWith('sess-')) continue; // only persistent session worktrees
|
|
const path = join(WORKTREE_BASE, name);
|
|
try {
|
|
const s = await stat(path);
|
|
if (!s.isDirectory()) continue;
|
|
onDisk.push({ path, mtimeMs: s.mtimeMs });
|
|
} catch {
|
|
// vanished between readdir and stat — skip
|
|
}
|
|
}
|
|
|
|
// Live worktree paths from the DB (active rows only — archived/removed rows are
|
|
// not "live", so their leftover dirs are reapable orphans).
|
|
const liveRows = await sql<{ path: string }[]>`
|
|
SELECT path FROM worktrees WHERE status = 'active'
|
|
`;
|
|
const live = new Set(liveRows.map((r) => r.path));
|
|
|
|
const candidates = selectOrphanWorktreeTargets(onDisk, live, now, graceMs);
|
|
const reaped: string[] = [];
|
|
const skippedAtRisk: string[] = [];
|
|
|
|
for (const path of candidates) {
|
|
// Preflight: never reap work at risk. A git error forces atRisk=true (fail
|
|
// closed), so a half-broken worktree is kept, not silently destroyed.
|
|
const risk = await checkWorktreeWorkAtRisk(path);
|
|
if (risk.atRisk) {
|
|
skippedAtRisk.push(path);
|
|
log.warn({ path, dirty: risk.dirty, unmerged: risk.unmerged, error: risk.error }, 'orphan-reaper: skipping at-risk orphan worktree');
|
|
continue;
|
|
}
|
|
const removed = await removeOrphanDir(path);
|
|
if (removed) reaped.push(path);
|
|
}
|
|
|
|
if (reaped.length > 0 || skippedAtRisk.length > 0) {
|
|
log.info({ scanned: onDisk.length, candidates: candidates.length, reaped, skippedAtRisk }, 'orphan-reaper: pass complete');
|
|
}
|
|
return { scanned: onDisk.length, candidates: candidates.length, reaped, skippedAtRisk };
|
|
}
|
|
|
|
/**
|
|
* Remove a single orphan worktree dir. Resolve its main repo via the git
|
|
* common-dir, run `worktree remove --force` from there + prune, then rm the dir as
|
|
* a backstop. Best-effort: every step is independently fault-tolerant so a partial
|
|
* state (dir present, git untracked) still gets reclaimed.
|
|
*/
|
|
async function removeOrphanDir(path: string): Promise<boolean> {
|
|
// Find the owning repo (the common git dir's parent). When the dir isn't a valid
|
|
// worktree anymore, this fails and we fall back to a plain rm.
|
|
const common = await hostExec(
|
|
`git -C ${shellEscape(path)} rev-parse --path-format=absolute --git-common-dir`,
|
|
{ timeoutMs: 10_000 },
|
|
).catch(() => null);
|
|
const commonDir = common && common.exitCode === 0 ? common.stdout.trim() : '';
|
|
// The repo worktree root is the parent of the .git common dir (strip trailing /.git).
|
|
const repoRoot = commonDir.replace(/\/\.git\/?$/, '').replace(/\/\.git$/, '');
|
|
|
|
if (repoRoot && repoRoot !== commonDir) {
|
|
await hostExec(
|
|
`git -C ${shellEscape(repoRoot)} worktree remove ${shellEscape(path)} --force`,
|
|
{ timeoutMs: 15_000 },
|
|
).catch(() => {});
|
|
await hostExec(
|
|
`git -C ${shellEscape(repoRoot)} worktree prune`,
|
|
{ timeoutMs: 10_000 },
|
|
).catch(() => {});
|
|
}
|
|
// Backstop: ensure the dir is gone even if the git remove no-op'd.
|
|
const rm = await hostExec(`rm -rf ${shellEscape(path)}`, { timeoutMs: 15_000 }).catch(() => null);
|
|
return rm != null && rm.exitCode === 0;
|
|
}
|
|
|
|
/** Minimal single-quote shell escape (mirrors worktrees.ts). */
|
|
function shellEscape(s: string): string {
|
|
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
}
|
|
|
|
/** Periodic orphan-worktree reaper, started/stopped by the bootstrap. Unref'd. */
|
|
export function createOrphanWorktreeReaper(deps: OrphanWorktreeReaperDeps): { start(): void; stop(): void } {
|
|
const { sql, log, intervalMs } = deps;
|
|
const graceMs = deps.graceMs ?? DEFAULT_ORPHAN_WORKTREE_GRACE_MS;
|
|
let timer: ReturnType<typeof setInterval> | null = null;
|
|
let running = false;
|
|
|
|
return {
|
|
start() {
|
|
if (timer) return;
|
|
timer = setInterval(() => {
|
|
if (running) return; // a slow pass must not overlap the next tick
|
|
running = true;
|
|
void reapOrphanWorktrees(sql, log, graceMs)
|
|
.catch((err) => log.warn({ err: err instanceof Error ? err.message : String(err) }, 'orphan-reaper: pass error'))
|
|
.finally(() => {
|
|
running = false;
|
|
});
|
|
}, intervalMs);
|
|
timer.unref?.();
|
|
log.info({ intervalMs, graceMs }, 'orphan-reaper: started');
|
|
},
|
|
stop() {
|
|
if (timer) {
|
|
clearInterval(timer);
|
|
timer = null;
|
|
}
|
|
},
|
|
};
|
|
}
|