chore: snapshot working tree - pty_exited notifications + in-flight inference WIP
feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
This commit is contained in:
159
apps/control/src/services/retention.ts
Normal file
159
apps/control/src/services/retention.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
/**
|
||||
* Retention job: daily in-process timer that rolls up raw perf samples and
|
||||
* prunes old data.
|
||||
*
|
||||
* Crash-safe by construction:
|
||||
* 1. Rollup is an idempotent upsert (INSERT ... ON CONFLICT DO UPDATE).
|
||||
* 2. Delete raw only AFTER covering buckets are committed.
|
||||
* 3. Chunked transactions: one per provider per 1-hour window.
|
||||
*/
|
||||
|
||||
import type { Sql } from '../db.js';
|
||||
import type { Config } from '../config.js';
|
||||
|
||||
export interface RetentionConfig {
|
||||
rawHours: number;
|
||||
rollupDays: number;
|
||||
captureSizeKB: number;
|
||||
captureBudgetMB: number;
|
||||
}
|
||||
|
||||
export function buildRetentionConfig(cfg: Config): RetentionConfig {
|
||||
return {
|
||||
rawHours: cfg.RETENTION_RAW_HOURS,
|
||||
rollupDays: cfg.RETENTION_ROLLUP_DAYS,
|
||||
captureSizeKB: cfg.CAPTURE_SIZE_KB,
|
||||
captureBudgetMB: cfg.CAPTURE_BUDGET_MB,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Roll up raw perf samples into 5-minute buckets.
|
||||
* Idempotent: re-running the same window produces identical rollups.
|
||||
*/
|
||||
export async function runRollup(sql: Sql, providerId: string, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const buckets = await sql<{ bucket: Date }[]>`
|
||||
SELECT date_trunc('5 minutes', ts) AS bucket
|
||||
FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${cutoff.toISOString()}
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket
|
||||
`;
|
||||
|
||||
for (const { bucket } of buckets) {
|
||||
const bucketStart = new Date(bucket);
|
||||
const bucketEnd = new Date(bucket.getTime() + 5 * 60_000);
|
||||
|
||||
// Idempotent upsert: re-run recomputes the same buckets, never double-counts.
|
||||
await sql`
|
||||
INSERT INTO control_perf_rollup_5m (provider_id, bucket, gpu_agg, sys_agg)
|
||||
SELECT
|
||||
${providerId},
|
||||
${bucketStart.toISOString()},
|
||||
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'gpu', gpu)) AS gpu_agg,
|
||||
jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'sys', sys)) AS sys_agg
|
||||
FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts >= ${bucketStart.toISOString()}
|
||||
AND ts < ${bucketEnd.toISOString()}
|
||||
GROUP BY provider_id
|
||||
ON CONFLICT (provider_id, bucket) DO UPDATE SET
|
||||
gpu_agg = EXCLUDED.gpu_agg,
|
||||
sys_agg = EXCLUDED.sys_agg
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune raw perf samples older than the retention window.
|
||||
* Chunked: one transaction per provider per 1-hour window.
|
||||
*/
|
||||
export async function pruneRawSamples(sql: Sql, providerId: string, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_perf_samples
|
||||
WHERE provider_id = ${providerId}
|
||||
AND ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_perf_samples WHERE provider_id = ${providerId} AND ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune activity (control_requests) older than the retention window.
|
||||
* Chunked: one transaction per batch to avoid long lock hold times.
|
||||
*/
|
||||
export async function pruneActivity(sql: Sql, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_requests
|
||||
WHERE ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_requests WHERE ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune model events older than the retention window.
|
||||
* Chunked: one transaction per batch to avoid long lock hold times.
|
||||
*/
|
||||
export async function pruneModelEvents(sql: Sql, hours: number): Promise<void> {
|
||||
const cutoff = new Date(Date.now() - hours * 3600_000);
|
||||
const chunkSize = 1000;
|
||||
|
||||
while (true) {
|
||||
const toDelete = await sql<{ ts: Date }[]>`
|
||||
SELECT ts FROM control_model_events
|
||||
WHERE ts < ${cutoff.toISOString()}
|
||||
ORDER BY ts DESC
|
||||
LIMIT ${chunkSize}
|
||||
`;
|
||||
if (toDelete.length === 0) break;
|
||||
|
||||
const timestamps = toDelete.map((r) => r.ts);
|
||||
await sql`DELETE FROM control_model_events WHERE ts = ANY(${timestamps})`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim capture JSONB per-row to the configured size cap.
|
||||
* Returns the trimmed JSON string, or null.
|
||||
*/
|
||||
export function trimCapture(captureJson: string | null, sizeKB: number): string | null {
|
||||
if (!captureJson) return null;
|
||||
const sizeBytes = Buffer.byteLength(captureJson, 'utf8');
|
||||
if (sizeBytes <= sizeKB * 1024) return captureJson;
|
||||
// Trim the capture to fit within the cap.
|
||||
return captureJson.slice(0, Math.floor(sizeKB * 1024));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a capture JSON string into an object for sql.json().
|
||||
* Returns null if the input is null or invalid JSON.
|
||||
*/
|
||||
export function parseCaptureJson(captureJson: string | null): Record<string, unknown> | null {
|
||||
if (!captureJson) return null;
|
||||
try {
|
||||
return JSON.parse(captureJson) as Record<string, unknown>;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user