/** * Retention job: daily in-process timer that rolls up raw perf samples and * prunes old data. * * Crash-safe by construction: * 1. Rollup is an idempotent upsert (INSERT ... ON CONFLICT DO UPDATE). * 2. Delete raw only AFTER covering buckets are committed. * 3. Chunked transactions: one per provider per 1-hour window. */ import type { Sql } from '../db.js'; import type { Config } from '../config.js'; export interface RetentionConfig { rawHours: number; rollupDays: number; captureSizeKB: number; captureBudgetMB: number; } export function buildRetentionConfig(cfg: Config): RetentionConfig { return { rawHours: cfg.RETENTION_RAW_HOURS, rollupDays: cfg.RETENTION_ROLLUP_DAYS, captureSizeKB: cfg.CAPTURE_SIZE_KB, captureBudgetMB: cfg.CAPTURE_BUDGET_MB, }; } /** * Roll up raw perf samples into 5-minute buckets. * Idempotent: re-running the same window produces identical rollups. */ export async function runRollup(sql: Sql, providerId: string, hours: number): Promise { const cutoff = new Date(Date.now() - hours * 3600_000); const buckets = await sql<{ bucket: Date }[]>` SELECT date_trunc('5 minutes', ts) AS bucket FROM control_perf_samples WHERE provider_id = ${providerId} AND ts >= ${cutoff.toISOString()} GROUP BY bucket ORDER BY bucket `; for (const { bucket } of buckets) { const bucketStart = new Date(bucket); const bucketEnd = new Date(bucket.getTime() + 5 * 60_000); // Idempotent upsert: re-run recomputes the same buckets, never double-counts. await sql` INSERT INTO control_perf_rollup_5m (provider_id, bucket, gpu_agg, sys_agg) SELECT ${providerId}, ${bucketStart.toISOString()}, jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'gpu', gpu)) AS gpu_agg, jsonb_agg(DISTINCT jsonb_build_object('ts', ts, 'sys', sys)) AS sys_agg FROM control_perf_samples WHERE provider_id = ${providerId} AND ts >= ${bucketStart.toISOString()} AND ts < ${bucketEnd.toISOString()} GROUP BY provider_id ON CONFLICT (provider_id, bucket) DO UPDATE SET gpu_agg = EXCLUDED.gpu_agg, sys_agg = EXCLUDED.sys_agg `; } } /** * Prune raw perf samples older than the retention window. * Chunked: one transaction per provider per 1-hour window. */ export async function pruneRawSamples(sql: Sql, providerId: string, hours: number): Promise { const cutoff = new Date(Date.now() - hours * 3600_000); const chunkSize = 1000; while (true) { const toDelete = await sql<{ ts: Date }[]>` SELECT ts FROM control_perf_samples WHERE provider_id = ${providerId} AND ts < ${cutoff.toISOString()} ORDER BY ts DESC LIMIT ${chunkSize} `; if (toDelete.length === 0) break; const timestamps = toDelete.map((r) => r.ts); await sql`DELETE FROM control_perf_samples WHERE provider_id = ${providerId} AND ts = ANY(${timestamps})`; } } /** * Prune activity (control_requests) older than the retention window. * Chunked: one transaction per batch to avoid long lock hold times. */ export async function pruneActivity(sql: Sql, hours: number): Promise { const cutoff = new Date(Date.now() - hours * 3600_000); const chunkSize = 1000; while (true) { const toDelete = await sql<{ ts: Date }[]>` SELECT ts FROM control_requests WHERE ts < ${cutoff.toISOString()} ORDER BY ts DESC LIMIT ${chunkSize} `; if (toDelete.length === 0) break; const timestamps = toDelete.map((r) => r.ts); await sql`DELETE FROM control_requests WHERE ts = ANY(${timestamps})`; } } /** * Prune model events older than the retention window. * Chunked: one transaction per batch to avoid long lock hold times. */ export async function pruneModelEvents(sql: Sql, hours: number): Promise { const cutoff = new Date(Date.now() - hours * 3600_000); const chunkSize = 1000; while (true) { const toDelete = await sql<{ ts: Date }[]>` SELECT ts FROM control_model_events WHERE ts < ${cutoff.toISOString()} ORDER BY ts DESC LIMIT ${chunkSize} `; if (toDelete.length === 0) break; const timestamps = toDelete.map((r) => r.ts); await sql`DELETE FROM control_model_events WHERE ts = ANY(${timestamps})`; } } /** * Trim capture JSONB per-row to the configured size cap. * Returns the trimmed JSON string, or null. */ export function trimCapture(captureJson: string | null, sizeKB: number): string | null { if (!captureJson) return null; const sizeBytes = Buffer.byteLength(captureJson, 'utf8'); if (sizeBytes <= sizeKB * 1024) return captureJson; // Trim by BYTES, not JS chars: a char-index slice can split a multi-byte // codepoint and emit invalid UTF-8 (DB write error / corruption). Buffer // subarray + toString('utf8') truncates at the last whole codepoint. return Buffer.from(captureJson, 'utf8').subarray(0, Math.floor(sizeKB * 1024)).toString('utf8'); } /** * Parse a capture JSON string into an object for sql.json(). * Returns null if the input is null or invalid JSON. */ export function parseCaptureJson(captureJson: string | null): Record | null { if (!captureJson) return null; try { return JSON.parse(captureJson) as Record; } catch { return null; } }