boocode/apps/server/src/services/inference/prune.ts

import type { Sql } from '../../db.js';

// v1.13.4: two-tier compaction prune. Opencode's prune half (the cheap one);
// summarize half shipped in v1.11.0 as services/compaction.ts.
//
// Algorithm: scan tool_result parts newest-first. Protect the last
// PROTECTED_TOKENS of content (the model recently saw these — pruning them
// kills coherence). Older parts are candidates. Mark them hidden_at only
// if the candidate pool would free at least PRUNE_TRIGGER_TOKENS — pruning
// 3 small tool_results to recover 500 tokens isn't worth the loss of
// fidelity for the model's next turn.
//
// Stops at the last compaction summary boundary (chats.tail_start_id). The
// v1.11.0 summary already encodes everything before that point; pruning
// across the boundary would double-erase.

export const PROTECTED_TOKENS = 40_000;
export const PRUNE_TRIGGER_TOKENS = 20_000;

// Rough char-to-token estimate. Same heuristic compaction's usable() uses
// implicitly via the buffer constant.
function estimateTokens(text: string): number {
  return Math.ceil(text.length / 4);
}

function payloadTokens(payload: unknown): number {
  return estimateTokens(JSON.stringify(payload ?? ''));
}

export interface PruneResult {
  hidden: number;
  freedTokens: number;
}

// Pure algorithmic core, exported for unit-test access. Takes parts already
// ordered newest-first, plus an optional cutoff (last compaction summary
// boundary). Returns the part ids to hide and the total token estimate of
// the candidates. Caller does the DB UPDATE.
export interface PartForPrune {
  id: string;
  payload: unknown;
  created_at: Date;
}

export function selectPruneTargets(
  partsNewestFirst: ReadonlyArray<PartForPrune>,
  tailStartCreatedAt: Date | null,
): { ids: string[]; freedTokens: number } {
  let protectedTokens = 0;
  const candidates: { id: string; tokens: number }[] = [];
  let crossedProtection = false;

  for (const part of partsNewestFirst) {
    if (tailStartCreatedAt && part.created_at < tailStartCreatedAt) {
      // Past the last summary boundary; the v1.11.0 anchored summary already
      // covers everything older. Bail rather than double-erase.
      break;
    }
    const tokens = payloadTokens(part.payload);
    if (!crossedProtection) {
      protectedTokens += tokens;
      if (protectedTokens >= PROTECTED_TOKENS) {
        crossedProtection = true;
      }
      continue;
    }
    candidates.push({ id: part.id, tokens });
  }

  const candidateTokens = candidates.reduce((s, c) => s + c.tokens, 0);
  if (candidates.length === 0 || candidateTokens < PRUNE_TRIGGER_TOKENS) {
    return { ids: [], freedTokens: 0 };
  }
  return { ids: candidates.map((c) => c.id), freedTokens: candidateTokens };
}

export async function prune(args: {
  sql: Sql;
  chatId: string;
}): Promise<PruneResult> {
  const { sql, chatId } = args;

  // Newest-first scan of visible tool_result parts in this chat. Pull
  // chats.tail_start_id alongside so we know where the last summary boundary
  // sits (don't prune across it).
  const parts = await sql<{
    id: string;
    payload: unknown;
    created_at: Date;
    tail_start_id: string | null;
  }[]>`
    SELECT p.id, p.payload, m.created_at,
      (SELECT c.tail_start_id FROM chats c WHERE c.id = ${chatId}) AS tail_start_id
    FROM message_parts p
    JOIN messages m ON m.id = p.message_id
    WHERE m.chat_id = ${chatId}
      AND p.kind = 'tool_result'
      AND p.hidden_at IS NULL
    ORDER BY m.created_at DESC, p.sequence DESC
  `;

  if (parts.length === 0) {
    return { hidden: 0, freedTokens: 0 };
  }

  // Read the boundary cutoff timestamp once. Older messages are off-limits.
  let tailStartCreatedAt: Date | null = null;
  const firstTailId = parts[0]?.tail_start_id ?? null;
  if (firstTailId) {
    const tailRow = await sql<{ created_at: Date }[]>`
      SELECT created_at FROM messages WHERE id = ${firstTailId}
    `;
    tailStartCreatedAt = tailRow[0]?.created_at ?? null;
  }

  const decision = selectPruneTargets(parts, tailStartCreatedAt);
  if (decision.ids.length === 0) {
    return { hidden: 0, freedTokens: 0 };
  }

  await sql`
    UPDATE message_parts
    SET hidden_at = clock_timestamp()
    WHERE id = ANY(${decision.ids})
  `;
  return { hidden: decision.ids.length, freedTokens: decision.freedTokens };
}