boocode/apps/server/src/services/inference/tool-summaries.ts

/**
 * v2.0.5: Tool-use summary generation.
 *
 * After a batch of tool calls completes, fire a cheap LLM call to generate
 * a "git-commit-subject-style" one-liner label describing what the tools
 * accomplished. Ported from the Qwen Code source recon.
 */
import type { FastifyBaseLogger } from 'fastify';

const TOOL_SUMMARY_SYSTEM_PROMPT = `Write a short summary label describing what these tool calls accomplished. Think git-commit-subject, not sentence. Past tense, most distinctive noun. Max 30 characters. Output ONLY the label.

Examples:
- Searched in auth/
- Fixed NPE in UserService
- Created signup endpoint
- Read config.json
- Ran failing tests`;

const INPUT_TRUNCATE = 300;
const MAX_SUMMARY_LENGTH = 100;

export interface ToolInfo {
  name: string;
  input: string;
  output: string;
}

export async function generateToolUseSummary(opts: {
  tools: ToolInfo[];
  llamaSwapUrl: string;
  model: string;
  log: FastifyBaseLogger;
  signal?: AbortSignal;
}): Promise<string | null> {
  const { tools, llamaSwapUrl, model, log, signal } = opts;
  if (tools.length === 0) return null;
  if (signal?.aborted) return null;

  const toolText = tools
    .map(t => `Tool: ${t.name}\nInput: ${t.input.slice(0, INPUT_TRUNCATE)}\nOutput: ${t.output.slice(0, INPUT_TRUNCATE)}`)
    .join('\n\n');

  try {
    const res = await fetch(`${llamaSwapUrl}/v1/chat/completions`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        model,
        messages: [
          { role: 'system', content: TOOL_SUMMARY_SYSTEM_PROMPT },
          { role: 'user', content: toolText },
        ],
        max_tokens: 30,
        temperature: 0.2,
        stream: false,
        chat_template_kwargs: { enable_thinking: false },
      }),
      signal,
    });
    if (!res.ok) {
      log.debug({ status: res.status }, 'tool-summary: LLM request failed');
      return null;
    }
    const data = await res.json() as { choices?: Array<{ message?: { content?: string } }> };
    const raw = data.choices?.[0]?.message?.content?.trim() ?? '';
    if (!raw) return null;
    // Clean: strip quotes, "Label:" prefix, cap length
    let cleaned = raw.split('\n')[0]?.trim() ?? '';
    cleaned = cleaned
      .replace(/^[-*•]\s+/, '')
      .replace(/^["'`‘’“”]|["'`‘’“”]$/g, '')
      .replace(/^(label|summary)\s*:\s*/i, '')
      .trim();
    return cleaned.length > MAX_SUMMARY_LENGTH
      ? cleaned.slice(0, MAX_SUMMARY_LENGTH).trim()
      : cleaned || null;
  } catch (err) {
    log.debug({ err: err instanceof Error ? err.message : String(err) }, 'tool-summary: error');
    return null;
  }
}