boocode/apps/coder/src/services/arena-model-call.ts

/**
 * One-shot model completion for the Arena analyzer.
 *
 * Calls the local llama-swap server directly for a single non-streaming
 * completion. Used for the digest and judge stages (always DEFAULT_MODEL)
 * and for local-model cross-examinations (any local model).
 *
 * Mirrors apps/server/src/services/task-model.ts but targets the coder's
 * config shape and uses a longer timeout appropriate for analysis calls.
 */

import type { Config } from '../config.js';

const TIMEOUT_MS = 120_000;

export async function arenaModelCall(opts: {
  config: Pick<Config, 'LLAMA_SWAP_URL'>;
  model: string;
  system: string;
  user: string;
  maxTokens?: number;
  temperature?: number;
}): Promise<string> {
  const { config, model, system, user } = opts;
  const maxTokens = opts.maxTokens ?? 2_000;
  const temperature = opts.temperature ?? 0.3;

  const res = await fetch(`${config.LLAMA_SWAP_URL}/v1/chat/completions`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      model,
      messages: [
        { role: 'system', content: system },
        { role: 'user', content: user },
      ],
      max_tokens: maxTokens,
      temperature,
      stream: false,
      chat_template_kwargs: { enable_thinking: false },
    }),
    signal: AbortSignal.timeout(TIMEOUT_MS),
  });

  if (!res.ok) {
    const text = await res.text().catch(() => '');
    throw new Error(`llama-swap responded ${res.status}: ${text.slice(0, 200)}`);
  }

  const data = (await res.json()) as {
    choices?: Array<{
      message?: { content?: string; reasoning_content?: string };
    }>;
  };

  const choice = data.choices?.[0]?.message;
  if (!choice) return '';

  const content = (choice.content ?? '').trim();
  if (content.length > 0) return content;

  // For thinking-mode models the answer sometimes only lands in reasoning_content.
  const reasoning = (choice.reasoning_content ?? '').trim();
  if (reasoning.length > 0) {
    const lines = reasoning.split('\n').filter((l) => l.trim().length > 0);
    return lines[lines.length - 1] ?? '';
  }

  return '';
}