feat(coder): persist token breakdown in arena decisions and schema
This commit is contained in:
@@ -423,3 +423,7 @@ CREATE INDEX IF NOT EXISTS contestants_task_id_idx ON contestants(task_id);
|
||||
|
||||
-- Cross-examination listing per battle.
|
||||
CREATE INDEX IF NOT EXISTS cross_examinations_battle_idx ON cross_examinations(battle_id);
|
||||
|
||||
-- TokenScope: per-category token breakdown on arena contestants and tasks.
|
||||
ALTER TABLE contestants ADD COLUMN IF NOT EXISTS token_breakdown JSONB;
|
||||
ALTER TABLE tasks ADD COLUMN IF NOT EXISTS token_breakdown JSONB;
|
||||
|
||||
@@ -162,6 +162,24 @@ describe('computeBenchmark', () => {
|
||||
expect(bench.durationMs).toBe(0);
|
||||
expect(bench.tokensPerSec).toBeNull();
|
||||
});
|
||||
|
||||
it('includes token breakdown when provided', () => {
|
||||
const breakdown = {
|
||||
system: 10,
|
||||
user: 20,
|
||||
assistant: 30,
|
||||
tools: 40,
|
||||
reasoning: 5,
|
||||
total: 105,
|
||||
};
|
||||
const bench = computeBenchmark(t0, t1, 500, 'local', breakdown);
|
||||
expect(bench.tokenBreakdown).toEqual(breakdown);
|
||||
});
|
||||
|
||||
it('defaults token breakdown to null when omitted', () => {
|
||||
const bench = computeBenchmark(t0, t1, 500, 'local');
|
||||
expect(bench.tokenBreakdown).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ─── sanitizeSlug ────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
* A contestant's status lifecycle:
|
||||
* queued → running → done | error
|
||||
*/
|
||||
import type { BattleType, ContestantLane } from '@boocode/contracts/arena';
|
||||
import type { BattleType, ContestantLane, TokenBreakdown } from '@boocode/contracts/arena';
|
||||
|
||||
// ─── Lane classification ──────────────────────────────────────────────────────
|
||||
|
||||
@@ -73,6 +73,7 @@ export function isBattleComplete(contestants: readonly { status: string }[]): bo
|
||||
export interface Benchmark {
|
||||
durationMs: number;
|
||||
tokensPerSec: number | null;
|
||||
tokenBreakdown: TokenBreakdown | null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -86,13 +87,14 @@ export function computeBenchmark(
|
||||
endedAt: Date,
|
||||
costTokens: number | null,
|
||||
lane: ContestantLane,
|
||||
tokenBreakdown: TokenBreakdown | null = null,
|
||||
): Benchmark {
|
||||
const durationMs = Math.max(0, endedAt.getTime() - startedAt.getTime());
|
||||
const tokensPerSec =
|
||||
lane === 'local' && costTokens !== null && durationMs > 0
|
||||
? (costTokens / durationMs) * 1000
|
||||
: null;
|
||||
return { durationMs, tokensPerSec };
|
||||
return { durationMs, tokensPerSec, tokenBreakdown };
|
||||
}
|
||||
|
||||
// ─── Slug / path helpers ──────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user