Arena is a new pane kind for competitive AI evaluation. A Battle runs the same prompt against 2-6 Contestants across two concurrent lanes: local lane (llama-swap models, serial) and cloud lane (parallel). Added to all three registries: @boocode/contracts WsFrameSchema, server InferenceFrame, and web WsFrame. Backend (apps/coder): - arena-runner: battle scheduler, lane classifier, benchmark, results writer, resume, user winner override - arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL - arena-decisions: status transitions and resume logic (unit-tested) - arena-analyzer-helpers: pure helper functions (unit-tested) - arena-model-call: model call utility for analysis - arena routes: create/get/list/stop/analyze/cross-examine/winner/diff - schema: battles, contestants, cross_examinations tables (idempotent) - remove old /api/arena* routes and tasks.arena_id column Frontend (apps/web): - ArenaLauncherDialog: battle type, prompt, contestant selection - ArenaPane: live roster, streaming output, analysis, cross-exam - DiffView: unified diff with line-by-line color for coding contests - Winner override per-row dropdown (Trophy icon) - battle_updated WS handler for live winner/analysis updates - arena pane kind in Workspace, ChatTabBar, useSidebar Cross-app: - ArenaState and ArenaContestantShape/WsFrame types (contracts) - battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame - manifest.json written per battle results folder - /Arena added to .gitignore Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
500 lines
15 KiB
TypeScript
500 lines
15 KiB
TypeScript
// Single source of truth for the WebSocket frame Zod runtime schema.
|
|
// Validation runs on send (broker.publishFrame / publishUserFrame) and
|
|
// on receive (apps/web hooks useSessionStream + useUserEvents). Catches
|
|
// silent protocol drift between publisher and consumer.
|
|
//
|
|
// Per-kind payload schemas stay z.unknown() — frame-level drift detection
|
|
// is the goal; deep payload validation is follow-up work.
|
|
|
|
import { z } from 'zod';
|
|
|
|
// ---- shared primitives -----------------------------------------------------
|
|
|
|
const Uuid = z.string().uuid();
|
|
// Tool call IDs are model-emitted (e.g. "call_abc123") — not UUIDs.
|
|
const ToolCallId = z.string().min(1);
|
|
// v1.13.12 fix: postgres returns timestamp columns as JS Date objects, not
|
|
// strings. The publish sites pass them through unchanged, so the schema must
|
|
// tolerate both. preprocess converts Date → ISO string before string-validation;
|
|
// on the web side (where frames arrive via JSON.parse) it's a no-op. Before
|
|
// this fix, every message_complete / session_updated / chat_updated frame
|
|
// failed validation and got dropped — symptoms: token tracking blank in UI,
|
|
// status stuck at 'streaming' tripping the 60s stale-stream banner.
|
|
const IsoTimestamp = z.preprocess(
|
|
(v) => (v instanceof Date ? v.toISOString() : v),
|
|
z.string().min(1),
|
|
);
|
|
|
|
const ChatStatusValue = z.enum([
|
|
'streaming',
|
|
'tool_running',
|
|
'waiting_for_input',
|
|
'idle',
|
|
'error',
|
|
]);
|
|
|
|
// agent-status-normalize (#10): normalized per-(chat,agent) lifecycle status for
|
|
// external coding agents (warm-acp / opencode / claude-sdk / pty). Distinct from
|
|
// ChatStatusValue (native-inference chat lifecycle) — published by BooCoder's
|
|
// dispatcher + permission flow on the per-session channel.
|
|
const AgentStatusValue = z.enum(['working', 'blocked', 'idle', 'error']);
|
|
|
|
const ErrorReasonValue = z.enum([
|
|
'llm_provider_error',
|
|
'doom_loop',
|
|
'doom_loop_summary_failed',
|
|
'cap_hit',
|
|
'cap_hit_summary_failed',
|
|
]);
|
|
|
|
const MessageRoleValue = z.enum(['user', 'assistant', 'system', 'tool']);
|
|
|
|
const ToolCallShape = z.object({
|
|
id: ToolCallId,
|
|
name: z.string().min(1),
|
|
args: z.record(z.string(), z.unknown()),
|
|
});
|
|
|
|
// Free-form bags: opaque to the frame schema; deep validation is out of
|
|
// scope for v1.13.11 (frame-level drift detection is the goal; per-kind
|
|
// payload narrowing is follow-up work). z.unknown() means the consumer
|
|
// must narrow before reading — TypeScript-side this is fine because every
|
|
// consumer already operates on the hand-maintained Project / Chat / Session
|
|
// / WorkspacePane types, and the Zod-typed shape is only used at the
|
|
// publishFrame boundary.
|
|
const OpaqueObject = z.unknown();
|
|
|
|
// ---- per-session channel frames --------------------------------------------
|
|
|
|
export const SnapshotFrame = z.object({
|
|
type: z.literal('snapshot'),
|
|
messages: z.array(OpaqueObject),
|
|
});
|
|
|
|
export const MessageStartedFrame = z.object({
|
|
type: z.literal('message_started'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
role: MessageRoleValue,
|
|
});
|
|
|
|
export const DeltaFrame = z.object({
|
|
type: z.literal('delta'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
content: z.string(),
|
|
});
|
|
|
|
export const ReasoningDeltaFrame = z.object({
|
|
type: z.literal('reasoning_delta'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
content: z.string(),
|
|
});
|
|
|
|
export const ToolCallFrame = z.object({
|
|
type: z.literal('tool_call'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
tool_call: ToolCallShape,
|
|
});
|
|
|
|
export const ToolResultFrame = z.object({
|
|
type: z.literal('tool_result'),
|
|
tool_message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
tool_call_id: ToolCallId,
|
|
output: z.unknown(),
|
|
truncated: z.boolean(),
|
|
error: z.string().optional(),
|
|
});
|
|
|
|
export const MessageCompleteFrame = z.object({
|
|
type: z.literal('message_complete'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
tokens_used: z.number().int().nonnegative().nullable().optional(),
|
|
ctx_used: z.number().int().nonnegative().nullable().optional(),
|
|
ctx_max: z.number().int().positive().nullable().optional(),
|
|
started_at: IsoTimestamp.nullable().optional(),
|
|
finished_at: IsoTimestamp.nullable().optional(),
|
|
// nullable: external-coder turns carry task.model, which is null when no
|
|
// model was selected. This frame is published through the same fail-closed
|
|
// publishFrame, so null MUST validate or the entire frame (incl. the
|
|
// status:'complete' transition) is dropped.
|
|
model: z.string().nullable().optional(),
|
|
metadata: OpaqueObject.nullable().optional(),
|
|
// F1 (D-8): the terminal status of the assistant message. Absent on the native
|
|
// BooChat path (reducer defaults to 'complete'); the BooCoder dispatcher stamps
|
|
// it 'cancelled' on a user Stop / stall and 'failed' on a thrown error so the
|
|
// web reducer can render a muted "Stopped" / failed state without a new frame
|
|
// type. Optional → fail-closed publishFrame must keep, not strip, it.
|
|
status: z.enum(['complete', 'cancelled', 'failed']).optional(),
|
|
});
|
|
|
|
export const UsageFrame = z.object({
|
|
type: z.literal('usage'),
|
|
message_id: Uuid,
|
|
chat_id: Uuid.optional(),
|
|
completion_tokens: z.number().int().nonnegative().nullable(),
|
|
ctx_used: z.number().int().nonnegative().nullable(),
|
|
ctx_max: z.number().int().positive().nullable(),
|
|
});
|
|
|
|
export const MessagesDeletedFrame = z.object({
|
|
type: z.literal('messages_deleted'),
|
|
message_ids: z.array(Uuid),
|
|
chat_id: Uuid.optional(),
|
|
});
|
|
|
|
export const ChatRenamedFrame = z.object({
|
|
type: z.literal('chat_renamed'),
|
|
chat_id: Uuid,
|
|
name: z.string(),
|
|
});
|
|
|
|
export const CompactedFrame = z.object({
|
|
type: z.literal('compacted'),
|
|
session_id: Uuid,
|
|
chat_id: Uuid,
|
|
summary_message_id: Uuid,
|
|
});
|
|
|
|
export const ErrorFrame = z.object({
|
|
type: z.literal('error'),
|
|
message_id: Uuid.optional(),
|
|
chat_id: Uuid.optional(),
|
|
error: z.string(),
|
|
reason: ErrorReasonValue.optional(),
|
|
});
|
|
|
|
// ---- per-user channel frames (sidebar refresh) -----------------------------
|
|
|
|
export const ChatStatusFrame = z.object({
|
|
type: z.literal('chat_status'),
|
|
chat_id: Uuid,
|
|
status: ChatStatusValue,
|
|
at: IsoTimestamp,
|
|
reason: ErrorReasonValue.optional(),
|
|
});
|
|
|
|
export const SessionUpdatedFrame = z.object({
|
|
type: z.literal('session_updated'),
|
|
session_id: Uuid,
|
|
project_id: Uuid,
|
|
name: z.string(),
|
|
updated_at: IsoTimestamp,
|
|
});
|
|
|
|
export const SessionRenamedFrame = z.object({
|
|
type: z.literal('session_renamed'),
|
|
session_id: Uuid,
|
|
name: z.string(),
|
|
});
|
|
|
|
export const SessionCreatedFrame = z.object({
|
|
type: z.literal('session_created'),
|
|
session: OpaqueObject,
|
|
project_id: Uuid,
|
|
});
|
|
|
|
export const SessionArchivedFrame = z.object({
|
|
type: z.literal('session_archived'),
|
|
session_id: Uuid,
|
|
project_id: Uuid,
|
|
});
|
|
|
|
export const SessionDeletedFrame = z.object({
|
|
type: z.literal('session_deleted'),
|
|
session_id: Uuid,
|
|
project_id: Uuid,
|
|
});
|
|
|
|
export const SessionWorkspaceUpdatedFrame = z.object({
|
|
type: z.literal('session_workspace_updated'),
|
|
session_id: Uuid,
|
|
// v2.6.x: widened from z.array — the payload is now either the legacy bare
|
|
// WorkspacePane[] OR the WorkspaceState envelope object (panes + tabNumbers +
|
|
// nextTabNumber + closedPaneStack). z.array alone would fail-closed and drop
|
|
// every envelope frame at validation.
|
|
workspace_panes: z.union([z.array(OpaqueObject), z.record(z.unknown())]),
|
|
});
|
|
|
|
export const ChatCreatedFrame = z.object({
|
|
type: z.literal('chat_created'),
|
|
chat: OpaqueObject,
|
|
session_id: Uuid,
|
|
});
|
|
|
|
export const ChatUpdatedFrame = z.object({
|
|
type: z.literal('chat_updated'),
|
|
chat_id: Uuid,
|
|
session_id: Uuid,
|
|
name: z.string().nullable(),
|
|
updated_at: IsoTimestamp,
|
|
});
|
|
|
|
export const ChatArchivedFrame = z.object({
|
|
type: z.literal('chat_archived'),
|
|
chat_id: Uuid,
|
|
session_id: Uuid,
|
|
});
|
|
|
|
export const ChatUnarchivedFrame = z.object({
|
|
type: z.literal('chat_unarchived'),
|
|
chat: OpaqueObject,
|
|
});
|
|
|
|
export const ChatDeletedFrame = z.object({
|
|
type: z.literal('chat_deleted'),
|
|
chat_id: Uuid,
|
|
session_id: Uuid,
|
|
});
|
|
|
|
export const ProjectCreatedFrame = z.object({
|
|
type: z.literal('project_created'),
|
|
project: OpaqueObject,
|
|
});
|
|
|
|
export const ProjectArchivedFrame = z.object({
|
|
type: z.literal('project_archived'),
|
|
project_id: Uuid,
|
|
});
|
|
|
|
export const ProjectUnarchivedFrame = z.object({
|
|
type: z.literal('project_unarchived'),
|
|
project: OpaqueObject,
|
|
});
|
|
|
|
export const ProjectUpdatedFrame = z.object({
|
|
type: z.literal('project_updated'),
|
|
project_id: Uuid,
|
|
name: z.string(),
|
|
});
|
|
|
|
export const ProjectDeletedFrame = z.object({
|
|
type: z.literal('project_deleted'),
|
|
project_id: Uuid,
|
|
});
|
|
|
|
const PermissionOptionShape = z.object({
|
|
option_id: z.string(),
|
|
label: z.string(),
|
|
});
|
|
|
|
export const PermissionRequestedFrame = z.object({
|
|
type: z.literal('permission_requested'),
|
|
task_id: Uuid,
|
|
session_id: Uuid,
|
|
kind: z.enum(['tool', 'question', 'plan', 'elicitation']).optional(),
|
|
tool_title: z.string().optional(),
|
|
input: z.record(z.unknown()).optional(),
|
|
options: z.array(PermissionOptionShape),
|
|
});
|
|
|
|
export const PermissionResolvedFrame = z.object({
|
|
type: z.literal('permission_resolved'),
|
|
task_id: Uuid,
|
|
session_id: Uuid,
|
|
});
|
|
|
|
const AgentCommandShape = z.object({
|
|
name: z.string(),
|
|
description: z.string().optional(),
|
|
});
|
|
|
|
export const AgentCommandsFrame = z.object({
|
|
type: z.literal('agent_commands'),
|
|
task_id: Uuid,
|
|
session_id: Uuid,
|
|
commands: z.array(AgentCommandShape),
|
|
});
|
|
|
|
// agent-status-normalize (#10): published by BooCoder on the per-session channel
|
|
// when an external agent's normalized status changes (turn start/end, permission
|
|
// block/unblock). Keyed per (chat_id, agent); the frontend tracks the latest per
|
|
// pair and resets on chat switch. `reason` is a free-form discriminator
|
|
// (turn_start / turn_complete / failed / crashed / permission_request /
|
|
// permission_resolved).
|
|
export const AgentStatusUpdatedFrame = z.object({
|
|
type: z.literal('agent_status_updated'),
|
|
chat_id: Uuid,
|
|
agent: z.string().min(1),
|
|
status: AgentStatusValue,
|
|
reason: z.string().optional(),
|
|
at: IsoTimestamp,
|
|
});
|
|
|
|
// ---- orchestrator frames ([D-6]) -------------------------------------------
|
|
|
|
const FlowStepManifestEntry = z.object({
|
|
step_id: z.string().min(1),
|
|
agent: z.string().min(1),
|
|
kind: z.enum(['agent', 'code']),
|
|
chat_id: Uuid,
|
|
label: z.string().min(1),
|
|
});
|
|
|
|
// Published once when a flow run starts. Carries the full step manifest so
|
|
// the OrchestratorPane can build its roster immediately.
|
|
export const FlowRunStartedFrame = z.object({
|
|
type: z.literal('flow_run_started'),
|
|
run_id: Uuid,
|
|
flow_name: z.string().min(1),
|
|
band: z.enum(['small', 'medium', 'large']),
|
|
steps: z.array(FlowStepManifestEntry),
|
|
});
|
|
|
|
// Published on every step status change and on run completion. `report` is
|
|
// present (and non-null) only when `run_status === 'completed'` — it rides here
|
|
// rather than a dedicated frame (D-6). Phase 6: `cancelled` added to both enums
|
|
// so the stop route can publish cancel transitions (DB CHECKs already included it).
|
|
export const FlowRunStepUpdatedFrame = z.object({
|
|
type: z.literal('flow_run_step_updated'),
|
|
run_id: Uuid,
|
|
step_id: z.string().min(1),
|
|
status: z.enum(['pending', 'running', 'completed', 'failed', 'skipped', 'cancelled']),
|
|
run_status: z.enum(['running', 'completed', 'failed', 'cancelled']).optional(),
|
|
report: z.string().optional(),
|
|
});
|
|
|
|
// ---- arena frames ----------------------------------------------------------
|
|
|
|
const ContestantManifestEntry = z.object({
|
|
id: Uuid,
|
|
identity: z.string().min(1),
|
|
model: z.string().min(1),
|
|
lane: z.enum(['local', 'cloud']),
|
|
});
|
|
|
|
// Published once when a battle starts. Carries the contestant roster so the
|
|
// ArenaPane can build its grid immediately.
|
|
export const BattleStartedFrame = z.object({
|
|
type: z.literal('battle_started'),
|
|
battle_id: Uuid,
|
|
battle_type: z.enum(['coding', 'qa']),
|
|
prompt: z.string(),
|
|
contestants: z.array(ContestantManifestEntry),
|
|
});
|
|
|
|
// Published on every contestant status change or streaming update.
|
|
// `delta` carries the latest chunk of streaming output while status='running'.
|
|
// `battle_status` is present only on the final transition that closes the battle.
|
|
export const ContestantUpdatedFrame = z.object({
|
|
type: z.literal('contestant_updated'),
|
|
battle_id: Uuid,
|
|
contestant_id: Uuid,
|
|
status: z.enum(['queued', 'running', 'done', 'error']).optional(),
|
|
duration_ms: z.number().int().nonnegative().optional(),
|
|
tokens_per_sec: z.number().nonnegative().optional(),
|
|
battle_status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
|
|
delta: z.string().optional(),
|
|
error: z.string().optional(),
|
|
});
|
|
|
|
// Published when battle-level state changes that don't ride on a contestant
|
|
// update: analysis finished, winner set, cross-exam verdict ready. The pane
|
|
// uses this to update its analysis panel and winner badge without a refetch.
|
|
// Fields are all optional — publishers include only what changed.
|
|
export const BattleUpdatedFrame = z.object({
|
|
type: z.literal('battle_updated'),
|
|
battle_id: Uuid,
|
|
status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
|
|
winner_contestant_id: Uuid.nullable().optional(),
|
|
analysis_ready: z.boolean().optional(),
|
|
cross_exam_id: Uuid.optional(),
|
|
});
|
|
|
|
// ---- discriminated union ---------------------------------------------------
|
|
|
|
export const WsFrameSchema = z.discriminatedUnion('type', [
|
|
// per-session
|
|
SnapshotFrame,
|
|
MessageStartedFrame,
|
|
DeltaFrame,
|
|
ReasoningDeltaFrame,
|
|
ToolCallFrame,
|
|
ToolResultFrame,
|
|
MessageCompleteFrame,
|
|
UsageFrame,
|
|
MessagesDeletedFrame,
|
|
ChatRenamedFrame,
|
|
CompactedFrame,
|
|
ErrorFrame,
|
|
PermissionRequestedFrame,
|
|
PermissionResolvedFrame,
|
|
AgentCommandsFrame,
|
|
AgentStatusUpdatedFrame,
|
|
// orchestrator
|
|
FlowRunStartedFrame,
|
|
FlowRunStepUpdatedFrame,
|
|
// arena
|
|
BattleStartedFrame,
|
|
ContestantUpdatedFrame,
|
|
BattleUpdatedFrame,
|
|
// per-user
|
|
ChatStatusFrame,
|
|
SessionUpdatedFrame,
|
|
SessionRenamedFrame,
|
|
SessionCreatedFrame,
|
|
SessionArchivedFrame,
|
|
SessionDeletedFrame,
|
|
SessionWorkspaceUpdatedFrame,
|
|
ChatCreatedFrame,
|
|
ChatUpdatedFrame,
|
|
ChatArchivedFrame,
|
|
ChatUnarchivedFrame,
|
|
ChatDeletedFrame,
|
|
ProjectCreatedFrame,
|
|
ProjectArchivedFrame,
|
|
ProjectUnarchivedFrame,
|
|
ProjectUpdatedFrame,
|
|
ProjectDeletedFrame,
|
|
]);
|
|
|
|
export type WsFrame = z.infer<typeof WsFrameSchema>;
|
|
|
|
// Convenience: the set of known frame types. Useful for the publishFrame
|
|
// helper to log the offending type name when validation fails. Kept in sync
|
|
// by the drift test in src/__tests__/ws-frames.test.ts.
|
|
export const KNOWN_FRAME_TYPES: readonly WsFrame['type'][] = [
|
|
'snapshot',
|
|
'message_started',
|
|
'delta',
|
|
'reasoning_delta',
|
|
'tool_call',
|
|
'tool_result',
|
|
'message_complete',
|
|
'usage',
|
|
'messages_deleted',
|
|
'chat_renamed',
|
|
'compacted',
|
|
'error',
|
|
'permission_requested',
|
|
'permission_resolved',
|
|
'agent_commands',
|
|
'agent_status_updated',
|
|
'flow_run_started',
|
|
'flow_run_step_updated',
|
|
'battle_started',
|
|
'contestant_updated',
|
|
'battle_updated',
|
|
'chat_status',
|
|
'session_updated',
|
|
'session_renamed',
|
|
'session_created',
|
|
'session_archived',
|
|
'session_deleted',
|
|
'session_workspace_updated',
|
|
'chat_created',
|
|
'chat_updated',
|
|
'chat_archived',
|
|
'chat_unarchived',
|
|
'chat_deleted',
|
|
'project_created',
|
|
'project_archived',
|
|
'project_unarchived',
|
|
'project_updated',
|
|
'project_deleted',
|
|
] as const;
|