Files
boocode/packages/contracts/src/ws-frames.ts
indifferentketchup d6d246c15b feat(web,coder): arena pane — compare 2-6 AI competitors on same prompt
Arena is a new pane kind for competitive AI evaluation. A Battle runs
the same prompt against 2-6 Contestants across two concurrent lanes:
local lane (llama-swap models, serial) and cloud lane (parallel).

Added to all three registries: @boocode/contracts WsFrameSchema,
server InferenceFrame, and web WsFrame.

Backend (apps/coder):
- arena-runner: battle scheduler, lane classifier, benchmark, results
  writer, resume, user winner override
- arena-analyzer: two-stage digest→judge analysis on DEFAULT_MODEL
- arena-decisions: status transitions and resume logic (unit-tested)
- arena-analyzer-helpers: pure helper functions (unit-tested)
- arena-model-call: model call utility for analysis
- arena routes: create/get/list/stop/analyze/cross-examine/winner/diff
- schema: battles, contestants, cross_examinations tables (idempotent)
- remove old /api/arena* routes and tasks.arena_id column

Frontend (apps/web):
- ArenaLauncherDialog: battle type, prompt, contestant selection
- ArenaPane: live roster, streaming output, analysis, cross-exam
- DiffView: unified diff with line-by-line color for coding contests
- Winner override per-row dropdown (Trophy icon)
- battle_updated WS handler for live winner/analysis updates
- arena pane kind in Workspace, ChatTabBar, useSidebar

Cross-app:
- ArenaState and ArenaContestantShape/WsFrame types (contracts)
- battle_* frames in WsFrameSchema, InferenceFrame, and web WsFrame
- manifest.json written per battle results folder
- /Arena added to .gitignore

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 23:25:29 +00:00

500 lines
15 KiB
TypeScript

// Single source of truth for the WebSocket frame Zod runtime schema.
// Validation runs on send (broker.publishFrame / publishUserFrame) and
// on receive (apps/web hooks useSessionStream + useUserEvents). Catches
// silent protocol drift between publisher and consumer.
//
// Per-kind payload schemas stay z.unknown() — frame-level drift detection
// is the goal; deep payload validation is follow-up work.
import { z } from 'zod';
// ---- shared primitives -----------------------------------------------------
const Uuid = z.string().uuid();
// Tool call IDs are model-emitted (e.g. "call_abc123") — not UUIDs.
const ToolCallId = z.string().min(1);
// v1.13.12 fix: postgres returns timestamp columns as JS Date objects, not
// strings. The publish sites pass them through unchanged, so the schema must
// tolerate both. preprocess converts Date → ISO string before string-validation;
// on the web side (where frames arrive via JSON.parse) it's a no-op. Before
// this fix, every message_complete / session_updated / chat_updated frame
// failed validation and got dropped — symptoms: token tracking blank in UI,
// status stuck at 'streaming' tripping the 60s stale-stream banner.
const IsoTimestamp = z.preprocess(
(v) => (v instanceof Date ? v.toISOString() : v),
z.string().min(1),
);
const ChatStatusValue = z.enum([
'streaming',
'tool_running',
'waiting_for_input',
'idle',
'error',
]);
// agent-status-normalize (#10): normalized per-(chat,agent) lifecycle status for
// external coding agents (warm-acp / opencode / claude-sdk / pty). Distinct from
// ChatStatusValue (native-inference chat lifecycle) — published by BooCoder's
// dispatcher + permission flow on the per-session channel.
const AgentStatusValue = z.enum(['working', 'blocked', 'idle', 'error']);
const ErrorReasonValue = z.enum([
'llm_provider_error',
'doom_loop',
'doom_loop_summary_failed',
'cap_hit',
'cap_hit_summary_failed',
]);
const MessageRoleValue = z.enum(['user', 'assistant', 'system', 'tool']);
const ToolCallShape = z.object({
id: ToolCallId,
name: z.string().min(1),
args: z.record(z.string(), z.unknown()),
});
// Free-form bags: opaque to the frame schema; deep validation is out of
// scope for v1.13.11 (frame-level drift detection is the goal; per-kind
// payload narrowing is follow-up work). z.unknown() means the consumer
// must narrow before reading — TypeScript-side this is fine because every
// consumer already operates on the hand-maintained Project / Chat / Session
// / WorkspacePane types, and the Zod-typed shape is only used at the
// publishFrame boundary.
const OpaqueObject = z.unknown();
// ---- per-session channel frames --------------------------------------------
export const SnapshotFrame = z.object({
type: z.literal('snapshot'),
messages: z.array(OpaqueObject),
});
export const MessageStartedFrame = z.object({
type: z.literal('message_started'),
message_id: Uuid,
chat_id: Uuid.optional(),
role: MessageRoleValue,
});
export const DeltaFrame = z.object({
type: z.literal('delta'),
message_id: Uuid,
chat_id: Uuid.optional(),
content: z.string(),
});
export const ReasoningDeltaFrame = z.object({
type: z.literal('reasoning_delta'),
message_id: Uuid,
chat_id: Uuid.optional(),
content: z.string(),
});
export const ToolCallFrame = z.object({
type: z.literal('tool_call'),
message_id: Uuid,
chat_id: Uuid.optional(),
tool_call: ToolCallShape,
});
export const ToolResultFrame = z.object({
type: z.literal('tool_result'),
tool_message_id: Uuid,
chat_id: Uuid.optional(),
tool_call_id: ToolCallId,
output: z.unknown(),
truncated: z.boolean(),
error: z.string().optional(),
});
export const MessageCompleteFrame = z.object({
type: z.literal('message_complete'),
message_id: Uuid,
chat_id: Uuid.optional(),
tokens_used: z.number().int().nonnegative().nullable().optional(),
ctx_used: z.number().int().nonnegative().nullable().optional(),
ctx_max: z.number().int().positive().nullable().optional(),
started_at: IsoTimestamp.nullable().optional(),
finished_at: IsoTimestamp.nullable().optional(),
// nullable: external-coder turns carry task.model, which is null when no
// model was selected. This frame is published through the same fail-closed
// publishFrame, so null MUST validate or the entire frame (incl. the
// status:'complete' transition) is dropped.
model: z.string().nullable().optional(),
metadata: OpaqueObject.nullable().optional(),
// F1 (D-8): the terminal status of the assistant message. Absent on the native
// BooChat path (reducer defaults to 'complete'); the BooCoder dispatcher stamps
// it 'cancelled' on a user Stop / stall and 'failed' on a thrown error so the
// web reducer can render a muted "Stopped" / failed state without a new frame
// type. Optional → fail-closed publishFrame must keep, not strip, it.
status: z.enum(['complete', 'cancelled', 'failed']).optional(),
});
export const UsageFrame = z.object({
type: z.literal('usage'),
message_id: Uuid,
chat_id: Uuid.optional(),
completion_tokens: z.number().int().nonnegative().nullable(),
ctx_used: z.number().int().nonnegative().nullable(),
ctx_max: z.number().int().positive().nullable(),
});
export const MessagesDeletedFrame = z.object({
type: z.literal('messages_deleted'),
message_ids: z.array(Uuid),
chat_id: Uuid.optional(),
});
export const ChatRenamedFrame = z.object({
type: z.literal('chat_renamed'),
chat_id: Uuid,
name: z.string(),
});
export const CompactedFrame = z.object({
type: z.literal('compacted'),
session_id: Uuid,
chat_id: Uuid,
summary_message_id: Uuid,
});
export const ErrorFrame = z.object({
type: z.literal('error'),
message_id: Uuid.optional(),
chat_id: Uuid.optional(),
error: z.string(),
reason: ErrorReasonValue.optional(),
});
// ---- per-user channel frames (sidebar refresh) -----------------------------
export const ChatStatusFrame = z.object({
type: z.literal('chat_status'),
chat_id: Uuid,
status: ChatStatusValue,
at: IsoTimestamp,
reason: ErrorReasonValue.optional(),
});
export const SessionUpdatedFrame = z.object({
type: z.literal('session_updated'),
session_id: Uuid,
project_id: Uuid,
name: z.string(),
updated_at: IsoTimestamp,
});
export const SessionRenamedFrame = z.object({
type: z.literal('session_renamed'),
session_id: Uuid,
name: z.string(),
});
export const SessionCreatedFrame = z.object({
type: z.literal('session_created'),
session: OpaqueObject,
project_id: Uuid,
});
export const SessionArchivedFrame = z.object({
type: z.literal('session_archived'),
session_id: Uuid,
project_id: Uuid,
});
export const SessionDeletedFrame = z.object({
type: z.literal('session_deleted'),
session_id: Uuid,
project_id: Uuid,
});
export const SessionWorkspaceUpdatedFrame = z.object({
type: z.literal('session_workspace_updated'),
session_id: Uuid,
// v2.6.x: widened from z.array — the payload is now either the legacy bare
// WorkspacePane[] OR the WorkspaceState envelope object (panes + tabNumbers +
// nextTabNumber + closedPaneStack). z.array alone would fail-closed and drop
// every envelope frame at validation.
workspace_panes: z.union([z.array(OpaqueObject), z.record(z.unknown())]),
});
export const ChatCreatedFrame = z.object({
type: z.literal('chat_created'),
chat: OpaqueObject,
session_id: Uuid,
});
export const ChatUpdatedFrame = z.object({
type: z.literal('chat_updated'),
chat_id: Uuid,
session_id: Uuid,
name: z.string().nullable(),
updated_at: IsoTimestamp,
});
export const ChatArchivedFrame = z.object({
type: z.literal('chat_archived'),
chat_id: Uuid,
session_id: Uuid,
});
export const ChatUnarchivedFrame = z.object({
type: z.literal('chat_unarchived'),
chat: OpaqueObject,
});
export const ChatDeletedFrame = z.object({
type: z.literal('chat_deleted'),
chat_id: Uuid,
session_id: Uuid,
});
export const ProjectCreatedFrame = z.object({
type: z.literal('project_created'),
project: OpaqueObject,
});
export const ProjectArchivedFrame = z.object({
type: z.literal('project_archived'),
project_id: Uuid,
});
export const ProjectUnarchivedFrame = z.object({
type: z.literal('project_unarchived'),
project: OpaqueObject,
});
export const ProjectUpdatedFrame = z.object({
type: z.literal('project_updated'),
project_id: Uuid,
name: z.string(),
});
export const ProjectDeletedFrame = z.object({
type: z.literal('project_deleted'),
project_id: Uuid,
});
const PermissionOptionShape = z.object({
option_id: z.string(),
label: z.string(),
});
export const PermissionRequestedFrame = z.object({
type: z.literal('permission_requested'),
task_id: Uuid,
session_id: Uuid,
kind: z.enum(['tool', 'question', 'plan', 'elicitation']).optional(),
tool_title: z.string().optional(),
input: z.record(z.unknown()).optional(),
options: z.array(PermissionOptionShape),
});
export const PermissionResolvedFrame = z.object({
type: z.literal('permission_resolved'),
task_id: Uuid,
session_id: Uuid,
});
const AgentCommandShape = z.object({
name: z.string(),
description: z.string().optional(),
});
export const AgentCommandsFrame = z.object({
type: z.literal('agent_commands'),
task_id: Uuid,
session_id: Uuid,
commands: z.array(AgentCommandShape),
});
// agent-status-normalize (#10): published by BooCoder on the per-session channel
// when an external agent's normalized status changes (turn start/end, permission
// block/unblock). Keyed per (chat_id, agent); the frontend tracks the latest per
// pair and resets on chat switch. `reason` is a free-form discriminator
// (turn_start / turn_complete / failed / crashed / permission_request /
// permission_resolved).
export const AgentStatusUpdatedFrame = z.object({
type: z.literal('agent_status_updated'),
chat_id: Uuid,
agent: z.string().min(1),
status: AgentStatusValue,
reason: z.string().optional(),
at: IsoTimestamp,
});
// ---- orchestrator frames ([D-6]) -------------------------------------------
const FlowStepManifestEntry = z.object({
step_id: z.string().min(1),
agent: z.string().min(1),
kind: z.enum(['agent', 'code']),
chat_id: Uuid,
label: z.string().min(1),
});
// Published once when a flow run starts. Carries the full step manifest so
// the OrchestratorPane can build its roster immediately.
export const FlowRunStartedFrame = z.object({
type: z.literal('flow_run_started'),
run_id: Uuid,
flow_name: z.string().min(1),
band: z.enum(['small', 'medium', 'large']),
steps: z.array(FlowStepManifestEntry),
});
// Published on every step status change and on run completion. `report` is
// present (and non-null) only when `run_status === 'completed'` — it rides here
// rather than a dedicated frame (D-6). Phase 6: `cancelled` added to both enums
// so the stop route can publish cancel transitions (DB CHECKs already included it).
export const FlowRunStepUpdatedFrame = z.object({
type: z.literal('flow_run_step_updated'),
run_id: Uuid,
step_id: z.string().min(1),
status: z.enum(['pending', 'running', 'completed', 'failed', 'skipped', 'cancelled']),
run_status: z.enum(['running', 'completed', 'failed', 'cancelled']).optional(),
report: z.string().optional(),
});
// ---- arena frames ----------------------------------------------------------
const ContestantManifestEntry = z.object({
id: Uuid,
identity: z.string().min(1),
model: z.string().min(1),
lane: z.enum(['local', 'cloud']),
});
// Published once when a battle starts. Carries the contestant roster so the
// ArenaPane can build its grid immediately.
export const BattleStartedFrame = z.object({
type: z.literal('battle_started'),
battle_id: Uuid,
battle_type: z.enum(['coding', 'qa']),
prompt: z.string(),
contestants: z.array(ContestantManifestEntry),
});
// Published on every contestant status change or streaming update.
// `delta` carries the latest chunk of streaming output while status='running'.
// `battle_status` is present only on the final transition that closes the battle.
export const ContestantUpdatedFrame = z.object({
type: z.literal('contestant_updated'),
battle_id: Uuid,
contestant_id: Uuid,
status: z.enum(['queued', 'running', 'done', 'error']).optional(),
duration_ms: z.number().int().nonnegative().optional(),
tokens_per_sec: z.number().nonnegative().optional(),
battle_status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
delta: z.string().optional(),
error: z.string().optional(),
});
// Published when battle-level state changes that don't ride on a contestant
// update: analysis finished, winner set, cross-exam verdict ready. The pane
// uses this to update its analysis panel and winner badge without a refetch.
// Fields are all optional — publishers include only what changed.
export const BattleUpdatedFrame = z.object({
type: z.literal('battle_updated'),
battle_id: Uuid,
status: z.enum(['pending', 'running', 'completed', 'failed', 'cancelled']).optional(),
winner_contestant_id: Uuid.nullable().optional(),
analysis_ready: z.boolean().optional(),
cross_exam_id: Uuid.optional(),
});
// ---- discriminated union ---------------------------------------------------
export const WsFrameSchema = z.discriminatedUnion('type', [
// per-session
SnapshotFrame,
MessageStartedFrame,
DeltaFrame,
ReasoningDeltaFrame,
ToolCallFrame,
ToolResultFrame,
MessageCompleteFrame,
UsageFrame,
MessagesDeletedFrame,
ChatRenamedFrame,
CompactedFrame,
ErrorFrame,
PermissionRequestedFrame,
PermissionResolvedFrame,
AgentCommandsFrame,
AgentStatusUpdatedFrame,
// orchestrator
FlowRunStartedFrame,
FlowRunStepUpdatedFrame,
// arena
BattleStartedFrame,
ContestantUpdatedFrame,
BattleUpdatedFrame,
// per-user
ChatStatusFrame,
SessionUpdatedFrame,
SessionRenamedFrame,
SessionCreatedFrame,
SessionArchivedFrame,
SessionDeletedFrame,
SessionWorkspaceUpdatedFrame,
ChatCreatedFrame,
ChatUpdatedFrame,
ChatArchivedFrame,
ChatUnarchivedFrame,
ChatDeletedFrame,
ProjectCreatedFrame,
ProjectArchivedFrame,
ProjectUnarchivedFrame,
ProjectUpdatedFrame,
ProjectDeletedFrame,
]);
export type WsFrame = z.infer<typeof WsFrameSchema>;
// Convenience: the set of known frame types. Useful for the publishFrame
// helper to log the offending type name when validation fails. Kept in sync
// by the drift test in src/__tests__/ws-frames.test.ts.
export const KNOWN_FRAME_TYPES: readonly WsFrame['type'][] = [
'snapshot',
'message_started',
'delta',
'reasoning_delta',
'tool_call',
'tool_result',
'message_complete',
'usage',
'messages_deleted',
'chat_renamed',
'compacted',
'error',
'permission_requested',
'permission_resolved',
'agent_commands',
'agent_status_updated',
'flow_run_started',
'flow_run_step_updated',
'battle_started',
'contestant_updated',
'battle_updated',
'chat_status',
'session_updated',
'session_renamed',
'session_created',
'session_archived',
'session_deleted',
'session_workspace_updated',
'chat_created',
'chat_updated',
'chat_archived',
'chat_unarchived',
'chat_deleted',
'project_created',
'project_archived',
'project_unarchived',
'project_updated',
'project_deleted',
] as const;