From c060778258a780c7436f18e9ff3cd46d22f4c96f Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Sun, 31 May 2026 22:07:14 +0000 Subject: [PATCH] =?UTF-8?q?feat(coder):=20Phase=201-UX=20backend=20?= =?UTF-8?q?=E2=80=94=20agent=20attribution=20+=20agent-sessions=20route=20?= =?UTF-8?q?+=20opencode=20usage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pending_changes.agent stamped at every queue site (native -> 'boocode', dispatched external -> task.agent, manual RightRail -> NULL) + flows through listPending. New GET /api/sessions/:id/agent-sessions -> [{agent,status,has_session,last_active_at}] per (chat,agent). opencode warm server consumes session.next.step.ended, accumulating input_tokens/output_tokens/cost onto agent_sessions (new idempotent columns) via a pure opencode-usage.ts mapper. Tests: agent-sessions.routes (3) + opencode-usage (6); tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/coder/src/index.ts | 2 + .../__tests__/agent-sessions.routes.test.ts | 75 ++++++++++++++++++ apps/coder/src/routes/agent-sessions.ts | 51 ++++++++++++ apps/coder/src/routes/pending.ts | 2 + apps/coder/src/schema.sql | 11 +++ .../backends/__tests__/opencode-usage.test.ts | 51 ++++++++++++ .../src/services/backends/opencode-server.ts | 41 ++++++++++ .../src/services/backends/opencode-usage.ts | 77 +++++++++++++++++++ apps/coder/src/services/dispatcher.ts | 7 +- apps/coder/src/services/pending_changes.ts | 25 ++++-- 10 files changed, 333 insertions(+), 9 deletions(-) create mode 100644 apps/coder/src/routes/__tests__/agent-sessions.routes.test.ts create mode 100644 apps/coder/src/routes/agent-sessions.ts create mode 100644 apps/coder/src/services/backends/__tests__/opencode-usage.test.ts create mode 100644 apps/coder/src/services/backends/opencode-usage.ts diff --git a/apps/coder/src/index.ts b/apps/coder/src/index.ts index bbd4dd7..a388f8b 100644 --- a/apps/coder/src/index.ts +++ b/apps/coder/src/index.ts @@ -25,6 +25,7 @@ import { setInferenceContext, clearInferenceContext } from './services/tools/inf import { registerMessageRoutes } from './routes/messages.js'; import { registerSkillRoutes } from './routes/skills.js'; import { registerPendingRoutes } from './routes/pending.js'; +import { registerAgentSessionRoutes } from './routes/agent-sessions.js'; import { registerTaskRoutes } from './routes/tasks.js'; import { registerInboxRoutes } from './routes/inbox.js'; import { registerStatsRoutes } from './routes/stats.js'; @@ -191,6 +192,7 @@ async function main() { registerMessageRoutes(app, sql, broker, inferenceApi); registerSkillRoutes(app, sql, broker, inferenceApi); registerPendingRoutes(app, sql); + registerAgentSessionRoutes(app, sql); registerTaskRoutes(app, sql, inferenceApi); registerInboxRoutes(app, sql); registerStatsRoutes(app, sql); diff --git a/apps/coder/src/routes/__tests__/agent-sessions.routes.test.ts b/apps/coder/src/routes/__tests__/agent-sessions.routes.test.ts new file mode 100644 index 0000000..518eb60 --- /dev/null +++ b/apps/coder/src/routes/__tests__/agent-sessions.routes.test.ts @@ -0,0 +1,75 @@ +import { describe, it, expect } from 'vitest'; +import Fastify, { type FastifyInstance } from 'fastify'; +import { registerAgentSessionRoutes } from '../agent-sessions.js'; +import type { Sql } from '../../db.js'; + +// Mock the porsager surface this route uses: a tagged-template `sql` dispatched by +// query substring. Two queries: the session-existence check and the agent_sessions +// JOIN. We return post-coercion shapes (booleans/strings) exactly as porsager would +// hand them to the route — `has_session` already a JS boolean, `last_active_at` a +// string|null — so the asserted JSON matches the API contract end-to-end. +interface MockState { + sessionExists: boolean; + rows: Array<{ agent: string; status: string; has_session: boolean; last_active_at: string | null }>; +} + +function mockSql(state: MockState): Sql { + return ((strings: TemplateStringsArray) => { + const q = strings.join(''); + if (q.includes('SELECT id FROM sessions')) { + return Promise.resolve(state.sessionExists ? [{ id: 'session-1' }] : []); + } + if (q.includes('FROM agent_sessions')) { + return Promise.resolve(state.rows); + } + return Promise.resolve([]); + }) as unknown as Sql; +} + +function buildApp(state: MockState): FastifyInstance { + const app = Fastify(); + registerAgentSessionRoutes(app, mockSql(state)); + return app; +} + +describe('GET /api/sessions/:id/agent-sessions', () => { + it('returns the per-(chat,agent) rows in the contracted shape', async () => { + const app = buildApp({ + sessionExists: true, + rows: [ + { agent: 'opencode', status: 'active', has_session: true, last_active_at: '2026-05-31T12:00:00.000Z' }, + { agent: 'goose', status: 'idle', has_session: false, last_active_at: null }, + ], + }); + const res = await app.inject({ method: 'GET', url: '/api/sessions/session-1/agent-sessions' }); + expect(res.statusCode).toBe(200); + const body = res.json(); + expect(Array.isArray(body)).toBe(true); + expect(body).toEqual([ + { agent: 'opencode', status: 'active', has_session: true, last_active_at: '2026-05-31T12:00:00.000Z' }, + { agent: 'goose', status: 'idle', has_session: false, last_active_at: null }, + ]); + // Contract field types. + expect(typeof body[0].agent).toBe('string'); + expect(typeof body[0].status).toBe('string'); + expect(typeof body[0].has_session).toBe('boolean'); + expect(body[1].last_active_at).toBeNull(); + await app.close(); + }); + + it('returns an empty array when the session has no agent_sessions rows', async () => { + const app = buildApp({ sessionExists: true, rows: [] }); + const res = await app.inject({ method: 'GET', url: '/api/sessions/session-1/agent-sessions' }); + expect(res.statusCode).toBe(200); + expect(res.json()).toEqual([]); + await app.close(); + }); + + it('404s when the session does not exist', async () => { + const app = buildApp({ sessionExists: false, rows: [] }); + const res = await app.inject({ method: 'GET', url: '/api/sessions/nope/agent-sessions' }); + expect(res.statusCode).toBe(404); + expect(res.json()).toEqual({ error: 'session not found' }); + await app.close(); + }); +}); diff --git a/apps/coder/src/routes/agent-sessions.ts b/apps/coder/src/routes/agent-sessions.ts new file mode 100644 index 0000000..7f1e019 --- /dev/null +++ b/apps/coder/src/routes/agent-sessions.ts @@ -0,0 +1,51 @@ +import type { FastifyInstance } from 'fastify'; +import type { Sql } from '../db.js'; + +// v2.6 Phase 1-UX (design §9b): chat-scoped "resumed vs new session" indicator. +// `agent_sessions` is keyed (chat_id, agent) — the tab/chat is the agent-context +// unit (P1.5-b). The route param is a SESSION id, so we resolve every chat in the +// session and return the union of their agent_sessions rows. A session with two +// opencode tabs yields two rows (one per chat); the frontend keys the chip per +// chat, but the wire shape is a flat per-(chat,agent) list. +// +// has_session = agent_session_id IS NOT NULL — i.e. a native backend session id +// (opencode/ACP) was created and stored, so switching back resumes rather than +// starts fresh. +export interface AgentSessionRow { + agent: string; + status: string; + has_session: boolean; + last_active_at: string | null; +} + +export function registerAgentSessionRoutes(app: FastifyInstance, sql: Sql): void { + // GET /api/sessions/:sessionId/agent-sessions — list the agent-session rows for + // every chat in the session (drives the AgentComposerBar resumed/new chip). + app.get<{ Params: { sessionId: string } }>( + '/api/sessions/:sessionId/agent-sessions', + async (req, reply) => { + const sessionId = req.params.sessionId; + + const session = await sql<{ id: string }[]>`SELECT id FROM sessions WHERE id = ${sessionId}`; + if (session.length === 0) { + reply.code(404); + return { error: 'session not found' }; + } + + // Join through chats so the session-scoped param resolves to its (chat,agent) + // rows. last_active_at first → the frontend reads the freshest activity. + const rows = await sql` + SELECT + a.agent AS agent, + a.status AS status, + (a.agent_session_id IS NOT NULL) AS has_session, + a.last_active_at AS last_active_at + FROM agent_sessions a + JOIN chats c ON c.id = a.chat_id + WHERE c.session_id = ${sessionId} + ORDER BY a.last_active_at DESC NULLS LAST, a.agent ASC + `; + return rows; + }, + ); +} diff --git a/apps/coder/src/routes/pending.ts b/apps/coder/src/routes/pending.ts index 9467126..6dc0a75 100644 --- a/apps/coder/src/routes/pending.ts +++ b/apps/coder/src/routes/pending.ts @@ -90,6 +90,8 @@ export function registerPendingRoutes(app: FastifyInstance, sql: Sql): void { parsed.data.file_path, parsed.data.content, projectRoot, + // Manual RightRail create — no agent staged it; renders as "manual". + null, ); return change; } catch (err) { diff --git a/apps/coder/src/schema.sql b/apps/coder/src/schema.sql index adf7c93..9513a03 100644 --- a/apps/coder/src/schema.sql +++ b/apps/coder/src/schema.sql @@ -131,6 +131,17 @@ END $$; -- v2.6: config fingerprint for stale-session detection (auto-recover on model change). ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS config_hash TEXT; +-- v2.6 Phase 1-UX (U.6): opencode token/cost usage, ACCUMULATED per (chat_id, agent). +-- opencode's warm server emits `session.next.step.ended` once per LLM step (several +-- per multi-tool turn) carrying {tokens{input,output,reasoning,cache},cost}. We sum +-- each step's normalized {input,output,cost} onto the session row — running totals +-- for the whole conversation context, not last-step. Backend-only; no route/UI yet. +-- input_tokens folds in cache read+write; output_tokens folds in reasoning (see +-- backends/opencode-usage.ts). Defaults 0 so accumulation (col + delta) is well-defined. +ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS input_tokens BIGINT NOT NULL DEFAULT 0; +ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS output_tokens BIGINT NOT NULL DEFAULT 0; +ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS cost DOUBLE PRECISION NOT NULL DEFAULT 0; + -- ─── P1.5-b (corrected): worktrees entity + re-key agent_sessions to (chat_id, agent) ─── -- The TAB (a chat) is the context unit: two opencode tabs in one session = two -- independent contexts sharing one worktree. So agent_sessions keys on diff --git a/apps/coder/src/services/backends/__tests__/opencode-usage.test.ts b/apps/coder/src/services/backends/__tests__/opencode-usage.test.ts new file mode 100644 index 0000000..005b18c --- /dev/null +++ b/apps/coder/src/services/backends/__tests__/opencode-usage.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'vitest'; +import { stepEndedToUsage } from '../opencode-usage.js'; + +describe('stepEndedToUsage (U.6)', () => { + it('folds cache read+write into input and reasoning into output', () => { + const u = stepEndedToUsage({ + cost: 0.0123, + tokens: { input: 100, output: 50, reasoning: 20, cache: { read: 10, write: 5 } }, + }); + expect(u).toEqual({ input: 115, output: 70, cost: 0.0123 }); + }); + + it('handles a step with no cache and no reasoning', () => { + const u = stepEndedToUsage({ + cost: 0, + tokens: { input: 8, output: 4, reasoning: 0, cache: { read: 0, write: 0 } }, + }); + expect(u).toEqual({ input: 8, output: 4, cost: 0 }); + }); + + it('is defensive against a missing tokens block', () => { + const u = stepEndedToUsage({ cost: 0.5 } as never); + expect(u).toEqual({ input: 0, output: 0, cost: 0.5 }); + }); + + it('is defensive against undefined props', () => { + expect(stepEndedToUsage(undefined)).toEqual({ input: 0, output: 0, cost: 0 }); + }); + + it('drops NaN / negative noise to zero rather than poisoning the accumulated total', () => { + const u = stepEndedToUsage({ + cost: Number.NaN, + tokens: { + input: -5, + output: Number.NaN, + reasoning: 3, + cache: { read: Number.POSITIVE_INFINITY, write: 2 }, + }, + }); + // input: (-5→0) + (Inf→0) + 2 = 2; output: (NaN→0) + 3 = 3; cost: NaN→0 + expect(u).toEqual({ input: 2, output: 3, cost: 0 }); + }); + + it('rounds fractional token counts', () => { + const u = stepEndedToUsage({ + cost: 1.5, + tokens: { input: 10.6, output: 4.4, reasoning: 0, cache: { read: 0, write: 0 } }, + }); + expect(u).toEqual({ input: 11, output: 4, cost: 1.5 }); + }); +}); diff --git a/apps/coder/src/services/backends/opencode-server.ts b/apps/coder/src/services/backends/opencode-server.ts index 4b62456..627b220 100644 --- a/apps/coder/src/services/backends/opencode-server.ts +++ b/apps/coder/src/services/backends/opencode-server.ts @@ -38,6 +38,7 @@ import type { ToolCallStatus } from '@agentclientprotocol/sdk'; import type { Sql } from '../../db.js'; import type { AcpToolSnapshot } from '../acp-tool-snapshot.js'; import { armAbortGuard, noteTurnActivity, consumeTerminal } from './turn-guard.js'; +import { stepEndedToUsage, type StepUsage } from './opencode-usage.js'; import type { AgentBackend, AgentEvent, @@ -282,6 +283,19 @@ export class OpenCodeServerBackend implements AgentBackend { st.activeTurn.onEvent({ type: 'tool_update', toolCall: snap }); return; } + // ─── per-step usage (U.6) — token/cost accounting for opencode sessions ── + case 'session.next.step.ended': { + const p = ev.properties; + const st = this.byOpencodeId.get(p.sessionID); + if (!st?.activeTurn) return; + this.bumpActivity(st); + // Accumulate this step's normalized usage onto the (chat_id, agent) row. + // Fire-and-forget: a DB hiccup must not stall the turn. opencode emits this + // once per LLM step, so a multi-tool turn sums several deltas. + const usage = stepEndedToUsage(p); + void this.accumulateUsage(st, usage); + return; + } // ─── message.part.* — terminal/post-hoc events (dedup gate) ──────────── case 'message.part.delta': { const p = ev.properties; @@ -428,6 +442,33 @@ export class OpenCodeServerBackend implements AgentBackend { } } + // ─── per-step usage persistence (U.6) ──────────────────────────────────────── + + /** + * Accumulate one `session.next.step.ended`'s normalized usage onto the session's + * agent_sessions row, keyed by the resumed `agent_session_id` (unique per active + * row — the dispatcher's `(chat_id, agent)` lookup wrote it). Running totals for + * the whole conversation context (not last-step). Zero-delta steps are skipped to + * avoid a no-op write. Errors are swallowed: usage telemetry must never fail a turn. + */ + private async accumulateUsage(st: SessionState, u: StepUsage): Promise { + if (u.input === 0 && u.output === 0 && u.cost === 0) return; + try { + await this.sql` + UPDATE agent_sessions SET + input_tokens = input_tokens + ${u.input}, + output_tokens = output_tokens + ${u.output}, + cost = cost + ${u.cost} + WHERE agent_session_id = ${st.agentSessionId} + `; + } catch (err) { + this.log.warn( + { err: errMsg(err), agentSessionId: st.agentSessionId }, + 'opencode-server: failed to persist step usage (non-fatal)', + ); + } + } + // ─── ensureSession: create-or-resume against agent_sessions (1.5) ──────────── async ensureSession(sessionId: string, opts: EnsureSessionOpts): Promise { diff --git a/apps/coder/src/services/backends/opencode-usage.ts b/apps/coder/src/services/backends/opencode-usage.ts new file mode 100644 index 0000000..95fb793 --- /dev/null +++ b/apps/coder/src/services/backends/opencode-usage.ts @@ -0,0 +1,77 @@ +/** + * v2.6 Phase 1-UX (U.6) — pure mapper for opencode's per-step usage event. + * + * opencode's warm server emits `session.next.step.ended` once per completed LLM + * step (so a multi-tool turn fires it several times). Its `properties` carry the + * step's token + cost accounting: + * + * { + * timestamp: number; + * sessionID: string; + * finish: string; + * cost: number; // USD for this step + * tokens: { + * input: number; output: number; reasoning: number; + * cache: { read: number; write: number }; + * }; + * snapshot?: string; + * } + * + * (Verified against @opencode-ai/sdk@1.15.12 — `EventSessionNextStepEnded` in + * `dist/v2/gen/types.gen.d.ts`, a member of the `Event` union the SSE loop + * switches on.) + * + * We normalize to the review's target slice `{input, output, cost}` (the + * provider-agnostic `AgentUsage` shape lands later). cache read/write tokens are + * folded into `input` so the persisted input count reflects the real context the + * model billed for; reasoning tokens are folded into `output` since that's what + * the provider counts them as for generation. This keeps the persisted totals a + * faithful sum of what opencode reported, without inventing extra columns yet. + */ + +/** The `properties` shape of a `session.next.step.ended` event (subset we read). */ +export interface StepEndedProps { + cost: number; + tokens: { + input: number; + output: number; + reasoning: number; + cache: { read: number; write: number }; + }; +} + +/** Normalized per-step usage delta persisted onto the agent_sessions row. */ +export interface StepUsage { + input: number; + output: number; + cost: number; +} + +/** Coerce a possibly-missing/NaN number to a non-negative finite integer (tokens). */ +function n(v: unknown): number { + const x = typeof v === 'number' ? v : Number(v); + return Number.isFinite(x) && x > 0 ? Math.round(x) : 0; +} + +/** Coerce a possibly-missing/NaN number to a non-negative finite float (cost USD). */ +function f(v: unknown): number { + const x = typeof v === 'number' ? v : Number(v); + return Number.isFinite(x) && x > 0 ? x : 0; +} + +/** + * Map a `session.next.step.ended` payload → the normalized `{input, output, cost}` + * delta. Defensive against missing/partial token blocks (the wire is trusted but + * we never want a NaN to poison the accumulated DB total). `input` folds in cache + * read+write; `output` folds in reasoning. + */ +export function stepEndedToUsage(props: Partial | undefined): StepUsage { + const t = props?.tokens; + const cacheRead = n(t?.cache?.read); + const cacheWrite = n(t?.cache?.write); + return { + input: n(t?.input) + cacheRead + cacheWrite, + output: n(t?.output) + n(t?.reasoning), + cost: f(props?.cost), + }; +} diff --git a/apps/coder/src/services/dispatcher.ts b/apps/coder/src/services/dispatcher.ts index 94816e6..465969d 100644 --- a/apps/coder/src/services/dispatcher.ts +++ b/apps/coder/src/services/dispatcher.ts @@ -441,10 +441,11 @@ export function createDispatcher(deps: Deps): { start(): void; stop(): Promise { const resolved = resolveWritePath(projectRoot, filePath); const diff = JSON.stringify({ old: oldString, new: newString }); const [row] = await sql` - INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff) - VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff}) + INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) + VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff}, ${agent}) RETURNING * `; return row!; @@ -53,12 +61,15 @@ export async function queueCreate( filePath: string, content: string, projectRoot: string, + // See queueEdit: defaults to 'boocode' for the native write tools; the manual + // RightRail create route passes null. + agent: string | null = 'boocode', ): Promise { const resolved = resolveWritePath(projectRoot, filePath); const [row] = await sql` - INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff) - VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content}) + INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) + VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content}, ${agent}) RETURNING * `; return row!; @@ -70,12 +81,14 @@ export async function queueDelete( taskId: string | null, filePath: string, projectRoot: string, + // See queueEdit: defaults to 'boocode' for the native write tools. + agent: string | null = 'boocode', ): Promise { const resolved = resolveWritePath(projectRoot, filePath); const [row] = await sql` - INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff) - VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '') + INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent) + VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '', ${agent}) RETURNING * `; return row!;