feat(coder): Phase 1-UX backend — agent attribution + agent-sessions route + opencode usage

pending_changes.agent stamped at every queue site (native -> 'boocode', dispatched external -> task.agent, manual RightRail -> NULL) + flows through listPending. New GET /api/sessions/:id/agent-sessions -> [{agent,status,has_session,last_active_at}] per (chat,agent). opencode warm server consumes session.next.step.ended, accumulating input_tokens/output_tokens/cost onto agent_sessions (new idempotent columns) via a pure opencode-usage.ts mapper. Tests: agent-sessions.routes (3) + opencode-usage (6); tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 22:07:14 +00:00
parent 48c1d70baf
commit c060778258
10 changed files with 333 additions and 9 deletions
--- a/apps/coder/src/index.ts
+++ b/apps/coder/src/index.ts
@@ -25,6 +25,7 @@ import { setInferenceContext, clearInferenceContext } from './services/tools/inf
 import { registerMessageRoutes } from './routes/messages.js';
 import { registerSkillRoutes } from './routes/skills.js';
 import { registerPendingRoutes } from './routes/pending.js';
+import { registerAgentSessionRoutes } from './routes/agent-sessions.js';
 import { registerTaskRoutes } from './routes/tasks.js';
 import { registerInboxRoutes } from './routes/inbox.js';
 import { registerStatsRoutes } from './routes/stats.js';
@@ -191,6 +192,7 @@ async function main() {
  registerMessageRoutes(app, sql, broker, inferenceApi);
  registerSkillRoutes(app, sql, broker, inferenceApi);
  registerPendingRoutes(app, sql);
+  registerAgentSessionRoutes(app, sql);
  registerTaskRoutes(app, sql, inferenceApi);
  registerInboxRoutes(app, sql);
  registerStatsRoutes(app, sql);
--- a/apps/coder/src/routes/tests/agent-sessions.routes.test.ts
+++ b/apps/coder/src/routes/tests/agent-sessions.routes.test.ts
@@ -0,0 +1,75 @@
+import { describe, it, expect } from 'vitest';
+import Fastify, { type FastifyInstance } from 'fastify';
+import { registerAgentSessionRoutes } from '../agent-sessions.js';
+import type { Sql } from '../../db.js';
+
+// Mock the porsager surface this route uses: a tagged-template `sql` dispatched by
+// query substring. Two queries: the session-existence check and the agent_sessions
+// JOIN. We return post-coercion shapes (booleans/strings) exactly as porsager would
+// hand them to the route — `has_session` already a JS boolean, `last_active_at` a
+// string|null — so the asserted JSON matches the API contract end-to-end.
+interface MockState {
+  sessionExists: boolean;
+  rows: Array<{ agent: string; status: string; has_session: boolean; last_active_at: string | null }>;
+}
+
+function mockSql(state: MockState): Sql {
+  return ((strings: TemplateStringsArray) => {
+    const q = strings.join('');
+    if (q.includes('SELECT id FROM sessions')) {
+      return Promise.resolve(state.sessionExists ? [{ id: 'session-1' }] : []);
+    }
+    if (q.includes('FROM agent_sessions')) {
+      return Promise.resolve(state.rows);
+    }
+    return Promise.resolve([]);
+  }) as unknown as Sql;
+}
+
+function buildApp(state: MockState): FastifyInstance {
+  const app = Fastify();
+  registerAgentSessionRoutes(app, mockSql(state));
+  return app;
+}
+
+describe('GET /api/sessions/:id/agent-sessions', () => {
+  it('returns the per-(chat,agent) rows in the contracted shape', async () => {
+    const app = buildApp({
+      sessionExists: true,
+      rows: [
+        { agent: 'opencode', status: 'active', has_session: true, last_active_at: '2026-05-31T12:00:00.000Z' },
+        { agent: 'goose', status: 'idle', has_session: false, last_active_at: null },
+      ],
+    });
+    const res = await app.inject({ method: 'GET', url: '/api/sessions/session-1/agent-sessions' });
+    expect(res.statusCode).toBe(200);
+    const body = res.json();
+    expect(Array.isArray(body)).toBe(true);
+    expect(body).toEqual([
+      { agent: 'opencode', status: 'active', has_session: true, last_active_at: '2026-05-31T12:00:00.000Z' },
+      { agent: 'goose', status: 'idle', has_session: false, last_active_at: null },
+    ]);
+    // Contract field types.
+    expect(typeof body[0].agent).toBe('string');
+    expect(typeof body[0].status).toBe('string');
+    expect(typeof body[0].has_session).toBe('boolean');
+    expect(body[1].last_active_at).toBeNull();
+    await app.close();
+  });
+
+  it('returns an empty array when the session has no agent_sessions rows', async () => {
+    const app = buildApp({ sessionExists: true, rows: [] });
+    const res = await app.inject({ method: 'GET', url: '/api/sessions/session-1/agent-sessions' });
+    expect(res.statusCode).toBe(200);
+    expect(res.json()).toEqual([]);
+    await app.close();
+  });
+
+  it('404s when the session does not exist', async () => {
+    const app = buildApp({ sessionExists: false, rows: [] });
+    const res = await app.inject({ method: 'GET', url: '/api/sessions/nope/agent-sessions' });
+    expect(res.statusCode).toBe(404);
+    expect(res.json()).toEqual({ error: 'session not found' });
+    await app.close();
+  });
+});
--- a/apps/coder/src/routes/agent-sessions.ts
+++ b/apps/coder/src/routes/agent-sessions.ts
@@ -0,0 +1,51 @@
+import type { FastifyInstance } from 'fastify';
+import type { Sql } from '../db.js';
+
+// v2.6 Phase 1-UX (design §9b): chat-scoped "resumed vs new session" indicator.
+// `agent_sessions` is keyed (chat_id, agent) — the tab/chat is the agent-context
+// unit (P1.5-b). The route param is a SESSION id, so we resolve every chat in the
+// session and return the union of their agent_sessions rows. A session with two
+// opencode tabs yields two rows (one per chat); the frontend keys the chip per
+// chat, but the wire shape is a flat per-(chat,agent) list.
+//
+// has_session = agent_session_id IS NOT NULL — i.e. a native backend session id
+// (opencode/ACP) was created and stored, so switching back resumes rather than
+// starts fresh.
+export interface AgentSessionRow {
+  agent: string;
+  status: string;
+  has_session: boolean;
+  last_active_at: string | null;
+}
+
+export function registerAgentSessionRoutes(app: FastifyInstance, sql: Sql): void {
+  // GET /api/sessions/:sessionId/agent-sessions — list the agent-session rows for
+  // every chat in the session (drives the AgentComposerBar resumed/new chip).
+  app.get<{ Params: { sessionId: string } }>(
+    '/api/sessions/:sessionId/agent-sessions',
+    async (req, reply) => {
+      const sessionId = req.params.sessionId;
+
+      const session = await sql<{ id: string }[]>`SELECT id FROM sessions WHERE id = ${sessionId}`;
+      if (session.length === 0) {
+        reply.code(404);
+        return { error: 'session not found' };
+      }
+
+      // Join through chats so the session-scoped param resolves to its (chat,agent)
+      // rows. last_active_at first → the frontend reads the freshest activity.
+      const rows = await sql<AgentSessionRow[]>`
+        SELECT
+          a.agent AS agent,
+          a.status AS status,
+          (a.agent_session_id IS NOT NULL) AS has_session,
+          a.last_active_at AS last_active_at
+        FROM agent_sessions a
+        JOIN chats c ON c.id = a.chat_id
+        WHERE c.session_id = ${sessionId}
+        ORDER BY a.last_active_at DESC NULLS LAST, a.agent ASC
+      `;
+      return rows;
+    },
+  );
+}
--- a/apps/coder/src/routes/pending.ts
+++ b/apps/coder/src/routes/pending.ts
@@ -90,6 +90,8 @@ export function registerPendingRoutes(app: FastifyInstance, sql: Sql): void {
          parsed.data.file_path,
          parsed.data.content,
          projectRoot,
+          // Manual RightRail create — no agent staged it; renders as "manual".
+          null,
        );
        return change;
      } catch (err) {
--- a/apps/coder/src/schema.sql
+++ b/apps/coder/src/schema.sql
@@ -131,6 +131,17 @@ END $$;
 -- v2.6: config fingerprint for stale-session detection (auto-recover on model change).
 ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS config_hash TEXT;

+-- v2.6 Phase 1-UX (U.6): opencode token/cost usage, ACCUMULATED per (chat_id, agent).
+-- opencode's warm server emits `session.next.step.ended` once per LLM step (several
+-- per multi-tool turn) carrying {tokens{input,output,reasoning,cache},cost}. We sum
+-- each step's normalized {input,output,cost} onto the session row — running totals
+-- for the whole conversation context, not last-step. Backend-only; no route/UI yet.
+-- input_tokens folds in cache read+write; output_tokens folds in reasoning (see
+-- backends/opencode-usage.ts). Defaults 0 so accumulation (col + delta) is well-defined.
+ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS input_tokens BIGINT NOT NULL DEFAULT 0;
+ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS output_tokens BIGINT NOT NULL DEFAULT 0;
+ALTER TABLE agent_sessions ADD COLUMN IF NOT EXISTS cost DOUBLE PRECISION NOT NULL DEFAULT 0;
+
 -- ─── P1.5-b (corrected): worktrees entity + re-key agent_sessions to (chat_id, agent) ───
 -- The TAB (a chat) is the context unit: two opencode tabs in one session = two
 -- independent contexts sharing one worktree. So agent_sessions keys on
--- a/apps/coder/src/services/backends/tests/opencode-usage.test.ts
+++ b/apps/coder/src/services/backends/tests/opencode-usage.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect } from 'vitest';
+import { stepEndedToUsage } from '../opencode-usage.js';
+
+describe('stepEndedToUsage (U.6)', () => {
+  it('folds cache read+write into input and reasoning into output', () => {
+    const u = stepEndedToUsage({
+      cost: 0.0123,
+      tokens: { input: 100, output: 50, reasoning: 20, cache: { read: 10, write: 5 } },
+    });
+    expect(u).toEqual({ input: 115, output: 70, cost: 0.0123 });
+  });
+
+  it('handles a step with no cache and no reasoning', () => {
+    const u = stepEndedToUsage({
+      cost: 0,
+      tokens: { input: 8, output: 4, reasoning: 0, cache: { read: 0, write: 0 } },
+    });
+    expect(u).toEqual({ input: 8, output: 4, cost: 0 });
+  });
+
+  it('is defensive against a missing tokens block', () => {
+    const u = stepEndedToUsage({ cost: 0.5 } as never);
+    expect(u).toEqual({ input: 0, output: 0, cost: 0.5 });
+  });
+
+  it('is defensive against undefined props', () => {
+    expect(stepEndedToUsage(undefined)).toEqual({ input: 0, output: 0, cost: 0 });
+  });
+
+  it('drops NaN / negative noise to zero rather than poisoning the accumulated total', () => {
+    const u = stepEndedToUsage({
+      cost: Number.NaN,
+      tokens: {
+        input: -5,
+        output: Number.NaN,
+        reasoning: 3,
+        cache: { read: Number.POSITIVE_INFINITY, write: 2 },
+      },
+    });
+    // input: (-5→0) + (Inf→0) + 2 = 2; output: (NaN→0) + 3 = 3; cost: NaN→0
+    expect(u).toEqual({ input: 2, output: 3, cost: 0 });
+  });
+
+  it('rounds fractional token counts', () => {
+    const u = stepEndedToUsage({
+      cost: 1.5,
+      tokens: { input: 10.6, output: 4.4, reasoning: 0, cache: { read: 0, write: 0 } },
+    });
+    expect(u).toEqual({ input: 11, output: 4, cost: 1.5 });
+  });
+});
--- a/apps/coder/src/services/backends/opencode-server.ts
+++ b/apps/coder/src/services/backends/opencode-server.ts
@@ -38,6 +38,7 @@ import type { ToolCallStatus } from '@agentclientprotocol/sdk';
 import type { Sql } from '../../db.js';
 import type { AcpToolSnapshot } from '../acp-tool-snapshot.js';
 import { armAbortGuard, noteTurnActivity, consumeTerminal } from './turn-guard.js';
+import { stepEndedToUsage, type StepUsage } from './opencode-usage.js';
 import type {
  AgentBackend,
  AgentEvent,
@@ -282,6 +283,19 @@ export class OpenCodeServerBackend implements AgentBackend {
        st.activeTurn.onEvent({ type: 'tool_update', toolCall: snap });
        return;
      }
+      // ─── per-step usage (U.6) — token/cost accounting for opencode sessions ──
+      case 'session.next.step.ended': {
+        const p = ev.properties;
+        const st = this.byOpencodeId.get(p.sessionID);
+        if (!st?.activeTurn) return;
+        this.bumpActivity(st);
+        // Accumulate this step's normalized usage onto the (chat_id, agent) row.
+        // Fire-and-forget: a DB hiccup must not stall the turn. opencode emits this
+        // once per LLM step, so a multi-tool turn sums several deltas.
+        const usage = stepEndedToUsage(p);
+        void this.accumulateUsage(st, usage);
+        return;
+      }
      // ─── message.part.* — terminal/post-hoc events (dedup gate) ────────────
      case 'message.part.delta': {
        const p = ev.properties;
@@ -428,6 +442,33 @@ export class OpenCodeServerBackend implements AgentBackend {
    }
  }

+  // ─── per-step usage persistence (U.6) ────────────────────────────────────────
+
+  /**
+   * Accumulate one `session.next.step.ended`'s normalized usage onto the session's
+   * agent_sessions row, keyed by the resumed `agent_session_id` (unique per active
+   * row — the dispatcher's `(chat_id, agent)` lookup wrote it). Running totals for
+   * the whole conversation context (not last-step). Zero-delta steps are skipped to
+   * avoid a no-op write. Errors are swallowed: usage telemetry must never fail a turn.
+   */
+  private async accumulateUsage(st: SessionState, u: StepUsage): Promise<void> {
+    if (u.input === 0 && u.output === 0 && u.cost === 0) return;
+    try {
+      await this.sql`
+        UPDATE agent_sessions SET
+          input_tokens = input_tokens + ${u.input},
+          output_tokens = output_tokens + ${u.output},
+          cost = cost + ${u.cost}
+        WHERE agent_session_id = ${st.agentSessionId}
+      `;
+    } catch (err) {
+      this.log.warn(
+        { err: errMsg(err), agentSessionId: st.agentSessionId },
+        'opencode-server: failed to persist step usage (non-fatal)',
+      );
+    }
+  }
+
  // ─── ensureSession: create-or-resume against agent_sessions (1.5) ────────────

  async ensureSession(sessionId: string, opts: EnsureSessionOpts): Promise<AgentSessionHandle> {
--- a/apps/coder/src/services/backends/opencode-usage.ts
+++ b/apps/coder/src/services/backends/opencode-usage.ts
@@ -0,0 +1,77 @@
+/**
+ * v2.6 Phase 1-UX (U.6) — pure mapper for opencode's per-step usage event.
+ *
+ * opencode's warm server emits `session.next.step.ended` once per completed LLM
+ * step (so a multi-tool turn fires it several times). Its `properties` carry the
+ * step's token + cost accounting:
+ *
+ *   {
+ *     timestamp: number;
+ *     sessionID: string;
+ *     finish: string;
+ *     cost: number;                                  // USD for this step
+ *     tokens: {
+ *       input: number; output: number; reasoning: number;
+ *       cache: { read: number; write: number };
+ *     };
+ *     snapshot?: string;
+ *   }
+ *
+ * (Verified against @opencode-ai/sdk@1.15.12 — `EventSessionNextStepEnded` in
+ * `dist/v2/gen/types.gen.d.ts`, a member of the `Event` union the SSE loop
+ * switches on.)
+ *
+ * We normalize to the review's target slice `{input, output, cost}` (the
+ * provider-agnostic `AgentUsage` shape lands later). cache read/write tokens are
+ * folded into `input` so the persisted input count reflects the real context the
+ * model billed for; reasoning tokens are folded into `output` since that's what
+ * the provider counts them as for generation. This keeps the persisted totals a
+ * faithful sum of what opencode reported, without inventing extra columns yet.
+ */
+
+/** The `properties` shape of a `session.next.step.ended` event (subset we read). */
+export interface StepEndedProps {
+  cost: number;
+  tokens: {
+    input: number;
+    output: number;
+    reasoning: number;
+    cache: { read: number; write: number };
+  };
+}
+
+/** Normalized per-step usage delta persisted onto the agent_sessions row. */
+export interface StepUsage {
+  input: number;
+  output: number;
+  cost: number;
+}
+
+/** Coerce a possibly-missing/NaN number to a non-negative finite integer (tokens). */
+function n(v: unknown): number {
+  const x = typeof v === 'number' ? v : Number(v);
+  return Number.isFinite(x) && x > 0 ? Math.round(x) : 0;
+}
+
+/** Coerce a possibly-missing/NaN number to a non-negative finite float (cost USD). */
+function f(v: unknown): number {
+  const x = typeof v === 'number' ? v : Number(v);
+  return Number.isFinite(x) && x > 0 ? x : 0;
+}
+
+/**
+ * Map a `session.next.step.ended` payload → the normalized `{input, output, cost}`
+ * delta. Defensive against missing/partial token blocks (the wire is trusted but
+ * we never want a NaN to poison the accumulated DB total). `input` folds in cache
+ * read+write; `output` folds in reasoning.
+ */
+export function stepEndedToUsage(props: Partial<StepEndedProps> | undefined): StepUsage {
+  const t = props?.tokens;
+  const cacheRead = n(t?.cache?.read);
+  const cacheWrite = n(t?.cache?.write);
+  return {
+    input: n(t?.input) + cacheRead + cacheWrite,
+    output: n(t?.output) + n(t?.reasoning),
+    cost: f(props?.cost),
+  };
+}
--- a/apps/coder/src/services/dispatcher.ts
+++ b/apps/coder/src/services/dispatcher.ts
@@ -441,10 +441,11 @@ export function createDispatcher(deps: Deps): { start(): void; stop(): Promise<v
      const diff = await diffWorktree(worktreePath, projectPath, { signal: ac.signal });

      if (diff) {
-        // Queue a single pending_change entry with the full unified diff
+        // Queue a single pending_change entry with the full unified diff, stamped
+        // with the dispatched agent for DiffPanel attribution (v2.6 Phase 1-UX).
        await sql`
-          INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff)
-          VALUES (${sessionId}, ${taskId}, ${projectPath}, 'edit', ${diff})
+          INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
+          VALUES (${sessionId}, ${taskId}, ${projectPath}, 'edit', ${diff}, ${agent})
        `;
        log.info({ taskId, diffLength: diff.length }, 'dispatcher: diff queued as pending change');
      } else {
--- a/apps/coder/src/services/pending_changes.ts
+++ b/apps/coder/src/services/pending_changes.ts
@@ -13,6 +13,10 @@ export interface PendingChange {
  operation: 'create' | 'edit' | 'delete';
  diff: string;
  status: 'pending' | 'applied' | 'rejected' | 'reverted';
+  // v2.6 Phase 1-UX: which agent staged this change (DiffPanel attribution).
+  // Native boocode write tools stamp 'boocode'; the manual RightRail create path
+  // passes null (renders as "manual"). NULL on legacy rows queued pre-v2.6.
+  agent: string | null;
  created_at: string;
 }

@@ -34,13 +38,17 @@ export async function queueEdit(
  oldString: string,
  newString: string,
  projectRoot: string,
+  // v2.6 Phase 1-UX: attribution. Defaults to 'boocode' because the only callers
+  // that omit it are the native write tools (edit_file/create_file/delete_file).
+  // Pass null explicitly for the manual RightRail create path.
+  agent: string | null = 'boocode',
 ): Promise<PendingChange> {
  const resolved = resolveWritePath(projectRoot, filePath);
  const diff = JSON.stringify({ old: oldString, new: newString });

  const [row] = await sql<PendingChange[]>`
-    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff)
-    VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff})
+    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
+    VALUES (${sessionId}, ${taskId}, ${resolved}, 'edit', ${diff}, ${agent})
    RETURNING *
  `;
  return row!;
@@ -53,12 +61,15 @@ export async function queueCreate(
  filePath: string,
  content: string,
  projectRoot: string,
+  // See queueEdit: defaults to 'boocode' for the native write tools; the manual
+  // RightRail create route passes null.
+  agent: string | null = 'boocode',
 ): Promise<PendingChange> {
  const resolved = resolveWritePath(projectRoot, filePath);

  const [row] = await sql<PendingChange[]>`
-    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff)
-    VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content})
+    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
+    VALUES (${sessionId}, ${taskId}, ${resolved}, 'create', ${content}, ${agent})
    RETURNING *
  `;
  return row!;
@@ -70,12 +81,14 @@ export async function queueDelete(
  taskId: string | null,
  filePath: string,
  projectRoot: string,
+  // See queueEdit: defaults to 'boocode' for the native write tools.
+  agent: string | null = 'boocode',
 ): Promise<PendingChange> {
  const resolved = resolveWritePath(projectRoot, filePath);

  const [row] = await sql<PendingChange[]>`
-    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff)
-    VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '')
+    INSERT INTO pending_changes (session_id, task_id, file_path, operation, diff, agent)
+    VALUES (${sessionId}, ${taskId}, ${resolved}, 'delete', '', ${agent})
    RETURNING *
  `;
  return row!;