feat(coder): Phase 1-UX backend — agent attribution + agent-sessions route + opencode usage

pending_changes.agent stamped at every queue site (native -> 'boocode', dispatched external -> task.agent, manual RightRail -> NULL) + flows through listPending. New GET /api/sessions/:id/agent-sessions -> [{agent,status,has_session,last_active_at}] per (chat,agent). opencode warm server consumes session.next.step.ended, accumulating input_tokens/output_tokens/cost onto agent_sessions (new idempotent columns) via a pure opencode-usage.ts mapper. Tests: agent-sessions.routes (3) + opencode-usage (6); tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 22:07:14 +00:00
parent 48c1d70baf
commit c060778258
10 changed files with 333 additions and 9 deletions
--- a/apps/coder/src/services/backends/tests/opencode-usage.test.ts
+++ b/apps/coder/src/services/backends/tests/opencode-usage.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect } from 'vitest';
+import { stepEndedToUsage } from '../opencode-usage.js';
+
+describe('stepEndedToUsage (U.6)', () => {
+  it('folds cache read+write into input and reasoning into output', () => {
+    const u = stepEndedToUsage({
+      cost: 0.0123,
+      tokens: { input: 100, output: 50, reasoning: 20, cache: { read: 10, write: 5 } },
+    });
+    expect(u).toEqual({ input: 115, output: 70, cost: 0.0123 });
+  });
+
+  it('handles a step with no cache and no reasoning', () => {
+    const u = stepEndedToUsage({
+      cost: 0,
+      tokens: { input: 8, output: 4, reasoning: 0, cache: { read: 0, write: 0 } },
+    });
+    expect(u).toEqual({ input: 8, output: 4, cost: 0 });
+  });
+
+  it('is defensive against a missing tokens block', () => {
+    const u = stepEndedToUsage({ cost: 0.5 } as never);
+    expect(u).toEqual({ input: 0, output: 0, cost: 0.5 });
+  });
+
+  it('is defensive against undefined props', () => {
+    expect(stepEndedToUsage(undefined)).toEqual({ input: 0, output: 0, cost: 0 });
+  });
+
+  it('drops NaN / negative noise to zero rather than poisoning the accumulated total', () => {
+    const u = stepEndedToUsage({
+      cost: Number.NaN,
+      tokens: {
+        input: -5,
+        output: Number.NaN,
+        reasoning: 3,
+        cache: { read: Number.POSITIVE_INFINITY, write: 2 },
+      },
+    });
+    // input: (-5→0) + (Inf→0) + 2 = 2; output: (NaN→0) + 3 = 3; cost: NaN→0
+    expect(u).toEqual({ input: 2, output: 3, cost: 0 });
+  });
+
+  it('rounds fractional token counts', () => {
+    const u = stepEndedToUsage({
+      cost: 1.5,
+      tokens: { input: 10.6, output: 4.4, reasoning: 0, cache: { read: 0, write: 0 } },
+    });
+    expect(u).toEqual({ input: 11, output: 4, cost: 1.5 });
+  });
+});
--- a/apps/coder/src/services/backends/opencode-server.ts
+++ b/apps/coder/src/services/backends/opencode-server.ts
@@ -38,6 +38,7 @@ import type { ToolCallStatus } from '@agentclientprotocol/sdk';
 import type { Sql } from '../../db.js';
 import type { AcpToolSnapshot } from '../acp-tool-snapshot.js';
 import { armAbortGuard, noteTurnActivity, consumeTerminal } from './turn-guard.js';
+import { stepEndedToUsage, type StepUsage } from './opencode-usage.js';
 import type {
  AgentBackend,
  AgentEvent,
@@ -282,6 +283,19 @@ export class OpenCodeServerBackend implements AgentBackend {
        st.activeTurn.onEvent({ type: 'tool_update', toolCall: snap });
        return;
      }
+      // ─── per-step usage (U.6) — token/cost accounting for opencode sessions ──
+      case 'session.next.step.ended': {
+        const p = ev.properties;
+        const st = this.byOpencodeId.get(p.sessionID);
+        if (!st?.activeTurn) return;
+        this.bumpActivity(st);
+        // Accumulate this step's normalized usage onto the (chat_id, agent) row.
+        // Fire-and-forget: a DB hiccup must not stall the turn. opencode emits this
+        // once per LLM step, so a multi-tool turn sums several deltas.
+        const usage = stepEndedToUsage(p);
+        void this.accumulateUsage(st, usage);
+        return;
+      }
      // ─── message.part.* — terminal/post-hoc events (dedup gate) ────────────
      case 'message.part.delta': {
        const p = ev.properties;
@@ -428,6 +442,33 @@ export class OpenCodeServerBackend implements AgentBackend {
    }
  }

+  // ─── per-step usage persistence (U.6) ────────────────────────────────────────
+
+  /**
+   * Accumulate one `session.next.step.ended`'s normalized usage onto the session's
+   * agent_sessions row, keyed by the resumed `agent_session_id` (unique per active
+   * row — the dispatcher's `(chat_id, agent)` lookup wrote it). Running totals for
+   * the whole conversation context (not last-step). Zero-delta steps are skipped to
+   * avoid a no-op write. Errors are swallowed: usage telemetry must never fail a turn.
+   */
+  private async accumulateUsage(st: SessionState, u: StepUsage): Promise<void> {
+    if (u.input === 0 && u.output === 0 && u.cost === 0) return;
+    try {
+      await this.sql`
+        UPDATE agent_sessions SET
+          input_tokens = input_tokens + ${u.input},
+          output_tokens = output_tokens + ${u.output},
+          cost = cost + ${u.cost}
+        WHERE agent_session_id = ${st.agentSessionId}
+      `;
+    } catch (err) {
+      this.log.warn(
+        { err: errMsg(err), agentSessionId: st.agentSessionId },
+        'opencode-server: failed to persist step usage (non-fatal)',
+      );
+    }
+  }
+
  // ─── ensureSession: create-or-resume against agent_sessions (1.5) ────────────

  async ensureSession(sessionId: string, opts: EnsureSessionOpts): Promise<AgentSessionHandle> {
--- a/apps/coder/src/services/backends/opencode-usage.ts
+++ b/apps/coder/src/services/backends/opencode-usage.ts
@@ -0,0 +1,77 @@
+/**
+ * v2.6 Phase 1-UX (U.6) — pure mapper for opencode's per-step usage event.
+ *
+ * opencode's warm server emits `session.next.step.ended` once per completed LLM
+ * step (so a multi-tool turn fires it several times). Its `properties` carry the
+ * step's token + cost accounting:
+ *
+ *   {
+ *     timestamp: number;
+ *     sessionID: string;
+ *     finish: string;
+ *     cost: number;                                  // USD for this step
+ *     tokens: {
+ *       input: number; output: number; reasoning: number;
+ *       cache: { read: number; write: number };
+ *     };
+ *     snapshot?: string;
+ *   }
+ *
+ * (Verified against @opencode-ai/sdk@1.15.12 — `EventSessionNextStepEnded` in
+ * `dist/v2/gen/types.gen.d.ts`, a member of the `Event` union the SSE loop
+ * switches on.)
+ *
+ * We normalize to the review's target slice `{input, output, cost}` (the
+ * provider-agnostic `AgentUsage` shape lands later). cache read/write tokens are
+ * folded into `input` so the persisted input count reflects the real context the
+ * model billed for; reasoning tokens are folded into `output` since that's what
+ * the provider counts them as for generation. This keeps the persisted totals a
+ * faithful sum of what opencode reported, without inventing extra columns yet.
+ */
+
+/** The `properties` shape of a `session.next.step.ended` event (subset we read). */
+export interface StepEndedProps {
+  cost: number;
+  tokens: {
+    input: number;
+    output: number;
+    reasoning: number;
+    cache: { read: number; write: number };
+  };
+}
+
+/** Normalized per-step usage delta persisted onto the agent_sessions row. */
+export interface StepUsage {
+  input: number;
+  output: number;
+  cost: number;
+}
+
+/** Coerce a possibly-missing/NaN number to a non-negative finite integer (tokens). */
+function n(v: unknown): number {
+  const x = typeof v === 'number' ? v : Number(v);
+  return Number.isFinite(x) && x > 0 ? Math.round(x) : 0;
+}
+
+/** Coerce a possibly-missing/NaN number to a non-negative finite float (cost USD). */
+function f(v: unknown): number {
+  const x = typeof v === 'number' ? v : Number(v);
+  return Number.isFinite(x) && x > 0 ? x : 0;
+}
+
+/**
+ * Map a `session.next.step.ended` payload → the normalized `{input, output, cost}`
+ * delta. Defensive against missing/partial token blocks (the wire is trusted but
+ * we never want a NaN to poison the accumulated DB total). `input` folds in cache
+ * read+write; `output` folds in reasoning.
+ */
+export function stepEndedToUsage(props: Partial<StepEndedProps> | undefined): StepUsage {
+  const t = props?.tokens;
+  const cacheRead = n(t?.cache?.read);
+  const cacheWrite = n(t?.cache?.write);
+  return {
+    input: n(t?.input) + cacheRead + cacheWrite,
+    output: n(t?.output) + n(t?.reasoning),
+    cost: f(props?.cost),
+  };
+}