v1.12.2: live tok/s + ctx display next to status indicator
ChatThroughput renders inline beside StatusDot while streaming or tool_running. Subscribes to existing usage frames via sessionEvents. Hides when status drops to idle/error or data is older than 10s. Addresses the 2026-05-21 spike's UX gap where slow streams looked identical to dead streams — now there's a live token velocity readout that immediately distinguishes the two. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
|
||||
// v1.12.2: live throughput stream consumer. Fed by useSessionStream when a
|
||||
// 'usage' WS frame lands. Renders next to StatusDot via ChatThroughput.
|
||||
//
|
||||
// Singleton + Set<setState> pattern mirrors useChatStatus so any component
|
||||
// can subscribe to any chatId without prop drilling.
|
||||
|
||||
export interface ThroughputSample {
|
||||
tps: number | null;
|
||||
ctx_used: number | null;
|
||||
ctx_max: number | null;
|
||||
}
|
||||
|
||||
interface Entry {
|
||||
ctx_used: number | null;
|
||||
ctx_max: number | null;
|
||||
completion_tokens: number | null;
|
||||
recorded_at: number;
|
||||
prev_completion_tokens: number | null;
|
||||
prev_recorded_at: number | null;
|
||||
tps: number | null;
|
||||
}
|
||||
|
||||
// Stale window. After this, useChatThroughput returns null — clears the
|
||||
// indicator after the stream ends without the next inference turn.
|
||||
const STALE_MS = 10_000;
|
||||
|
||||
const entries = new Map<string, Entry>();
|
||||
const subscribers = new Set<() => void>();
|
||||
|
||||
function notify(): void {
|
||||
for (const s of subscribers) {
|
||||
try { s(); } catch { /* swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
// v1.12.2: imported by useSessionStream's WS handler. Computes tps from the
|
||||
// gap between successive completion_tokens samples; first sample yields null
|
||||
// (we need two points). Skips zero-progress samples so a duplicate usage
|
||||
// frame doesn't push tps to 0.
|
||||
export function recordUsage(
|
||||
chatId: string,
|
||||
data: { completion_tokens: number | null; ctx_used: number | null; ctx_max: number | null },
|
||||
): void {
|
||||
const now = Date.now();
|
||||
const prev = entries.get(chatId);
|
||||
let tps: number | null = prev?.tps ?? null;
|
||||
if (
|
||||
prev &&
|
||||
data.completion_tokens != null &&
|
||||
prev.completion_tokens != null &&
|
||||
data.completion_tokens > prev.completion_tokens &&
|
||||
now > prev.recorded_at
|
||||
) {
|
||||
const dTokens = data.completion_tokens - prev.completion_tokens;
|
||||
const dSeconds = (now - prev.recorded_at) / 1000;
|
||||
tps = dTokens / dSeconds;
|
||||
}
|
||||
entries.set(chatId, {
|
||||
ctx_used: data.ctx_used,
|
||||
ctx_max: data.ctx_max,
|
||||
completion_tokens: data.completion_tokens,
|
||||
recorded_at: now,
|
||||
prev_completion_tokens: prev?.completion_tokens ?? null,
|
||||
prev_recorded_at: prev?.recorded_at ?? null,
|
||||
tps,
|
||||
});
|
||||
notify();
|
||||
}
|
||||
|
||||
export function clearThroughput(chatId: string): void {
|
||||
if (entries.delete(chatId)) notify();
|
||||
}
|
||||
|
||||
// Periodic sweep: re-notify so stale entries fall off the UI when the
|
||||
// stream ends without a follow-up frame. Light — one timer for the whole app.
|
||||
const G = globalThis as Record<string, unknown>;
|
||||
if (!G.__boocode_throughput_ticker) {
|
||||
G.__boocode_throughput_ticker = true;
|
||||
setInterval(() => {
|
||||
const now = Date.now();
|
||||
let touched = false;
|
||||
for (const [k, v] of entries) {
|
||||
if (now - v.recorded_at > STALE_MS) {
|
||||
entries.delete(k);
|
||||
touched = true;
|
||||
}
|
||||
}
|
||||
if (touched) notify();
|
||||
}, 2_000);
|
||||
}
|
||||
|
||||
export function useChatThroughput(chatId: string | null | undefined): ThroughputSample | null {
|
||||
const [, force] = useState({});
|
||||
useEffect(() => {
|
||||
const sub = () => force({});
|
||||
subscribers.add(sub);
|
||||
return () => { subscribers.delete(sub); };
|
||||
}, []);
|
||||
if (!chatId) return null;
|
||||
const entry = entries.get(chatId);
|
||||
if (!entry) return null;
|
||||
if (Date.now() - entry.recorded_at > STALE_MS) return null;
|
||||
return { tps: entry.tps, ctx_used: entry.ctx_used, ctx_max: entry.ctx_max };
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { toast } from 'sonner';
|
||||
import type { Message, WsFrame } from '@/api/types';
|
||||
import { api } from '@/api/client';
|
||||
import { sessionEvents } from './sessionEvents';
|
||||
import { recordUsage } from './useChatThroughput';
|
||||
|
||||
// session_renamed frame removed from WsFrame — it was declared but never
|
||||
// published on the per-session WS channel (server publishes via broker.publishUser
|
||||
@@ -125,6 +126,19 @@ function applyFrame(state: State, frame: WsFrame): State {
|
||||
);
|
||||
return { ...state, messages: next };
|
||||
}
|
||||
case 'usage': {
|
||||
// v1.12.2: live throughput. Side-effects into the module-level
|
||||
// singleton consumed by ChatThroughput; no message-state mutation.
|
||||
// chat_id is the optional ws-frame field; usage frames always include it.
|
||||
if (frame.chat_id) {
|
||||
recordUsage(frame.chat_id, {
|
||||
completion_tokens: frame.completion_tokens,
|
||||
ctx_used: frame.ctx_used,
|
||||
ctx_max: frame.ctx_max,
|
||||
});
|
||||
}
|
||||
return state;
|
||||
}
|
||||
case 'messages_deleted': {
|
||||
const removeSet = new Set(frame.message_ids);
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user