v1.12.2: live tok/s + ctx display next to status indicator

ChatThroughput renders inline beside StatusDot while streaming or
tool_running. Subscribes to existing usage frames via sessionEvents.
Hides when status drops to idle/error or data is older than 10s.

Addresses the 2026-05-21 spike's UX gap where slow streams looked
identical to dead streams — now there's a live token velocity readout
that immediately distinguishes the two.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-21 20:45:53 +00:00
parent 1a0a3b1673
commit a7104691aa
7 changed files with 214 additions and 0 deletions

View File

@@ -332,6 +332,17 @@ export type WsFrame =
// to the client without a refetch.
metadata?: MessageMetadata | null;
}
// v1.12.2: live throughput frame, published mid-stream every ~500ms with
// the latest token + ctx counts so ChatThroughput can render tok/s and
// ctx_used while the model is still generating.
| {
type: 'usage';
message_id: string;
chat_id?: string;
completion_tokens: number | null;
ctx_used: number | null;
ctx_max: number | null;
}
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
| { type: 'chat_renamed'; chat_id: string; name: string }
// v1.11: published by services/compaction.ts after the new anchored

View File

@@ -2,6 +2,7 @@ import { useState } from 'react';
import { Bot, History, MessageSquare, Plus, Terminal, X } from 'lucide-react';
import type { Chat, WorkspacePane } from '@/api/types';
import { StatusDot } from '@/components/StatusDot';
import { ChatThroughput } from '@/components/ChatThroughput';
import {
ContextMenu,
ContextMenuContent,
@@ -99,6 +100,7 @@ export function ChatTabBar({
>
<MessageSquare size={12} className="shrink-0" />
<StatusDot chatId={chat.id} />
<ChatThroughput chatId={chat.id} />
{renamingId === chat.id ? (
<input
autoFocus

View File

@@ -0,0 +1,28 @@
import { useChatStatus } from '@/hooks/useChatStatus';
import { useChatThroughput } from '@/hooks/useChatThroughput';
import { cn } from '@/lib/utils';
interface Props {
chatId: string | null | undefined;
className?: string;
}
// v1.12.2: inline throughput readout. Renders next to StatusDot while the
// chat is streaming or running a tool. Hidden in idle/error/waiting states
// — the dot already communicates those.
export function ChatThroughput({ chatId, className }: Props) {
const status = useChatStatus(chatId);
const t = useChatThroughput(chatId);
if (!chatId || !t) return null;
if (status !== 'streaming' && status !== 'tool_running') return null;
const tps = t.tps != null && t.tps > 0 ? Math.round(t.tps) : null;
const showCtx = t.ctx_used != null && t.ctx_max != null;
if (tps === null && !showCtx) return null;
return (
<span className={cn('text-xs text-muted-foreground tabular-nums', className)}>
{tps !== null && `${tps} tok/s`}
{tps !== null && showCtx && ' · '}
{showCtx && `${t.ctx_used!.toLocaleString()}/${t.ctx_max!.toLocaleString()}`}
</span>
);
}

View File

@@ -13,6 +13,7 @@ import { toast } from 'sonner';
import type { Chat, WorkspacePane } from '@/api/types';
import { BottomSheet } from '@/components/BottomSheet';
import { StatusDot } from '@/components/StatusDot';
import { ChatThroughput } from '@/components/ChatThroughput';
import {
DropdownMenu,
DropdownMenuContent,
@@ -206,6 +207,7 @@ export function MobileTabSwitcher({
>
<span className="shrink-0 text-muted-foreground">{paneIcon(active?.kind ?? 'chat')}</span>
<StatusDot chatId={activeChatId} />
<ChatThroughput chatId={activeChatId} />
<span className="truncate flex-1 text-left">{activeLabel}</span>
<ChevronDown size={14} className="opacity-60 shrink-0" />
</button>
@@ -237,6 +239,7 @@ export function MobileTabSwitcher({
>
<span className="shrink-0 text-muted-foreground">{paneIcon(pane.kind)}</span>
<StatusDot chatId={cid ?? null} />
<ChatThroughput chatId={cid ?? null} />
{renamingChatId === cid && cid ? (
<input
autoFocus

View File

@@ -0,0 +1,106 @@
import { useEffect, useState } from 'react';
// v1.12.2: live throughput stream consumer. Fed by useSessionStream when a
// 'usage' WS frame lands. Renders next to StatusDot via ChatThroughput.
//
// Singleton + Set<setState> pattern mirrors useChatStatus so any component
// can subscribe to any chatId without prop drilling.
export interface ThroughputSample {
tps: number | null;
ctx_used: number | null;
ctx_max: number | null;
}
interface Entry {
ctx_used: number | null;
ctx_max: number | null;
completion_tokens: number | null;
recorded_at: number;
prev_completion_tokens: number | null;
prev_recorded_at: number | null;
tps: number | null;
}
// Stale window. After this, useChatThroughput returns null — clears the
// indicator after the stream ends without the next inference turn.
const STALE_MS = 10_000;
const entries = new Map<string, Entry>();
const subscribers = new Set<() => void>();
function notify(): void {
for (const s of subscribers) {
try { s(); } catch { /* swallow */ }
}
}
// v1.12.2: imported by useSessionStream's WS handler. Computes tps from the
// gap between successive completion_tokens samples; first sample yields null
// (we need two points). Skips zero-progress samples so a duplicate usage
// frame doesn't push tps to 0.
export function recordUsage(
chatId: string,
data: { completion_tokens: number | null; ctx_used: number | null; ctx_max: number | null },
): void {
const now = Date.now();
const prev = entries.get(chatId);
let tps: number | null = prev?.tps ?? null;
if (
prev &&
data.completion_tokens != null &&
prev.completion_tokens != null &&
data.completion_tokens > prev.completion_tokens &&
now > prev.recorded_at
) {
const dTokens = data.completion_tokens - prev.completion_tokens;
const dSeconds = (now - prev.recorded_at) / 1000;
tps = dTokens / dSeconds;
}
entries.set(chatId, {
ctx_used: data.ctx_used,
ctx_max: data.ctx_max,
completion_tokens: data.completion_tokens,
recorded_at: now,
prev_completion_tokens: prev?.completion_tokens ?? null,
prev_recorded_at: prev?.recorded_at ?? null,
tps,
});
notify();
}
export function clearThroughput(chatId: string): void {
if (entries.delete(chatId)) notify();
}
// Periodic sweep: re-notify so stale entries fall off the UI when the
// stream ends without a follow-up frame. Light — one timer for the whole app.
const G = globalThis as Record<string, unknown>;
if (!G.__boocode_throughput_ticker) {
G.__boocode_throughput_ticker = true;
setInterval(() => {
const now = Date.now();
let touched = false;
for (const [k, v] of entries) {
if (now - v.recorded_at > STALE_MS) {
entries.delete(k);
touched = true;
}
}
if (touched) notify();
}, 2_000);
}
export function useChatThroughput(chatId: string | null | undefined): ThroughputSample | null {
const [, force] = useState({});
useEffect(() => {
const sub = () => force({});
subscribers.add(sub);
return () => { subscribers.delete(sub); };
}, []);
if (!chatId) return null;
const entry = entries.get(chatId);
if (!entry) return null;
if (Date.now() - entry.recorded_at > STALE_MS) return null;
return { tps: entry.tps, ctx_used: entry.ctx_used, ctx_max: entry.ctx_max };
}

View File

@@ -3,6 +3,7 @@ import { toast } from 'sonner';
import type { Message, WsFrame } from '@/api/types';
import { api } from '@/api/client';
import { sessionEvents } from './sessionEvents';
import { recordUsage } from './useChatThroughput';
// session_renamed frame removed from WsFrame — it was declared but never
// published on the per-session WS channel (server publishes via broker.publishUser
@@ -125,6 +126,19 @@ function applyFrame(state: State, frame: WsFrame): State {
);
return { ...state, messages: next };
}
case 'usage': {
// v1.12.2: live throughput. Side-effects into the module-level
// singleton consumed by ChatThroughput; no message-state mutation.
// chat_id is the optional ws-frame field; usage frames always include it.
if (frame.chat_id) {
recordUsage(frame.chat_id, {
completion_tokens: frame.completion_tokens,
ctx_used: frame.ctx_used,
ctx_max: frame.ctx_max,
});
}
return state;
}
case 'messages_deleted': {
const removeSet = new Set(frame.message_ids);
return {