Compare commits
1 Commits
v1.12.1-st
...
v1.12.2-li
| Author | SHA1 | Date | |
|---|---|---|---|
| a7104691aa |
@@ -117,6 +117,7 @@ export interface InferenceFrame {
|
|||||||
| 'tool_call'
|
| 'tool_call'
|
||||||
| 'tool_result'
|
| 'tool_result'
|
||||||
| 'message_complete'
|
| 'message_complete'
|
||||||
|
| 'usage'
|
||||||
| 'messages_deleted'
|
| 'messages_deleted'
|
||||||
| 'session_renamed'
|
| 'session_renamed'
|
||||||
| 'chat_renamed'
|
| 'chat_renamed'
|
||||||
@@ -145,6 +146,7 @@ export interface InferenceFrame {
|
|||||||
tokens_used?: number | null;
|
tokens_used?: number | null;
|
||||||
ctx_used?: number | null;
|
ctx_used?: number | null;
|
||||||
ctx_max?: number | null;
|
ctx_max?: number | null;
|
||||||
|
completion_tokens?: number | null;
|
||||||
started_at?: string | null;
|
started_at?: string | null;
|
||||||
finished_at?: string | null;
|
finished_at?: string | null;
|
||||||
model?: string;
|
model?: string;
|
||||||
@@ -444,6 +446,7 @@ async function streamCompletion(
|
|||||||
messages: OpenAiMessage[],
|
messages: OpenAiMessage[],
|
||||||
opts: StreamOptions,
|
opts: StreamOptions,
|
||||||
onDelta: (content: string) => void,
|
onDelta: (content: string) => void,
|
||||||
|
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
||||||
signal?: AbortSignal
|
signal?: AbortSignal
|
||||||
): Promise<StreamResult> {
|
): Promise<StreamResult> {
|
||||||
const body: Record<string, unknown> = {
|
const body: Record<string, unknown> = {
|
||||||
@@ -499,6 +502,7 @@ async function streamCompletion(
|
|||||||
if (typeof parsed.usage.completion_tokens === 'number') {
|
if (typeof parsed.usage.completion_tokens === 'number') {
|
||||||
completionTokens = parsed.usage.completion_tokens;
|
completionTokens = parsed.usage.completion_tokens;
|
||||||
}
|
}
|
||||||
|
onUsage?.(promptTokens, completionTokens);
|
||||||
}
|
}
|
||||||
// v1.11.3: removed dead `parsed.timings.n_ctx` read. llama-server's
|
// v1.11.3: removed dead `parsed.timings.n_ctx` read. llama-server's
|
||||||
// streaming completion does NOT emit n_ctx in timings (verified
|
// streaming completion does NOT emit n_ctx in timings (verified
|
||||||
@@ -728,6 +732,34 @@ async function executeStreamPhase(
|
|||||||
).filter((t) => webToolsEnabled || !WEB_TOOL_NAMES.has(t.function.name));
|
).filter((t) => webToolsEnabled || !WEB_TOOL_NAMES.has(t.function.name));
|
||||||
const effectiveTemperature = agent?.temperature;
|
const effectiveTemperature = agent?.temperature;
|
||||||
|
|
||||||
|
// v1.12.2: ctx_max lookup is cached after the first hit per model, so this
|
||||||
|
// is a Map probe in steady state. We capture nCtx once at the top of the
|
||||||
|
// stream so the throttled usage publish doesn't refetch each tick.
|
||||||
|
const mctxForStream = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtxForStream = mctxForStream?.n_ctx ?? null;
|
||||||
|
|
||||||
|
// v1.12.2: throttle live usage publishes to ~500ms. The model can land
|
||||||
|
// dozens of usage frames per second; without a throttle the WS turns into
|
||||||
|
// a firehose for a few KB savings on each render.
|
||||||
|
const USAGE_THROTTLE_MS = 500;
|
||||||
|
let lastUsageAt = 0;
|
||||||
|
let pendingUsage: { p: number | null; c: number | null } | null = null;
|
||||||
|
let usageTimer: NodeJS.Timeout | null = null;
|
||||||
|
const flushUsage = () => {
|
||||||
|
if (!pendingUsage) return;
|
||||||
|
const { p, c } = pendingUsage;
|
||||||
|
pendingUsage = null;
|
||||||
|
lastUsageAt = Date.now();
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'usage',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
completion_tokens: c,
|
||||||
|
ctx_used: p,
|
||||||
|
ctx_max: nCtxForStream,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return await streamCompletion(
|
return await streamCompletion(
|
||||||
ctx,
|
ctx,
|
||||||
@@ -745,6 +777,18 @@ async function executeStreamPhase(
|
|||||||
ctx.log.debug({ sessionId, delta }, 'inference delta');
|
ctx.log.debug({ sessionId, delta }, 'inference delta');
|
||||||
scheduleFlush();
|
scheduleFlush();
|
||||||
},
|
},
|
||||||
|
(prompt, completion) => {
|
||||||
|
pendingUsage = { p: prompt, c: completion };
|
||||||
|
const elapsed = Date.now() - lastUsageAt;
|
||||||
|
if (elapsed >= USAGE_THROTTLE_MS) {
|
||||||
|
flushUsage();
|
||||||
|
} else if (!usageTimer) {
|
||||||
|
usageTimer = setTimeout(() => {
|
||||||
|
usageTimer = null;
|
||||||
|
flushUsage();
|
||||||
|
}, USAGE_THROTTLE_MS - elapsed);
|
||||||
|
}
|
||||||
|
},
|
||||||
signal
|
signal
|
||||||
);
|
);
|
||||||
} finally {
|
} finally {
|
||||||
@@ -752,6 +796,10 @@ async function executeStreamPhase(
|
|||||||
clearTimeout(pendingFlushTimer);
|
clearTimeout(pendingFlushTimer);
|
||||||
pendingFlushTimer = null;
|
pendingFlushTimer = null;
|
||||||
}
|
}
|
||||||
|
if (usageTimer) {
|
||||||
|
clearTimeout(usageTimer);
|
||||||
|
usageTimer = null;
|
||||||
|
}
|
||||||
await flushPromise;
|
await flushPromise;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1238,6 +1286,7 @@ async function runCapHitSummary(
|
|||||||
});
|
});
|
||||||
scheduleFlush();
|
scheduleFlush();
|
||||||
},
|
},
|
||||||
|
undefined,
|
||||||
signal,
|
signal,
|
||||||
);
|
);
|
||||||
summaryOk = true;
|
summaryOk = true;
|
||||||
@@ -1499,6 +1548,7 @@ async function runDoomLoopSummary(
|
|||||||
});
|
});
|
||||||
scheduleFlush();
|
scheduleFlush();
|
||||||
},
|
},
|
||||||
|
undefined,
|
||||||
signal,
|
signal,
|
||||||
);
|
);
|
||||||
summaryOk = true;
|
summaryOk = true;
|
||||||
|
|||||||
@@ -332,6 +332,17 @@ export type WsFrame =
|
|||||||
// to the client without a refetch.
|
// to the client without a refetch.
|
||||||
metadata?: MessageMetadata | null;
|
metadata?: MessageMetadata | null;
|
||||||
}
|
}
|
||||||
|
// v1.12.2: live throughput frame, published mid-stream every ~500ms with
|
||||||
|
// the latest token + ctx counts so ChatThroughput can render tok/s and
|
||||||
|
// ctx_used while the model is still generating.
|
||||||
|
| {
|
||||||
|
type: 'usage';
|
||||||
|
message_id: string;
|
||||||
|
chat_id?: string;
|
||||||
|
completion_tokens: number | null;
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
}
|
||||||
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
|
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
|
||||||
| { type: 'chat_renamed'; chat_id: string; name: string }
|
| { type: 'chat_renamed'; chat_id: string; name: string }
|
||||||
// v1.11: published by services/compaction.ts after the new anchored
|
// v1.11: published by services/compaction.ts after the new anchored
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import { useState } from 'react';
|
|||||||
import { Bot, History, MessageSquare, Plus, Terminal, X } from 'lucide-react';
|
import { Bot, History, MessageSquare, Plus, Terminal, X } from 'lucide-react';
|
||||||
import type { Chat, WorkspacePane } from '@/api/types';
|
import type { Chat, WorkspacePane } from '@/api/types';
|
||||||
import { StatusDot } from '@/components/StatusDot';
|
import { StatusDot } from '@/components/StatusDot';
|
||||||
|
import { ChatThroughput } from '@/components/ChatThroughput';
|
||||||
import {
|
import {
|
||||||
ContextMenu,
|
ContextMenu,
|
||||||
ContextMenuContent,
|
ContextMenuContent,
|
||||||
@@ -99,6 +100,7 @@ export function ChatTabBar({
|
|||||||
>
|
>
|
||||||
<MessageSquare size={12} className="shrink-0" />
|
<MessageSquare size={12} className="shrink-0" />
|
||||||
<StatusDot chatId={chat.id} />
|
<StatusDot chatId={chat.id} />
|
||||||
|
<ChatThroughput chatId={chat.id} />
|
||||||
{renamingId === chat.id ? (
|
{renamingId === chat.id ? (
|
||||||
<input
|
<input
|
||||||
autoFocus
|
autoFocus
|
||||||
|
|||||||
28
apps/web/src/components/ChatThroughput.tsx
Normal file
28
apps/web/src/components/ChatThroughput.tsx
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import { useChatStatus } from '@/hooks/useChatStatus';
|
||||||
|
import { useChatThroughput } from '@/hooks/useChatThroughput';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
chatId: string | null | undefined;
|
||||||
|
className?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.2: inline throughput readout. Renders next to StatusDot while the
|
||||||
|
// chat is streaming or running a tool. Hidden in idle/error/waiting states
|
||||||
|
// — the dot already communicates those.
|
||||||
|
export function ChatThroughput({ chatId, className }: Props) {
|
||||||
|
const status = useChatStatus(chatId);
|
||||||
|
const t = useChatThroughput(chatId);
|
||||||
|
if (!chatId || !t) return null;
|
||||||
|
if (status !== 'streaming' && status !== 'tool_running') return null;
|
||||||
|
const tps = t.tps != null && t.tps > 0 ? Math.round(t.tps) : null;
|
||||||
|
const showCtx = t.ctx_used != null && t.ctx_max != null;
|
||||||
|
if (tps === null && !showCtx) return null;
|
||||||
|
return (
|
||||||
|
<span className={cn('text-xs text-muted-foreground tabular-nums', className)}>
|
||||||
|
{tps !== null && `${tps} tok/s`}
|
||||||
|
{tps !== null && showCtx && ' · '}
|
||||||
|
{showCtx && `${t.ctx_used!.toLocaleString()}/${t.ctx_max!.toLocaleString()}`}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import { toast } from 'sonner';
|
|||||||
import type { Chat, WorkspacePane } from '@/api/types';
|
import type { Chat, WorkspacePane } from '@/api/types';
|
||||||
import { BottomSheet } from '@/components/BottomSheet';
|
import { BottomSheet } from '@/components/BottomSheet';
|
||||||
import { StatusDot } from '@/components/StatusDot';
|
import { StatusDot } from '@/components/StatusDot';
|
||||||
|
import { ChatThroughput } from '@/components/ChatThroughput';
|
||||||
import {
|
import {
|
||||||
DropdownMenu,
|
DropdownMenu,
|
||||||
DropdownMenuContent,
|
DropdownMenuContent,
|
||||||
@@ -206,6 +207,7 @@ export function MobileTabSwitcher({
|
|||||||
>
|
>
|
||||||
<span className="shrink-0 text-muted-foreground">{paneIcon(active?.kind ?? 'chat')}</span>
|
<span className="shrink-0 text-muted-foreground">{paneIcon(active?.kind ?? 'chat')}</span>
|
||||||
<StatusDot chatId={activeChatId} />
|
<StatusDot chatId={activeChatId} />
|
||||||
|
<ChatThroughput chatId={activeChatId} />
|
||||||
<span className="truncate flex-1 text-left">{activeLabel}</span>
|
<span className="truncate flex-1 text-left">{activeLabel}</span>
|
||||||
<ChevronDown size={14} className="opacity-60 shrink-0" />
|
<ChevronDown size={14} className="opacity-60 shrink-0" />
|
||||||
</button>
|
</button>
|
||||||
@@ -237,6 +239,7 @@ export function MobileTabSwitcher({
|
|||||||
>
|
>
|
||||||
<span className="shrink-0 text-muted-foreground">{paneIcon(pane.kind)}</span>
|
<span className="shrink-0 text-muted-foreground">{paneIcon(pane.kind)}</span>
|
||||||
<StatusDot chatId={cid ?? null} />
|
<StatusDot chatId={cid ?? null} />
|
||||||
|
<ChatThroughput chatId={cid ?? null} />
|
||||||
{renamingChatId === cid && cid ? (
|
{renamingChatId === cid && cid ? (
|
||||||
<input
|
<input
|
||||||
autoFocus
|
autoFocus
|
||||||
|
|||||||
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import { useEffect, useState } from 'react';
|
||||||
|
|
||||||
|
// v1.12.2: live throughput stream consumer. Fed by useSessionStream when a
|
||||||
|
// 'usage' WS frame lands. Renders next to StatusDot via ChatThroughput.
|
||||||
|
//
|
||||||
|
// Singleton + Set<setState> pattern mirrors useChatStatus so any component
|
||||||
|
// can subscribe to any chatId without prop drilling.
|
||||||
|
|
||||||
|
export interface ThroughputSample {
|
||||||
|
tps: number | null;
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Entry {
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
completion_tokens: number | null;
|
||||||
|
recorded_at: number;
|
||||||
|
prev_completion_tokens: number | null;
|
||||||
|
prev_recorded_at: number | null;
|
||||||
|
tps: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stale window. After this, useChatThroughput returns null — clears the
|
||||||
|
// indicator after the stream ends without the next inference turn.
|
||||||
|
const STALE_MS = 10_000;
|
||||||
|
|
||||||
|
const entries = new Map<string, Entry>();
|
||||||
|
const subscribers = new Set<() => void>();
|
||||||
|
|
||||||
|
function notify(): void {
|
||||||
|
for (const s of subscribers) {
|
||||||
|
try { s(); } catch { /* swallow */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.2: imported by useSessionStream's WS handler. Computes tps from the
|
||||||
|
// gap between successive completion_tokens samples; first sample yields null
|
||||||
|
// (we need two points). Skips zero-progress samples so a duplicate usage
|
||||||
|
// frame doesn't push tps to 0.
|
||||||
|
export function recordUsage(
|
||||||
|
chatId: string,
|
||||||
|
data: { completion_tokens: number | null; ctx_used: number | null; ctx_max: number | null },
|
||||||
|
): void {
|
||||||
|
const now = Date.now();
|
||||||
|
const prev = entries.get(chatId);
|
||||||
|
let tps: number | null = prev?.tps ?? null;
|
||||||
|
if (
|
||||||
|
prev &&
|
||||||
|
data.completion_tokens != null &&
|
||||||
|
prev.completion_tokens != null &&
|
||||||
|
data.completion_tokens > prev.completion_tokens &&
|
||||||
|
now > prev.recorded_at
|
||||||
|
) {
|
||||||
|
const dTokens = data.completion_tokens - prev.completion_tokens;
|
||||||
|
const dSeconds = (now - prev.recorded_at) / 1000;
|
||||||
|
tps = dTokens / dSeconds;
|
||||||
|
}
|
||||||
|
entries.set(chatId, {
|
||||||
|
ctx_used: data.ctx_used,
|
||||||
|
ctx_max: data.ctx_max,
|
||||||
|
completion_tokens: data.completion_tokens,
|
||||||
|
recorded_at: now,
|
||||||
|
prev_completion_tokens: prev?.completion_tokens ?? null,
|
||||||
|
prev_recorded_at: prev?.recorded_at ?? null,
|
||||||
|
tps,
|
||||||
|
});
|
||||||
|
notify();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearThroughput(chatId: string): void {
|
||||||
|
if (entries.delete(chatId)) notify();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Periodic sweep: re-notify so stale entries fall off the UI when the
|
||||||
|
// stream ends without a follow-up frame. Light — one timer for the whole app.
|
||||||
|
const G = globalThis as Record<string, unknown>;
|
||||||
|
if (!G.__boocode_throughput_ticker) {
|
||||||
|
G.__boocode_throughput_ticker = true;
|
||||||
|
setInterval(() => {
|
||||||
|
const now = Date.now();
|
||||||
|
let touched = false;
|
||||||
|
for (const [k, v] of entries) {
|
||||||
|
if (now - v.recorded_at > STALE_MS) {
|
||||||
|
entries.delete(k);
|
||||||
|
touched = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (touched) notify();
|
||||||
|
}, 2_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useChatThroughput(chatId: string | null | undefined): ThroughputSample | null {
|
||||||
|
const [, force] = useState({});
|
||||||
|
useEffect(() => {
|
||||||
|
const sub = () => force({});
|
||||||
|
subscribers.add(sub);
|
||||||
|
return () => { subscribers.delete(sub); };
|
||||||
|
}, []);
|
||||||
|
if (!chatId) return null;
|
||||||
|
const entry = entries.get(chatId);
|
||||||
|
if (!entry) return null;
|
||||||
|
if (Date.now() - entry.recorded_at > STALE_MS) return null;
|
||||||
|
return { tps: entry.tps, ctx_used: entry.ctx_used, ctx_max: entry.ctx_max };
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@ import { toast } from 'sonner';
|
|||||||
import type { Message, WsFrame } from '@/api/types';
|
import type { Message, WsFrame } from '@/api/types';
|
||||||
import { api } from '@/api/client';
|
import { api } from '@/api/client';
|
||||||
import { sessionEvents } from './sessionEvents';
|
import { sessionEvents } from './sessionEvents';
|
||||||
|
import { recordUsage } from './useChatThroughput';
|
||||||
|
|
||||||
// session_renamed frame removed from WsFrame — it was declared but never
|
// session_renamed frame removed from WsFrame — it was declared but never
|
||||||
// published on the per-session WS channel (server publishes via broker.publishUser
|
// published on the per-session WS channel (server publishes via broker.publishUser
|
||||||
@@ -125,6 +126,19 @@ function applyFrame(state: State, frame: WsFrame): State {
|
|||||||
);
|
);
|
||||||
return { ...state, messages: next };
|
return { ...state, messages: next };
|
||||||
}
|
}
|
||||||
|
case 'usage': {
|
||||||
|
// v1.12.2: live throughput. Side-effects into the module-level
|
||||||
|
// singleton consumed by ChatThroughput; no message-state mutation.
|
||||||
|
// chat_id is the optional ws-frame field; usage frames always include it.
|
||||||
|
if (frame.chat_id) {
|
||||||
|
recordUsage(frame.chat_id, {
|
||||||
|
completion_tokens: frame.completion_tokens,
|
||||||
|
ctx_used: frame.ctx_used,
|
||||||
|
ctx_max: frame.ctx_max,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return state;
|
||||||
|
}
|
||||||
case 'messages_deleted': {
|
case 'messages_deleted': {
|
||||||
const removeSet = new Set(frame.message_ids);
|
const removeSet = new Set(frame.message_ids);
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user