diff --git a/apps/server/src/routes/chats.ts b/apps/server/src/routes/chats.ts index 5d0c45f..c7a072d 100644 --- a/apps/server/src/routes/chats.ts +++ b/apps/server/src/routes/chats.ts @@ -3,6 +3,7 @@ import { z } from 'zod'; import type { Sql } from '../db.js'; import type { Broker } from '../services/broker.js'; import type { Chat, Message } from '../types/api.js'; +import { getModelContext } from '../services/model-context.js'; const CreateBody = z.object({ name: z.string().min(1).max(200).optional(), @@ -60,7 +61,20 @@ export function registerChatRoutes( WHERE c.session_id = ${req.params.id} AND c.status = ${status} ORDER BY c.updated_at DESC `; - return rows; + // v1.11.5: enrich each chat with its model's context window so the + // ContextBar can render a zero-state (and the auto-compaction threshold + // tooltip) before the first assistant message lands. All chats in a + // session share the session's model, so we do ONE getModelContext + // lookup and apply the result to the whole list. Failed lookups + // (model unknown, llama-swap down) yield null and the frontend falls + // through to the "model context unknown" placeholder. + const sessRow = await sql<{ model: string | null }[]>` + SELECT model FROM sessions WHERE id = ${req.params.id} + `; + const sessionModel = sessRow[0]?.model ?? null; + const mctx = sessionModel ? await getModelContext(sessionModel) : null; + const modelContextLimit = mctx?.n_ctx ?? null; + return rows.map((r) => ({ ...r, model_context_limit: modelContextLimit })); } ); diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts index 54f5a04..38cb781 100644 --- a/apps/server/src/types/api.ts +++ b/apps/server/src/types/api.ts @@ -89,6 +89,12 @@ export interface Chat { message_count?: number; last_message_preview?: string | null; effective_context_tokens?: number | null; + // v1.11.5: model's full context window (from llama-swap props), threaded + // to the frontend so ContextBar can render a zero-state + the auto- + // compaction threshold tooltip before any assistant message lands. + // Shared across all chats in a session (chats inherit session.model). + // null when the upstream lookup failed (model unknown, llama-swap down). + model_context_limit?: number | null; } // KEEP IN SYNC: apps/server/src/schema.sql messages_role_chk / messages_status_chk diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index 9784382..cc359c9 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -80,6 +80,12 @@ export interface Chat { message_count?: number; last_message_preview?: string | null; effective_context_tokens?: number | null; + // v1.11.5: model's full context window from llama-swap /props. Used by + // ContextBar to render the zero-state + auto-compaction threshold tooltip + // before any assistant message exists in the chat. null when upstream + // lookup failed (model unknown, llama-swap unreachable) — UI degrades + // to a "model context unknown" placeholder. + model_context_limit?: number | null; } export type MessageRole = 'user' | 'assistant' | 'tool' | 'system'; diff --git a/apps/web/src/components/ChatContextPopover.tsx b/apps/web/src/components/ChatContextPopover.tsx deleted file mode 100644 index a08cc9d..0000000 --- a/apps/web/src/components/ChatContextPopover.tsx +++ /dev/null @@ -1,55 +0,0 @@ -import type { ChatContextStats } from '@/hooks/useChatContextStats'; - -interface Props { - stats: ChatContextStats | null; -} - -/** - * Formats a token count into a compact k/m-suffix string. - * - < 1_000 → raw integer (e.g. "42") - * - 1_000–999_999 → "Nk" or "N.Nk" (e.g. "30k", "12.5k", "100k") - * - >= 1_000_000 → "Nm" or "N.Nm" (e.g. "1m", "1.5m", "100m") - * - * Drops a trailing ".0" so we get "30k" instead of "30.0k". - */ -function formatTokens(n: number): string { - if (n < 1000) return String(n); - if (n < 1_000_000) { - const k = n / 1000; - return k >= 100 ? `${Math.round(k)}k` : `${k.toFixed(1).replace(/\.0$/, '')}k`; - } - const m = n / 1_000_000; - return m >= 100 ? `${Math.round(m)}m` : `${m.toFixed(1).replace(/\.0$/, '')}m`; -} - -/** - * Color thresholds: - * - > 85% → text-destructive - * - >= 60% → text-amber-500 - * - else → text-muted-foreground - * (85% itself falls into the amber band.) - */ -function percentColorClass(percent: number): string { - if (percent > 85) return 'text-destructive'; - if (percent >= 60) return 'text-amber-500'; - return 'text-muted-foreground'; -} - -export function ChatContextPopover({ stats }: Props) { - if (!stats) return null; - return ( -
-
-
- Context window -
-
- {stats.percent}% used -
-
- {formatTokens(stats.used)} / {formatTokens(stats.max)} tokens -
-
-
- ); -} diff --git a/apps/web/src/components/ChatInput.tsx b/apps/web/src/components/ChatInput.tsx index a60fdbe..9a31d20 100644 --- a/apps/web/src/components/ChatInput.tsx +++ b/apps/web/src/components/ChatInput.tsx @@ -22,8 +22,10 @@ import { AttachmentPreviewModal } from '@/components/AttachmentPreviewModal'; import { FileMentionPopover } from '@/components/FileMentionPopover'; import { DropOverlay } from '@/components/DropOverlay'; import { AgentPicker } from '@/components/AgentPicker'; +import { ContextBar } from '@/components/ContextBar'; import { SkillSlashCommand } from '@/components/SkillSlashCommand'; import { api } from '@/api/client'; +import type { Message } from '@/api/types'; import { sessionEvents } from '@/hooks/sessionEvents'; import { chatInputsRegistry, sendToChat } from '@/lib/events'; import { useSkills } from '@/hooks/useSkills'; @@ -59,9 +61,15 @@ interface Props { // when non-empty) and focuses — no auto-send. chatId?: string; chatLabel?: string; + // v1.11.5: context-bar inputs. messages drives the latest-pair walk; + // modelContextLimit is the zero-state fallback (and powers the + // auto-compaction-threshold tooltip when no assistant message has run + // yet). Both are optional so older call sites still compile. + messages?: Message[]; + modelContextLimit?: number | null; } -export function ChatInput({ disabled, projectId, agentId, onAgentChange, sessionId, webSearchEnabled, onSend, onForceSend, onSlashCommand, chatId, chatLabel }: Props) { +export function ChatInput({ disabled, projectId, agentId, onAgentChange, sessionId, webSearchEnabled, onSend, onForceSend, onSlashCommand, chatId, chatLabel, messages, modelContextLimit }: Props) { const { isMobile } = useViewport(); const [value, setValue] = useState(''); const [busy, setBusy] = useState(false); @@ -553,10 +561,11 @@ export function ChatInput({ disabled, projectId, agentId, onAgentChange, session ))} )} - {/* Batch 9 toolbar — agent picker. v1.9 adds the icon-only + menu next - to it for quick toggles (currently: Web search). When omitted at the - callsite the row stays collapsed so nothing else has to change. */} - {(onAgentChange || sessionId) && ( + {/* Batch 9 toolbar — agent picker + quick-toggle menu. v1.11.5.1 + inlines ContextBar in the same row so the bar lives next to the + picker rather than as a separate header above it. The row renders + when ANY of {picker, quick-toggle, ContextBar} is wanted. */} + {(onAgentChange || sessionId || messages !== undefined) && (
{onAgentChange && ( )} + {/* v1.11.5.1: ContextBar fills the remaining horizontal space. + `flex-1 min-w-0` is set inside the component. Mounts only when + the caller passes `messages` so older call sites (without the + prop) keep their original layout. */} + {messages !== undefined && ( + + )}
)}
diff --git a/apps/web/src/components/ContextBar.tsx b/apps/web/src/components/ContextBar.tsx index d45a06d..5365128 100644 --- a/apps/web/src/components/ContextBar.tsx +++ b/apps/web/src/components/ContextBar.tsx @@ -2,20 +2,27 @@ import type { Message } from '@/api/types'; interface Props { messages: Message[]; + // v1.11.5: model's full context window from chat.model_context_limit + // (server-side getModelContext lookup). Lets us render a meaningful + // zero-state (0 / max, muted) before any assistant message has run. + // null/undefined means lookup failed — bar still renders, but with an + // "Context — / —" placeholder rather than misleading 0/0 math. + modelContextLimit?: number | null; } -// v1.11.2: persistent context-usage indicator above MessageList. Mirrors the -// server-side compaction.usable() formula — color thresholds are computed -// against (max - 20k buffer), not raw max, so the bar turns amber/orange -// /red at the same boundaries auto-compaction will fire. The popover above -// the input (ChatContextPopover) uses raw-% thresholds and is intentionally -// kept separate (it's a different surface and a different signal). +// v1.11.5.1: inline persistent context-usage indicator. Lives in the same +// horizontal row as the agent picker (was a separate row above; user +// pointed at the empty space next to "Code Reviewer ▾ +" and asked for +// the bar there). Caller wraps in a flex container and ContextBar takes +// the remaining width via `flex-1 min-w-0`. Color tiers fire against +// (max - 20k compaction reserve) so the bar warns amber/orange/red at +// the same boundaries the server's auto-compaction triggers. const COMPACTION_BUFFER = 20_000; // Walk newest-first; first message with both ctx_used and ctx_max non-null // AND ctx_max > 0 wins. Older messages may have ctx_used but missing ctx_max // (early v1 before llama-swap's n_ctx capture worked) — skip them and keep -// walking. If nothing usable in the chat, caller renders null. +// walking. Returns null when no usable pair exists in the chat. function latestPair(messages: Message[]): { used: number; max: number } | null { for (let i = messages.length - 1; i >= 0; i--) { const m = messages[i]!; @@ -42,45 +49,68 @@ function tierFor(usablePct: number): ColorTier { return { text: 'text-muted-foreground', bar: 'bg-muted-foreground/40' }; } -export function ContextBar({ messages }: Props) { +export function ContextBar({ messages, modelContextLimit }: Props) { + // Resolve which of the three render branches applies: + // 1. real pair — actual usage from the latest assistant message + // 2. zero-state — no usage yet but we know the model's limit + // 3. unknown — neither usage nor limit; render placeholder + // The component NEVER returns null per v1.11.5 spec — the bar is + // persistent so the user knows where it lives. const pair = latestPair(messages); - if (!pair) return null; + const usable: number | null = pair + ? Math.max(0, pair.max - COMPACTION_BUFFER) + : modelContextLimit && modelContextLimit > 0 + ? Math.max(0, modelContextLimit - COMPACTION_BUFFER) + : null; - const { used, max } = pair; - const usable = Math.max(0, max - COMPACTION_BUFFER); - const pct = used / max; - const usablePct = usable > 0 ? used / usable : 0; + const used = pair?.used ?? 0; + const max = pair?.max ?? (modelContextLimit && modelContextLimit > 0 ? modelContextLimit : null); + + // pct/usablePct only meaningful when max is known. The unknown branch + // sets fill width to 0 and tier to muted regardless. + const pct = max ? used / max : 0; + const usablePct = usable && usable > 0 ? used / usable : 0; const tier = tierFor(usablePct); - // Bar fill is clamped to [0, 100] — over-budget cases (usable < used) still + // Bar fill clamped to [0, 100]. Over-budget cases (usable < used) still // show the bar at 100% red rather than overflowing the track visually. const fillPct = Math.min(100, Math.max(0, pct * 100)); - const compactionThresholdPct = max > 0 ? Math.round((usable / max) * 100) : 0; + const compactionThresholdPct = + max && usable && usable > 0 ? Math.round((usable / max) * 100) : null; + const tooltipText = + compactionThresholdPct !== null + ? `Auto-compaction at ~${compactionThresholdPct}%` + : 'Model context unknown.'; + // `flex-1 min-w-0` lets the bar consume the remaining width inside the + // picker row's flex container while preventing the numbers (whitespace- + // nowrap) from pushing the bar out of bounds. Two-element row: track on + // the left, numbers on the right. return ( -
-
-
- {/* "Context" on >=sm, "Ctx" on phones to save horizontal space. */} - - Context - Ctx - - - {used.toLocaleString()} / {max.toLocaleString()}{' '} - ({Math.round(pct * 100)}%) - -
-
-
-
+
+
+
+ + {max !== null ? ( + <> + {/* Absolute counts hidden on very narrow viewports so the + percentage always has room. Tooltip carries full detail. */} + + {used.toLocaleString()} / {max.toLocaleString()}{' '} + + ({Math.round(pct * 100)}%) + + ) : ( + <>— / — + )} +
); } diff --git a/apps/web/src/components/panes/ChatPane.tsx b/apps/web/src/components/panes/ChatPane.tsx index 618b0d7..547245b 100644 --- a/apps/web/src/components/panes/ChatPane.tsx +++ b/apps/web/src/components/panes/ChatPane.tsx @@ -3,11 +3,8 @@ import { ChevronDown, Square, X } from 'lucide-react'; import { toast } from 'sonner'; import { api } from '@/api/client'; import { useSessionStream } from '@/hooks/useSessionStream'; -import { useChatContextStats } from '@/hooks/useChatContextStats'; import { MessageList } from '@/components/MessageList'; import { ChatInput } from '@/components/ChatInput'; -import { ChatContextPopover } from '@/components/ChatContextPopover'; -import { ContextBar } from '@/components/ContextBar'; import { DropdownMenu, DropdownMenuContent, @@ -47,7 +44,11 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange, const chatMessages = stream.messages.filter((m) => m.chat_id === chatId); const streaming = chatMessages.some((m) => m.status === 'streaming'); - const contextStats = useChatContextStats(chatId, chatMessages); + // v1.11.5: per-chat model context limit comes from chat.model_context_limit + // populated by GET /api/sessions/:id/chats. Threaded into ChatInput so + // ContextBar can render a zero-state before the first assistant message. + const modelContextLimit = + sessionChats?.find((c) => c.id === chatId)?.model_context_limit ?? null; // Auto-send next queued message when streaming completes useEffect(() => { @@ -126,10 +127,7 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange, return (
- {/* v1.11.2: persistent context-usage indicator. Renders null when there - are no assistant messages yet (fresh chat). shrink-0 keeps it out of - the MessageList scroll region — bar stays pinned, list scrolls. */} - + {/* v1.11.5: ContextBar moved into ChatInput (above the agent picker). */} {/* Queued messages */} @@ -189,22 +187,23 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
)} -
- - c.id === chatId)?.name ?? 'Chat'} - /> -
+ c.id === chatId)?.name ?? 'Chat'} + // v1.11.5: feed ContextBar (mounted inside ChatInput). messages + // drives latest-pair walk; modelContextLimit powers the zero-state. + messages={chatMessages} + modelContextLimit={modelContextLimit} + />
); } diff --git a/apps/web/src/hooks/useChatContextStats.ts b/apps/web/src/hooks/useChatContextStats.ts deleted file mode 100644 index a386584..0000000 --- a/apps/web/src/hooks/useChatContextStats.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { useMemo } from 'react'; -import type { Message } from '@/api/types'; - -export interface ChatContextStats { - used: number; - max: number; - percent: number; -} - -/** - * Returns the latest context-window usage for the given chat, derived from the - * assistant message (with both ctx_used and ctx_max populated) having the most - * recent created_at. Returns null when no such message exists. - * - * Re-evaluates whenever the `messages` reference or `chatId` changes, which - * matches the cadence of streaming updates from `useSessionStream`. - */ -export function useChatContextStats( - chatId: string, - messages: Message[], -): ChatContextStats | null { - return useMemo(() => { - let latest: Message | null = null; - for (const m of messages) { - if (m.chat_id !== chatId) continue; - if (m.role !== 'assistant') continue; - if (m.ctx_used == null || m.ctx_max == null) continue; - if (!latest || m.created_at > latest.created_at) latest = m; - } - if (!latest || latest.ctx_used == null || latest.ctx_max == null) return null; - const used = latest.ctx_used; - const max = latest.ctx_max; - if (max <= 0) return null; - const percent = Math.round((used / max) * 100); - return { used, max, percent }; - }, [chatId, messages]); -}