diff --git a/apps/server/src/routes/chats.ts b/apps/server/src/routes/chats.ts
index 5d0c45f..c7a072d 100644
--- a/apps/server/src/routes/chats.ts
+++ b/apps/server/src/routes/chats.ts
@@ -3,6 +3,7 @@ import { z } from 'zod';
 import type { Sql } from '../db.js';
 import type { Broker } from '../services/broker.js';
 import type { Chat, Message } from '../types/api.js';
+import { getModelContext } from '../services/model-context.js';
 
 const CreateBody = z.object({
   name: z.string().min(1).max(200).optional(),
@@ -60,7 +61,20 @@ export function registerChatRoutes(
         WHERE c.session_id = ${req.params.id} AND c.status = ${status}
         ORDER BY c.updated_at DESC
       `;
-      return rows;
+      // v1.11.5: enrich each chat with its model's context window so the
+      // ContextBar can render a zero-state (and the auto-compaction threshold
+      // tooltip) before the first assistant message lands. All chats in a
+      // session share the session's model, so we do ONE getModelContext
+      // lookup and apply the result to the whole list. Failed lookups
+      // (model unknown, llama-swap down) yield null and the frontend falls
+      // through to the "model context unknown" placeholder.
+      const sessRow = await sql<{ model: string | null }[]>`
+        SELECT model FROM sessions WHERE id = ${req.params.id}
+      `;
+      const sessionModel = sessRow[0]?.model ?? null;
+      const mctx = sessionModel ? await getModelContext(sessionModel) : null;
+      const modelContextLimit = mctx?.n_ctx ?? null;
+      return rows.map((r) => ({ ...r, model_context_limit: modelContextLimit }));
     }
   );
 
diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts
index 54f5a04..38cb781 100644
--- a/apps/server/src/types/api.ts
+++ b/apps/server/src/types/api.ts
@@ -89,6 +89,12 @@ export interface Chat {
   message_count?: number;
   last_message_preview?: string | null;
   effective_context_tokens?: number | null;
+  // v1.11.5: model's full context window (from llama-swap props), threaded
+  // to the frontend so ContextBar can render a zero-state + the auto-
+  // compaction threshold tooltip before any assistant message lands.
+  // Shared across all chats in a session (chats inherit session.model).
+  // null when the upstream lookup failed (model unknown, llama-swap down).
+  model_context_limit?: number | null;
 }
 
 // KEEP IN SYNC: apps/server/src/schema.sql messages_role_chk / messages_status_chk
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 9784382..cc359c9 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -80,6 +80,12 @@ export interface Chat {
   message_count?: number;
   last_message_preview?: string | null;
   effective_context_tokens?: number | null;
+  // v1.11.5: model's full context window from llama-swap /props. Used by
+  // ContextBar to render the zero-state + auto-compaction threshold tooltip
+  // before any assistant message exists in the chat. null when upstream
+  // lookup failed (model unknown, llama-swap unreachable) — UI degrades
+  // to a "model context unknown" placeholder.
+  model_context_limit?: number | null;
 }
 
 export type MessageRole = 'user' | 'assistant' | 'tool' | 'system';
diff --git a/apps/web/src/components/ChatContextPopover.tsx b/apps/web/src/components/ChatContextPopover.tsx
deleted file mode 100644
index a08cc9d..0000000
--- a/apps/web/src/components/ChatContextPopover.tsx
+++ /dev/null
@@ -1,55 +0,0 @@
-import type { ChatContextStats } from '@/hooks/useChatContextStats';
-
-interface Props {
-  stats: ChatContextStats | null;
-}
-
-/**
- * Formats a token count into a compact k/m-suffix string.
- *  - < 1_000          → raw integer (e.g. "42")
- *  - 1_000–999_999    → "Nk" or "N.Nk" (e.g. "30k", "12.5k", "100k")
- *  - >= 1_000_000     → "Nm" or "N.Nm" (e.g. "1m", "1.5m", "100m")
- *
- * Drops a trailing ".0" so we get "30k" instead of "30.0k".
- */
-function formatTokens(n: number): string {
-  if (n < 1000) return String(n);
-  if (n < 1_000_000) {
-    const k = n / 1000;
-    return k >= 100 ? `${Math.round(k)}k` : `${k.toFixed(1).replace(/\.0$/, '')}k`;
-  }
-  const m = n / 1_000_000;
-  return m >= 100 ? `${Math.round(m)}m` : `${m.toFixed(1).replace(/\.0$/, '')}m`;
-}
-
-/**
- * Color thresholds:
- *  - >  85%  → text-destructive
- *  - >= 60%  → text-amber-500
- *  - else    → text-muted-foreground
- * (85% itself falls into the amber band.)
- */
-function percentColorClass(percent: number): string {
-  if (percent > 85) return 'text-destructive';
-  if (percent >= 60) return 'text-amber-500';
-  return 'text-muted-foreground';
-}
-
-export function ChatContextPopover({ stats }: Props) {
-  if (!stats) return null;
-  return (
-    <div className="absolute bottom-full right-4 mb-4 z-20 pointer-events-none">
-      <div className="rounded-md border border-border bg-card text-card-foreground shadow-sm px-3 py-2 text-xs min-w-[140px]">
-        <div className="text-muted-foreground/80 text-[10px] uppercase tracking-wide mb-0.5">
-          Context window
-        </div>
-        <div className={`text-base font-medium ${percentColorClass(stats.percent)}`}>
-          {stats.percent}% used
-        </div>
-        <div className="text-muted-foreground text-[10px] font-mono">
-          {formatTokens(stats.used)} / {formatTokens(stats.max)} tokens
-        </div>
-      </div>
-    </div>
-  );
-}
diff --git a/apps/web/src/components/ChatInput.tsx b/apps/web/src/components/ChatInput.tsx
index a60fdbe..9a31d20 100644
--- a/apps/web/src/components/ChatInput.tsx
+++ b/apps/web/src/components/ChatInput.tsx
@@ -22,8 +22,10 @@ import { AttachmentPreviewModal } from '@/components/AttachmentPreviewModal';
 import { FileMentionPopover } from '@/components/FileMentionPopover';
 import { DropOverlay } from '@/components/DropOverlay';
 import { AgentPicker } from '@/components/AgentPicker';
+import { ContextBar } from '@/components/ContextBar';
 import { SkillSlashCommand } from '@/components/SkillSlashCommand';
 import { api } from '@/api/client';
+import type { Message } from '@/api/types';
 import { sessionEvents } from '@/hooks/sessionEvents';
 import { chatInputsRegistry, sendToChat } from '@/lib/events';
 import { useSkills } from '@/hooks/useSkills';
@@ -59,9 +61,15 @@ interface Props {
   // when non-empty) and focuses — no auto-send.
   chatId?: string;
   chatLabel?: string;
+  // v1.11.5: context-bar inputs. messages drives the latest-pair walk;
+  // modelContextLimit is the zero-state fallback (and powers the
+  // auto-compaction-threshold tooltip when no assistant message has run
+  // yet). Both are optional so older call sites still compile.
+  messages?: Message[];
+  modelContextLimit?: number | null;
 }
 
-export function ChatInput({ disabled, projectId, agentId, onAgentChange, sessionId, webSearchEnabled, onSend, onForceSend, onSlashCommand, chatId, chatLabel }: Props) {
+export function ChatInput({ disabled, projectId, agentId, onAgentChange, sessionId, webSearchEnabled, onSend, onForceSend, onSlashCommand, chatId, chatLabel, messages, modelContextLimit }: Props) {
   const { isMobile } = useViewport();
   const [value, setValue] = useState('');
   const [busy, setBusy] = useState(false);
@@ -553,10 +561,11 @@ export function ChatInput({ disabled, projectId, agentId, onAgentChange, session
           ))}
         </div>
       )}
-      {/* Batch 9 toolbar — agent picker. v1.9 adds the icon-only + menu next
-          to it for quick toggles (currently: Web search). When omitted at the
-          callsite the row stays collapsed so nothing else has to change. */}
-      {(onAgentChange || sessionId) && (
+      {/* Batch 9 toolbar — agent picker + quick-toggle menu. v1.11.5.1
+          inlines ContextBar in the same row so the bar lives next to the
+          picker rather than as a separate header above it. The row renders
+          when ANY of {picker, quick-toggle, ContextBar} is wanted. */}
+      {(onAgentChange || sessionId || messages !== undefined) && (
         <div className="px-4 pt-2 flex items-center gap-1.5">
           {onAgentChange && (
             <AgentPicker
@@ -598,6 +607,13 @@ export function ChatInput({ disabled, projectId, agentId, onAgentChange, session
               </DropdownMenuContent>
             </DropdownMenu>
           )}
+          {/* v1.11.5.1: ContextBar fills the remaining horizontal space.
+              `flex-1 min-w-0` is set inside the component. Mounts only when
+              the caller passes `messages` so older call sites (without the
+              prop) keep their original layout. */}
+          {messages !== undefined && (
+            <ContextBar messages={messages} modelContextLimit={modelContextLimit} />
+          )}
         </div>
       )}
       <div className="px-4 py-3 flex items-end gap-2">
diff --git a/apps/web/src/components/ContextBar.tsx b/apps/web/src/components/ContextBar.tsx
index d45a06d..5365128 100644
--- a/apps/web/src/components/ContextBar.tsx
+++ b/apps/web/src/components/ContextBar.tsx
@@ -2,20 +2,27 @@ import type { Message } from '@/api/types';
 
 interface Props {
   messages: Message[];
+  // v1.11.5: model's full context window from chat.model_context_limit
+  // (server-side getModelContext lookup). Lets us render a meaningful
+  // zero-state (0 / max, muted) before any assistant message has run.
+  // null/undefined means lookup failed — bar still renders, but with an
+  // "Context — / —" placeholder rather than misleading 0/0 math.
+  modelContextLimit?: number | null;
 }
 
-// v1.11.2: persistent context-usage indicator above MessageList. Mirrors the
-// server-side compaction.usable() formula — color thresholds are computed
-// against (max - 20k buffer), not raw max, so the bar turns amber/orange
-// /red at the same boundaries auto-compaction will fire. The popover above
-// the input (ChatContextPopover) uses raw-% thresholds and is intentionally
-// kept separate (it's a different surface and a different signal).
+// v1.11.5.1: inline persistent context-usage indicator. Lives in the same
+// horizontal row as the agent picker (was a separate row above; user
+// pointed at the empty space next to "Code Reviewer ▾  +" and asked for
+// the bar there). Caller wraps in a flex container and ContextBar takes
+// the remaining width via `flex-1 min-w-0`. Color tiers fire against
+// (max - 20k compaction reserve) so the bar warns amber/orange/red at
+// the same boundaries the server's auto-compaction triggers.
 const COMPACTION_BUFFER = 20_000;
 
 // Walk newest-first; first message with both ctx_used and ctx_max non-null
 // AND ctx_max > 0 wins. Older messages may have ctx_used but missing ctx_max
 // (early v1 before llama-swap's n_ctx capture worked) — skip them and keep
-// walking. If nothing usable in the chat, caller renders null.
+// walking. Returns null when no usable pair exists in the chat.
 function latestPair(messages: Message[]): { used: number; max: number } | null {
   for (let i = messages.length - 1; i >= 0; i--) {
     const m = messages[i]!;
@@ -42,45 +49,68 @@ function tierFor(usablePct: number): ColorTier {
   return { text: 'text-muted-foreground', bar: 'bg-muted-foreground/40' };
 }
 
-export function ContextBar({ messages }: Props) {
+export function ContextBar({ messages, modelContextLimit }: Props) {
+  // Resolve which of the three render branches applies:
+  //   1. real pair      — actual usage from the latest assistant message
+  //   2. zero-state     — no usage yet but we know the model's limit
+  //   3. unknown        — neither usage nor limit; render placeholder
+  // The component NEVER returns null per v1.11.5 spec — the bar is
+  // persistent so the user knows where it lives.
   const pair = latestPair(messages);
-  if (!pair) return null;
+  const usable: number | null = pair
+    ? Math.max(0, pair.max - COMPACTION_BUFFER)
+    : modelContextLimit && modelContextLimit > 0
+      ? Math.max(0, modelContextLimit - COMPACTION_BUFFER)
+      : null;
 
-  const { used, max } = pair;
-  const usable = Math.max(0, max - COMPACTION_BUFFER);
-  const pct = used / max;
-  const usablePct = usable > 0 ? used / usable : 0;
+  const used = pair?.used ?? 0;
+  const max = pair?.max ?? (modelContextLimit && modelContextLimit > 0 ? modelContextLimit : null);
+
+  // pct/usablePct only meaningful when max is known. The unknown branch
+  // sets fill width to 0 and tier to muted regardless.
+  const pct = max ? used / max : 0;
+  const usablePct = usable && usable > 0 ? used / usable : 0;
   const tier = tierFor(usablePct);
 
-  // Bar fill is clamped to [0, 100] — over-budget cases (usable < used) still
+  // Bar fill clamped to [0, 100]. Over-budget cases (usable < used) still
   // show the bar at 100% red rather than overflowing the track visually.
   const fillPct = Math.min(100, Math.max(0, pct * 100));
-  const compactionThresholdPct = max > 0 ? Math.round((usable / max) * 100) : 0;
+  const compactionThresholdPct =
+    max && usable && usable > 0 ? Math.round((usable / max) * 100) : null;
+  const tooltipText =
+    compactionThresholdPct !== null
+      ? `Auto-compaction at ~${compactionThresholdPct}%`
+      : 'Model context unknown.';
 
+  // `flex-1 min-w-0` lets the bar consume the remaining width inside the
+  // picker row's flex container while preventing the numbers (whitespace-
+  // nowrap) from pushing the bar out of bounds. Two-element row: track on
+  // the left, numbers on the right.
   return (
-    <div className="border-b px-4 py-1 shrink-0">
-      <div className="max-w-[1000px] mx-auto w-full">
-        <div className="flex items-baseline justify-between text-[10px] font-mono leading-tight">
-          {/* "Context" on >=sm, "Ctx" on phones to save horizontal space. */}
-          <span className={tier.text}>
-            <span className="hidden sm:inline">Context</span>
-            <span className="sm:hidden">Ctx</span>
-          </span>
-          <span
-            className={tier.text}
-            title={`Auto-compaction at ~${compactionThresholdPct}%`}
-          >
-            {used.toLocaleString()} / {max.toLocaleString()}{' '}
-            <span className="max-[380px]:hidden">({Math.round(pct * 100)}%)</span>
-          </span>
-        </div>
-        <div className="mt-1 h-1 rounded-full bg-muted overflow-hidden">
-          <div
-            className={`h-full ${tier.bar} transition-[width] duration-300`}
-            style={{ width: `${fillPct}%` }}
-          />
-        </div>
+    <div className="flex items-center gap-2 flex-1 min-w-0">
+      <div className="flex-1 h-2 rounded-full bg-muted overflow-hidden min-w-0">
+        <div
+          className={`h-full ${tier.bar} transition-[width] duration-300`}
+          style={{ width: `${fillPct}%` }}
+        />
       </div>
+      <span
+        className={`${tier.text} text-[10px] font-mono whitespace-nowrap shrink-0`}
+        title={tooltipText}
+      >
+        {max !== null ? (
+          <>
+            {/* Absolute counts hidden on very narrow viewports so the
+                percentage always has room. Tooltip carries full detail. */}
+            <span className="max-[480px]:hidden">
+              {used.toLocaleString()} / {max.toLocaleString()}{' '}
+            </span>
+            ({Math.round(pct * 100)}%)
+          </>
+        ) : (
+          <>— / —</>
+        )}
+      </span>
     </div>
   );
 }
diff --git a/apps/web/src/components/panes/ChatPane.tsx b/apps/web/src/components/panes/ChatPane.tsx
index 618b0d7..547245b 100644
--- a/apps/web/src/components/panes/ChatPane.tsx
+++ b/apps/web/src/components/panes/ChatPane.tsx
@@ -3,11 +3,8 @@ import { ChevronDown, Square, X } from 'lucide-react';
 import { toast } from 'sonner';
 import { api } from '@/api/client';
 import { useSessionStream } from '@/hooks/useSessionStream';
-import { useChatContextStats } from '@/hooks/useChatContextStats';
 import { MessageList } from '@/components/MessageList';
 import { ChatInput } from '@/components/ChatInput';
-import { ChatContextPopover } from '@/components/ChatContextPopover';
-import { ContextBar } from '@/components/ContextBar';
 import {
   DropdownMenu,
   DropdownMenuContent,
@@ -47,7 +44,11 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
 
   const chatMessages = stream.messages.filter((m) => m.chat_id === chatId);
   const streaming = chatMessages.some((m) => m.status === 'streaming');
-  const contextStats = useChatContextStats(chatId, chatMessages);
+  // v1.11.5: per-chat model context limit comes from chat.model_context_limit
+  // populated by GET /api/sessions/:id/chats. Threaded into ChatInput so
+  // ContextBar can render a zero-state before the first assistant message.
+  const modelContextLimit =
+    sessionChats?.find((c) => c.id === chatId)?.model_context_limit ?? null;
 
   // Auto-send next queued message when streaming completes
   useEffect(() => {
@@ -126,10 +127,7 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
 
   return (
     <div className="flex flex-col h-full min-h-0">
-      {/* v1.11.2: persistent context-usage indicator. Renders null when there
-          are no assistant messages yet (fresh chat). shrink-0 keeps it out of
-          the MessageList scroll region — bar stays pinned, list scrolls. */}
-      <ContextBar messages={chatMessages} />
+      {/* v1.11.5: ContextBar moved into ChatInput (above the agent picker). */}
       <MessageList messages={chatMessages} sessionChats={sessionChats} />
 
       {/* Queued messages */}
@@ -189,22 +187,23 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
         </div>
       )}
 
-      <div className="relative">
-        <ChatContextPopover stats={contextStats} />
-        <ChatInput
-          disabled={false}
-          projectId={projectId}
-          sessionId={sessionId}
-          agentId={agentId}
-          onAgentChange={onAgentChange}
-          webSearchEnabled={webSearchEnabled}
-          onSend={handleSend}
-          onForceSend={streaming ? handleForceSend : undefined}
-          onSlashCommand={handleSlashCommand}
-          chatId={chatId}
-          chatLabel={sessionChats?.find((c) => c.id === chatId)?.name ?? 'Chat'}
-        />
-      </div>
+      <ChatInput
+        disabled={false}
+        projectId={projectId}
+        sessionId={sessionId}
+        agentId={agentId}
+        onAgentChange={onAgentChange}
+        webSearchEnabled={webSearchEnabled}
+        onSend={handleSend}
+        onForceSend={streaming ? handleForceSend : undefined}
+        onSlashCommand={handleSlashCommand}
+        chatId={chatId}
+        chatLabel={sessionChats?.find((c) => c.id === chatId)?.name ?? 'Chat'}
+        // v1.11.5: feed ContextBar (mounted inside ChatInput). messages
+        // drives latest-pair walk; modelContextLimit powers the zero-state.
+        messages={chatMessages}
+        modelContextLimit={modelContextLimit}
+      />
     </div>
   );
 }
diff --git a/apps/web/src/hooks/useChatContextStats.ts b/apps/web/src/hooks/useChatContextStats.ts
deleted file mode 100644
index a386584..0000000
--- a/apps/web/src/hooks/useChatContextStats.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-import { useMemo } from 'react';
-import type { Message } from '@/api/types';
-
-export interface ChatContextStats {
-  used: number;
-  max: number;
-  percent: number;
-}
-
-/**
- * Returns the latest context-window usage for the given chat, derived from the
- * assistant message (with both ctx_used and ctx_max populated) having the most
- * recent created_at. Returns null when no such message exists.
- *
- * Re-evaluates whenever the `messages` reference or `chatId` changes, which
- * matches the cadence of streaming updates from `useSessionStream`.
- */
-export function useChatContextStats(
-  chatId: string,
-  messages: Message[],
-): ChatContextStats | null {
-  return useMemo(() => {
-    let latest: Message | null = null;
-    for (const m of messages) {
-      if (m.chat_id !== chatId) continue;
-      if (m.role !== 'assistant') continue;
-      if (m.ctx_used == null || m.ctx_max == null) continue;
-      if (!latest || m.created_at > latest.created_at) latest = m;
-    }
-    if (!latest || latest.ctx_used == null || latest.ctx_max == null) return null;
-    const used = latest.ctx_used;
-    const max = latest.ctx_max;
-    if (max <= 0) return null;
-    const percent = Math.round((used / max) * 100);
-    return { used, max, percent };
-  }, [chatId, messages]);
-}