batch4: chats-in-sessions, force-send, /compact, right-rail file browser

Session 1:N Chat data model with backfill. Workspace switches to client-side multi-tab pane management. Right-rail file browser with float-over viewer and click-drag line selection replaces FileBrowserPane. Adds /compact streaming summarizer (respects compact markers in context builder), force-send (cancels in-flight, persists partial as 'cancelled', awaits cancellation completion via deferred Promise + 5s timeout), message queue, stop generation, chat auto-rename, session archive/unarchive with Closed Sessions section on repo landing page. CHECK constraints on sessions.status, messages.role, messages.status with KEEP IN SYNC comments tying to MESSAGE_ROLES / MESSAGE_STATUSES const arrays. Deletes dead pane routes/hook and the api.panes.* client block. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 20:39:48 +00:00
parent 6d9515b8a5
commit c35ec65fc4
37 changed files with 3290 additions and 1012 deletions
--- a/apps/server/src/services/auto_name.ts
+++ b/apps/server/src/services/auto_name.ts
@@ -5,31 +5,12 @@ const NAMING_SYSTEM_PROMPT =

 const MAX_TITLE_CHARS = 60;

-// QWEN3 NON-STREAMING UTILITY-CALL PATTERN
-// ----------------------------------------
-// Qwen3-family chat templates default to chain-of-thought reasoning: the
-// model emits a long <think>…</think> block into `reasoning_content` and
-// only finalizes a real reply in `content`. For short utility calls
-// (naming, classification, routing, summarization) with a tight token
-// budget, the model burns the entire budget on reasoning and returns:
-//   - content: ""
-//   - reasoning_content: "Thinking Process: 1. ..." (mid-thought, truncated)
-//   - finish_reason: "length"
-// Fix: pass `chat_template_kwargs: { enable_thinking: false }` to skip the
-// thinking block, and keep `max_tokens` low (~30 is plenty for a 4-word
-// title). The kwarg is a no-op for non-Qwen chat templates, so it's safe
-// to apply unconditionally for any short non-streaming model call.
-// Apply this same pattern to: fork-message (planned), agent-routing
-// (planned), web-search summarization (planned).
-
 function cleanTitle(raw: string): string {
  let name = raw.trim();
-  // Strip surrounding straight or smart quotes (one layer).
  const quotes = ['"', "'", '`', '‘', '’', '“', '”'];
  while (name.length >= 2 && quotes.includes(name[0]!) && quotes.includes(name[name.length - 1]!)) {
    name = name.slice(1, -1).trim();
  }
-  // Drop a leading "Title:" prefix if the model added one despite instructions.
  name = name.replace(/^title\s*:\s*/i, '').trim();
  if (name.length > MAX_TITLE_CHARS) {
    name = name.slice(0, MAX_TITLE_CHARS).trim();
@@ -46,13 +27,10 @@ interface NamingResponse {
  }>;
 }

-// Some Qwen-family models emit "thinking" tokens into reasoning_content and
-// only finalize a real reply in content. Pull a sensible candidate string.
 function pickTitleSource(data: NamingResponse): string {
  const choice = data.choices?.[0]?.message;
  if (!choice) return '';
  if (choice.content && choice.content.trim().length > 0) return choice.content;
-  // Fallback: try to extract a last-line title from reasoning, if present.
  const reasoning = choice.reasoning_content ?? '';
  if (reasoning.length === 0) return '';
  const lines = reasoning
@@ -62,38 +40,44 @@ function pickTitleSource(data: NamingResponse): string {
  return lines[lines.length - 1] ?? '';
 }

-export async function maybeAutoNameSession(
+export async function maybeAutoNameChat(
  ctx: InferenceContext,
+  chatId: string,
  sessionId: string
 ): Promise<void> {
  const counts = await ctx.sql<{ n: number }[]>`
    SELECT COUNT(*)::int AS n
    FROM messages
-    WHERE session_id = ${sessionId}
+    WHERE chat_id = ${chatId}
      AND role = 'assistant'
      AND status = 'complete'
  `;
  if (counts[0]?.n !== 1) return;

-  const sessionRows = await ctx.sql<
-    { id: string; name: string; model: string }[]
+  const chatRows = await ctx.sql<
+    { id: string; name: string | null; session_id: string }[]
  >`
-    SELECT id, name, model FROM sessions WHERE id = ${sessionId}
+    SELECT id, name, session_id FROM chats WHERE id = ${chatId}
  `;
-  const session = sessionRows[0];
-  if (!session) return;
-  const existingName = session.name ?? '';
-  if (existingName !== '' && existingName !== 'New session') return;
+  const chat = chatRows[0];
+  if (!chat) return;
+  if (chat.name !== null && chat.name !== '') return;
+
+  const sessionRows = await ctx.sql<{ model: string }[]>`
+    SELECT model FROM sessions WHERE id = ${sessionId}
+  `;
+  const model = sessionRows[0]?.model;
+  if (!model) return;

  const userMsg = await ctx.sql<{ content: string }[]>`
    SELECT content FROM messages
-    WHERE session_id = ${sessionId} AND role = 'user'
+    WHERE chat_id = ${chatId} AND role = 'user'
    ORDER BY created_at ASC
    LIMIT 1
  `;
  const assistantMsg = await ctx.sql<{ content: string }[]>`
    SELECT content FROM messages
-    WHERE session_id = ${sessionId}
+    WHERE chat_id = ${chatId}
      AND role = 'assistant'
      AND status = 'complete'
    ORDER BY created_at ASC
@@ -105,7 +89,7 @@ export async function maybeAutoNameSession(
  const assistantText = assistantMsg[0].content.slice(0, 2000);

  const body = {
-    model: session.model,
+    model,
    messages: [
      { role: 'system', content: NAMING_SYSTEM_PROMPT },
      {
@@ -116,9 +100,6 @@ export async function maybeAutoNameSession(
    max_tokens: 30,
    temperature: 0.3,
    stream: false,
-    // Qwen-family models default to chain-of-thought; this template kwarg
-    // tells llama.cpp's chat template renderer to skip the thinking block.
-    // Harmless for non-Qwen models.
    chat_template_kwargs: { enable_thinking: false },
  };

@@ -135,23 +116,30 @@ export async function maybeAutoNameSession(
  const raw = pickTitleSource(data);
  const name = cleanTitle(raw);
  if (!name) {
-    ctx.log.warn({ sessionId, raw }, 'auto-name: empty title from model');
+    ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
    return;
  }

-  const updated = await ctx.sql<{ id: string; name: string }[]>`
-    UPDATE sessions
-    SET name = ${name}, updated_at = NOW()
-    WHERE id = ${sessionId}
-      AND (name IS NULL OR name = '' OR name = 'New session')
-    RETURNING id, name
+  const updated = await ctx.sql<{ id: string; name: string; session_id: string; updated_at: string }[]>`
+    UPDATE chats
+    SET name = ${name}, updated_at = clock_timestamp()
+    WHERE id = ${chatId}
+      AND (name IS NULL OR name = '')
+    RETURNING id, name, session_id, updated_at
  `;
  if (updated.length === 0) return;

  ctx.publish(sessionId, {
-    type: 'session_renamed',
-    session_id: sessionId,
+    type: 'chat_renamed',
+    chat_id: chatId,
    name,
  });
-  ctx.log.info({ sessionId, name }, 'session auto-named');
+  ctx.publishUser({
+    type: 'chat_updated',
+    chat_id: chatId,
+    session_id: sessionId,
+    name,
+    updated_at: updated[0]!.updated_at,
+  });
+  ctx.log.info({ chatId, name }, 'chat auto-named');
 }
--- a/apps/server/src/services/inference.ts
+++ b/apps/server/src/services/inference.ts
@@ -4,7 +4,7 @@ import type { Config } from '../config.js';
 import type { Message, Project, Session, ToolCall, UserStreamFrame } from '../types/api.js';
 import { ALL_TOOLS, TOOLS_BY_NAME, toolJsonSchemas } from './tools.js';
 import { PathScopeError, resolveProjectRoot } from './path_guard.js';
-import { maybeAutoNameSession } from './auto_name.js';
+import { maybeAutoNameChat } from './auto_name.js';

 const BASE_SYSTEM_PROMPT = (projectPath: string) =>
  `You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
@@ -21,9 +21,11 @@ export interface InferenceFrame {
    | 'message_complete'
    | 'messages_deleted'
    | 'session_renamed'
+    | 'chat_renamed'
    | 'error';
  message_id?: string;
  message_ids?: string[];
+  chat_id?: string;
  tool_message_id?: string;
  tool_call_id?: string;
  role?: 'assistant' | 'tool' | 'user';
@@ -101,8 +103,23 @@ export function buildMessagesPayload(
  }
  out.push({ role: 'system', content: systemPrompt });

-  for (const m of history) {
+  // Find the latest compact marker — only send messages from that point onwards
+  let startIdx = 0;
+  for (let i = history.length - 1; i >= 0; i--) {
+    if (history[i]!.kind === 'compact') {
+      startIdx = i;
+      break;
+    }
+  }
+
+  for (let i = startIdx; i < history.length; i++) {
+    const m = history[i]!;
+    if (m.kind === 'compact') {
+      out.push({ role: 'system', content: m.content });
+      continue;
+    }
    if (m.role === 'assistant' && m.status === 'streaming') continue;
+    if (m.role === 'assistant' && m.status === 'cancelled') continue;
    if (m.role === 'tool') {
      const tr = m.tool_results;
      if (!tr) continue;
@@ -140,10 +157,11 @@ export function buildMessagesPayload(

 async function loadContext(
  sql: Sql,
-  sessionId: string
+  sessionId: string,
+  chatId: string
 ): Promise<{ session: Session; project: Project; history: Message[] } | null> {
  const sessionRows = await sql<Session[]>`
-    SELECT id, project_id, name, model, system_prompt, created_at, updated_at
+    SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at
    FROM sessions WHERE id = ${sessionId}
  `;
  if (sessionRows.length === 0) return null;
@@ -157,10 +175,10 @@ async function loadContext(
  const project = projectRows[0]!;

  const history = await sql<Message[]>`
-    SELECT id, session_id, role, content, tool_calls, tool_results, status, last_seq,
+    SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
           tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at
    FROM messages
-    WHERE session_id = ${sessionId}
+    WHERE chat_id = ${chatId}
    ORDER BY created_at ASC, id ASC
  `;

@@ -204,7 +222,8 @@ async function streamCompletion(
  model: string,
  messages: OpenAiMessage[],
  includeTools: boolean,
-  onDelta: (content: string) => void
+  onDelta: (content: string) => void,
+  signal?: AbortSignal
 ): Promise<StreamResult> {
  const body: Record<string, unknown> = {
    model,
@@ -221,6 +240,7 @@ async function streamCompletion(
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(body),
+    signal,
  });
  if (!res.ok || !res.body) {
    const text = await res.text().catch(() => '');
@@ -331,8 +351,10 @@ async function executeToolCall(
 async function runAssistantTurn(
  ctx: InferenceContext,
  sessionId: string,
+  chatId: string,
  assistantMessageId: string,
-  depth: number
+  depth: number,
+  signal?: AbortSignal
 ): Promise<void> {
  if (depth > MAX_TOOL_LOOP_DEPTH) {
    await ctx.sql`
@@ -345,12 +367,13 @@ async function runAssistantTurn(
    ctx.publish(sessionId, {
      type: 'error',
      message_id: assistantMessageId,
+      chat_id: chatId,
      error: 'tool loop depth exceeded',
    });
    return;
  }

-  const loaded = await loadContext(ctx.sql, sessionId);
+  const loaded = await loadContext(ctx.sql, sessionId, chatId);
  if (!loaded) {
    ctx.log.warn({ sessionId }, 'inference: session or project missing');
    return;
@@ -370,6 +393,7 @@ async function runAssistantTurn(
  ctx.publish(sessionId, {
    type: 'message_started',
    message_id: assistantMessageId,
+    chat_id: chatId,
    role: 'assistant',
  });

@@ -408,21 +432,25 @@ async function runAssistantTurn(
        ctx.publish(sessionId, {
          type: 'delta',
          message_id: assistantMessageId,
+          chat_id: chatId,
          content: delta,
        });
        ctx.log.debug({ sessionId, delta }, 'inference delta');
        scheduleFlush();
-      }
+      },
+      signal
    );
  } catch (err) {
    if (pendingFlushTimer) {
      clearTimeout(pendingFlushTimer);
      pendingFlushTimer = null;
    }
-    const errMsg = err instanceof Error ? err.message : String(err);
+    await flushPromise;
+    const isAbort = err instanceof Error && err.name === 'AbortError';
+    const finalStatus = isAbort ? 'cancelled' : 'failed';
    await ctx.sql`
      UPDATE messages
-      SET status = 'failed',
+      SET status = ${finalStatus},
          content = ${accumulated},
          finished_at = clock_timestamp()
      WHERE id = ${assistantMessageId}
@@ -433,12 +461,23 @@ async function runAssistantTurn(
      RETURNING project_id, name, updated_at
    `;
    ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at });
-    ctx.publish(sessionId, {
-      type: 'error',
-      message_id: assistantMessageId,
-      error: errMsg,
-    });
-    ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
+    if (isAbort) {
+      ctx.publish(sessionId, {
+        type: 'message_complete',
+        message_id: assistantMessageId,
+        chat_id: chatId,
+      });
+      ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled');
+    } else {
+      const errMsg = err instanceof Error ? err.message : String(err);
+      ctx.publish(sessionId, {
+        type: 'error',
+        message_id: assistantMessageId,
+        chat_id: chatId,
+        error: errMsg,
+      });
+      ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
+    }
    return;
  }

@@ -475,12 +514,14 @@ async function runAssistantTurn(
      ctx.publish(sessionId, {
        type: 'tool_call',
        message_id: assistantMessageId,
+        chat_id: chatId,
        tool_call: tc,
      });
    }
    ctx.publish(sessionId, {
      type: 'message_complete',
      message_id: assistantMessageId,
+      chat_id: chatId,
      tokens_used: updated?.tokens_used ?? null,
      ctx_used: updated?.ctx_used ?? null,
      ctx_max: updated?.ctx_max ?? null,
@@ -492,8 +533,8 @@ async function runAssistantTurn(
    await Promise.all(
      toolCalls.map(async (tc) => {
        const [toolRow] = await ctx.sql<{ id: string }[]>`
-          INSERT INTO messages (session_id, role, content, status, created_at)
-          VALUES (${sessionId}, 'tool', '', 'complete', clock_timestamp())
+          INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
+          VALUES (${sessionId}, ${chatId}, 'tool', '', 'complete', clock_timestamp())
          RETURNING id
        `;
        const toolMessageId = toolRow!.id;
@@ -512,6 +553,7 @@ async function runAssistantTurn(
        ctx.publish(sessionId, {
          type: 'tool_result',
          tool_message_id: toolMessageId,
+          chat_id: chatId,
          tool_call_id: tc.id,
          output: tres.output,
          truncated: tres.truncated,
@@ -521,11 +563,11 @@ async function runAssistantTurn(
    );

    const [nextAssistant] = await ctx.sql<{ id: string }[]>`
-      INSERT INTO messages (session_id, role, content, status, created_at)
-      VALUES (${sessionId}, 'assistant', '', 'streaming', clock_timestamp())
+      INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
+      VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp())
      RETURNING id
    `;
-    await runAssistantTurn(ctx, sessionId, nextAssistant!.id, depth + 1);
+    await runAssistantTurn(ctx, sessionId, chatId, nextAssistant!.id, depth + 1, signal);
    return;
  }

@@ -551,6 +593,7 @@ async function runAssistantTurn(
  ctx.publish(sessionId, {
    type: 'message_complete',
    message_id: assistantMessageId,
+    chat_id: chatId,
    tokens_used: updated?.tokens_used ?? null,
    ctx_used: updated?.ctx_used ?? null,
    ctx_max: updated?.ctx_max ?? null,
@@ -561,6 +604,7 @@ async function runAssistantTurn(
  ctx.log.info(
    {
      sessionId,
+      chatId,
      assistantMessageId,
      finishReason,
      chars: content.length,
@@ -574,36 +618,153 @@ async function runAssistantTurn(
 export async function runInference(
  ctx: InferenceContext,
  sessionId: string,
-  assistantMessageId: string
+  chatId: string,
+  assistantMessageId: string,
+  signal?: AbortSignal
 ): Promise<void> {
-  return runAssistantTurn(ctx, sessionId, assistantMessageId, 0);
+  return runAssistantTurn(ctx, sessionId, chatId, assistantMessageId, 0, signal);
+}
+
+const COMPACT_SYSTEM_PROMPT =
+  'Summarize the preceding conversation into a dense but complete context paragraph. Preserve all key facts, decisions, file paths, code patterns, and action items. Do not add any new information. Output only the summary paragraph.';
+
+async function runCompact(
+  ctx: InferenceContext,
+  sessionId: string,
+  chatId: string,
+  compactMessageId: string
+): Promise<void> {
+  const loaded = await loadContext(ctx.sql, sessionId, chatId);
+  if (!loaded) return;
+  const { session, project, history } = loaded;
+
+  const messagesForSummary = buildMessagesPayload(session, project,
+    history.filter((m) => m.id !== compactMessageId)
+  );
+  messagesForSummary.push({
+    role: 'system',
+    content: COMPACT_SYSTEM_PROMPT,
+  });
+
+  ctx.publish(sessionId, {
+    type: 'message_started',
+    message_id: compactMessageId,
+    chat_id: chatId,
+    role: 'assistant',
+  });
+
+  let content = '';
+  try {
+    const result = await streamCompletion(
+      ctx,
+      session.model,
+      messagesForSummary,
+      false,
+      (delta) => {
+        content += delta;
+        ctx.publish(sessionId, {
+          type: 'delta',
+          message_id: compactMessageId,
+          chat_id: chatId,
+          content: delta,
+        });
+      }
+    );
+    content = result.content;
+  } catch (err) {
+    const errMsg = err instanceof Error ? err.message : String(err);
+    await ctx.sql`
+      UPDATE messages SET status = 'failed', content = ${content}, finished_at = clock_timestamp()
+      WHERE id = ${compactMessageId}
+    `;
+    ctx.publish(sessionId, {
+      type: 'error',
+      message_id: compactMessageId,
+      chat_id: chatId,
+      error: errMsg,
+    });
+    return;
+  }
+
+  const preCompactCount = history.filter((m) => m.id !== compactMessageId && m.kind !== 'compact').length;
+  const summary = `[Context compacted — ${preCompactCount} messages summarized]\n\n${content}`;
+
+  await ctx.sql`
+    UPDATE messages SET content = ${summary}, status = 'complete', finished_at = clock_timestamp()
+    WHERE id = ${compactMessageId}
+  `;
+  ctx.publish(sessionId, {
+    type: 'message_complete',
+    message_id: compactMessageId,
+    chat_id: chatId,
+  });
+}
+
+interface InferenceRegistration {
+  controller: AbortController;
+  completed: Promise<void>;
 }

 export function createInferenceRunner(
  ctx: Omit<InferenceContext, 'publishUser'>,
  publishUserFn: (user: string, frame: UserStreamFrame) => void
 ) {
+  const registry = new Map<string, InferenceRegistration>();
+
  return {
-    enqueue(sessionId: string, assistantMessageId: string, user: string) {
+    enqueue(sessionId: string, chatId: string, assistantMessageId: string, user: string) {
+      const callCtx: InferenceContext = {
+        ...ctx,
+        publishUser: (frame) => publishUserFn(user, frame),
+      };
+      const controller = new AbortController();
+      let resolveCompleted!: () => void;
+      const completed = new Promise<void>((res) => { resolveCompleted = res; });
+      const registration: InferenceRegistration = { controller, completed };
+      registry.set(chatId, registration);
+      void (async () => {
+        try {
+          await runInference(callCtx, sessionId, chatId, assistantMessageId, controller.signal);
+          setImmediate(() => {
+            void maybeAutoNameChat(callCtx, chatId, sessionId).catch((err: Error) => {
+              callCtx.log.warn({ err, chatId }, 'auto-name failed');
+            });
+          });
+        } catch (err) {
+          callCtx.log.error({ err }, 'unhandled inference error');
+        } finally {
+          resolveCompleted();
+          // Only clear our own registration; a force-send may have replaced it.
+          if (registry.get(chatId) === registration) {
+            registry.delete(chatId);
+          }
+        }
+      })();
+    },
+
+    enqueueCompact(sessionId: string, chatId: string, compactMessageId: string, user: string) {
      const callCtx: InferenceContext = {
        ...ctx,
        publishUser: (frame) => publishUserFn(user, frame),
      };
      void (async () => {
        try {
-          await runInference(callCtx, sessionId, assistantMessageId);
-          setImmediate(() => {
-            void maybeAutoNameSession(callCtx, sessionId).catch((err) => {
-              callCtx.log.warn({ err, sessionId }, 'auto-name failed');
-            });
-          });
+          await runCompact(callCtx, sessionId, chatId, compactMessageId);
        } catch (err) {
-          callCtx.log.error({ err }, 'unhandled inference error');
+          callCtx.log.error({ err }, 'unhandled compact error');
        }
      })();
    },
+
+    async cancel(_sessionId: string, chatId: string): Promise<boolean> {
+      const reg = registry.get(chatId);
+      if (!reg) return false;
+      reg.controller.abort();
+      // Swallow — we just need to wait for the catch/finally to persist state.
+      await reg.completed.catch(() => {});
+      return true;
+    },
  };
 }

-// Reference to keep ALL_TOOLS imported for type checks if needed
 export const _toolNames = ALL_TOOLS.map((t) => t.name);