wip: context-meter + model-label UI and provider/inference tweaks

Checkpoint of in-flight work so the orchestrator branch can rebase onto a clean main: ContextBar → ContextMeter, model-label helper, model/agent picker + provider-snapshot/registry changes, inference payload + message-columns. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:55:38 +00:00
parent 5f4c7a9050
commit 163b5b86f7
21 changed files with 471 additions and 233 deletions
--- a/apps/server/src/services/tests/inference.test.ts
+++ b/apps/server/src/services/tests/inference.test.ts
@@ -111,6 +111,42 @@ describe('buildMessagesPayload', async () => {
    expect(result[4]).toMatchObject({ role: 'assistant', content: 'great' });
  });

+  it('does NOT annotate models when the chat uses a single model', async () => {
+    const session = makeSession();
+    const project = makeProject();
+    const history: Message[] = [
+      makeMessage('user', 'hi'),
+      makeMessage('assistant', 'hello', { model: 'qwen3.6-35b-a3b-mxfp4' }),
+      makeMessage('user', 'again'),
+      makeMessage('assistant', 'world', { model: 'qwen3.6-35b-a3b-mxfp4' }),
+    ];
+    const result = await buildMessagesPayload(session, project, history);
+    // 1 system + 4 history — no extra attribution system note.
+    expect(result).toHaveLength(5);
+    expect(result.filter((m) => m.role === 'system')).toHaveLength(1);
+    expect(result[2]).toMatchObject({ role: 'assistant', content: 'hello' });
+    expect(result[4]).toMatchObject({ role: 'assistant', content: 'world' });
+  });
+
+  it('annotates each assistant turn with its model when the chat mixes models', async () => {
+    const session = makeSession();
+    const project = makeProject();
+    const history: Message[] = [
+      makeMessage('user', 'hi'),
+      makeMessage('assistant', 'opus reply', { model: 'claude-opus-4-8' }),
+      makeMessage('user', 'switch'),
+      makeMessage('assistant', 'qwen reply', { model: 'qwen3.6-35b-a3b-mxfp4' }),
+    ];
+    const result = await buildMessagesPayload(session, project, history);
+    // 1 system prompt + 1 attribution note + 4 history rows.
+    const systems = result.filter((m) => m.role === 'system');
+    expect(systems).toHaveLength(2);
+    expect(systems[1]!.content).toContain('square brackets');
+    const assistants = result.filter((m) => m.role === 'assistant');
+    expect(assistants[0]!.content).toBe('[claude-opus-4-8] opus reply');
+    expect(assistants[1]!.content).toBe('[qwen3.6-35b-a3b-mxfp4] qwen reply');
+  });
+
  it('starts from the latest compact marker, emitting it as a system message', async () => {
    const session = makeSession();
    const project = makeProject();
--- a/apps/server/src/services/inference/payload.ts
+++ b/apps/server/src/services/inference/payload.ts
@@ -91,6 +91,27 @@ export async function buildMessagesPayload(
    }
  }

+  // Per-turn model attribution. When the sent window mixes ≥2 models, prefix
+  // each prior assistant turn with its model id so the active model can answer
+  // "what did Opus say". Single-model chats are left byte-identical (no prefix,
+  // no note) so the common case sees no payload or prefix-cache change.
+  const sentModels = new Set<string>();
+  for (let i = startIdx; i < history.length; i++) {
+    const m = history[i]!;
+    if (m.role === 'assistant' && m.model && !isAnySentinel(m)) sentModels.add(m.model);
+  }
+  const annotateModels = sentModels.size >= 2;
+  if (annotateModels) {
+    out.push({
+      role: 'system',
+      content:
+        'This conversation includes replies from more than one AI model. Each prior ' +
+        'assistant turn below is prefixed with its model id in square brackets, e.g. ' +
+        '[claude-opus-4-8]. Those prefixes are metadata for your reference (so you can ' +
+        'tell which model produced which turn) — do not add such a prefix to your own replies.',
+    });
+  }
+
  for (let i = startIdx; i < history.length; i++) {
    const m = history[i]!;
    if (m.kind === 'compact') {
@@ -143,9 +164,10 @@ export async function buildMessagesPayload(
      continue;
    }
    if (m.role === 'assistant') {
+      const body = m.content && m.content.length > 0 ? m.content : null;
      const msg: OpenAiMessage = {
        role: 'assistant',
-        content: m.content && m.content.length > 0 ? m.content : null,
+        content: body != null && annotateModels && m.model ? `[${m.model}] ${body}` : body,
      };
      if (m.tool_calls && m.tool_calls.length > 0) {
        if (assistantToolCallsArePayloadComplete(history, i)) {
--- a/apps/server/src/services/message-columns.ts
+++ b/apps/server/src/services/message-columns.ts
@@ -1,8 +1,9 @@
 // Shared column projections for queries against the messages_with_parts view.
 // All sites that read the full Message wire shape for route responses use
 // MESSAGE_COLUMNS. The inference load path uses INFERENCE_MESSAGE_COLUMNS —
-// it adds reasoning_parts but omits the compaction-display fields
-// (summary, tail_start_id, compacted_at, model) that only the UI needs.
+// it adds reasoning_parts and model (per-turn attribution, used to label prior
+// turns when a chat mixes models) but omits the compaction-display fields
+// (summary, tail_start_id, compacted_at) that only the UI needs.

 export const MESSAGE_COLUMNS =
  'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
@@ -12,4 +13,4 @@ export const MESSAGE_COLUMNS =
 export const INFERENCE_MESSAGE_COLUMNS =
  'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
  'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
-  'reasoning_parts';
+  'reasoning_parts, model';
--- a/apps/server/src/types/api.ts
+++ b/apps/server/src/types/api.ts
@@ -223,6 +223,11 @@ export interface Message {
  summary?: boolean;
  tail_start_id?: string | null;
  compacted_at?: string | null;
+  // Per-assistant-turn model attribution (the chip). Read into the inference
+  // payload so the active model can attribute prior turns when a chat mixes
+  // models ("what did Opus say"). Optional — null/absent for user/tool rows
+  // and pre-attribution assistant rows.
+  model?: string | null;
 }

 export interface ModelInfo {