wip: context-meter + model-label UI and provider/inference tweaks

Checkpoint of in-flight work so the orchestrator branch can rebase onto a
clean main: ContextBar → ContextMeter, model-label helper, model/agent picker
+ provider-snapshot/registry changes, inference payload + message-columns.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-03 14:55:38 +00:00
parent 5f4c7a9050
commit 163b5b86f7
21 changed files with 471 additions and 233 deletions

View File

@@ -111,6 +111,42 @@ describe('buildMessagesPayload', async () => {
expect(result[4]).toMatchObject({ role: 'assistant', content: 'great' });
});
it('does NOT annotate models when the chat uses a single model', async () => {
const session = makeSession();
const project = makeProject();
const history: Message[] = [
makeMessage('user', 'hi'),
makeMessage('assistant', 'hello', { model: 'qwen3.6-35b-a3b-mxfp4' }),
makeMessage('user', 'again'),
makeMessage('assistant', 'world', { model: 'qwen3.6-35b-a3b-mxfp4' }),
];
const result = await buildMessagesPayload(session, project, history);
// 1 system + 4 history — no extra attribution system note.
expect(result).toHaveLength(5);
expect(result.filter((m) => m.role === 'system')).toHaveLength(1);
expect(result[2]).toMatchObject({ role: 'assistant', content: 'hello' });
expect(result[4]).toMatchObject({ role: 'assistant', content: 'world' });
});
it('annotates each assistant turn with its model when the chat mixes models', async () => {
const session = makeSession();
const project = makeProject();
const history: Message[] = [
makeMessage('user', 'hi'),
makeMessage('assistant', 'opus reply', { model: 'claude-opus-4-8' }),
makeMessage('user', 'switch'),
makeMessage('assistant', 'qwen reply', { model: 'qwen3.6-35b-a3b-mxfp4' }),
];
const result = await buildMessagesPayload(session, project, history);
// 1 system prompt + 1 attribution note + 4 history rows.
const systems = result.filter((m) => m.role === 'system');
expect(systems).toHaveLength(2);
expect(systems[1]!.content).toContain('square brackets');
const assistants = result.filter((m) => m.role === 'assistant');
expect(assistants[0]!.content).toBe('[claude-opus-4-8] opus reply');
expect(assistants[1]!.content).toBe('[qwen3.6-35b-a3b-mxfp4] qwen reply');
});
it('starts from the latest compact marker, emitting it as a system message', async () => {
const session = makeSession();
const project = makeProject();

View File

@@ -91,6 +91,27 @@ export async function buildMessagesPayload(
}
}
// Per-turn model attribution. When the sent window mixes ≥2 models, prefix
// each prior assistant turn with its model id so the active model can answer
// "what did Opus say". Single-model chats are left byte-identical (no prefix,
// no note) so the common case sees no payload or prefix-cache change.
const sentModels = new Set<string>();
for (let i = startIdx; i < history.length; i++) {
const m = history[i]!;
if (m.role === 'assistant' && m.model && !isAnySentinel(m)) sentModels.add(m.model);
}
const annotateModels = sentModels.size >= 2;
if (annotateModels) {
out.push({
role: 'system',
content:
'This conversation includes replies from more than one AI model. Each prior ' +
'assistant turn below is prefixed with its model id in square brackets, e.g. ' +
'[claude-opus-4-8]. Those prefixes are metadata for your reference (so you can ' +
'tell which model produced which turn) — do not add such a prefix to your own replies.',
});
}
for (let i = startIdx; i < history.length; i++) {
const m = history[i]!;
if (m.kind === 'compact') {
@@ -143,9 +164,10 @@ export async function buildMessagesPayload(
continue;
}
if (m.role === 'assistant') {
const body = m.content && m.content.length > 0 ? m.content : null;
const msg: OpenAiMessage = {
role: 'assistant',
content: m.content && m.content.length > 0 ? m.content : null,
content: body != null && annotateModels && m.model ? `[${m.model}] ${body}` : body,
};
if (m.tool_calls && m.tool_calls.length > 0) {
if (assistantToolCallsArePayloadComplete(history, i)) {

View File

@@ -1,8 +1,9 @@
// Shared column projections for queries against the messages_with_parts view.
// All sites that read the full Message wire shape for route responses use
// MESSAGE_COLUMNS. The inference load path uses INFERENCE_MESSAGE_COLUMNS —
// it adds reasoning_parts but omits the compaction-display fields
// (summary, tail_start_id, compacted_at, model) that only the UI needs.
// it adds reasoning_parts and model (per-turn attribution, used to label prior
// turns when a chat mixes models) but omits the compaction-display fields
// (summary, tail_start_id, compacted_at) that only the UI needs.
export const MESSAGE_COLUMNS =
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
@@ -12,4 +13,4 @@ export const MESSAGE_COLUMNS =
export const INFERENCE_MESSAGE_COLUMNS =
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
'reasoning_parts';
'reasoning_parts, model';

View File

@@ -223,6 +223,11 @@ export interface Message {
summary?: boolean;
tail_start_id?: string | null;
compacted_at?: string | null;
// Per-assistant-turn model attribution (the chip). Read into the inference
// payload so the active model can attribute prior turns when a chat mixes
// models ("what did Opus say"). Optional — null/absent for user/tool rows
// and pre-attribution assistant rows.
model?: string | null;
}
export interface ModelInfo {