wip: context-meter + model-label UI and provider/inference tweaks
Checkpoint of in-flight work so the orchestrator branch can rebase onto a clean main: ContextBar → ContextMeter, model-label helper, model/agent picker + provider-snapshot/registry changes, inference payload + message-columns. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -111,6 +111,42 @@ describe('buildMessagesPayload', async () => {
|
||||
expect(result[4]).toMatchObject({ role: 'assistant', content: 'great' });
|
||||
});
|
||||
|
||||
it('does NOT annotate models when the chat uses a single model', async () => {
|
||||
const session = makeSession();
|
||||
const project = makeProject();
|
||||
const history: Message[] = [
|
||||
makeMessage('user', 'hi'),
|
||||
makeMessage('assistant', 'hello', { model: 'qwen3.6-35b-a3b-mxfp4' }),
|
||||
makeMessage('user', 'again'),
|
||||
makeMessage('assistant', 'world', { model: 'qwen3.6-35b-a3b-mxfp4' }),
|
||||
];
|
||||
const result = await buildMessagesPayload(session, project, history);
|
||||
// 1 system + 4 history — no extra attribution system note.
|
||||
expect(result).toHaveLength(5);
|
||||
expect(result.filter((m) => m.role === 'system')).toHaveLength(1);
|
||||
expect(result[2]).toMatchObject({ role: 'assistant', content: 'hello' });
|
||||
expect(result[4]).toMatchObject({ role: 'assistant', content: 'world' });
|
||||
});
|
||||
|
||||
it('annotates each assistant turn with its model when the chat mixes models', async () => {
|
||||
const session = makeSession();
|
||||
const project = makeProject();
|
||||
const history: Message[] = [
|
||||
makeMessage('user', 'hi'),
|
||||
makeMessage('assistant', 'opus reply', { model: 'claude-opus-4-8' }),
|
||||
makeMessage('user', 'switch'),
|
||||
makeMessage('assistant', 'qwen reply', { model: 'qwen3.6-35b-a3b-mxfp4' }),
|
||||
];
|
||||
const result = await buildMessagesPayload(session, project, history);
|
||||
// 1 system prompt + 1 attribution note + 4 history rows.
|
||||
const systems = result.filter((m) => m.role === 'system');
|
||||
expect(systems).toHaveLength(2);
|
||||
expect(systems[1]!.content).toContain('square brackets');
|
||||
const assistants = result.filter((m) => m.role === 'assistant');
|
||||
expect(assistants[0]!.content).toBe('[claude-opus-4-8] opus reply');
|
||||
expect(assistants[1]!.content).toBe('[qwen3.6-35b-a3b-mxfp4] qwen reply');
|
||||
});
|
||||
|
||||
it('starts from the latest compact marker, emitting it as a system message', async () => {
|
||||
const session = makeSession();
|
||||
const project = makeProject();
|
||||
|
||||
@@ -91,6 +91,27 @@ export async function buildMessagesPayload(
|
||||
}
|
||||
}
|
||||
|
||||
// Per-turn model attribution. When the sent window mixes ≥2 models, prefix
|
||||
// each prior assistant turn with its model id so the active model can answer
|
||||
// "what did Opus say". Single-model chats are left byte-identical (no prefix,
|
||||
// no note) so the common case sees no payload or prefix-cache change.
|
||||
const sentModels = new Set<string>();
|
||||
for (let i = startIdx; i < history.length; i++) {
|
||||
const m = history[i]!;
|
||||
if (m.role === 'assistant' && m.model && !isAnySentinel(m)) sentModels.add(m.model);
|
||||
}
|
||||
const annotateModels = sentModels.size >= 2;
|
||||
if (annotateModels) {
|
||||
out.push({
|
||||
role: 'system',
|
||||
content:
|
||||
'This conversation includes replies from more than one AI model. Each prior ' +
|
||||
'assistant turn below is prefixed with its model id in square brackets, e.g. ' +
|
||||
'[claude-opus-4-8]. Those prefixes are metadata for your reference (so you can ' +
|
||||
'tell which model produced which turn) — do not add such a prefix to your own replies.',
|
||||
});
|
||||
}
|
||||
|
||||
for (let i = startIdx; i < history.length; i++) {
|
||||
const m = history[i]!;
|
||||
if (m.kind === 'compact') {
|
||||
@@ -143,9 +164,10 @@ export async function buildMessagesPayload(
|
||||
continue;
|
||||
}
|
||||
if (m.role === 'assistant') {
|
||||
const body = m.content && m.content.length > 0 ? m.content : null;
|
||||
const msg: OpenAiMessage = {
|
||||
role: 'assistant',
|
||||
content: m.content && m.content.length > 0 ? m.content : null,
|
||||
content: body != null && annotateModels && m.model ? `[${m.model}] ${body}` : body,
|
||||
};
|
||||
if (m.tool_calls && m.tool_calls.length > 0) {
|
||||
if (assistantToolCallsArePayloadComplete(history, i)) {
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
// Shared column projections for queries against the messages_with_parts view.
|
||||
// All sites that read the full Message wire shape for route responses use
|
||||
// MESSAGE_COLUMNS. The inference load path uses INFERENCE_MESSAGE_COLUMNS —
|
||||
// it adds reasoning_parts but omits the compaction-display fields
|
||||
// (summary, tail_start_id, compacted_at, model) that only the UI needs.
|
||||
// it adds reasoning_parts and model (per-turn attribution, used to label prior
|
||||
// turns when a chat mixes models) but omits the compaction-display fields
|
||||
// (summary, tail_start_id, compacted_at) that only the UI needs.
|
||||
|
||||
export const MESSAGE_COLUMNS =
|
||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||
@@ -12,4 +13,4 @@ export const MESSAGE_COLUMNS =
|
||||
export const INFERENCE_MESSAGE_COLUMNS =
|
||||
'id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq, ' +
|
||||
'tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata, ' +
|
||||
'reasoning_parts';
|
||||
'reasoning_parts, model';
|
||||
|
||||
@@ -223,6 +223,11 @@ export interface Message {
|
||||
summary?: boolean;
|
||||
tail_start_id?: string | null;
|
||||
compacted_at?: string | null;
|
||||
// Per-assistant-turn model attribution (the chip). Read into the inference
|
||||
// payload so the active model can attribute prior turns when a chat mixes
|
||||
// models ("what did Opus say"). Optional — null/absent for user/tool rows
|
||||
// and pre-attribution assistant rows.
|
||||
model?: string | null;
|
||||
}
|
||||
|
||||
export interface ModelInfo {
|
||||
|
||||
Reference in New Issue
Block a user