v1.13.6: compaction head-assembly audit + reasoning fix

Audit traced compaction's summary path post-v1.13.1-B read flip:
- Q1: reads from messages_with_parts (view) — clean
- Q2: parts shape correctly threaded through buildHeadPayload — clean
- Q3: reasoning omitted from summary input — FIX NEEDED

v1.13.1-C wired reasoning end-to-end into inference/payload.ts but
missed this read site. Summarizer model couldn't see the reasoning
trail for tool-bearing turns, quietly degrading summary quality for
reasoning-channel models (qwen3.6).

Fix:
- CompactionMessage extended with reasoning_parts field
- SELECT pulls reasoning_parts from messages_with_parts
- buildHeadPayload (now exported for tests) prefixes assistant content
  with <reasoning>...</reasoning>\n\n<content>... when reasoning is
  present; standalone <reasoning>...</reasoning> for tool-call-only
  turns; omits the tag when reasoning is null or empty

4 new render branch tests (190 total).

Smoke deferred: forcing real compaction requires either threshold
pollution or building up a >40k-token chat with reasoning_parts.
Render branches are unit-covered; integration would only re-prove
structural correctness.
This commit is contained in:
2026-05-22 08:18:47 +00:00
parent f8fc5db929
commit 81d837c04e
3 changed files with 93 additions and 7 deletions

View File

@@ -6,6 +6,7 @@ import {
turns,
select,
buildPrompt,
buildHeadPayload,
type CompactionMessage,
} from '../compaction.js';
import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';
@@ -31,6 +32,7 @@ function mkMsg(
status: 'complete',
tool_calls: null,
tool_results: null,
reasoning_parts: null,
metadata: null,
created_at: new Date(counter * 1000).toISOString(),
...overrides,
@@ -256,3 +258,56 @@ describe('buildPrompt', () => {
expect(out.endsWith('extra-context-line')).toBe(true);
});
});
// ---- buildHeadPayload (v1.13.6) -----------------------------------------------
describe('buildHeadPayload reasoning render', () => {
it('emits reasoning as a <reasoning> tag prefixed onto the assistant content', () => {
const out = buildHeadPayload([
mkMsg('user', 'show me the file'),
mkMsg('assistant', 'reading it now', {
reasoning_parts: [{ text: 'user wants src/index.ts; I should view it' }],
}),
]);
expect(out).toHaveLength(2);
expect(out[1]!.role).toBe('assistant');
expect(out[1]!.content).toBe(
'<reasoning>user wants src/index.ts; I should view it</reasoning>\n\nreading it now',
);
});
it('emits a standalone <reasoning> tag when reasoning is present but content is empty (tool-call-only turn)', () => {
const out = buildHeadPayload([
mkMsg('assistant', '', {
reasoning_parts: [{ text: 'jumping straight to grep' }],
tool_calls: [{ id: 'c1', name: 'grep', args: { pattern: 'foo' } }],
}),
]);
expect(out).toHaveLength(1);
expect(out[0]!.content).toBe('<reasoning>jumping straight to grep</reasoning>');
expect(out[0]!.tool_calls).toHaveLength(1);
expect(out[0]!.tool_calls![0]!.function.name).toBe('grep');
});
it('joins multiple reasoning parts without separators (matches the streaming concat)', () => {
const out = buildHeadPayload([
mkMsg('assistant', 'final answer', {
reasoning_parts: [{ text: 'first thought ' }, { text: 'second thought' }],
}),
]);
expect(out[0]!.content).toBe(
'<reasoning>first thought second thought</reasoning>\n\nfinal answer',
);
});
it('omits the reasoning tag entirely when reasoning_parts is null or empty', () => {
const out = buildHeadPayload([
mkMsg('assistant', 'plain answer', { reasoning_parts: null }),
mkMsg('assistant', 'other answer', { reasoning_parts: [] }),
]);
expect(out[0]!.content).toBe('plain answer');
expect(out[1]!.content).toBe('other answer');
expect(out[0]!.content).not.toContain('<reasoning>');
expect(out[1]!.content).not.toContain('<reasoning>');
});
});

View File

@@ -39,6 +39,11 @@ export interface CompactionMessage {
status: 'streaming' | 'complete' | 'failed' | 'cancelled';
tool_calls: Array<{ id: string; name: string; args: Record<string, unknown> }> | null;
tool_results: { tool_call_id: string; output: unknown; truncated: boolean; error?: string } | null;
// v1.13.6: reasoning_parts captured by v1.13.1-C and read back through
// messages_with_parts. Embedded into the head-assembly payload as prose so
// the summarizer LLM sees what the model was reasoning through when it
// chose its tool calls.
reasoning_parts: Array<{ text: string }> | null;
metadata: { kind?: string } | null;
created_at: string;
}
@@ -197,7 +202,8 @@ export function buildPrompt(
// would silently drop pre-legacy-compact history before the LLM sees it.
// Compaction wants to send the entire head, full stop.) ===
interface OpenAiMessage {
// v1.13.6: exported for unit-test access (reasoning render coverage).
export interface OpenAiMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string | null;
tool_calls?: Array<{
@@ -212,7 +218,8 @@ function isCapHitSentinel(m: CompactionMessage): boolean {
return m.role === 'system' && m.metadata != null && m.metadata.kind === 'cap_hit';
}
function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
// v1.13.6: exported for unit-test access (reasoning render coverage).
export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
const out: OpenAiMessage[] = [];
for (const m of head) {
if (isCapHitSentinel(m)) continue;
@@ -243,9 +250,22 @@ function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
continue;
}
if (m.role === 'assistant') {
// v1.13.6: embed reasoning text as prose prefixed onto the assistant
// content. OpenAI wire shape doesn't carry reasoning as a structured
// field, but the summarizer is reading text — a tagged prose block
// gives it the same signal. We mirror the AI SDK ReasoningPart shape
// by using a <reasoning>...</reasoning> wrapper so the summarizer can
// distinguish reasoning from user-visible answer.
let body = m.content && m.content.length > 0 ? m.content : '';
if (m.reasoning_parts && m.reasoning_parts.length > 0) {
const reasoning = m.reasoning_parts.map((r) => r.text).join('');
body = body.length > 0
? `<reasoning>${reasoning}</reasoning>\n\n${body}`
: `<reasoning>${reasoning}</reasoning>`;
}
const msg: OpenAiMessage = {
role: 'assistant',
content: m.content && m.content.length > 0 ? m.content : null,
content: body.length > 0 ? body : null,
};
if (m.tool_calls && m.tool_calls.length > 0) {
msg.tool_calls = m.tool_calls.map((tc) => ({
@@ -344,8 +364,11 @@ export async function process(input: ProcessInput): Promise<void> {
// turns() boundary logic sees the same sequence the LLM will.
// v1.13.1-B: reads tool_calls/tool_results via the parts-merged view so
// the compaction payload matches what the LLM saw on the original turn.
// v1.13.6: also pulls reasoning_parts (added in v1.13.1-C) so summaries
// capture what the model was working through before each tool call.
const messages = await sql<CompactionMessage[]>`
SELECT id, role, content, kind, summary, status, tool_calls, tool_results, metadata, created_at
SELECT id, role, content, kind, summary, status, tool_calls, tool_results,
reasoning_parts, metadata, created_at
FROM messages_with_parts
WHERE chat_id = ${chatId} AND compacted_at IS NULL
ORDER BY created_at ASC, id ASC