v1.13.6: compaction head-assembly audit + reasoning fix
Audit traced compaction's summary path post-v1.13.1-B read flip: - Q1: reads from messages_with_parts (view) — clean - Q2: parts shape correctly threaded through buildHeadPayload — clean - Q3: reasoning omitted from summary input — FIX NEEDED v1.13.1-C wired reasoning end-to-end into inference/payload.ts but missed this read site. Summarizer model couldn't see the reasoning trail for tool-bearing turns, quietly degrading summary quality for reasoning-channel models (qwen3.6). Fix: - CompactionMessage extended with reasoning_parts field - SELECT pulls reasoning_parts from messages_with_parts - buildHeadPayload (now exported for tests) prefixes assistant content with <reasoning>...</reasoning>\n\n<content>... when reasoning is present; standalone <reasoning>...</reasoning> for tool-call-only turns; omits the tag when reasoning is null or empty 4 new render branch tests (190 total). Smoke deferred: forcing real compaction requires either threshold pollution or building up a >40k-token chat with reasoning_parts. Render branches are unit-covered; integration would only re-prove structural correctness.
This commit is contained in:
@@ -6,6 +6,7 @@ import {
|
||||
turns,
|
||||
select,
|
||||
buildPrompt,
|
||||
buildHeadPayload,
|
||||
type CompactionMessage,
|
||||
} from '../compaction.js';
|
||||
import { SUMMARY_TEMPLATE } from '../compaction-prompt.js';
|
||||
@@ -31,6 +32,7 @@ function mkMsg(
|
||||
status: 'complete',
|
||||
tool_calls: null,
|
||||
tool_results: null,
|
||||
reasoning_parts: null,
|
||||
metadata: null,
|
||||
created_at: new Date(counter * 1000).toISOString(),
|
||||
...overrides,
|
||||
@@ -256,3 +258,56 @@ describe('buildPrompt', () => {
|
||||
expect(out.endsWith('extra-context-line')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- buildHeadPayload (v1.13.6) -----------------------------------------------
|
||||
|
||||
describe('buildHeadPayload reasoning render', () => {
|
||||
it('emits reasoning as a <reasoning> tag prefixed onto the assistant content', () => {
|
||||
const out = buildHeadPayload([
|
||||
mkMsg('user', 'show me the file'),
|
||||
mkMsg('assistant', 'reading it now', {
|
||||
reasoning_parts: [{ text: 'user wants src/index.ts; I should view it' }],
|
||||
}),
|
||||
]);
|
||||
expect(out).toHaveLength(2);
|
||||
expect(out[1]!.role).toBe('assistant');
|
||||
expect(out[1]!.content).toBe(
|
||||
'<reasoning>user wants src/index.ts; I should view it</reasoning>\n\nreading it now',
|
||||
);
|
||||
});
|
||||
|
||||
it('emits a standalone <reasoning> tag when reasoning is present but content is empty (tool-call-only turn)', () => {
|
||||
const out = buildHeadPayload([
|
||||
mkMsg('assistant', '', {
|
||||
reasoning_parts: [{ text: 'jumping straight to grep' }],
|
||||
tool_calls: [{ id: 'c1', name: 'grep', args: { pattern: 'foo' } }],
|
||||
}),
|
||||
]);
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0]!.content).toBe('<reasoning>jumping straight to grep</reasoning>');
|
||||
expect(out[0]!.tool_calls).toHaveLength(1);
|
||||
expect(out[0]!.tool_calls![0]!.function.name).toBe('grep');
|
||||
});
|
||||
|
||||
it('joins multiple reasoning parts without separators (matches the streaming concat)', () => {
|
||||
const out = buildHeadPayload([
|
||||
mkMsg('assistant', 'final answer', {
|
||||
reasoning_parts: [{ text: 'first thought ' }, { text: 'second thought' }],
|
||||
}),
|
||||
]);
|
||||
expect(out[0]!.content).toBe(
|
||||
'<reasoning>first thought second thought</reasoning>\n\nfinal answer',
|
||||
);
|
||||
});
|
||||
|
||||
it('omits the reasoning tag entirely when reasoning_parts is null or empty', () => {
|
||||
const out = buildHeadPayload([
|
||||
mkMsg('assistant', 'plain answer', { reasoning_parts: null }),
|
||||
mkMsg('assistant', 'other answer', { reasoning_parts: [] }),
|
||||
]);
|
||||
expect(out[0]!.content).toBe('plain answer');
|
||||
expect(out[1]!.content).toBe('other answer');
|
||||
expect(out[0]!.content).not.toContain('<reasoning>');
|
||||
expect(out[1]!.content).not.toContain('<reasoning>');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -39,6 +39,11 @@ export interface CompactionMessage {
|
||||
status: 'streaming' | 'complete' | 'failed' | 'cancelled';
|
||||
tool_calls: Array<{ id: string; name: string; args: Record<string, unknown> }> | null;
|
||||
tool_results: { tool_call_id: string; output: unknown; truncated: boolean; error?: string } | null;
|
||||
// v1.13.6: reasoning_parts captured by v1.13.1-C and read back through
|
||||
// messages_with_parts. Embedded into the head-assembly payload as prose so
|
||||
// the summarizer LLM sees what the model was reasoning through when it
|
||||
// chose its tool calls.
|
||||
reasoning_parts: Array<{ text: string }> | null;
|
||||
metadata: { kind?: string } | null;
|
||||
created_at: string;
|
||||
}
|
||||
@@ -197,7 +202,8 @@ export function buildPrompt(
|
||||
// would silently drop pre-legacy-compact history before the LLM sees it.
|
||||
// Compaction wants to send the entire head, full stop.) ===
|
||||
|
||||
interface OpenAiMessage {
|
||||
// v1.13.6: exported for unit-test access (reasoning render coverage).
|
||||
export interface OpenAiMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||
content: string | null;
|
||||
tool_calls?: Array<{
|
||||
@@ -212,7 +218,8 @@ function isCapHitSentinel(m: CompactionMessage): boolean {
|
||||
return m.role === 'system' && m.metadata != null && m.metadata.kind === 'cap_hit';
|
||||
}
|
||||
|
||||
function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
|
||||
// v1.13.6: exported for unit-test access (reasoning render coverage).
|
||||
export function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
|
||||
const out: OpenAiMessage[] = [];
|
||||
for (const m of head) {
|
||||
if (isCapHitSentinel(m)) continue;
|
||||
@@ -243,9 +250,22 @@ function buildHeadPayload(head: CompactionMessage[]): OpenAiMessage[] {
|
||||
continue;
|
||||
}
|
||||
if (m.role === 'assistant') {
|
||||
// v1.13.6: embed reasoning text as prose prefixed onto the assistant
|
||||
// content. OpenAI wire shape doesn't carry reasoning as a structured
|
||||
// field, but the summarizer is reading text — a tagged prose block
|
||||
// gives it the same signal. We mirror the AI SDK ReasoningPart shape
|
||||
// by using a <reasoning>...</reasoning> wrapper so the summarizer can
|
||||
// distinguish reasoning from user-visible answer.
|
||||
let body = m.content && m.content.length > 0 ? m.content : '';
|
||||
if (m.reasoning_parts && m.reasoning_parts.length > 0) {
|
||||
const reasoning = m.reasoning_parts.map((r) => r.text).join('');
|
||||
body = body.length > 0
|
||||
? `<reasoning>${reasoning}</reasoning>\n\n${body}`
|
||||
: `<reasoning>${reasoning}</reasoning>`;
|
||||
}
|
||||
const msg: OpenAiMessage = {
|
||||
role: 'assistant',
|
||||
content: m.content && m.content.length > 0 ? m.content : null,
|
||||
content: body.length > 0 ? body : null,
|
||||
};
|
||||
if (m.tool_calls && m.tool_calls.length > 0) {
|
||||
msg.tool_calls = m.tool_calls.map((tc) => ({
|
||||
@@ -344,8 +364,11 @@ export async function process(input: ProcessInput): Promise<void> {
|
||||
// turns() boundary logic sees the same sequence the LLM will.
|
||||
// v1.13.1-B: reads tool_calls/tool_results via the parts-merged view so
|
||||
// the compaction payload matches what the LLM saw on the original turn.
|
||||
// v1.13.6: also pulls reasoning_parts (added in v1.13.1-C) so summaries
|
||||
// capture what the model was working through before each tool call.
|
||||
const messages = await sql<CompactionMessage[]>`
|
||||
SELECT id, role, content, kind, summary, status, tool_calls, tool_results, metadata, created_at
|
||||
SELECT id, role, content, kind, summary, status, tool_calls, tool_results,
|
||||
reasoning_parts, metadata, created_at
|
||||
FROM messages_with_parts
|
||||
WHERE chat_id = ${chatId} AND compacted_at IS NULL
|
||||
ORDER BY created_at ASC, id ASC
|
||||
|
||||
Reference in New Issue
Block a user