v1.9.7: ask_user_input elicitation tool

This commit is contained in:
2026-05-18 02:15:18 +00:00
parent adb5d7b3bb
commit d85b17081e
9 changed files with 710 additions and 4 deletions

View File

@@ -123,6 +123,9 @@ async function main() {
chat_id: chatId,
});
},
publishSessionFrame: (sessionId, frame) => {
broker.publish(sessionId, frame);
},
});
registerSkillsRoutes(app, sql, {
enqueueInference: (sessionId, chatId, assistantId, user) => {

View File

@@ -1,7 +1,7 @@
import type { FastifyInstance } from 'fastify';
import { z } from 'zod';
import type { Sql } from '../db.js';
import type { Chat, Message, Session } from '../types/api.js';
import type { Chat, Message, Session, ToolCall } from '../types/api.js';
const SendBody = z.object({
content: z.string().min(1).max(64_000),
@@ -14,6 +14,39 @@ const ContinueBody = z.object({
sentinel_message_id: z.string().uuid(),
});
// Batch 9.7: ask_user_input answer submission. Defensive shape — the question
// content is echoed back for traceability but the server does NOT trust it
// (the source of truth is the assistant message's tool_calls.args.questions).
const AnswerUserInputBody = z.object({
tool_call_id: z.string().min(1),
answers: z
.array(
z.object({
question: z.string(),
selected_options: z.array(z.string()),
free_text: z.string().nullable(),
}),
)
.min(1)
.max(3),
});
// Same shape the model declared via the tool's zod input. Re-derived here so
// the route can validate args without depending on services/tools.ts (which
// would pull in fs/path_guard for nothing).
const AskUserInputArgs = z.object({
questions: z
.array(
z.object({
question: z.string(),
type: z.enum(['single_select', 'multi_select']),
options: z.array(z.string()).min(1),
}),
)
.min(1)
.max(3),
});
interface MessageHandlers {
enqueueInference: (sessionId: string, chatId: string, assistantMessageId: string, user: string) => void;
enqueueCompact: (sessionId: string, chatId: string, compactMessageId: string, user: string) => void;
@@ -24,6 +57,13 @@ interface MessageHandlers {
content: string
) => void;
publishMessagesDeleted: (sessionId: string, chatId: string, messageIds: string[]) => void;
// Batch 9.7: lets the answer endpoint emit the tool_result frame that the
// pause path intentionally skipped. Matches SkillInvokeHandlers in
// routes/skills.ts so index.ts can pass the same broker.publish adapter.
publishSessionFrame: (
sessionId: string,
frame: Record<string, unknown> & { type: string }
) => void;
cancelInference: (sessionId: string, chatId: string) => Promise<boolean>;
hasActiveInference: (chatId: string) => boolean;
}
@@ -389,4 +429,169 @@ export function registerMessageRoutes(
return result;
}
);
// Batch 9.7: resume an ask_user_input pause. Validates the body matches the
// question shape the model declared, UPDATEs the pending tool row's
// tool_results to the AnswerSet, publishes the deferred tool_result frame,
// and enqueues the next assistant turn. Error codes per spec:
// 400 invalid_body / mismatched_answer_shape
// 404 chat_not_found / unknown_tool_call_id
// 409 tool_call_already_answered
app.post<{ Params: { id: string } }>(
'/api/chats/:id/answer_user_input',
async (req, reply) => {
const parsed = AnswerUserInputBody.safeParse(req.body);
if (!parsed.success) {
reply.code(400);
return { error: 'invalid_body', details: parsed.error.flatten() };
}
const { tool_call_id, answers } = parsed.data;
const chatRows = await sql<Chat[]>`
SELECT id, session_id FROM chats WHERE id = ${req.params.id} AND status = 'open'
`;
if (chatRows.length === 0) {
reply.code(404);
return { error: 'chat_not_found' };
}
const chat = chatRows[0]!;
const sessionId = chat.session_id;
// Find the assistant message that emitted this tool_call. Scoped by
// chat_id + role to avoid cross-chat lookups; ordered by created_at DESC
// because the most recent issuance wins when an LLM reuses call IDs
// across turns (the older, already-answered one is a different row with
// populated tool_results downstream).
const callerRows = await sql<{ id: string; tool_calls: ToolCall[] | null }[]>`
SELECT id, tool_calls FROM messages
WHERE chat_id = ${chat.id}
AND role = 'assistant'
AND tool_calls IS NOT NULL
ORDER BY created_at DESC
`;
let foundCall: ToolCall | null = null;
for (const row of callerRows) {
const match = row.tool_calls?.find((tc) => tc.id === tool_call_id);
if (match) {
foundCall = match;
break;
}
}
if (!foundCall) {
reply.code(404);
return { error: 'unknown_tool_call_id' };
}
if (foundCall.name !== 'ask_user_input') {
reply.code(400);
return { error: 'tool_call_not_ask_user_input' };
}
// Validate the args themselves — the LLM could have emitted bad JSON.
const argsParsed = AskUserInputArgs.safeParse(foundCall.args);
if (!argsParsed.success) {
reply.code(400);
return { error: 'mismatched_answer_shape', detail: 'tool_call args invalid' };
}
const questions = argsParsed.data.questions;
if (answers.length !== questions.length) {
reply.code(400);
return {
error: 'mismatched_answer_shape',
detail: `expected ${questions.length} answer(s), got ${answers.length}`,
};
}
for (let i = 0; i < questions.length; i++) {
const q = questions[i]!;
const a = answers[i]!;
for (const sel of a.selected_options) {
if (!q.options.includes(sel)) {
reply.code(400);
return {
error: 'mismatched_answer_shape',
detail: `answer ${i + 1} contains option not in question: ${sel}`,
};
}
}
if (q.type === 'single_select' && a.selected_options.length > 1) {
reply.code(400);
return {
error: 'mismatched_answer_shape',
detail: `answer ${i + 1} has multiple selections on single_select`,
};
}
const hasOpt = a.selected_options.length > 0;
const hasText = a.free_text !== null && a.free_text.trim().length > 0;
if (!hasOpt && !hasText) {
reply.code(400);
return { error: 'mismatched_answer_shape', detail: `answer ${i + 1} is empty` };
}
}
// Find the pending tool row. ORDER BY created_at DESC + LIMIT 1 picks
// the most recent row with this tool_call_id; the already-answered
// check below guards against UPDATE-ing a stale answer.
const toolRows = await sql<{
id: string;
tool_results: { tool_call_id: string; output: unknown } | null;
}[]>`
SELECT id, tool_results FROM messages
WHERE chat_id = ${chat.id}
AND role = 'tool'
AND tool_results->>'tool_call_id' = ${tool_call_id}
ORDER BY created_at DESC
LIMIT 1
`;
const toolRow = toolRows[0];
if (!toolRow) {
reply.code(404);
return { error: 'unknown_tool_call_id', detail: 'tool message not found' };
}
if (toolRow.tool_results && toolRow.tool_results.output !== null) {
reply.code(409);
return { error: 'tool_call_already_answered' };
}
const answerSet = { answers };
const newToolResults = {
tool_call_id,
output: answerSet,
truncated: false,
};
const result = await sql.begin(async (tx) => {
await tx`
UPDATE messages
SET tool_results = ${tx.json(newToolResults as never)}
WHERE id = ${toolRow.id}
`;
const [assistantMsg] = await tx<{ id: string }[]>`
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
VALUES (${sessionId}, ${chat.id}, 'assistant', '', 'streaming', clock_timestamp())
RETURNING id
`;
await tx`UPDATE sessions SET updated_at = clock_timestamp() WHERE id = ${sessionId}`;
await tx`UPDATE chats SET updated_at = clock_timestamp() WHERE id = ${chat.id}`;
return {
tool_message_id: toolRow.id,
assistant_message_id: assistantMsg!.id,
};
});
// Publish the deferred tool_result frame. useSessionStream's reducer
// updates the matching tool_run.result so AskUserInputCard flips into
// its read-only "answered" mode without a refetch.
handlers.publishSessionFrame(sessionId, {
type: 'tool_result',
tool_message_id: result.tool_message_id,
tool_call_id,
chat_id: chat.id,
output: answerSet,
truncated: false,
});
handlers.enqueueInference(sessionId, chat.id, result.assistant_message_id, 'default');
reply.code(202);
return result;
},
);
}

View File

@@ -15,9 +15,12 @@ const CACHE_TTL_MS = 60_000;
// explicit `tools:` field inherit the full default set (which now includes
// the skill tools); agents with an explicit `tools:` array must list any
// skill tool they want to use — strict opt-in.
// Batch 9.7: ask_user_input added — same opt-in semantics. Agents with an
// explicit tools list that omits it cannot trigger the interactive picker.
const ALL_TOOL_NAMES = [
'view_file', 'list_dir', 'grep', 'find_files', 'git_status',
'skill_find', 'skill_use', 'skill_resource',
'ask_user_input',
] as const;
const DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES];
const DEFAULT_TEMPERATURE = 0.7;

View File

@@ -665,6 +665,12 @@ async function executeToolPhase(
model: session.model,
});
// Batch 9.7: ask_user_input pauses the loop. The tool row is still inserted
// (the answer endpoint needs a target row to UPDATE), but tool_results is
// pre-stamped with output=null as a "pending" sentinel and no tool_result
// frame goes out — the card renders from the tool_call frame alone. Mixed
// batches still execute the other tools normally.
let pausingForUserInput = false;
await Promise.all(
toolCalls.map(async (tc) => {
const [toolRow] = await ctx.sql<{ id: string }[]>`
@@ -673,6 +679,16 @@ async function executeToolPhase(
RETURNING id
`;
const toolMessageId = toolRow!.id;
if (tc.name === 'ask_user_input') {
pausingForUserInput = true;
const sentinel = { tool_call_id: tc.id, output: null, truncated: false };
await ctx.sql`
UPDATE messages
SET tool_results = ${ctx.sql.json(sentinel as never)}
WHERE id = ${toolMessageId}
`;
return;
}
const tres = await executeToolCall(projectRoot, tc);
const stored = {
tool_call_id: tc.id,
@@ -697,6 +713,23 @@ async function executeToolPhase(
})
);
if (pausingForUserInput) {
// Drop the dot back to idle — the card is the actionable surface now.
// The next inference turn fires from POST /api/chats/:id/answer_user_input
// once the user submits their answers.
ctx.publishUser({
type: 'chat_status',
chat_id: chatId,
status: 'idle',
at: new Date().toISOString(),
});
ctx.log.info(
{ sessionId, chatId, assistantMessageId },
'inference paused awaiting user input',
);
return;
}
const [nextAssistant] = await ctx.sql<{ id: string }[]>`
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp())

View File

@@ -405,6 +405,81 @@ export const skillResource: ToolDef<SkillResourceInputT> = {
},
};
// Batch 9.7: ask_user_input. Interactive elicitation. The model emits a tool
// call with 1-3 structured questions; the inference loop PAUSES (does not
// execute the tool server-side, does not recurse) and waits for the frontend
// to POST /api/chats/:id/answer_user_input with the user's selections. See
// routes/messages.ts for the resume path and services/inference.ts for the
// pause branch in executeToolPhase.
const AskUserInputInput = z.object({
questions: z
.array(
z.object({
question: z.string().min(1).max(200),
type: z.enum(['single_select', 'multi_select']),
options: z.array(z.string().min(1).max(80)).min(2).max(6),
}),
)
.min(1)
.max(3),
});
type AskUserInputInputT = z.infer<typeof AskUserInputInput>;
export const askUserInput: ToolDef<AskUserInputInputT> = {
name: 'ask_user_input',
description:
"Ask the user 1-3 structured questions through an inline picker UI. Use when you genuinely need a choice the user must make (e.g. scope, options, preferences) before continuing. Each question has 2-6 options and accepts free-text answers in addition. The tool call pauses the conversation until the user submits — the next assistant turn sees their answers as the tool result. Do not use for trivial yes/no clarifications you could infer; prefer it over multi-paragraph speculation about what the user might want.",
inputSchema: AskUserInputInput,
jsonSchema: {
type: 'function',
function: {
name: 'ask_user_input',
description:
'Ask the user 1-3 structured questions through an inline picker. Pauses the conversation until the user answers; the next turn sees their selections.',
parameters: {
type: 'object',
properties: {
questions: {
type: 'array',
minItems: 1,
maxItems: 3,
items: {
type: 'object',
properties: {
question: { type: 'string', description: '<=200 chars, shown to the user' },
type: {
type: 'string',
enum: ['single_select', 'multi_select'],
description: 'single_select = at most one option; multi_select = any subset',
},
options: {
type: 'array',
minItems: 2,
maxItems: 6,
items: { type: 'string' },
description: '2-6 strings, each <=80 chars; free-text input is always available alongside',
},
},
required: ['question', 'type', 'options'],
additionalProperties: false,
},
},
},
required: ['questions'],
additionalProperties: false,
},
},
},
// Server-side no-op. The "execution" of ask_user_input is the user's
// response, captured client-side and posted to /api/chats/:id/answer_user_input.
// The inference loop detects this tool by name and pauses before reaching
// executeToolCall — this fallback only runs if something bypasses that
// branch, in which case the pending sentinel matches the pause-path shape.
async execute(input) {
return { _pending: true, questions: input.questions };
},
};
export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
viewFile as ToolDef<unknown>,
listDir as ToolDef<unknown>,
@@ -414,6 +489,7 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
skillFind as ToolDef<unknown>,
skillUse as ToolDef<unknown>,
skillResource as ToolDef<unknown>,
askUserInput as ToolDef<unknown>,
];
// v1.8.2: forward-compatible read-only whitelist. An agent whose `tools` is
@@ -422,6 +498,8 @@ export const ALL_TOOLS: ReadonlyArray<ToolDef<unknown>> = [
// default (10). Every tool in v1.8.2 happens to be read-only, so the
// non-RO branch only takes effect once BooCoder lands write tools.
// Batch 9.6: skill_* added; all still read-only.
// Batch 9.7: ask_user_input added — it pauses execution but doesn't mutate
// project state, so it belongs in the read-only set for budget purposes.
export const READ_ONLY_TOOL_NAMES = [
'view_file',
'list_dir',
@@ -431,6 +509,7 @@ export const READ_ONLY_TOOL_NAMES = [
'skill_find',
'skill_use',
'skill_resource',
'ask_user_input',
] as const;
export const TOOLS_BY_NAME: Record<string, ToolDef<unknown>> = Object.fromEntries(