diff --git a/apps/server/src/schema.sql b/apps/server/src/schema.sql index f8274d2..68ee801 100644 --- a/apps/server/src/schema.sql +++ b/apps/server/src/schema.sql @@ -120,6 +120,19 @@ BEGIN END IF; END $$; +-- v1.12.1: drop stale inline CHECK constraints that were superseded by the +-- named *_chk variants above. messages_status_check missed 'cancelled' and +-- messages_role_check missed 'system' — both narrower than what's in use. +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'messages_status_check') THEN + ALTER TABLE messages DROP CONSTRAINT messages_status_check; + END IF; + IF EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'messages_role_check') THEN + ALTER TABLE messages DROP CONSTRAINT messages_role_check; + END IF; +END $$; + -- v1.2-project-ux: projects.status + projects.gitea_remote -- KEEP IN SYNC: apps/server/src/types/api.ts PROJECT_STATUSES ALTER TABLE projects ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'open'; diff --git a/apps/server/src/services/inference.ts b/apps/server/src/services/inference.ts index 9e57595..27dd24b 100644 --- a/apps/server/src/services/inference.ts +++ b/apps/server/src/services/inference.ts @@ -62,7 +62,6 @@ const CAP_HIT_SUMMARY_NOTE = (limit: number) => // session/processor.ts). Threshold of 3 is the smallest value that doesn't // false-positive on a model that retries once after a transient error. export const DOOM_LOOP_THRESHOLD = 3; -export const DOOM_LOOP_SAME_NAME_THRESHOLD = 5; const DOOM_LOOP_NOTE = (name: string) => `You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`; @@ -86,18 +85,6 @@ export function detectDoomLoop( return { name: ref.name, args: ref.args }; } -export function detectSameNameLoop( - recentToolCalls: ToolCall[], -): { name: string } | null { - if (recentToolCalls.length < DOOM_LOOP_SAME_NAME_THRESHOLD) return null; - const last = recentToolCalls.slice(-DOOM_LOOP_SAME_NAME_THRESHOLD); - const name = last[0]!.name; - for (let i = 1; i < last.length; i++) { - if (last[i]!.name !== name) return null; - } - return { name }; -} - function isCapHitSentinel(m: Message): boolean { return ( m.role === 'system' && @@ -814,6 +801,17 @@ async function handleAbortOrError( // genuine errors flip the dot red. v1.8.2: error path also carries a // machine-readable `reason` so the UI can render specifics inline. if (isAbort) { + // v1.12.1: defensive cancellation write. The status=${finalStatus} UPDATE + // above already sets 'cancelled' for the AbortError case, but a row can + // leak as 'streaming' when the abort fires between the post-tool-phase + // INSERT (executeToolPhase) and the next runAssistantTurn's stream setup, + // bypassing the try/catch around executeStreamPhase. The status guard + // makes this a no-op when the earlier write already landed. + await ctx.sql` + UPDATE messages + SET status = 'cancelled', content = ${accumulated}, finished_at = clock_timestamp() + WHERE id = ${args.assistantMessageId} AND status = 'streaming' + `; ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() }); ctx.publish(sessionId, { type: 'message_complete', @@ -907,6 +905,7 @@ async function executeToolPhase( // pre-stamped with output=null as a "pending" sentinel and no tool_result // frame goes out — the card renders from the tool_call frame alone. Mixed // batches still execute the other tools normally. + ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'tool_running', at: new Date().toISOString() }); let pausingForUserInput = false; await Promise.all( toolCalls.map(async (tc) => { @@ -951,13 +950,10 @@ async function executeToolPhase( ); if (pausingForUserInput) { - // Drop the dot back to idle — the card is the actionable surface now. - // The next inference turn fires from POST /api/chats/:id/answer_user_input - // once the user submits their answers. ctx.publishUser({ type: 'chat_status', chat_id: chatId, - status: 'idle', + status: 'waiting_for_input', at: new Date().toISOString(), }); ctx.log.info( @@ -972,7 +968,6 @@ async function executeToolPhase( VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp()) RETURNING id `; - ctx.log.info({ chatId, nextToolsUsed: toolsUsed + result.toolCalls.length, phase: 'executeToolPhase:before_recurse' }, 'recursing into next turn'); await runAssistantTurn(ctx, { sessionId, chatId, @@ -1057,7 +1052,6 @@ async function runAssistantTurn( args: TurnArgs, ): Promise { const { sessionId, chatId } = args; - ctx.log.info({ chatId, sessionId, toolsUsed: args.toolsUsed, recentToolCallsLen: args.recentToolCalls?.length ?? 0, phase: 'runAssistantTurn:enter' }, 'turn enter'); // v1.11: if the prior turn flagged this chat for compaction, run it first // so loadContext below reads the post-compaction history. We swallow @@ -1088,7 +1082,6 @@ async function runAssistantTurn( return; } const { session, project, history } = loaded; - ctx.log.info({ chatId, historyLen: history.length, phase: 'runAssistantTurn:loaded' }, 'context loaded'); const projectRoot = await resolveProjectRoot(project.path); // Agent resolution is per-turn so PATCH agent_id mid-conversation takes // effect on the next message. Unknown agent_id returns null silently — @@ -1106,7 +1099,6 @@ async function runAssistantTurn( await runCapHitSummary(ctx, args, session, project, history, agent, budget); return; } - ctx.log.info({ chatId, budget, toolsUsed: args.toolsUsed, phase: 'runAssistantTurn:budget_ok' }, 'budget ok'); // v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can // burn through 3 identical calls long before the 15-call budget fires). @@ -1118,17 +1110,6 @@ async function runAssistantTurn( await runDoomLoopSummary(ctx, args, session, project, history, agent, loop); return; } - ctx.log.info({ chatId, phase: 'runAssistantTurn:no_doom_loop' }, 'no doom loop'); - - const sameNameLoop = detectSameNameLoop(args.recentToolCalls); - if (sameNameLoop) { - await runDoomLoopSummary(ctx, args, session, project, history, agent, { - name: sameNameLoop.name, - args: {}, - }); - return; - } - ctx.log.info({ chatId, phase: 'runAssistantTurn:no_same_name_loop' }, 'no same-name loop'); const messages = await buildMessagesPayload(session, project, history, agent); @@ -1142,24 +1123,17 @@ async function runAssistantTurn( const webToolsEnabled = session.web_search_enabled ?? project.default_web_search_enabled ?? false; - ctx.log.info({ chatId, msgCount: messages.length, phase: 'runAssistantTurn:payload_built' }, 'payload built'); - const state: StreamPhaseState = { accumulated: '', startedAt: null }; let result: StreamResult; try { - ctx.log.info({ chatId, model: session.model, phase: 'runAssistantTurn:before_stream' }, 'calling upstream'); result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled); } catch (err) { await handleAbortOrError(ctx, args, state.accumulated, err); return; } - ctx.log.info({ chatId, toolCallsLen: result.toolCalls.length, finishReason: result.finishReason, contentLen: result.content?.length ?? 0, phase: 'runAssistantTurn:after_stream' }, 'upstream returned'); - if (result.toolCalls.length > 0) { - ctx.log.info({ chatId, toolNames: result.toolCalls.map(tc => tc.name), phase: 'runAssistantTurn:before_tools' }, 'executing tools'); await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot); - ctx.log.info({ chatId, phase: 'runAssistantTurn:after_tools' }, 'tools complete, returning'); return; } @@ -1465,7 +1439,6 @@ async function runDoomLoopSummary( loop: { name: string; args: Record }, ): Promise { const { sessionId, chatId, assistantMessageId, signal } = args; - ctx.log.info({ chatId, loopName: loop.name, phase: 'runDoomLoopSummary:enter' }, 'doom-loop summary firing'); const messages = await buildMessagesPayload(session, project, history, agent); messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) }); @@ -1713,7 +1686,7 @@ export function createInferenceRunner( }; // v1.8 mobile-tabs: announce working before the async loop starts so // every device subscribed to the user channel sees the amber dot. - callCtx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'working', at: new Date().toISOString() }); + callCtx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'streaming', at: new Date().toISOString() }); const controller = new AbortController(); let resolveCompleted!: () => void; const completed = new Promise((res) => { resolveCompleted = res; });