Compare commits
9 Commits
fce8c06932
...
v1.12.4-in
| Author | SHA1 | Date | |
|---|---|---|---|
| 9ef00c0268 | |||
| c87df6981a | |||
| 8fa7b7fce9 | |||
| ea468ca7fb | |||
| eef4782383 | |||
| a7104691aa | |||
| 1a0a3b1673 | |||
| 48ee63a286 | |||
| d58d553503 |
@@ -16,7 +16,7 @@ import { registerWebSocket } from './routes/ws.js';
|
|||||||
import { registerModelRoutes } from './routes/models.js';
|
import { registerModelRoutes } from './routes/models.js';
|
||||||
import { registerAgentRoutes } from './routes/agents.js';
|
import { registerAgentRoutes } from './routes/agents.js';
|
||||||
import { registerSkillsRoutes } from './routes/skills.js';
|
import { registerSkillsRoutes } from './routes/skills.js';
|
||||||
import { createInferenceRunner } from './services/inference.js';
|
import { createInferenceRunner } from './services/inference/index.js';
|
||||||
import { createBroker } from './services/broker.js';
|
import { createBroker } from './services/broker.js';
|
||||||
import { listSkills } from './services/skills.js';
|
import { listSkills } from './services/skills.js';
|
||||||
import * as compaction from './services/compaction.js';
|
import * as compaction from './services/compaction.js';
|
||||||
@@ -49,6 +49,18 @@ async function main() {
|
|||||||
await applySchema(sql);
|
await applySchema(sql);
|
||||||
app.log.info('database schema applied');
|
app.log.info('database schema applied');
|
||||||
|
|
||||||
|
const swept = await sql<{ count: string }[]>`
|
||||||
|
WITH swept AS (
|
||||||
|
UPDATE messages SET status = 'failed'
|
||||||
|
WHERE status = 'streaming' AND created_at < NOW() - INTERVAL '5 minutes'
|
||||||
|
RETURNING id
|
||||||
|
) SELECT count(*)::text AS count FROM swept
|
||||||
|
`;
|
||||||
|
const sweptCount = Number(swept[0]?.count ?? 0);
|
||||||
|
if (sweptCount > 0) {
|
||||||
|
app.log.info({ sweptCount }, 'swept stale streaming messages to failed');
|
||||||
|
}
|
||||||
|
|
||||||
// v1.11.3: tell the model-context cache where llama-swap lives. Cache
|
// v1.11.3: tell the model-context cache where llama-swap lives. Cache
|
||||||
// lookups go to ${LLAMA_SWAP_URL}/upstream/<model>/props to read
|
// lookups go to ${LLAMA_SWAP_URL}/upstream/<model>/props to read
|
||||||
// default_generation_settings.n_ctx — the value persisted as messages.ctx_max.
|
// default_generation_settings.n_ctx — the value persisted as messages.ctx_max.
|
||||||
|
|||||||
@@ -18,6 +18,12 @@ const ForkBody = z.object({
|
|||||||
name: z.string().min(1).max(200).optional(),
|
name: z.string().min(1).max(200).optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const DiscardStaleBody = z.object({
|
||||||
|
message_id: z.string().uuid(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const STALE_MIN_AGE_SECONDS = 60;
|
||||||
|
|
||||||
export function registerChatRoutes(
|
export function registerChatRoutes(
|
||||||
app: FastifyInstance,
|
app: FastifyInstance,
|
||||||
sql: Sql,
|
sql: Sql,
|
||||||
@@ -320,6 +326,73 @@ export function registerChatRoutes(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// v1.12.3: explicit recovery from a stuck-streaming assistant row. The
|
||||||
|
// frontend gates this behind a 60s no-token-activity timer; the server
|
||||||
|
// re-checks the age and current status for safety. Non-streaming rows
|
||||||
|
// return 409 (frontend race; idempotent retry is fine).
|
||||||
|
app.post<{ Params: { id: string } }>(
|
||||||
|
'/api/chats/:id/discard_stale',
|
||||||
|
async (req, reply) => {
|
||||||
|
const parsed = DiscardStaleBody.safeParse(req.body ?? {});
|
||||||
|
if (!parsed.success) {
|
||||||
|
reply.code(400);
|
||||||
|
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||||
|
}
|
||||||
|
const rows = await sql<{
|
||||||
|
id: string;
|
||||||
|
session_id: string;
|
||||||
|
chat_id: string;
|
||||||
|
status: string;
|
||||||
|
age_seconds: number;
|
||||||
|
}[]>`
|
||||||
|
SELECT id, session_id, chat_id, status,
|
||||||
|
EXTRACT(EPOCH FROM (clock_timestamp() - created_at))::int AS age_seconds
|
||||||
|
FROM messages
|
||||||
|
WHERE id = ${parsed.data.message_id} AND chat_id = ${req.params.id}
|
||||||
|
`;
|
||||||
|
if (rows.length === 0) {
|
||||||
|
reply.code(404);
|
||||||
|
return { error: 'message not found in chat' };
|
||||||
|
}
|
||||||
|
const msg = rows[0]!;
|
||||||
|
if (msg.status !== 'streaming') {
|
||||||
|
reply.code(409);
|
||||||
|
return { error: 'message is no longer streaming', current_status: msg.status };
|
||||||
|
}
|
||||||
|
if (msg.age_seconds < STALE_MIN_AGE_SECONDS) {
|
||||||
|
reply.code(409);
|
||||||
|
return { error: 'message is not stale yet', age_seconds: msg.age_seconds };
|
||||||
|
}
|
||||||
|
const updated = await sql<Message[]>`
|
||||||
|
UPDATE messages
|
||||||
|
SET status = 'failed',
|
||||||
|
content = COALESCE(content, ''),
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${msg.id} AND status = 'streaming'
|
||||||
|
RETURNING id, session_id, chat_id, role, content, kind, tool_calls, tool_results,
|
||||||
|
status, last_seq, tokens_used, ctx_used, ctx_max, started_at, finished_at,
|
||||||
|
created_at, metadata, summary, tail_start_id, compacted_at
|
||||||
|
`;
|
||||||
|
if (updated.length === 0) {
|
||||||
|
// Race: the row flipped out of 'streaming' between our SELECT and UPDATE.
|
||||||
|
reply.code(409);
|
||||||
|
return { error: 'message status changed mid-request' };
|
||||||
|
}
|
||||||
|
broker.publishUser('default', {
|
||||||
|
type: 'chat_status',
|
||||||
|
chat_id: msg.chat_id,
|
||||||
|
status: 'idle',
|
||||||
|
at: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
broker.publish(msg.session_id, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: msg.id,
|
||||||
|
chat_id: msg.chat_id,
|
||||||
|
});
|
||||||
|
return updated[0];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
app.get<{ Params: { id: string } }>(
|
app.get<{ Params: { id: string } }>(
|
||||||
'/api/chats/:id/messages',
|
'/api/chats/:id/messages',
|
||||||
async (req, reply) => {
|
async (req, reply) => {
|
||||||
|
|||||||
@@ -13,6 +13,18 @@ const CreateBody = z.object({
|
|||||||
agent_id: z.string().min(1).max(200).nullable().optional(),
|
agent_id: z.string().min(1).max(200).nullable().optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const WorkspacePaneZ = z.object({
|
||||||
|
id: z.string().min(1).max(200),
|
||||||
|
kind: z.enum(['chat', 'terminal', 'agent', 'empty', 'settings']),
|
||||||
|
chatId: z.string().min(1).max(200).optional(),
|
||||||
|
chatIds: z.array(z.string().min(1).max(200)).max(50),
|
||||||
|
activeChatIdx: z.number().int(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const WorkspacePanesBody = z.object({
|
||||||
|
workspace_panes: z.array(WorkspacePaneZ).max(10),
|
||||||
|
});
|
||||||
|
|
||||||
const PatchBody = z.object({
|
const PatchBody = z.object({
|
||||||
name: z.string().min(1).max(200).optional(),
|
name: z.string().min(1).max(200).optional(),
|
||||||
model: z.string().min(1).max(200).optional(),
|
model: z.string().min(1).max(200).optional(),
|
||||||
@@ -44,7 +56,7 @@ export function registerSessionRoutes(
|
|||||||
}
|
}
|
||||||
const status = req.query.status === 'archived' ? 'archived' : 'open';
|
const status = req.query.status === 'archived' ? 'archived' : 'open';
|
||||||
const rows = await sql<Session[]>`
|
const rows = await sql<Session[]>`
|
||||||
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled
|
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled, workspace_panes
|
||||||
FROM sessions
|
FROM sessions
|
||||||
WHERE project_id = ${req.params.id} AND status = ${status}
|
WHERE project_id = ${req.params.id} AND status = ${status}
|
||||||
ORDER BY updated_at DESC
|
ORDER BY updated_at DESC
|
||||||
@@ -92,7 +104,7 @@ export function registerSessionRoutes(
|
|||||||
const [session] = await tx<Session[]>`
|
const [session] = await tx<Session[]>`
|
||||||
INSERT INTO sessions (project_id, name, model, system_prompt, agent_id)
|
INSERT INTO sessions (project_id, name, model, system_prompt, agent_id)
|
||||||
VALUES (${req.params.id}, ${name}, ${model}, ${systemPrompt}, ${agentId})
|
VALUES (${req.params.id}, ${name}, ${model}, ${systemPrompt}, ${agentId})
|
||||||
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled
|
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled, workspace_panes
|
||||||
`;
|
`;
|
||||||
await tx`
|
await tx`
|
||||||
INSERT INTO chats (session_id, name, status)
|
INSERT INTO chats (session_id, name, status)
|
||||||
@@ -112,7 +124,7 @@ export function registerSessionRoutes(
|
|||||||
|
|
||||||
app.get<{ Params: { id: string } }>('/api/sessions/:id', async (req, reply) => {
|
app.get<{ Params: { id: string } }>('/api/sessions/:id', async (req, reply) => {
|
||||||
const rows = await sql<Session[]>`
|
const rows = await sql<Session[]>`
|
||||||
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled
|
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled, workspace_panes
|
||||||
FROM sessions WHERE id = ${req.params.id}
|
FROM sessions WHERE id = ${req.params.id}
|
||||||
`;
|
`;
|
||||||
if (rows.length === 0) {
|
if (rows.length === 0) {
|
||||||
@@ -158,7 +170,7 @@ export function registerSessionRoutes(
|
|||||||
updated_at = clock_timestamp()
|
updated_at = clock_timestamp()
|
||||||
WHERE id = ${req.params.id}
|
WHERE id = ${req.params.id}
|
||||||
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at,
|
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at,
|
||||||
agent_id, web_search_enabled
|
agent_id, web_search_enabled, workspace_panes
|
||||||
`;
|
`;
|
||||||
if (rows.length === 0) {
|
if (rows.length === 0) {
|
||||||
reply.code(404);
|
reply.code(404);
|
||||||
@@ -187,6 +199,36 @@ export function registerSessionRoutes(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
app.patch<{ Params: { id: string } }>(
|
||||||
|
'/api/sessions/:id/workspace',
|
||||||
|
async (req, reply) => {
|
||||||
|
const parsed = WorkspacePanesBody.safeParse(req.body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
reply.code(400);
|
||||||
|
return { error: 'invalid body', details: parsed.error.flatten() };
|
||||||
|
}
|
||||||
|
const rows = await sql<Session[]>`
|
||||||
|
UPDATE sessions
|
||||||
|
SET workspace_panes = ${sql.json(parsed.data.workspace_panes as never)},
|
||||||
|
updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${req.params.id}
|
||||||
|
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at,
|
||||||
|
agent_id, web_search_enabled, workspace_panes
|
||||||
|
`;
|
||||||
|
if (rows.length === 0) {
|
||||||
|
reply.code(404);
|
||||||
|
return { error: 'session not found' };
|
||||||
|
}
|
||||||
|
const session = rows[0]!;
|
||||||
|
broker.publishUser('default', {
|
||||||
|
type: 'session_workspace_updated',
|
||||||
|
session_id: session.id,
|
||||||
|
workspace_panes: session.workspace_panes,
|
||||||
|
});
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// v1.9: bulk-archive every open session in a project. Mirrors the
|
// v1.9: bulk-archive every open session in a project. Mirrors the
|
||||||
// single-archive shape (same broker frame type) so the existing useSidebar
|
// single-archive shape (same broker frame type) so the existing useSidebar
|
||||||
// reducer cases handle it without changes — just N frames instead of 1.
|
// reducer cases handle it without changes — just N frames instead of 1.
|
||||||
@@ -263,7 +305,7 @@ export function registerSessionRoutes(
|
|||||||
const rows = await sql<Session[]>`
|
const rows = await sql<Session[]>`
|
||||||
UPDATE sessions SET status = 'open', updated_at = clock_timestamp()
|
UPDATE sessions SET status = 'open', updated_at = clock_timestamp()
|
||||||
WHERE id = ${req.params.id} AND status = 'archived'
|
WHERE id = ${req.params.id} AND status = 'archived'
|
||||||
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled
|
RETURNING id, project_id, name, model, system_prompt, status, created_at, updated_at, agent_id, web_search_enabled, workspace_panes
|
||||||
`;
|
`;
|
||||||
if (rows.length === 0) {
|
if (rows.length === 0) {
|
||||||
reply.code(404);
|
reply.code(404);
|
||||||
|
|||||||
@@ -47,22 +47,14 @@ CREATE TABLE IF NOT EXISTS settings (
|
|||||||
|
|
||||||
INSERT INTO settings (key, value) VALUES ('default_model', '"qwen3.6-35b-a3b-mxfp4"') ON CONFLICT (key) DO NOTHING;
|
INSERT INTO settings (key, value) VALUES ('default_model', '"qwen3.6-35b-a3b-mxfp4"') ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
-- DEPRECATED: client-side pane state as of v1.2-batch4. Table retained per
|
-- v1.12.1: deprecated session_panes table removed. Workspace pane state now
|
||||||
-- additive schema rule; no writes. Drop in a future destructive migration.
|
-- lives in sessions.workspace_panes (jsonb), see below.
|
||||||
CREATE TABLE IF NOT EXISTS session_panes (
|
DROP TABLE IF EXISTS session_panes;
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
||||||
session_id UUID NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
||||||
position INTEGER NOT NULL,
|
|
||||||
kind TEXT NOT NULL CHECK (kind IN ('chat', 'file_browser', 'terminal')),
|
|
||||||
state JSONB NOT NULL DEFAULT '{}',
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp(),
|
|
||||||
UNIQUE (session_id, position)
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_session_panes_session ON session_panes (session_id);
|
|
||||||
|
|
||||||
-- v1.4: backfill removed. Pane layout is client-side (localStorage) since v1.2-batch4.
|
-- v1.12.1: server-side workspace pane layout, replaces localStorage so every
|
||||||
-- The CREATE TABLE above is retained for additive-schema discipline; drop is a
|
-- device sees the same panes for a given session. Shape matches
|
||||||
-- future destructive migration.
|
-- WorkspacePane[] from apps/server/src/types/api.ts.
|
||||||
|
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS workspace_panes JSONB NOT NULL DEFAULT '[]'::jsonb;
|
||||||
|
|
||||||
-- v1.2: sessions.status (open | archived)
|
-- v1.2: sessions.status (open | archived)
|
||||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'open';
|
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'open';
|
||||||
@@ -128,6 +120,19 @@ BEGIN
|
|||||||
END IF;
|
END IF;
|
||||||
END $$;
|
END $$;
|
||||||
|
|
||||||
|
-- v1.12.1: drop stale inline CHECK constraints that were superseded by the
|
||||||
|
-- named *_chk variants above. messages_status_check missed 'cancelled' and
|
||||||
|
-- messages_role_check missed 'system' — both narrower than what's in use.
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'messages_status_check') THEN
|
||||||
|
ALTER TABLE messages DROP CONSTRAINT messages_status_check;
|
||||||
|
END IF;
|
||||||
|
IF EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'messages_role_check') THEN
|
||||||
|
ALTER TABLE messages DROP CONSTRAINT messages_role_check;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
-- v1.2-project-ux: projects.status + projects.gitea_remote
|
-- v1.2-project-ux: projects.status + projects.gitea_remote
|
||||||
-- KEEP IN SYNC: apps/server/src/types/api.ts PROJECT_STATUSES
|
-- KEEP IN SYNC: apps/server/src/types/api.ts PROJECT_STATUSES
|
||||||
ALTER TABLE projects ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'open';
|
ALTER TABLE projects ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'open';
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect } from 'vitest';
|
import { describe, it, expect } from 'vitest';
|
||||||
import { DOOM_LOOP_THRESHOLD, detectDoomLoop } from '../inference.js';
|
import { DOOM_LOOP_THRESHOLD, detectDoomLoop } from '../inference/index.js';
|
||||||
import type { ToolCall } from '../../types/api.js';
|
import type { ToolCall } from '../../types/api.js';
|
||||||
|
|
||||||
// ---- fixture ----------------------------------------------------------------
|
// ---- fixture ----------------------------------------------------------------
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect } from 'vitest';
|
import { describe, it, expect } from 'vitest';
|
||||||
import { buildMessagesPayload } from '../inference.js';
|
import { buildMessagesPayload } from '../inference/index.js';
|
||||||
import type {
|
import type {
|
||||||
Message,
|
Message,
|
||||||
MessageRole,
|
MessageRole,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import type { InferenceContext } from './inference.js';
|
import type { InferenceContext } from './inference/index.js';
|
||||||
|
|
||||||
const NAMING_SYSTEM_PROMPT =
|
const NAMING_SYSTEM_PROMPT =
|
||||||
'You name chat sessions. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
|
'You name chat sessions. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
20
apps/server/src/services/inference/budget.ts
Normal file
20
apps/server/src/services/inference/budget.ts
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import type { Agent } from '../../types/api.js';
|
||||||
|
import { READ_ONLY_TOOL_NAMES } from '../tools.js';
|
||||||
|
|
||||||
|
// v1.8.2: tool-call budget defaults. Resolved per-turn by resolveToolBudget.
|
||||||
|
// - Agent with explicit max_tool_calls: that value.
|
||||||
|
// - Agent with read-only-only tools: BUDGET_READ_ONLY (30).
|
||||||
|
// - Agent with any non-read-only tool: BUDGET_NON_READ_ONLY (10).
|
||||||
|
// - No agent (raw chat): BUDGET_NO_AGENT (15).
|
||||||
|
export const BUDGET_READ_ONLY = 30;
|
||||||
|
export const BUDGET_NON_READ_ONLY = 10;
|
||||||
|
export const BUDGET_NO_AGENT = 15;
|
||||||
|
|
||||||
|
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||||
|
|
||||||
|
export function resolveToolBudget(agent: Agent | null): number {
|
||||||
|
if (agent?.max_tool_calls != null) return agent.max_tool_calls;
|
||||||
|
if (!agent) return BUDGET_NO_AGENT;
|
||||||
|
const allReadOnly = agent.tools.every((t) => READ_ONLY_SET.has(t));
|
||||||
|
return allReadOnly ? BUDGET_READ_ONLY : BUDGET_NON_READ_ONLY;
|
||||||
|
}
|
||||||
148
apps/server/src/services/inference/error-handler.ts
Normal file
148
apps/server/src/services/inference/error-handler.ts
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
import type { MessageMetadata, Session } from '../../types/api.js';
|
||||||
|
import * as modelContext from '../model-context.js';
|
||||||
|
import { maybeFlagForCompaction } from './payload.js';
|
||||||
|
import type { InferenceContext, StreamResult, TurnArgs } from './turn.js';
|
||||||
|
|
||||||
|
export async function handleAbortOrError(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
accumulated: string,
|
||||||
|
err: unknown
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId, assistantMessageId } = args;
|
||||||
|
const isAbort = err instanceof Error && err.name === 'AbortError';
|
||||||
|
const finalStatus = isAbort ? 'cancelled' : 'failed';
|
||||||
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
// v1.8.2: persist a structured error metadata blob on genuine failures so
|
||||||
|
// the bubble can render the reason on reload without re-deriving from the
|
||||||
|
// (one-shot) WS error frame. User-initiated abort skips this — there's no
|
||||||
|
// "reason" to surface for a stop the user already explicitly chose.
|
||||||
|
const errorMetadata: MessageMetadata | null = isAbort
|
||||||
|
? null
|
||||||
|
: { kind: 'error', error_reason: 'llm_provider_error', error_text: errMsg };
|
||||||
|
if (errorMetadata) {
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET status = ${finalStatus},
|
||||||
|
content = ${accumulated},
|
||||||
|
finished_at = clock_timestamp(),
|
||||||
|
metadata = ${ctx.sql.json(errorMetadata as never)}
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
} else {
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET status = ${finalStatus},
|
||||||
|
content = ${accumulated},
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
const [failSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||||
|
UPDATE sessions SET updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${sessionId}
|
||||||
|
RETURNING project_id, name, updated_at
|
||||||
|
`;
|
||||||
|
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: failSessRow!.project_id, name: failSessRow!.name, updated_at: failSessRow!.updated_at });
|
||||||
|
// v1.8 mobile-tabs: cancellation is a user-initiated stop, treat as idle;
|
||||||
|
// genuine errors flip the dot red. v1.8.2: error path also carries a
|
||||||
|
// machine-readable `reason` so the UI can render specifics inline.
|
||||||
|
if (isAbort) {
|
||||||
|
// v1.12.1: defensive cancellation write. The status=${finalStatus} UPDATE
|
||||||
|
// above already sets 'cancelled' for the AbortError case, but a row can
|
||||||
|
// leak as 'streaming' when the abort fires between the post-tool-phase
|
||||||
|
// INSERT (executeToolPhase) and the next runAssistantTurn's stream setup,
|
||||||
|
// bypassing the try/catch around executeStreamPhase. The status guard
|
||||||
|
// makes this a no-op when the earlier write already landed.
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET status = 'cancelled', content = ${accumulated}, finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${args.assistantMessageId} AND status = 'streaming'
|
||||||
|
`;
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
});
|
||||||
|
ctx.log.info({ sessionId, chatId, assistantMessageId }, 'inference cancelled');
|
||||||
|
} else {
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'chat_status',
|
||||||
|
chat_id: chatId,
|
||||||
|
status: 'error',
|
||||||
|
at: new Date().toISOString(),
|
||||||
|
reason: 'llm_provider_error',
|
||||||
|
});
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'error',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
error: errMsg,
|
||||||
|
reason: 'llm_provider_error',
|
||||||
|
});
|
||||||
|
ctx.log.error({ err, sessionId, assistantMessageId }, 'inference failed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function finalizeCompletion(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
result: StreamResult,
|
||||||
|
startedAt: string | null,
|
||||||
|
session: Session
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId, assistantMessageId } = args;
|
||||||
|
const { content, finishReason, promptTokens, completionTokens } = result;
|
||||||
|
|
||||||
|
// v1.11.3: see executeToolPhase for the rationale.
|
||||||
|
const mctx = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtx = mctx?.n_ctx ?? null;
|
||||||
|
|
||||||
|
const [updated] = await ctx.sql<
|
||||||
|
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||||
|
>`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${content},
|
||||||
|
status = 'complete',
|
||||||
|
tokens_used = ${completionTokens},
|
||||||
|
ctx_used = ${promptTokens},
|
||||||
|
ctx_max = ${nCtx},
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||||
|
`;
|
||||||
|
// v1.11: flag for compaction on the terminal turn too. Catches the common
|
||||||
|
// case of a turn that hit the limit without invoking tools.
|
||||||
|
await maybeFlagForCompaction(ctx, chatId, updated);
|
||||||
|
const [completeSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||||
|
UPDATE sessions SET updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${sessionId}
|
||||||
|
RETURNING project_id, name, updated_at
|
||||||
|
`;
|
||||||
|
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: completeSessRow!.project_id, name: completeSessRow!.name, updated_at: completeSessRow!.updated_at });
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
started_at: startedAt,
|
||||||
|
finished_at: updated?.finished_at ?? null,
|
||||||
|
model: session.model,
|
||||||
|
});
|
||||||
|
ctx.log.info(
|
||||||
|
{
|
||||||
|
sessionId,
|
||||||
|
chatId,
|
||||||
|
assistantMessageId,
|
||||||
|
finishReason,
|
||||||
|
chars: content.length,
|
||||||
|
tokens_used: updated?.tokens_used,
|
||||||
|
ctx_used: updated?.ctx_used,
|
||||||
|
},
|
||||||
|
'inference complete'
|
||||||
|
);
|
||||||
|
}
|
||||||
20
apps/server/src/services/inference/index.ts
Normal file
20
apps/server/src/services/inference/index.ts
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
// v1.12.4: re-export shim. Outside callers (apps/server/src/index.ts and the
|
||||||
|
// vitest inference tests) import from './services/inference/index.js'. The
|
||||||
|
// directory is now the public surface; turn.ts holds runAssistantTurn /
|
||||||
|
// runInference / createInferenceRunner while the other inference/*.ts files
|
||||||
|
// stay implementation-private.
|
||||||
|
|
||||||
|
export {
|
||||||
|
createInferenceRunner,
|
||||||
|
runAssistantTurn,
|
||||||
|
runInference,
|
||||||
|
} from './turn.js';
|
||||||
|
export type {
|
||||||
|
FramePublisher,
|
||||||
|
InferenceContext,
|
||||||
|
InferenceFrame,
|
||||||
|
StreamResult,
|
||||||
|
TurnArgs,
|
||||||
|
} from './turn.js';
|
||||||
|
export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js';
|
||||||
|
export { buildMessagesPayload } from './payload.js';
|
||||||
155
apps/server/src/services/inference/payload.ts
Normal file
155
apps/server/src/services/inference/payload.ts
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
import type { Sql } from '../../db.js';
|
||||||
|
import type {
|
||||||
|
Agent,
|
||||||
|
Message,
|
||||||
|
Project,
|
||||||
|
Session,
|
||||||
|
} from '../../types/api.js';
|
||||||
|
import * as compaction from '../compaction.js';
|
||||||
|
import { buildSystemPrompt } from '../system-prompt.js';
|
||||||
|
import { isAnySentinel } from './sentinels.js';
|
||||||
|
import type { InferenceContext } from './turn.js';
|
||||||
|
|
||||||
|
export interface OpenAiMessage {
|
||||||
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||||
|
content: string | null;
|
||||||
|
tool_calls?: Array<{
|
||||||
|
id: string;
|
||||||
|
type: 'function';
|
||||||
|
function: { name: string; arguments: string };
|
||||||
|
}>;
|
||||||
|
tool_call_id?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12: buildSystemPrompt lives in services/system-prompt.ts. It awaits the
|
||||||
|
// container-guidance loader, so this function is async too and every call
|
||||||
|
// site in inference.ts awaits the result.
|
||||||
|
export async function buildMessagesPayload(
|
||||||
|
session: Session,
|
||||||
|
project: Project,
|
||||||
|
history: Message[],
|
||||||
|
agent: Agent | null = null
|
||||||
|
): Promise<OpenAiMessage[]> {
|
||||||
|
const out: OpenAiMessage[] = [];
|
||||||
|
const systemPrompt = await buildSystemPrompt(project, session, agent);
|
||||||
|
out.push({ role: 'system', content: systemPrompt });
|
||||||
|
|
||||||
|
// Find the latest compact marker — only send messages from that point onwards
|
||||||
|
let startIdx = 0;
|
||||||
|
for (let i = history.length - 1; i >= 0; i--) {
|
||||||
|
if (history[i]!.kind === 'compact') {
|
||||||
|
startIdx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = startIdx; i < history.length; i++) {
|
||||||
|
const m = history[i]!;
|
||||||
|
if (m.kind === 'compact') {
|
||||||
|
out.push({ role: 'system', content: m.content });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// v1.8.2 / v1.11.6: cap-hit and doom-loop sentinels are UI-only — never
|
||||||
|
// send them to the LLM. The synthetic instruction note lives only inside
|
||||||
|
// the summary call's messages array and is never persisted, so on a
|
||||||
|
// follow-up turn the model resumes with a clean context.
|
||||||
|
if (isAnySentinel(m)) continue;
|
||||||
|
if (m.role === 'assistant' && m.status === 'streaming') continue;
|
||||||
|
if (m.role === 'assistant' && m.status === 'cancelled') continue;
|
||||||
|
if (m.role === 'tool') {
|
||||||
|
const tr = m.tool_results;
|
||||||
|
if (!tr) continue;
|
||||||
|
const outputText = tr.error
|
||||||
|
? `error: ${tr.error}`
|
||||||
|
: typeof tr.output === 'string'
|
||||||
|
? tr.output
|
||||||
|
: JSON.stringify(tr.output);
|
||||||
|
out.push({
|
||||||
|
role: 'tool',
|
||||||
|
content: outputText,
|
||||||
|
tool_call_id: tr.tool_call_id,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (m.role === 'assistant') {
|
||||||
|
const msg: OpenAiMessage = {
|
||||||
|
role: 'assistant',
|
||||||
|
content: m.content && m.content.length > 0 ? m.content : null,
|
||||||
|
};
|
||||||
|
if (m.tool_calls && m.tool_calls.length > 0) {
|
||||||
|
msg.tool_calls = m.tool_calls.map((tc) => ({
|
||||||
|
id: tc.id,
|
||||||
|
type: 'function' as const,
|
||||||
|
function: { name: tc.name, arguments: JSON.stringify(tc.args) },
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
out.push(msg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.push({ role: 'user', content: m.content });
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function loadContext(
|
||||||
|
sql: Sql,
|
||||||
|
sessionId: string,
|
||||||
|
chatId: string
|
||||||
|
): Promise<{ session: Session; project: Project; history: Message[] } | null> {
|
||||||
|
const sessionRows = await sql<Session[]>`
|
||||||
|
SELECT id, project_id, name, model, system_prompt, status, created_at, updated_at,
|
||||||
|
agent_id, web_search_enabled
|
||||||
|
FROM sessions WHERE id = ${sessionId}
|
||||||
|
`;
|
||||||
|
if (sessionRows.length === 0) return null;
|
||||||
|
const session = sessionRows[0]!;
|
||||||
|
|
||||||
|
const projectRows = await sql<Project[]>`
|
||||||
|
SELECT id, name, path, added_at, last_session_id, status, gitea_remote,
|
||||||
|
default_system_prompt, default_web_search_enabled
|
||||||
|
FROM projects WHERE id = ${session.project_id}
|
||||||
|
`;
|
||||||
|
if (projectRows.length === 0) return null;
|
||||||
|
const project = projectRows[0]!;
|
||||||
|
|
||||||
|
// v1.11: filter compacted messages out of the inference assembly. The GET
|
||||||
|
// /api/sessions/:id/messages endpoint still returns everything (so the UI
|
||||||
|
// can show history with the summary card inline); only LLM payloads skip
|
||||||
|
// compacted rows. compacted_at IS NULL keeps the active summary + tail.
|
||||||
|
const history = await sql<Message[]>`
|
||||||
|
SELECT id, session_id, chat_id, role, content, kind, tool_calls, tool_results, status, last_seq,
|
||||||
|
tokens_used, ctx_used, ctx_max, started_at, finished_at, created_at, metadata
|
||||||
|
FROM messages
|
||||||
|
WHERE chat_id = ${chatId} AND compacted_at IS NULL
|
||||||
|
ORDER BY created_at ASC, id ASC
|
||||||
|
`;
|
||||||
|
|
||||||
|
return { session, project, history };
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.11: shared helper used after both finalizeCompletion and executeToolPhase
|
||||||
|
// persist their token counts. Reads tokens off the just-UPDATEd row (which
|
||||||
|
// the caller returns from RETURNING), runs compaction.isOverflow, and flips
|
||||||
|
// chats.needs_compaction. The next runAssistantTurn invocation acts on it.
|
||||||
|
// Silent on missing tokens — llama-swap occasionally omits usage on truncated
|
||||||
|
// streams, and we'd rather miss one overflow than crash the inference path.
|
||||||
|
export async function maybeFlagForCompaction(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
chatId: string,
|
||||||
|
updated: { tokens_used: number | null; ctx_used: number | null; ctx_max: number | null } | undefined,
|
||||||
|
): Promise<void> {
|
||||||
|
if (!updated) return;
|
||||||
|
const promptTokens = updated.ctx_used;
|
||||||
|
const completionTokens = updated.tokens_used;
|
||||||
|
const contextLimit = updated.ctx_max;
|
||||||
|
if (typeof promptTokens !== 'number') return;
|
||||||
|
if (typeof completionTokens !== 'number') return;
|
||||||
|
if (typeof contextLimit !== 'number') return;
|
||||||
|
const overflow = compaction.isOverflow(
|
||||||
|
{ prompt_tokens: promptTokens, completion_tokens: completionTokens },
|
||||||
|
contextLimit,
|
||||||
|
);
|
||||||
|
if (!overflow) return;
|
||||||
|
await ctx.sql`UPDATE chats SET needs_compaction = true WHERE id = ${chatId}`;
|
||||||
|
ctx.log.info({ chatId, promptTokens, completionTokens, contextLimit }, 'inference: flagged for compaction');
|
||||||
|
}
|
||||||
523
apps/server/src/services/inference/sentinel-summaries.ts
Normal file
523
apps/server/src/services/inference/sentinel-summaries.ts
Normal file
@@ -0,0 +1,523 @@
|
|||||||
|
import type {
|
||||||
|
Agent,
|
||||||
|
Message,
|
||||||
|
MessageMetadata,
|
||||||
|
Project,
|
||||||
|
Session,
|
||||||
|
} from '../../types/api.js';
|
||||||
|
import * as modelContext from '../model-context.js';
|
||||||
|
import { buildMessagesPayload } from './payload.js';
|
||||||
|
import { DOOM_LOOP_THRESHOLD } from './sentinels.js';
|
||||||
|
import { streamCompletion } from './stream-phase.js';
|
||||||
|
import { DB_FLUSH_INTERVAL_MS } from './types.js';
|
||||||
|
import type {
|
||||||
|
InferenceContext,
|
||||||
|
StreamResult,
|
||||||
|
TurnArgs,
|
||||||
|
} from './turn.js';
|
||||||
|
|
||||||
|
// Synthetic system note appended to the cap-hit summary call. Verbatim from
|
||||||
|
// the v1.8.2 spec — do not paraphrase: the model is more reliable when the
|
||||||
|
// instruction is short, declarative, and identical across calls.
|
||||||
|
const CAP_HIT_SUMMARY_NOTE = (limit: number) =>
|
||||||
|
`You've reached the tool budget (${limit} calls). Produce the best answer you can with what you have. Do not call more tools.`;
|
||||||
|
|
||||||
|
const DOOM_LOOP_NOTE = (name: string) =>
|
||||||
|
`You called ${name} with the same arguments ${DOOM_LOOP_THRESHOLD} times in a row. Stop calling it. Produce the best answer you can with what you have.`;
|
||||||
|
|
||||||
|
export async function runCapHitSummary(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
session: Session,
|
||||||
|
project: Project,
|
||||||
|
history: Message[],
|
||||||
|
agent: Agent | null,
|
||||||
|
budget: number,
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId, assistantMessageId, signal } = args;
|
||||||
|
|
||||||
|
const messages = await buildMessagesPayload(session, project, history, agent);
|
||||||
|
messages.push({ role: 'system', content: CAP_HIT_SUMMARY_NOTE(budget) });
|
||||||
|
|
||||||
|
const startedRow = await ctx.sql<{ started_at: string }[]>`
|
||||||
|
UPDATE messages
|
||||||
|
SET started_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING started_at
|
||||||
|
`;
|
||||||
|
const startedAt = startedRow[0]?.started_at ?? null;
|
||||||
|
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_started',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
role: 'assistant',
|
||||||
|
});
|
||||||
|
|
||||||
|
let accumulated = '';
|
||||||
|
let pendingFlushTimer: NodeJS.Timeout | null = null;
|
||||||
|
let flushPromise: Promise<unknown> = Promise.resolve();
|
||||||
|
const flushNow = () => {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
const snapshot = accumulated;
|
||||||
|
flushPromise = flushPromise.then(() =>
|
||||||
|
ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
|
||||||
|
);
|
||||||
|
};
|
||||||
|
const scheduleFlush = () => {
|
||||||
|
if (pendingFlushTimer) return;
|
||||||
|
pendingFlushTimer = setTimeout(() => {
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
flushNow();
|
||||||
|
}, DB_FLUSH_INTERVAL_MS);
|
||||||
|
};
|
||||||
|
|
||||||
|
let summaryOk = false;
|
||||||
|
let summarySoftCancelled = false;
|
||||||
|
let summaryError: string | null = null;
|
||||||
|
let result: StreamResult | null = null;
|
||||||
|
try {
|
||||||
|
result = await streamCompletion(
|
||||||
|
ctx,
|
||||||
|
session.model,
|
||||||
|
messages,
|
||||||
|
{ tools: null, temperature: agent?.temperature },
|
||||||
|
(delta) => {
|
||||||
|
accumulated += delta;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'delta',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
content: delta,
|
||||||
|
});
|
||||||
|
scheduleFlush();
|
||||||
|
},
|
||||||
|
undefined,
|
||||||
|
signal,
|
||||||
|
);
|
||||||
|
summaryOk = true;
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.name === 'AbortError') {
|
||||||
|
summarySoftCancelled = true;
|
||||||
|
} else {
|
||||||
|
summaryError = err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
await flushPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finalize the summary message based on the three outcomes. The sentinel
|
||||||
|
// is inserted regardless so the user always has the Continue affordance —
|
||||||
|
// even on a partial / failed summary the chat history shows where the
|
||||||
|
// budget was hit.
|
||||||
|
if (summaryOk && result) {
|
||||||
|
// v1.11.3: see executeToolPhase for the rationale.
|
||||||
|
const mctx = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtx = mctx?.n_ctx ?? null;
|
||||||
|
const [updated] = await ctx.sql<
|
||||||
|
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||||
|
>`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${result.content},
|
||||||
|
status = 'complete',
|
||||||
|
tokens_used = ${result.completionTokens},
|
||||||
|
ctx_used = ${result.promptTokens},
|
||||||
|
ctx_max = ${nCtx},
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
started_at: startedAt,
|
||||||
|
finished_at: updated?.finished_at ?? null,
|
||||||
|
model: session.model,
|
||||||
|
});
|
||||||
|
} else if (summarySoftCancelled) {
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${accumulated},
|
||||||
|
status = 'cancelled',
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const errMeta: MessageMetadata = {
|
||||||
|
kind: 'error',
|
||||||
|
error_reason: 'summary_after_cap_failed',
|
||||||
|
error_text: summaryError ?? 'summary failed',
|
||||||
|
};
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${accumulated},
|
||||||
|
status = 'failed',
|
||||||
|
finished_at = clock_timestamp(),
|
||||||
|
metadata = ${ctx.sql.json(errMeta as never)}
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'error',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
error: summaryError ?? 'summary failed',
|
||||||
|
reason: 'summary_after_cap_failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bump session/chat updated_at exactly once for this turn.
|
||||||
|
const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||||
|
UPDATE sessions SET updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${sessionId}
|
||||||
|
RETURNING project_id, name, updated_at
|
||||||
|
`;
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'session_updated',
|
||||||
|
session_id: sessionId,
|
||||||
|
project_id: sessRow!.project_id,
|
||||||
|
name: sessRow!.name,
|
||||||
|
updated_at: sessRow!.updated_at,
|
||||||
|
});
|
||||||
|
|
||||||
|
await insertCapHitSentinel(ctx, sessionId, chatId, agent, budget);
|
||||||
|
|
||||||
|
// Status frame fires last so the dot color reflects the terminal state.
|
||||||
|
// Success → idle, abort → idle (user-driven stop), error → error+reason.
|
||||||
|
if (summaryOk) {
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||||
|
} else if (summarySoftCancelled) {
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||||
|
} else {
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'chat_status',
|
||||||
|
chat_id: chatId,
|
||||||
|
status: 'error',
|
||||||
|
at: new Date().toISOString(),
|
||||||
|
reason: 'summary_after_cap_failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.log.info(
|
||||||
|
{ sessionId, chatId, assistantMessageId, budget, summaryOk, summaryCancelled: summarySoftCancelled },
|
||||||
|
'inference cap-hit summary finished',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function insertCapHitSentinel(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
sessionId: string,
|
||||||
|
chatId: string,
|
||||||
|
agent: Agent | null,
|
||||||
|
budget: number,
|
||||||
|
): Promise<void> {
|
||||||
|
// Hard ceiling: count prior cap_hit sentinels in this chat. After two
|
||||||
|
// continues (sentinel count of 2), the next sentinel reports can_continue
|
||||||
|
// false and the UI disables the Continue button.
|
||||||
|
const priorRows = await ctx.sql<{ count: number }[]>`
|
||||||
|
SELECT COUNT(*)::int AS count
|
||||||
|
FROM messages
|
||||||
|
WHERE chat_id = ${chatId}
|
||||||
|
AND role = 'system'
|
||||||
|
AND metadata->>'kind' = 'cap_hit'
|
||||||
|
`;
|
||||||
|
const priorCount = priorRows[0]?.count ?? 0;
|
||||||
|
const canContinue = priorCount < 2;
|
||||||
|
const metadata: MessageMetadata = {
|
||||||
|
kind: 'cap_hit',
|
||||||
|
used: budget,
|
||||||
|
limit: budget,
|
||||||
|
agent_name: agent?.name ?? null,
|
||||||
|
can_continue: canContinue,
|
||||||
|
};
|
||||||
|
const content = `Reached tool budget (${budget}/${budget}). Continue to extend.`;
|
||||||
|
|
||||||
|
const [row] = await ctx.sql<{ id: string }[]>`
|
||||||
|
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||||
|
VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)})
|
||||||
|
RETURNING id
|
||||||
|
`;
|
||||||
|
|
||||||
|
// The sentinel content is static, but we still walk the standard frame
|
||||||
|
// sequence (started → delta → complete) so useSessionStream's reducer
|
||||||
|
// appends it via the same path it uses for streaming assistant messages.
|
||||||
|
// The delta carries the full text in one chunk.
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_started',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
role: 'system',
|
||||||
|
});
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'delta',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
metadata,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.11.6: doom-loop wrap-up. Mirrors runCapHitSummary structurally — same
|
||||||
|
// in-flight-slot reuse, same tools-disabled streaming-summary call, same
|
||||||
|
// post-finalize sentinel insert + chat_status drop. Differences:
|
||||||
|
// - synthetic note text comes from DOOM_LOOP_NOTE (names the looping tool)
|
||||||
|
// - sentinel metadata is { kind: 'doom_loop', tool_name, args, threshold }
|
||||||
|
// and has no Continue affordance (manual retry would just re-loop)
|
||||||
|
// - chat_status error path uses reason: 'doom_loop_summary_failed'
|
||||||
|
// Kept as a clone rather than refactored into a shared helper because the
|
||||||
|
// two summary paths still differ in error reason + sentinel shape; a third
|
||||||
|
// sentinel would justify factoring out runWrapUpSummary(opts).
|
||||||
|
export async function runDoomLoopSummary(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
session: Session,
|
||||||
|
project: Project,
|
||||||
|
history: Message[],
|
||||||
|
agent: Agent | null,
|
||||||
|
loop: { name: string; args: Record<string, unknown> },
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId, assistantMessageId, signal } = args;
|
||||||
|
|
||||||
|
const messages = await buildMessagesPayload(session, project, history, agent);
|
||||||
|
messages.push({ role: 'system', content: DOOM_LOOP_NOTE(loop.name) });
|
||||||
|
|
||||||
|
const startedRow = await ctx.sql<{ started_at: string }[]>`
|
||||||
|
UPDATE messages
|
||||||
|
SET started_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING started_at
|
||||||
|
`;
|
||||||
|
const startedAt = startedRow[0]?.started_at ?? null;
|
||||||
|
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_started',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
role: 'assistant',
|
||||||
|
});
|
||||||
|
|
||||||
|
let accumulated = '';
|
||||||
|
let pendingFlushTimer: NodeJS.Timeout | null = null;
|
||||||
|
let flushPromise: Promise<unknown> = Promise.resolve();
|
||||||
|
const flushNow = () => {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
const snapshot = accumulated;
|
||||||
|
flushPromise = flushPromise.then(() =>
|
||||||
|
ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
|
||||||
|
);
|
||||||
|
};
|
||||||
|
const scheduleFlush = () => {
|
||||||
|
if (pendingFlushTimer) return;
|
||||||
|
pendingFlushTimer = setTimeout(() => {
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
flushNow();
|
||||||
|
}, DB_FLUSH_INTERVAL_MS);
|
||||||
|
};
|
||||||
|
|
||||||
|
let summaryOk = false;
|
||||||
|
let summarySoftCancelled = false;
|
||||||
|
let summaryError: string | null = null;
|
||||||
|
let result: StreamResult | null = null;
|
||||||
|
try {
|
||||||
|
result = await streamCompletion(
|
||||||
|
ctx,
|
||||||
|
session.model,
|
||||||
|
messages,
|
||||||
|
{ tools: null, temperature: agent?.temperature },
|
||||||
|
(delta) => {
|
||||||
|
accumulated += delta;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'delta',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
content: delta,
|
||||||
|
});
|
||||||
|
scheduleFlush();
|
||||||
|
},
|
||||||
|
undefined,
|
||||||
|
signal,
|
||||||
|
);
|
||||||
|
summaryOk = true;
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof Error && err.name === 'AbortError') {
|
||||||
|
summarySoftCancelled = true;
|
||||||
|
} else {
|
||||||
|
summaryError = err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
await flushPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (summaryOk && result) {
|
||||||
|
const mctx = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtx = mctx?.n_ctx ?? null;
|
||||||
|
const [updated] = await ctx.sql<
|
||||||
|
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||||
|
>`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${result.content},
|
||||||
|
status = 'complete',
|
||||||
|
tokens_used = ${result.completionTokens},
|
||||||
|
ctx_used = ${result.promptTokens},
|
||||||
|
ctx_max = ${nCtx},
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
started_at: startedAt,
|
||||||
|
finished_at: updated?.finished_at ?? null,
|
||||||
|
model: session.model,
|
||||||
|
});
|
||||||
|
} else if (summarySoftCancelled) {
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${accumulated},
|
||||||
|
status = 'cancelled',
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Doom-loop summary failure reuses the existing summary_after_cap_failed
|
||||||
|
// error reason — the ErrorReason union is shared between sentinel paths
|
||||||
|
// and the UI surfaces a generic "summary failed" line for both. We don't
|
||||||
|
// add a new reason code because the user-visible failure mode is the
|
||||||
|
// same (model gave up mid-summary). Sentinel below still fires.
|
||||||
|
const errMeta: MessageMetadata = {
|
||||||
|
kind: 'error',
|
||||||
|
error_reason: 'summary_after_cap_failed',
|
||||||
|
error_text: summaryError ?? 'doom-loop summary failed',
|
||||||
|
};
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${accumulated},
|
||||||
|
status = 'failed',
|
||||||
|
finished_at = clock_timestamp(),
|
||||||
|
metadata = ${ctx.sql.json(errMeta as never)}
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'error',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
error: summaryError ?? 'doom-loop summary failed',
|
||||||
|
reason: 'summary_after_cap_failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const [sessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||||
|
UPDATE sessions SET updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${sessionId}
|
||||||
|
RETURNING project_id, name, updated_at
|
||||||
|
`;
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'session_updated',
|
||||||
|
session_id: sessionId,
|
||||||
|
project_id: sessRow!.project_id,
|
||||||
|
name: sessRow!.name,
|
||||||
|
updated_at: sessRow!.updated_at,
|
||||||
|
});
|
||||||
|
|
||||||
|
await insertDoomLoopSentinel(ctx, sessionId, chatId, loop);
|
||||||
|
|
||||||
|
if (summaryOk || summarySoftCancelled) {
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'idle', at: new Date().toISOString() });
|
||||||
|
} else {
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'chat_status',
|
||||||
|
chat_id: chatId,
|
||||||
|
status: 'error',
|
||||||
|
at: new Date().toISOString(),
|
||||||
|
reason: 'summary_after_cap_failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.log.info(
|
||||||
|
{ sessionId, chatId, assistantMessageId, loopedTool: loop.name, summaryOk, summaryCancelled: summarySoftCancelled },
|
||||||
|
'inference doom-loop summary finished',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function insertDoomLoopSentinel(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
sessionId: string,
|
||||||
|
chatId: string,
|
||||||
|
loop: { name: string; args: Record<string, unknown> },
|
||||||
|
): Promise<void> {
|
||||||
|
// No hard-ceiling / can-continue logic here — doom-loop is a different
|
||||||
|
// failure mode from cap-hit. Continuing would re-trigger the loop with
|
||||||
|
// the same tools available; the user needs to restate their question
|
||||||
|
// or switch agents instead.
|
||||||
|
const metadata: MessageMetadata = {
|
||||||
|
kind: 'doom_loop',
|
||||||
|
tool_name: loop.name,
|
||||||
|
args: loop.args,
|
||||||
|
threshold: DOOM_LOOP_THRESHOLD,
|
||||||
|
};
|
||||||
|
const content = `Detected ${DOOM_LOOP_THRESHOLD} identical calls to ${loop.name}. Stopping the tool-call loop. Produce the best answer you can with what you have.`;
|
||||||
|
|
||||||
|
const [row] = await ctx.sql<{ id: string }[]>`
|
||||||
|
INSERT INTO messages (session_id, chat_id, role, content, status, created_at, metadata)
|
||||||
|
VALUES (${sessionId}, ${chatId}, 'system', ${content}, 'complete', clock_timestamp(), ${ctx.sql.json(metadata as never)})
|
||||||
|
RETURNING id
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Standard frame sequence — same as cap-hit sentinel — so
|
||||||
|
// useSessionStream's reducer appends the row via the existing path.
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_started',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
role: 'system',
|
||||||
|
});
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'delta',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: row!.id,
|
||||||
|
chat_id: chatId,
|
||||||
|
metadata,
|
||||||
|
});
|
||||||
|
}
|
||||||
53
apps/server/src/services/inference/sentinels.ts
Normal file
53
apps/server/src/services/inference/sentinels.ts
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import type { Message, ToolCall } from '../../types/api.js';
|
||||||
|
|
||||||
|
// v1.11.6: doom-loop guard. When the model calls the same tool with the
|
||||||
|
// same arguments DOOM_LOOP_THRESHOLD times in a row within one user-message
|
||||||
|
// turn, abort the recursion and run the same wrap-up summary path as the
|
||||||
|
// cap-hit case. Ported from opencode (DOOM_LOOP_THRESHOLD in
|
||||||
|
// session/processor.ts). Threshold of 3 is the smallest value that doesn't
|
||||||
|
// false-positive on a model that retries once after a transient error.
|
||||||
|
export const DOOM_LOOP_THRESHOLD = 3;
|
||||||
|
|
||||||
|
// Returns the name + args of the looping tool when the LAST
|
||||||
|
// DOOM_LOOP_THRESHOLD entries in `recentToolCalls` are identical (same name
|
||||||
|
// AND deep-equal args via JSON.stringify). Returns null otherwise.
|
||||||
|
// Pure; exported for unit-test access.
|
||||||
|
export function detectDoomLoop(
|
||||||
|
recentToolCalls: ToolCall[],
|
||||||
|
): { name: string; args: Record<string, unknown> } | null {
|
||||||
|
if (recentToolCalls.length < DOOM_LOOP_THRESHOLD) return null;
|
||||||
|
const last = recentToolCalls.slice(-DOOM_LOOP_THRESHOLD);
|
||||||
|
const ref = last[0]!;
|
||||||
|
const refArgs = JSON.stringify(ref.args);
|
||||||
|
for (let i = 1; i < last.length; i++) {
|
||||||
|
const tc = last[i]!;
|
||||||
|
if (tc.name !== ref.name) return null;
|
||||||
|
if (JSON.stringify(tc.args) !== refArgs) return null;
|
||||||
|
}
|
||||||
|
return { name: ref.name, args: ref.args };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isCapHitSentinel(m: Message): boolean {
|
||||||
|
return (
|
||||||
|
m.role === 'system' &&
|
||||||
|
m.metadata !== null &&
|
||||||
|
typeof m.metadata === 'object' &&
|
||||||
|
(m.metadata as { kind?: unknown }).kind === 'cap_hit'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.11.6: parallel predicate. Same UI-only semantics as cap-hit sentinels —
|
||||||
|
// never sent to the LLM (filtered by buildMessagesPayload through the
|
||||||
|
// isAnySentinel check below).
|
||||||
|
export function isDoomLoopSentinel(m: Message): boolean {
|
||||||
|
return (
|
||||||
|
m.role === 'system' &&
|
||||||
|
m.metadata !== null &&
|
||||||
|
typeof m.metadata === 'object' &&
|
||||||
|
(m.metadata as { kind?: unknown }).kind === 'doom_loop'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isAnySentinel(m: Message): boolean {
|
||||||
|
return isCapHitSentinel(m) || isDoomLoopSentinel(m);
|
||||||
|
}
|
||||||
380
apps/server/src/services/inference/stream-phase.ts
Normal file
380
apps/server/src/services/inference/stream-phase.ts
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
import type {
|
||||||
|
Agent,
|
||||||
|
Session,
|
||||||
|
ToolCall,
|
||||||
|
} from '../../types/api.js';
|
||||||
|
import * as modelContext from '../model-context.js';
|
||||||
|
import { toolJsonSchemas, type ToolJsonSchema } from '../tools.js';
|
||||||
|
import type { OpenAiMessage } from './payload.js';
|
||||||
|
import {
|
||||||
|
XML_TOOL_CLOSE,
|
||||||
|
XML_TOOL_OPEN,
|
||||||
|
parseXmlToolCall,
|
||||||
|
partialXmlOpenerStart,
|
||||||
|
} from './xml-parser.js';
|
||||||
|
import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
|
||||||
|
import type {
|
||||||
|
InferenceContext,
|
||||||
|
StreamResult,
|
||||||
|
TurnArgs,
|
||||||
|
} from './turn.js';
|
||||||
|
|
||||||
|
interface ChatCompletionDelta {
|
||||||
|
role?: string;
|
||||||
|
content?: string | null;
|
||||||
|
tool_calls?: Array<{
|
||||||
|
index: number;
|
||||||
|
id?: string;
|
||||||
|
type?: 'function';
|
||||||
|
function?: { name?: string; arguments?: string };
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ChatCompletionChunk {
|
||||||
|
choices?: Array<{
|
||||||
|
delta: ChatCompletionDelta;
|
||||||
|
finish_reason: string | null;
|
||||||
|
}>;
|
||||||
|
usage?: {
|
||||||
|
prompt_tokens?: number;
|
||||||
|
completion_tokens?: number;
|
||||||
|
total_tokens?: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StreamOptions {
|
||||||
|
// null = omit tools entirely (compact phase); [] = caller stripped all tools
|
||||||
|
// (rare; we still omit from the request body to avoid OpenAI 400).
|
||||||
|
tools: ToolJsonSchema[] | null;
|
||||||
|
temperature?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function* sseLines(stream: ReadableStream<Uint8Array>): AsyncGenerator<string> {
|
||||||
|
const reader = stream.getReader();
|
||||||
|
const decoder = new TextDecoder('utf-8');
|
||||||
|
let buffer = '';
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { value, done } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
let idx;
|
||||||
|
while ((idx = buffer.indexOf('\n')) >= 0) {
|
||||||
|
const line = buffer.slice(0, idx).replace(/\r$/, '');
|
||||||
|
buffer = buffer.slice(idx + 1);
|
||||||
|
if (line.length === 0) continue;
|
||||||
|
yield line;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (buffer.length > 0) yield buffer;
|
||||||
|
} finally {
|
||||||
|
reader.releaseLock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.10.5 Qwen-coder XML fallback. Some local models (notably qwen3-coder via
|
||||||
|
// llama-swap) emit tool calls as inline XML inside delta.content rather than
|
||||||
|
// the structured delta.tool_calls field. The XML shape is:
|
||||||
|
// <tool_call>
|
||||||
|
// <function=NAME>
|
||||||
|
// <parameter=KEY>
|
||||||
|
// VALUE
|
||||||
|
// </parameter>
|
||||||
|
// ...more parameters...
|
||||||
|
// </function>
|
||||||
|
// </tool_call>
|
||||||
|
// Multiple <tool_call> blocks may appear back-to-back; they never nest.
|
||||||
|
// streamCompletion buffers delta.content, extracts complete blocks, parses
|
||||||
|
// them via parseXmlToolCall, and pushes synthetic entries into the existing
|
||||||
|
// toolCallsBuffer alongside any native JSON-format tool calls.
|
||||||
|
export async function streamCompletion(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
model: string,
|
||||||
|
messages: OpenAiMessage[],
|
||||||
|
opts: StreamOptions,
|
||||||
|
onDelta: (content: string) => void,
|
||||||
|
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
||||||
|
signal?: AbortSignal
|
||||||
|
): Promise<StreamResult> {
|
||||||
|
const body: Record<string, unknown> = {
|
||||||
|
model,
|
||||||
|
messages,
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
|
};
|
||||||
|
if (opts.tools && opts.tools.length > 0) {
|
||||||
|
body['tools'] = opts.tools;
|
||||||
|
body['tool_choice'] = 'auto';
|
||||||
|
}
|
||||||
|
if (typeof opts.temperature === 'number') {
|
||||||
|
body['temperature'] = opts.temperature;
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
signal,
|
||||||
|
});
|
||||||
|
if (!res.ok || !res.body) {
|
||||||
|
const text = await res.text().catch(() => '');
|
||||||
|
throw new Error(`llama-swap returned ${res.status}: ${text.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let content = '';
|
||||||
|
// v1.10.5: holds delta.content bytes that may contain a partial XML tool
|
||||||
|
// call. Anything not part of a (possibly forming) <tool_call>…</tool_call>
|
||||||
|
// pair is flushed to content + onDelta as soon as we know it's safe.
|
||||||
|
let pendingBuffer = '';
|
||||||
|
let finishReason: string | null = null;
|
||||||
|
let promptTokens: number | null = null;
|
||||||
|
let completionTokens: number | null = null;
|
||||||
|
const toolCallsBuffer = new Map<number, { id: string; name: string; argsText: string }>();
|
||||||
|
|
||||||
|
for await (const line of sseLines(res.body)) {
|
||||||
|
if (!line.startsWith('data:')) continue;
|
||||||
|
const payload = line.slice(5).trim();
|
||||||
|
if (payload === '[DONE]') break;
|
||||||
|
let parsed: ChatCompletionChunk;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(payload);
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parsed.usage) {
|
||||||
|
if (typeof parsed.usage.prompt_tokens === 'number') {
|
||||||
|
promptTokens = parsed.usage.prompt_tokens;
|
||||||
|
}
|
||||||
|
if (typeof parsed.usage.completion_tokens === 'number') {
|
||||||
|
completionTokens = parsed.usage.completion_tokens;
|
||||||
|
}
|
||||||
|
onUsage?.(promptTokens, completionTokens);
|
||||||
|
}
|
||||||
|
// v1.11.3: removed dead `parsed.timings.n_ctx` read. llama-server's
|
||||||
|
// streaming completion does NOT emit n_ctx in timings (verified
|
||||||
|
// empirically); the authoritative source is llama-swap's
|
||||||
|
// /upstream/<model>/props endpoint, fetched per-turn via
|
||||||
|
// model-context.getModelContext() at the finalization sites below.
|
||||||
|
|
||||||
|
const choice = parsed.choices?.[0];
|
||||||
|
if (!choice) continue;
|
||||||
|
const delta = choice.delta ?? {};
|
||||||
|
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
||||||
|
// v1.10.5 XML fallback. Append, then extract any complete tool_call
|
||||||
|
// blocks before deciding what's safe to flush as visible content.
|
||||||
|
pendingBuffer += delta.content;
|
||||||
|
while (true) {
|
||||||
|
const startIdx = pendingBuffer.indexOf(XML_TOOL_OPEN);
|
||||||
|
if (startIdx === -1) break;
|
||||||
|
const closeIdx = pendingBuffer.indexOf(XML_TOOL_CLOSE, startIdx);
|
||||||
|
if (closeIdx === -1) break;
|
||||||
|
const blockEnd = closeIdx + XML_TOOL_CLOSE.length;
|
||||||
|
const block = pendingBuffer.slice(startIdx, blockEnd);
|
||||||
|
// Any text before the opener is plain content — flush it now.
|
||||||
|
if (startIdx > 0) {
|
||||||
|
const before = pendingBuffer.slice(0, startIdx);
|
||||||
|
content += before;
|
||||||
|
onDelta(before);
|
||||||
|
}
|
||||||
|
const parsedCall = parseXmlToolCall(block);
|
||||||
|
if (parsedCall) {
|
||||||
|
const synthIdx = toolCallsBuffer.size;
|
||||||
|
toolCallsBuffer.set(synthIdx, {
|
||||||
|
id: `xml_call_${synthIdx}`,
|
||||||
|
name: parsedCall.name,
|
||||||
|
argsText: JSON.stringify(parsedCall.args),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// If parsing failed we still drop the block — emitting unparseable
|
||||||
|
// XML to the chat would look worse than silently swallowing it.
|
||||||
|
pendingBuffer = pendingBuffer.slice(blockEnd);
|
||||||
|
}
|
||||||
|
// After all complete blocks are out, hold back any (partial or full)
|
||||||
|
// unclosed opener; flush the rest.
|
||||||
|
const partialIdx = partialXmlOpenerStart(pendingBuffer);
|
||||||
|
if (partialIdx >= 0) {
|
||||||
|
if (partialIdx > 0) {
|
||||||
|
const flush = pendingBuffer.slice(0, partialIdx);
|
||||||
|
content += flush;
|
||||||
|
onDelta(flush);
|
||||||
|
}
|
||||||
|
pendingBuffer = pendingBuffer.slice(partialIdx);
|
||||||
|
} else if (pendingBuffer.length > 0) {
|
||||||
|
content += pendingBuffer;
|
||||||
|
onDelta(pendingBuffer);
|
||||||
|
pendingBuffer = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(delta.tool_calls)) {
|
||||||
|
for (const tc of delta.tool_calls) {
|
||||||
|
const idx = tc.index;
|
||||||
|
const existing = toolCallsBuffer.get(idx) ?? { id: '', name: '', argsText: '' };
|
||||||
|
if (tc.id) existing.id = tc.id;
|
||||||
|
if (tc.function?.name) existing.name = tc.function.name;
|
||||||
|
if (typeof tc.function?.arguments === 'string') existing.argsText += tc.function.arguments;
|
||||||
|
toolCallsBuffer.set(idx, existing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (choice.finish_reason) finishReason = choice.finish_reason;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.10.5: if the stream ended mid-XML (e.g. model truncated, no closer
|
||||||
|
// ever arrived), flush whatever was buffered as plain content so it isn't
|
||||||
|
// silently dropped. Better to show a stray `<tool_call>` than vanish text.
|
||||||
|
if (pendingBuffer.length > 0) {
|
||||||
|
content += pendingBuffer;
|
||||||
|
onDelta(pendingBuffer);
|
||||||
|
pendingBuffer = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
const toolCalls: ToolCall[] = [];
|
||||||
|
for (const [, t] of [...toolCallsBuffer.entries()].sort(([a], [b]) => a - b)) {
|
||||||
|
let args: Record<string, unknown> = {};
|
||||||
|
if (t.argsText.length > 0) {
|
||||||
|
try {
|
||||||
|
args = JSON.parse(t.argsText);
|
||||||
|
} catch {
|
||||||
|
args = { _raw: t.argsText };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
toolCalls.push({ id: t.id || `call_${toolCalls.length}`, name: t.name, args });
|
||||||
|
}
|
||||||
|
|
||||||
|
return { finishReason, content, toolCalls, promptTokens, completionTokens };
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function executeStreamPhase(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
session: Session,
|
||||||
|
messages: OpenAiMessage[],
|
||||||
|
state: StreamPhaseState,
|
||||||
|
agent: Agent | null,
|
||||||
|
// v1.11.8: when false, web_search and web_fetch are stripped from the
|
||||||
|
// tool list sent to the LLM, so the model can't even attempt them.
|
||||||
|
webToolsEnabled: boolean,
|
||||||
|
): Promise<StreamResult> {
|
||||||
|
const { sessionId, chatId, assistantMessageId, signal } = args;
|
||||||
|
|
||||||
|
const startedRow = await ctx.sql<{ started_at: string }[]>`
|
||||||
|
UPDATE messages
|
||||||
|
SET started_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING started_at
|
||||||
|
`;
|
||||||
|
state.startedAt = startedRow[0]?.started_at ?? null;
|
||||||
|
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_started',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
role: 'assistant',
|
||||||
|
});
|
||||||
|
|
||||||
|
let pendingFlushTimer: NodeJS.Timeout | null = null;
|
||||||
|
let flushPromise: Promise<unknown> = Promise.resolve();
|
||||||
|
|
||||||
|
const flushNow = () => {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
const snapshot = state.accumulated;
|
||||||
|
flushPromise = flushPromise.then(() =>
|
||||||
|
ctx.sql`UPDATE messages SET content = ${snapshot} WHERE id = ${assistantMessageId}`
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const scheduleFlush = () => {
|
||||||
|
if (pendingFlushTimer) return;
|
||||||
|
pendingFlushTimer = setTimeout(() => {
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
flushNow();
|
||||||
|
}, DB_FLUSH_INTERVAL_MS);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Tool whitelist: if an agent is set, filter the global tool list to only the
|
||||||
|
// tool names it allows. Unknown names in agent.tools are dropped silently
|
||||||
|
// (handled here by intersection). When no agent: send all tools.
|
||||||
|
// v1.11.8: a second filter strips web_search + web_fetch unless the chat
|
||||||
|
// has them explicitly enabled. Counts as an opt-in security boundary: the
|
||||||
|
// model can't summon a tool that wasn't offered to it.
|
||||||
|
const WEB_TOOL_NAMES: ReadonlySet<string> = new Set(['web_search', 'web_fetch']);
|
||||||
|
const effectiveTools: ToolJsonSchema[] = (agent
|
||||||
|
? toolJsonSchemas().filter((t) => agent.tools.includes(t.function.name))
|
||||||
|
: toolJsonSchemas()
|
||||||
|
).filter((t) => webToolsEnabled || !WEB_TOOL_NAMES.has(t.function.name));
|
||||||
|
const effectiveTemperature = agent?.temperature;
|
||||||
|
|
||||||
|
// v1.12.2: ctx_max lookup is cached after the first hit per model, so this
|
||||||
|
// is a Map probe in steady state. We capture nCtx once at the top of the
|
||||||
|
// stream so the throttled usage publish doesn't refetch each tick.
|
||||||
|
const mctxForStream = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtxForStream = mctxForStream?.n_ctx ?? null;
|
||||||
|
|
||||||
|
// v1.12.2: throttle live usage publishes to ~500ms. The model can land
|
||||||
|
// dozens of usage frames per second; without a throttle the WS turns into
|
||||||
|
// a firehose for a few KB savings on each render.
|
||||||
|
const USAGE_THROTTLE_MS = 500;
|
||||||
|
let lastUsageAt = 0;
|
||||||
|
let pendingUsage: { p: number | null; c: number | null } | null = null;
|
||||||
|
let usageTimer: NodeJS.Timeout | null = null;
|
||||||
|
const flushUsage = () => {
|
||||||
|
if (!pendingUsage) return;
|
||||||
|
const { p, c } = pendingUsage;
|
||||||
|
pendingUsage = null;
|
||||||
|
lastUsageAt = Date.now();
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'usage',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
completion_tokens: c,
|
||||||
|
ctx_used: p,
|
||||||
|
ctx_max: nCtxForStream,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await streamCompletion(
|
||||||
|
ctx,
|
||||||
|
session.model,
|
||||||
|
messages,
|
||||||
|
{ tools: effectiveTools, temperature: effectiveTemperature },
|
||||||
|
(delta) => {
|
||||||
|
state.accumulated += delta;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'delta',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
content: delta,
|
||||||
|
});
|
||||||
|
ctx.log.debug({ sessionId, delta }, 'inference delta');
|
||||||
|
scheduleFlush();
|
||||||
|
},
|
||||||
|
(prompt, completion) => {
|
||||||
|
pendingUsage = { p: prompt, c: completion };
|
||||||
|
const elapsed = Date.now() - lastUsageAt;
|
||||||
|
if (elapsed >= USAGE_THROTTLE_MS) {
|
||||||
|
flushUsage();
|
||||||
|
} else if (!usageTimer) {
|
||||||
|
usageTimer = setTimeout(() => {
|
||||||
|
usageTimer = null;
|
||||||
|
flushUsage();
|
||||||
|
}, USAGE_THROTTLE_MS - elapsed);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
signal
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
if (pendingFlushTimer) {
|
||||||
|
clearTimeout(pendingFlushTimer);
|
||||||
|
pendingFlushTimer = null;
|
||||||
|
}
|
||||||
|
if (usageTimer) {
|
||||||
|
clearTimeout(usageTimer);
|
||||||
|
usageTimer = null;
|
||||||
|
}
|
||||||
|
await flushPromise;
|
||||||
|
}
|
||||||
|
}
|
||||||
213
apps/server/src/services/inference/tool-phase.ts
Normal file
213
apps/server/src/services/inference/tool-phase.ts
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
import type { Session, ToolCall } from '../../types/api.js';
|
||||||
|
import * as modelContext from '../model-context.js';
|
||||||
|
import { PathScopeError } from '../path_guard.js';
|
||||||
|
import { TOOLS_BY_NAME } from '../tools.js';
|
||||||
|
import { maybeFlagForCompaction } from './payload.js';
|
||||||
|
import type {
|
||||||
|
InferenceContext,
|
||||||
|
StreamResult,
|
||||||
|
TurnArgs,
|
||||||
|
} from './turn.js';
|
||||||
|
// v1.12.4: ESM value-import cycle. executeToolPhase recurses into
|
||||||
|
// runAssistantTurn which lives in inference.ts. The cycle is safe because
|
||||||
|
// the reference is read at call time (inside an async function body), not
|
||||||
|
// at module top-level. Node + tsc resolve this cleanly.
|
||||||
|
import { runAssistantTurn } from './turn.js';
|
||||||
|
|
||||||
|
async function executeToolCall(
|
||||||
|
projectRoot: string,
|
||||||
|
toolCall: ToolCall
|
||||||
|
): Promise<{ output: unknown; truncated: boolean; error?: string }> {
|
||||||
|
const tool = TOOLS_BY_NAME[toolCall.name];
|
||||||
|
if (!tool) {
|
||||||
|
return { output: null, truncated: false, error: `unknown tool: ${toolCall.name}` };
|
||||||
|
}
|
||||||
|
const parsed = tool.inputSchema.safeParse(toolCall.args);
|
||||||
|
if (!parsed.success) {
|
||||||
|
// v1.12 Track B.2: enrich the zod-reject path so the model sees a
|
||||||
|
// one-line, tool-named hint ("tool 'search_symbols' rejected — query:
|
||||||
|
// Required") instead of a JSON blob of flatten output. Higher recovery
|
||||||
|
// rate on the next turn; doom-loop guard still bounds infinite retries.
|
||||||
|
// The cast is because tool.inputSchema is ZodType<unknown>, so zod can't
|
||||||
|
// statically narrow flatten()'s fieldErrors key set — but the runtime
|
||||||
|
// shape is the standard { formErrors: string[]; fieldErrors: Record<...> }.
|
||||||
|
const flatten = parsed.error.flatten() as {
|
||||||
|
formErrors: string[];
|
||||||
|
fieldErrors: Record<string, string[] | undefined>;
|
||||||
|
};
|
||||||
|
const fieldErrors = Object.entries(flatten.fieldErrors)
|
||||||
|
.map(([field, errs]) => `${field}: ${errs?.[0] ?? 'invalid'}`)
|
||||||
|
.join('; ');
|
||||||
|
const formError = flatten.formErrors[0];
|
||||||
|
const hint = fieldErrors || formError || 'unknown validation error';
|
||||||
|
return {
|
||||||
|
output: null,
|
||||||
|
truncated: false,
|
||||||
|
error: `tool '${toolCall.name}' rejected — ${hint}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const output = await tool.execute(parsed.data, projectRoot);
|
||||||
|
const truncated =
|
||||||
|
typeof output === 'object' && output !== null && 'truncated' in output
|
||||||
|
? Boolean((output as { truncated: unknown }).truncated)
|
||||||
|
: false;
|
||||||
|
return { output, truncated };
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof PathScopeError) {
|
||||||
|
return { output: null, truncated: false, error: err.message };
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
output: null,
|
||||||
|
truncated: false,
|
||||||
|
error: err instanceof Error ? err.message : String(err),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function executeToolPhase(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
result: StreamResult,
|
||||||
|
startedAt: string | null,
|
||||||
|
session: Session,
|
||||||
|
projectRoot: string
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId, assistantMessageId, toolsUsed, signal } = args;
|
||||||
|
const { content, toolCalls, promptTokens, completionTokens } = result;
|
||||||
|
|
||||||
|
// v1.11.3: ctx_max comes from llama-swap /upstream/<model>/props, not the
|
||||||
|
// streaming completion (which doesn't emit n_ctx). getModelContext caches
|
||||||
|
// the positive lookup for the process lifetime, so this is a single Map
|
||||||
|
// hit after the first invocation per model.
|
||||||
|
const mctx = await modelContext.getModelContext(session.model);
|
||||||
|
const nCtx = mctx?.n_ctx ?? null;
|
||||||
|
|
||||||
|
const [updated] = await ctx.sql<
|
||||||
|
{ tokens_used: number | null; ctx_used: number | null; ctx_max: number | null; finished_at: string | null }[]
|
||||||
|
>`
|
||||||
|
UPDATE messages
|
||||||
|
SET content = ${content},
|
||||||
|
status = 'complete',
|
||||||
|
tool_calls = ${ctx.sql.json(toolCalls as never)},
|
||||||
|
tokens_used = ${completionTokens},
|
||||||
|
ctx_used = ${promptTokens},
|
||||||
|
ctx_max = ${nCtx},
|
||||||
|
finished_at = clock_timestamp()
|
||||||
|
WHERE id = ${assistantMessageId}
|
||||||
|
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||||
|
`;
|
||||||
|
// v1.11: flag for compaction if this turn pushed us over the usable budget.
|
||||||
|
// We never compact mid-loop (the recursive runAssistantTurn keeps tools
|
||||||
|
// flowing); the flag fires on the NEXT turn's pre-fetch hook above.
|
||||||
|
await maybeFlagForCompaction(ctx, chatId, updated);
|
||||||
|
const [toolSessRow] = await ctx.sql<{ project_id: string; name: string; updated_at: string }[]>`
|
||||||
|
UPDATE sessions SET updated_at = clock_timestamp()
|
||||||
|
WHERE id = ${sessionId}
|
||||||
|
RETURNING project_id, name, updated_at
|
||||||
|
`;
|
||||||
|
ctx.publishUser({ type: 'session_updated', session_id: sessionId, project_id: toolSessRow!.project_id, name: toolSessRow!.name, updated_at: toolSessRow!.updated_at });
|
||||||
|
for (const tc of toolCalls) {
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'tool_call',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tool_call: tc,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'message_complete',
|
||||||
|
message_id: assistantMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tokens_used: updated?.tokens_used ?? null,
|
||||||
|
ctx_used: updated?.ctx_used ?? null,
|
||||||
|
ctx_max: updated?.ctx_max ?? null,
|
||||||
|
started_at: startedAt,
|
||||||
|
finished_at: updated?.finished_at ?? null,
|
||||||
|
model: session.model,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Batch 9.7: ask_user_input pauses the loop. The tool row is still inserted
|
||||||
|
// (the answer endpoint needs a target row to UPDATE), but tool_results is
|
||||||
|
// pre-stamped with output=null as a "pending" sentinel and no tool_result
|
||||||
|
// frame goes out — the card renders from the tool_call frame alone. Mixed
|
||||||
|
// batches still execute the other tools normally.
|
||||||
|
ctx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'tool_running', at: new Date().toISOString() });
|
||||||
|
let pausingForUserInput = false;
|
||||||
|
await Promise.all(
|
||||||
|
toolCalls.map(async (tc) => {
|
||||||
|
const [toolRow] = await ctx.sql<{ id: string }[]>`
|
||||||
|
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
|
||||||
|
VALUES (${sessionId}, ${chatId}, 'tool', '', 'complete', clock_timestamp())
|
||||||
|
RETURNING id
|
||||||
|
`;
|
||||||
|
const toolMessageId = toolRow!.id;
|
||||||
|
if (tc.name === 'ask_user_input') {
|
||||||
|
pausingForUserInput = true;
|
||||||
|
const sentinel = { tool_call_id: tc.id, output: null, truncated: false };
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET tool_results = ${ctx.sql.json(sentinel as never)}
|
||||||
|
WHERE id = ${toolMessageId}
|
||||||
|
`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const tres = await executeToolCall(projectRoot, tc);
|
||||||
|
const stored = {
|
||||||
|
tool_call_id: tc.id,
|
||||||
|
output: tres.output,
|
||||||
|
truncated: tres.truncated,
|
||||||
|
...(tres.error ? { error: tres.error } : {}),
|
||||||
|
};
|
||||||
|
await ctx.sql`
|
||||||
|
UPDATE messages
|
||||||
|
SET tool_results = ${ctx.sql.json(stored as never)}
|
||||||
|
WHERE id = ${toolMessageId}
|
||||||
|
`;
|
||||||
|
ctx.publish(sessionId, {
|
||||||
|
type: 'tool_result',
|
||||||
|
tool_message_id: toolMessageId,
|
||||||
|
chat_id: chatId,
|
||||||
|
tool_call_id: tc.id,
|
||||||
|
output: tres.output,
|
||||||
|
truncated: tres.truncated,
|
||||||
|
...(tres.error ? { error: tres.error } : {}),
|
||||||
|
});
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
if (pausingForUserInput) {
|
||||||
|
ctx.publishUser({
|
||||||
|
type: 'chat_status',
|
||||||
|
chat_id: chatId,
|
||||||
|
status: 'waiting_for_input',
|
||||||
|
at: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
ctx.log.info(
|
||||||
|
{ sessionId, chatId, assistantMessageId },
|
||||||
|
'inference paused awaiting user input',
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [nextAssistant] = await ctx.sql<{ id: string }[]>`
|
||||||
|
INSERT INTO messages (session_id, chat_id, role, content, status, created_at)
|
||||||
|
VALUES (${sessionId}, ${chatId}, 'assistant', '', 'streaming', clock_timestamp())
|
||||||
|
RETURNING id
|
||||||
|
`;
|
||||||
|
await runAssistantTurn(ctx, {
|
||||||
|
sessionId,
|
||||||
|
chatId,
|
||||||
|
assistantMessageId: nextAssistant!.id,
|
||||||
|
// v1.8.2: charge this turn's actual tool invocations against the budget.
|
||||||
|
// One assistant message can emit multiple tool_calls, so we add the run
|
||||||
|
// count, not 1. The next turn's budget check sees the cumulative total.
|
||||||
|
toolsUsed: toolsUsed + result.toolCalls.length,
|
||||||
|
// v1.11.6: append the just-executed tool calls to the per-turn history
|
||||||
|
// so the next runAssistantTurn's doom-loop check can see them. We don't
|
||||||
|
// cap the array length here — per-turn budgets keep it bounded
|
||||||
|
// (typically <30 entries), and slicing happens inside detectDoomLoop.
|
||||||
|
recentToolCalls: [...args.recentToolCalls, ...result.toolCalls],
|
||||||
|
signal,
|
||||||
|
});
|
||||||
|
}
|
||||||
326
apps/server/src/services/inference/turn.ts
Normal file
326
apps/server/src/services/inference/turn.ts
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
import type { FastifyBaseLogger } from 'fastify';
|
||||||
|
import type { Sql } from '../../db.js';
|
||||||
|
import type { Config } from '../../config.js';
|
||||||
|
import type {
|
||||||
|
Agent,
|
||||||
|
ErrorReason,
|
||||||
|
Message,
|
||||||
|
MessageMetadata,
|
||||||
|
Project,
|
||||||
|
Session,
|
||||||
|
ToolCall,
|
||||||
|
UserStreamFrame,
|
||||||
|
} from '../../types/api.js';
|
||||||
|
import { ALL_TOOLS } from '../tools.js';
|
||||||
|
import { resolveProjectRoot } from '../path_guard.js';
|
||||||
|
import { maybeAutoNameChat } from '../auto_name.js';
|
||||||
|
import { getAgentById } from '../agents.js';
|
||||||
|
import * as compaction from '../compaction.js';
|
||||||
|
import * as modelContext from '../model-context.js';
|
||||||
|
import type { Broker } from '../broker.js';
|
||||||
|
import { resolveToolBudget } from './budget.js';
|
||||||
|
import {
|
||||||
|
DOOM_LOOP_THRESHOLD,
|
||||||
|
detectDoomLoop,
|
||||||
|
} from './sentinels.js';
|
||||||
|
import {
|
||||||
|
buildMessagesPayload,
|
||||||
|
loadContext,
|
||||||
|
} from './payload.js';
|
||||||
|
import {
|
||||||
|
finalizeCompletion,
|
||||||
|
handleAbortOrError,
|
||||||
|
} from './error-handler.js';
|
||||||
|
import {
|
||||||
|
executeStreamPhase,
|
||||||
|
streamCompletion,
|
||||||
|
} from './stream-phase.js';
|
||||||
|
import { executeToolPhase } from './tool-phase.js';
|
||||||
|
import { DB_FLUSH_INTERVAL_MS, type StreamPhaseState } from './types.js';
|
||||||
|
import {
|
||||||
|
runCapHitSummary,
|
||||||
|
runDoomLoopSummary,
|
||||||
|
} from './sentinel-summaries.js';
|
||||||
|
|
||||||
|
// v1.12.4: re-exported so external callers (tests, future consumers) keep
|
||||||
|
// importing from services/inference.js as the public surface.
|
||||||
|
export { detectDoomLoop, DOOM_LOOP_THRESHOLD } from './sentinels.js';
|
||||||
|
export { buildMessagesPayload } from './payload.js';
|
||||||
|
|
||||||
|
export interface InferenceFrame {
|
||||||
|
type:
|
||||||
|
| 'message_started'
|
||||||
|
| 'delta'
|
||||||
|
| 'tool_call'
|
||||||
|
| 'tool_result'
|
||||||
|
| 'message_complete'
|
||||||
|
| 'usage'
|
||||||
|
| 'messages_deleted'
|
||||||
|
| 'session_renamed'
|
||||||
|
| 'chat_renamed'
|
||||||
|
| 'error';
|
||||||
|
message_id?: string;
|
||||||
|
message_ids?: string[];
|
||||||
|
chat_id?: string;
|
||||||
|
tool_message_id?: string;
|
||||||
|
tool_call_id?: string;
|
||||||
|
// v1.8.2: 'system' added so cap-hit sentinel messages can announce themselves
|
||||||
|
// through the normal message_started → delta → message_complete sequence.
|
||||||
|
role?: 'assistant' | 'tool' | 'user' | 'system';
|
||||||
|
content?: string;
|
||||||
|
tool_call?: ToolCall;
|
||||||
|
output?: unknown;
|
||||||
|
truncated?: boolean;
|
||||||
|
error?: string;
|
||||||
|
// v1.8.2: structured error reason. Set on `type: 'error'` so the UI can
|
||||||
|
// surface a specific message; `error` stays the human-readable text.
|
||||||
|
reason?: ErrorReason;
|
||||||
|
// v1.8.2: piggybacks on `message_complete` so static or terminally-resolved
|
||||||
|
// messages can carry their persisted metadata to the live stream without a
|
||||||
|
// refetch (sentinels carry { kind: 'cap_hit', ... }; failed messages carry
|
||||||
|
// { kind: 'error', ... }).
|
||||||
|
metadata?: MessageMetadata | null;
|
||||||
|
tokens_used?: number | null;
|
||||||
|
ctx_used?: number | null;
|
||||||
|
ctx_max?: number | null;
|
||||||
|
completion_tokens?: number | null;
|
||||||
|
started_at?: string | null;
|
||||||
|
finished_at?: string | null;
|
||||||
|
model?: string;
|
||||||
|
session_id?: string;
|
||||||
|
name?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FramePublisher = (sessionId: string, frame: InferenceFrame) => void;
|
||||||
|
|
||||||
|
export interface InferenceContext {
|
||||||
|
sql: Sql;
|
||||||
|
config: Config;
|
||||||
|
log: FastifyBaseLogger;
|
||||||
|
publish: FramePublisher;
|
||||||
|
publishUser: (frame: UserStreamFrame) => void;
|
||||||
|
// v1.11: passed through so compaction.process can publish 'compacted'
|
||||||
|
// frames on the same session WS channel useSessionStream subscribes to.
|
||||||
|
// Compaction is the only path that needs the raw broker handle (regular
|
||||||
|
// inference goes through `publish`); keeping a separate field avoids
|
||||||
|
// tempting other code paths into bypassing the session-id binding.
|
||||||
|
broker: Broker;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.4: payload assembly extracted to ./inference/payload.ts (tests
|
||||||
|
// import buildMessagesPayload from this module, so a re-export below
|
||||||
|
// preserves the public surface). Stream + tool phases extracted to
|
||||||
|
// ./inference/stream-phase.ts and ./inference/tool-phase.ts.
|
||||||
|
|
||||||
|
export interface StreamResult {
|
||||||
|
finishReason: string | null;
|
||||||
|
content: string;
|
||||||
|
toolCalls: ToolCall[];
|
||||||
|
promptTokens: number | null;
|
||||||
|
completionTokens: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export interface TurnArgs {
|
||||||
|
sessionId: string;
|
||||||
|
chatId: string;
|
||||||
|
assistantMessageId: string;
|
||||||
|
// v1.8.2: cumulative tool calls executed this run. Compared against the
|
||||||
|
// resolved budget at the top of each turn. Replaces the older `depth`
|
||||||
|
// counter (which counted iterations, not invocations).
|
||||||
|
toolsUsed: number;
|
||||||
|
// v1.11.6: ordered tool calls executed in this user-message turn (across
|
||||||
|
// recursive runAssistantTurn invocations). Reset to [] at user-message
|
||||||
|
// boundaries by runInference, same as toolsUsed. Doom-loop check at the
|
||||||
|
// top of runAssistantTurn slices the last DOOM_LOOP_THRESHOLD entries.
|
||||||
|
recentToolCalls: ToolCall[];
|
||||||
|
signal: AbortSignal | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export async function runAssistantTurn(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
args: TurnArgs,
|
||||||
|
): Promise<void> {
|
||||||
|
const { sessionId, chatId } = args;
|
||||||
|
|
||||||
|
// v1.11: if the prior turn flagged this chat for compaction, run it first
|
||||||
|
// so loadContext below reads the post-compaction history. We swallow
|
||||||
|
// compaction failures (clearing the flag so we don't loop) and proceed
|
||||||
|
// with the un-compacted history — a slow turn that hits the model's
|
||||||
|
// hard limit is recoverable; a dead session is not.
|
||||||
|
const chatFlag = await ctx.sql<{ needs_compaction: boolean }[]>`
|
||||||
|
SELECT needs_compaction FROM chats WHERE id = ${chatId}
|
||||||
|
`;
|
||||||
|
if (chatFlag[0]?.needs_compaction) {
|
||||||
|
try {
|
||||||
|
await compaction.process({
|
||||||
|
sql: ctx.sql,
|
||||||
|
config: ctx.config,
|
||||||
|
log: ctx.log,
|
||||||
|
broker: ctx.broker,
|
||||||
|
chatId,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
ctx.log.warn({ err, chatId }, 'auto-compaction failed; clearing flag and proceeding');
|
||||||
|
await ctx.sql`UPDATE chats SET needs_compaction = false WHERE id = ${chatId}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const loaded = await loadContext(ctx.sql, sessionId, chatId);
|
||||||
|
if (!loaded) {
|
||||||
|
ctx.log.warn({ sessionId }, 'inference: session or project missing');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const { session, project, history } = loaded;
|
||||||
|
const projectRoot = await resolveProjectRoot(project.path);
|
||||||
|
// Agent resolution is per-turn so PATCH agent_id mid-conversation takes
|
||||||
|
// effect on the next message. Unknown agent_id returns null silently —
|
||||||
|
// session falls back to base prompt + all tools + default temperature.
|
||||||
|
const agent = session.agent_id
|
||||||
|
? await getAgentById(project.path, session.agent_id)
|
||||||
|
: null;
|
||||||
|
|
||||||
|
// v1.8.2: cap-hit replaces the older "tool loop depth exceeded" failure.
|
||||||
|
// When we've already burned the budget *before* this turn even runs, we
|
||||||
|
// skip straight to the summary flow — the in-flight assistant message slot
|
||||||
|
// gets reused for the wrap-up reply instead of being marked failed.
|
||||||
|
const budget = resolveToolBudget(agent);
|
||||||
|
if (args.toolsUsed >= budget) {
|
||||||
|
await runCapHitSummary(ctx, args, session, project, history, agent, budget);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.11.6: doom-loop guard. Detected BEFORE the budget cap (the model can
|
||||||
|
// burn through 3 identical calls long before the 15-call budget fires).
|
||||||
|
// Same in-flight-slot-reuse pattern as runCapHitSummary — wrap-up reply
|
||||||
|
// lands in args.assistantMessageId, then a doom_loop sentinel is inserted
|
||||||
|
// to make the abort visible in the chat history.
|
||||||
|
const loop = detectDoomLoop(args.recentToolCalls);
|
||||||
|
if (loop) {
|
||||||
|
await runDoomLoopSummary(ctx, args, session, project, history, agent, loop);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const messages = await buildMessagesPayload(session, project, history, agent);
|
||||||
|
|
||||||
|
// v1.11.8: resolve per-chat web-tools opt-in. Tri-state on the wire:
|
||||||
|
// - session.web_search_enabled = null → inherit project default
|
||||||
|
// - session.web_search_enabled = true/false → explicit
|
||||||
|
// Both web_search and web_fetch are gated by this single flag (the UI
|
||||||
|
// label is "Enable web search and fetch" — same store, both tools).
|
||||||
|
// Default is false unless explicitly opted in, matching the v1.9
|
||||||
|
// plumbing intent ("inert until Batch 8 ships the actual tools").
|
||||||
|
const webToolsEnabled =
|
||||||
|
session.web_search_enabled ?? project.default_web_search_enabled ?? false;
|
||||||
|
|
||||||
|
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
||||||
|
let result: StreamResult;
|
||||||
|
try {
|
||||||
|
result = await executeStreamPhase(ctx, args, session, messages, state, agent, webToolsEnabled);
|
||||||
|
} catch (err) {
|
||||||
|
await handleAbortOrError(ctx, args, state.accumulated, err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.toolCalls.length > 0) {
|
||||||
|
await executeToolPhase(ctx, args, result, state.startedAt, session, projectRoot);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await finalizeCompletion(ctx, args, result, state.startedAt, session);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runInference(
|
||||||
|
ctx: InferenceContext,
|
||||||
|
sessionId: string,
|
||||||
|
chatId: string,
|
||||||
|
assistantMessageId: string,
|
||||||
|
signal?: AbortSignal
|
||||||
|
): Promise<void> {
|
||||||
|
// v1.8.2: every fresh inference (initial send, regenerate, force_send,
|
||||||
|
// continue) starts with a clean budget. Tool-call accumulation across
|
||||||
|
// Continue invocations is what the hard ceiling guards against, not the
|
||||||
|
// per-call budget.
|
||||||
|
// v1.11.6: recentToolCalls also resets — doom-loop detection is scoped
|
||||||
|
// to a single user-message turn, so a Continue starts with no history.
|
||||||
|
return runAssistantTurn(ctx, {
|
||||||
|
sessionId,
|
||||||
|
chatId,
|
||||||
|
assistantMessageId,
|
||||||
|
toolsUsed: 0,
|
||||||
|
recentToolCalls: [],
|
||||||
|
signal,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.8.2: cap-hit summary flow. Called instead of erroring when the loop
|
||||||
|
// hits its budget. Reuses the in-flight assistant message slot to stream a
|
||||||
|
// short wrap-up reply with the synthetic note prepended and tools disabled,
|
||||||
|
// then always inserts a cap_hit sentinel afterward (regardless of summary
|
||||||
|
// outcome) so the UI can show a Continue affordance.
|
||||||
|
interface InferenceRegistration {
|
||||||
|
controller: AbortController;
|
||||||
|
completed: Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createInferenceRunner(
|
||||||
|
ctx: Omit<InferenceContext, 'publishUser'>,
|
||||||
|
publishUserFn: (user: string, frame: UserStreamFrame) => void
|
||||||
|
) {
|
||||||
|
const registry = new Map<string, InferenceRegistration>();
|
||||||
|
|
||||||
|
return {
|
||||||
|
enqueue(sessionId: string, chatId: string, assistantMessageId: string, user: string) {
|
||||||
|
const callCtx: InferenceContext = {
|
||||||
|
...ctx,
|
||||||
|
publishUser: (frame) => publishUserFn(user, frame),
|
||||||
|
// v1.11: broker comes in via ctx (set at registration time). Repeated
|
||||||
|
// here so the destructure carries it onto the per-call ctx without
|
||||||
|
// having to add it to every enqueue/cancel signature individually.
|
||||||
|
broker: ctx.broker,
|
||||||
|
};
|
||||||
|
// v1.8 mobile-tabs: announce working before the async loop starts so
|
||||||
|
// every device subscribed to the user channel sees the amber dot.
|
||||||
|
callCtx.publishUser({ type: 'chat_status', chat_id: chatId, status: 'streaming', at: new Date().toISOString() });
|
||||||
|
const controller = new AbortController();
|
||||||
|
let resolveCompleted!: () => void;
|
||||||
|
const completed = new Promise<void>((res) => { resolveCompleted = res; });
|
||||||
|
const registration: InferenceRegistration = { controller, completed };
|
||||||
|
registry.set(chatId, registration);
|
||||||
|
void (async () => {
|
||||||
|
try {
|
||||||
|
await runInference(callCtx, sessionId, chatId, assistantMessageId, controller.signal);
|
||||||
|
setImmediate(() => {
|
||||||
|
void maybeAutoNameChat(callCtx, chatId, sessionId).catch((err: Error) => {
|
||||||
|
callCtx.log.warn({ err, chatId }, 'auto-name failed');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
callCtx.log.error({ err }, 'unhandled inference error');
|
||||||
|
} finally {
|
||||||
|
resolveCompleted();
|
||||||
|
// Only clear our own registration; a force-send may have replaced it.
|
||||||
|
if (registry.get(chatId) === registration) {
|
||||||
|
registry.delete(chatId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
},
|
||||||
|
|
||||||
|
async cancel(_sessionId: string, chatId: string): Promise<boolean> {
|
||||||
|
const reg = registry.get(chatId);
|
||||||
|
if (!reg) return false;
|
||||||
|
reg.controller.abort();
|
||||||
|
// Swallow — we just need to wait for the catch/finally to persist state.
|
||||||
|
await reg.completed.catch(() => {});
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
|
||||||
|
hasActive(chatId: string): boolean {
|
||||||
|
return registry.has(chatId);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export const _toolNames = ALL_TOOLS.map((t) => t.name);
|
||||||
13
apps/server/src/services/inference/types.ts
Normal file
13
apps/server/src/services/inference/types.ts
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
// v1.12.4: shared inter-phase types/constants for the extracted phase files.
|
||||||
|
// Lives here so stream-phase, tool-phase, and the summary functions still in
|
||||||
|
// inference.ts can all reference the same definitions without circular imports.
|
||||||
|
|
||||||
|
export interface StreamPhaseState {
|
||||||
|
accumulated: string;
|
||||||
|
startedAt: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 500ms keeps the DB UPDATE rate bounded under heavy streaming. Used by
|
||||||
|
// executeStreamPhase, runCapHitSummary, and runDoomLoopSummary — every site
|
||||||
|
// that does a debounced content flush during streaming.
|
||||||
|
export const DB_FLUSH_INTERVAL_MS = 500;
|
||||||
53
apps/server/src/services/inference/xml-parser.ts
Normal file
53
apps/server/src/services/inference/xml-parser.ts
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// v1.10.5: XML-tag tool-call fallback. Some models emit
|
||||||
|
// <tool_call><function=foo><parameter=key>value</parameter></function></tool_call>
|
||||||
|
// in plain content instead of using the OpenAI tool_calls JSON channel.
|
||||||
|
// The streaming loop in inference.ts extracts these blocks via these helpers.
|
||||||
|
|
||||||
|
export const XML_TOOL_OPEN = '<tool_call>';
|
||||||
|
export const XML_TOOL_CLOSE = '</tool_call>';
|
||||||
|
|
||||||
|
export function parseXmlToolCall(
|
||||||
|
block: string,
|
||||||
|
): { name: string; args: Record<string, unknown> } | null {
|
||||||
|
const nameMatch = block.match(/<function=([^>]+)>/);
|
||||||
|
if (!nameMatch || !nameMatch[1]) return null;
|
||||||
|
const name = nameMatch[1].trim();
|
||||||
|
if (!name) return null;
|
||||||
|
const args: Record<string, unknown> = {};
|
||||||
|
// Non-greedy body so each <parameter=…>…</parameter> pair is matched
|
||||||
|
// independently even when multiple appear in the same block.
|
||||||
|
const paramRe = /<parameter=([^>]+)>([\s\S]*?)<\/parameter>/g;
|
||||||
|
for (const m of block.matchAll(paramRe)) {
|
||||||
|
const key = (m[1] ?? '').trim();
|
||||||
|
if (!key) continue;
|
||||||
|
const raw = (m[2] ?? '').trim();
|
||||||
|
try {
|
||||||
|
args[key] = JSON.parse(raw);
|
||||||
|
} catch {
|
||||||
|
args[key] = raw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { name, args };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Locate the first character that begins (or completely contains) an
|
||||||
|
// unfinished <tool_call> opener in `s`. Returns -1 when `s` can be flushed
|
||||||
|
// to the client in full without risking a partial tag leak.
|
||||||
|
// Case 1: a full `<tool_call>` opener with no matching closer — caller
|
||||||
|
// must keep everything from that index forward until the next
|
||||||
|
// chunk arrives with the closer.
|
||||||
|
// Case 2: `s` ends with a strict prefix of `<tool_call>` (e.g. `<tool_c`).
|
||||||
|
// Caller must keep just that suffix in the buffer.
|
||||||
|
// Note: case 1 assumes the calling loop already extracted every complete
|
||||||
|
// <tool_call>…</tool_call> pair before reaching this check.
|
||||||
|
export function partialXmlOpenerStart(s: string): number {
|
||||||
|
const fullOpener = s.indexOf(XML_TOOL_OPEN);
|
||||||
|
if (fullOpener !== -1) return fullOpener;
|
||||||
|
const lastLt = s.lastIndexOf('<');
|
||||||
|
if (lastLt === -1) return -1;
|
||||||
|
const suffix = s.slice(lastLt);
|
||||||
|
if (XML_TOOL_OPEN.startsWith(suffix) && suffix.length < XML_TOOL_OPEN.length) {
|
||||||
|
return lastLt;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
@@ -39,6 +39,19 @@ export interface Session {
|
|||||||
// project.default_web_search_enabled. Plumbed but inert in v1.9 — the
|
// project.default_web_search_enabled. Plumbed but inert in v1.9 — the
|
||||||
// actual web_search tool ships in Batch 8.
|
// actual web_search tool ships in Batch 8.
|
||||||
web_search_enabled: boolean | null;
|
web_search_enabled: boolean | null;
|
||||||
|
// v1.12.1: server-side workspace pane layout. Replaces per-device
|
||||||
|
// localStorage so all devices viewing the session see the same panes.
|
||||||
|
workspace_panes: WorkspacePane[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export type WorkspacePaneKind = 'chat' | 'terminal' | 'agent' | 'empty' | 'settings';
|
||||||
|
|
||||||
|
export interface WorkspacePane {
|
||||||
|
id: string;
|
||||||
|
kind: WorkspacePaneKind;
|
||||||
|
chatId?: string;
|
||||||
|
chatIds: string[];
|
||||||
|
activeChatIdx: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
// v1.8.1: agents come from two sources. 'global' = /data/AGENTS.md (always
|
// v1.8.1: agents come from two sources. 'global' = /data/AGENTS.md (always
|
||||||
@@ -273,6 +286,11 @@ export interface SessionRenamedFrame {
|
|||||||
session_id: string;
|
session_id: string;
|
||||||
name: string;
|
name: string;
|
||||||
}
|
}
|
||||||
|
export interface SessionWorkspaceUpdatedFrame {
|
||||||
|
type: 'session_workspace_updated';
|
||||||
|
session_id: string;
|
||||||
|
workspace_panes: WorkspacePane[];
|
||||||
|
}
|
||||||
export interface SessionArchivedFrame {
|
export interface SessionArchivedFrame {
|
||||||
type: 'session_archived';
|
type: 'session_archived';
|
||||||
session_id: string;
|
session_id: string;
|
||||||
@@ -324,7 +342,7 @@ export interface ProjectUpdatedFrame {
|
|||||||
export interface ChatStatusFrame {
|
export interface ChatStatusFrame {
|
||||||
type: 'chat_status';
|
type: 'chat_status';
|
||||||
chat_id: string;
|
chat_id: string;
|
||||||
status: 'working' | 'idle' | 'error';
|
status: 'streaming' | 'tool_running' | 'waiting_for_input' | 'idle' | 'error';
|
||||||
at: string;
|
at: string;
|
||||||
reason?: ErrorReason;
|
reason?: ErrorReason;
|
||||||
}
|
}
|
||||||
@@ -335,6 +353,7 @@ export type UserStreamFrame =
|
|||||||
| SessionDeletedFrame
|
| SessionDeletedFrame
|
||||||
| SessionUpdatedFrame
|
| SessionUpdatedFrame
|
||||||
| SessionRenamedFrame
|
| SessionRenamedFrame
|
||||||
|
| SessionWorkspaceUpdatedFrame
|
||||||
| SessionArchivedFrame
|
| SessionArchivedFrame
|
||||||
| ChatCreatedFrame
|
| ChatCreatedFrame
|
||||||
| ChatUpdatedFrame
|
| ChatUpdatedFrame
|
||||||
|
|||||||
@@ -143,6 +143,11 @@ export const api = {
|
|||||||
),
|
),
|
||||||
openChatsCount: (id: string) =>
|
openChatsCount: (id: string) =>
|
||||||
request<{ count: number }>(`/api/sessions/${id}/chats/open-count`),
|
request<{ count: number }>(`/api/sessions/${id}/chats/open-count`),
|
||||||
|
updateWorkspacePanes: (id: string, panes: Session['workspace_panes']) =>
|
||||||
|
request<Session>(`/api/sessions/${id}/workspace`, {
|
||||||
|
method: 'PATCH',
|
||||||
|
body: JSON.stringify({ workspace_panes: panes }),
|
||||||
|
}),
|
||||||
},
|
},
|
||||||
|
|
||||||
chats: {
|
chats: {
|
||||||
@@ -175,6 +180,11 @@ export const api = {
|
|||||||
request<{ ok: true }>(`/api/chats/${chatId}/compact`, { method: 'POST' }),
|
request<{ ok: true }>(`/api/chats/${chatId}/compact`, { method: 'POST' }),
|
||||||
stop: (chatId: string) =>
|
stop: (chatId: string) =>
|
||||||
request<{ stopped: boolean }>(`/api/chats/${chatId}/stop`, { method: 'POST' }),
|
request<{ stopped: boolean }>(`/api/chats/${chatId}/stop`, { method: 'POST' }),
|
||||||
|
discardStale: (chatId: string, messageId: string) =>
|
||||||
|
request<Message>(`/api/chats/${chatId}/discard_stale`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify({ message_id: messageId }),
|
||||||
|
}),
|
||||||
forceSend: (chatId: string, content: string) =>
|
forceSend: (chatId: string, content: string) =>
|
||||||
request<{ user_message_id: string; assistant_message_id: string }>(
|
request<{ user_message_id: string; assistant_message_id: string }>(
|
||||||
`/api/chats/${chatId}/force_send`,
|
`/api/chats/${chatId}/force_send`,
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ export interface Session {
|
|||||||
agent_id: string | null;
|
agent_id: string | null;
|
||||||
// v1.9: null = inherit from project.default_web_search_enabled.
|
// v1.9: null = inherit from project.default_web_search_enabled.
|
||||||
web_search_enabled: boolean | null;
|
web_search_enabled: boolean | null;
|
||||||
|
// v1.12.1: server-authoritative pane layout, replaces localStorage.
|
||||||
|
workspace_panes: WorkspacePane[];
|
||||||
}
|
}
|
||||||
|
|
||||||
// v1.8.1: 'global' = /data/AGENTS.md (always-on), 'project' = per-project
|
// v1.8.1: 'global' = /data/AGENTS.md (always-on), 'project' = per-project
|
||||||
@@ -330,6 +332,17 @@ export type WsFrame =
|
|||||||
// to the client without a refetch.
|
// to the client without a refetch.
|
||||||
metadata?: MessageMetadata | null;
|
metadata?: MessageMetadata | null;
|
||||||
}
|
}
|
||||||
|
// v1.12.2: live throughput frame, published mid-stream every ~500ms with
|
||||||
|
// the latest token + ctx counts so ChatThroughput can render tok/s and
|
||||||
|
// ctx_used while the model is still generating.
|
||||||
|
| {
|
||||||
|
type: 'usage';
|
||||||
|
message_id: string;
|
||||||
|
chat_id?: string;
|
||||||
|
completion_tokens: number | null;
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
}
|
||||||
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
|
| { type: 'messages_deleted'; message_ids: string[]; chat_id?: string }
|
||||||
| { type: 'chat_renamed'; chat_id: string; name: string }
|
| { type: 'chat_renamed'; chat_id: string; name: string }
|
||||||
// v1.11: published by services/compaction.ts after the new anchored
|
// v1.11: published by services/compaction.ts after the new anchored
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import { useState } from 'react';
|
|||||||
import { Bot, History, MessageSquare, Plus, Terminal, X } from 'lucide-react';
|
import { Bot, History, MessageSquare, Plus, Terminal, X } from 'lucide-react';
|
||||||
import type { Chat, WorkspacePane } from '@/api/types';
|
import type { Chat, WorkspacePane } from '@/api/types';
|
||||||
import { StatusDot } from '@/components/StatusDot';
|
import { StatusDot } from '@/components/StatusDot';
|
||||||
|
import { ChatThroughput } from '@/components/ChatThroughput';
|
||||||
import {
|
import {
|
||||||
ContextMenu,
|
ContextMenu,
|
||||||
ContextMenuContent,
|
ContextMenuContent,
|
||||||
@@ -99,6 +100,7 @@ export function ChatTabBar({
|
|||||||
>
|
>
|
||||||
<MessageSquare size={12} className="shrink-0" />
|
<MessageSquare size={12} className="shrink-0" />
|
||||||
<StatusDot chatId={chat.id} />
|
<StatusDot chatId={chat.id} />
|
||||||
|
<ChatThroughput chatId={chat.id} />
|
||||||
{renamingId === chat.id ? (
|
{renamingId === chat.id ? (
|
||||||
<input
|
<input
|
||||||
autoFocus
|
autoFocus
|
||||||
|
|||||||
28
apps/web/src/components/ChatThroughput.tsx
Normal file
28
apps/web/src/components/ChatThroughput.tsx
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import { useChatStatus } from '@/hooks/useChatStatus';
|
||||||
|
import { useChatThroughput } from '@/hooks/useChatThroughput';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
chatId: string | null | undefined;
|
||||||
|
className?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.2: inline throughput readout. Renders next to StatusDot while the
|
||||||
|
// chat is streaming or running a tool. Hidden in idle/error/waiting states
|
||||||
|
// — the dot already communicates those.
|
||||||
|
export function ChatThroughput({ chatId, className }: Props) {
|
||||||
|
const status = useChatStatus(chatId);
|
||||||
|
const t = useChatThroughput(chatId);
|
||||||
|
if (!chatId || !t) return null;
|
||||||
|
if (status !== 'streaming' && status !== 'tool_running') return null;
|
||||||
|
const tps = t.tps != null && t.tps > 0 ? Math.round(t.tps) : null;
|
||||||
|
const showCtx = t.ctx_used != null && t.ctx_max != null;
|
||||||
|
if (tps === null && !showCtx) return null;
|
||||||
|
return (
|
||||||
|
<span className={cn('text-xs text-muted-foreground tabular-nums', className)}>
|
||||||
|
{tps !== null && `${tps} tok/s`}
|
||||||
|
{tps !== null && showCtx && ' · '}
|
||||||
|
{showCtx && `${t.ctx_used!.toLocaleString()}/${t.ctx_max!.toLocaleString()}`}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import { toast } from 'sonner';
|
|||||||
import type { Chat, WorkspacePane } from '@/api/types';
|
import type { Chat, WorkspacePane } from '@/api/types';
|
||||||
import { BottomSheet } from '@/components/BottomSheet';
|
import { BottomSheet } from '@/components/BottomSheet';
|
||||||
import { StatusDot } from '@/components/StatusDot';
|
import { StatusDot } from '@/components/StatusDot';
|
||||||
|
import { ChatThroughput } from '@/components/ChatThroughput';
|
||||||
import {
|
import {
|
||||||
DropdownMenu,
|
DropdownMenu,
|
||||||
DropdownMenuContent,
|
DropdownMenuContent,
|
||||||
@@ -206,6 +207,7 @@ export function MobileTabSwitcher({
|
|||||||
>
|
>
|
||||||
<span className="shrink-0 text-muted-foreground">{paneIcon(active?.kind ?? 'chat')}</span>
|
<span className="shrink-0 text-muted-foreground">{paneIcon(active?.kind ?? 'chat')}</span>
|
||||||
<StatusDot chatId={activeChatId} />
|
<StatusDot chatId={activeChatId} />
|
||||||
|
<ChatThroughput chatId={activeChatId} />
|
||||||
<span className="truncate flex-1 text-left">{activeLabel}</span>
|
<span className="truncate flex-1 text-left">{activeLabel}</span>
|
||||||
<ChevronDown size={14} className="opacity-60 shrink-0" />
|
<ChevronDown size={14} className="opacity-60 shrink-0" />
|
||||||
</button>
|
</button>
|
||||||
@@ -237,6 +239,7 @@ export function MobileTabSwitcher({
|
|||||||
>
|
>
|
||||||
<span className="shrink-0 text-muted-foreground">{paneIcon(pane.kind)}</span>
|
<span className="shrink-0 text-muted-foreground">{paneIcon(pane.kind)}</span>
|
||||||
<StatusDot chatId={cid ?? null} />
|
<StatusDot chatId={cid ?? null} />
|
||||||
|
<ChatThroughput chatId={cid ?? null} />
|
||||||
{renamingChatId === cid && cid ? (
|
{renamingChatId === cid && cid ? (
|
||||||
<input
|
<input
|
||||||
autoFocus
|
autoFocus
|
||||||
|
|||||||
34
apps/web/src/components/StaleStreamBanner.tsx
Normal file
34
apps/web/src/components/StaleStreamBanner.tsx
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
interface Props {
|
||||||
|
onRetry: () => void;
|
||||||
|
onDiscard: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.3: shown when an assistant message has been 'streaming' for 60+
|
||||||
|
// seconds without new tokens. Lives above ChatInput in ChatPane. Retry
|
||||||
|
// discards the stuck row then resends the last user message; Discard just
|
||||||
|
// clears the row and drops the dot to idle.
|
||||||
|
export function StaleStreamBanner({ onRetry, onDiscard }: Props) {
|
||||||
|
return (
|
||||||
|
<div className="border border-amber-500/30 bg-amber-500/5 rounded-md p-3 mb-2 mx-4 flex items-center justify-between gap-2">
|
||||||
|
<span className="text-sm text-muted-foreground">
|
||||||
|
Previous response didn't complete.
|
||||||
|
</span>
|
||||||
|
<div className="flex gap-2">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={onRetry}
|
||||||
|
className="text-xs px-2 py-1 rounded border border-border hover:bg-accent max-md:min-h-[44px] max-md:px-3"
|
||||||
|
>
|
||||||
|
Retry
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={onDiscard}
|
||||||
|
className="text-xs px-2 py-1 rounded border border-border hover:bg-accent max-md:min-h-[44px] max-md:px-3"
|
||||||
|
>
|
||||||
|
Discard
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -6,15 +6,10 @@ interface Props {
|
|||||||
className?: string;
|
className?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const STATUS_CLASS: Record<DerivedStatus, string> = {
|
|
||||||
working: 'bg-amber-500 animate-pulse',
|
|
||||||
idle_warm: 'bg-emerald-500',
|
|
||||||
idle_cold: 'bg-muted-foreground/40',
|
|
||||||
error: 'bg-destructive',
|
|
||||||
};
|
|
||||||
|
|
||||||
const STATUS_LABEL: Record<DerivedStatus, string> = {
|
const STATUS_LABEL: Record<DerivedStatus, string> = {
|
||||||
working: 'working',
|
streaming: 'streaming',
|
||||||
|
tool_running: 'running tool',
|
||||||
|
waiting_for_input: 'waiting for input',
|
||||||
idle_warm: 'idle',
|
idle_warm: 'idle',
|
||||||
idle_cold: 'idle',
|
idle_cold: 'idle',
|
||||||
error: 'error',
|
error: 'error',
|
||||||
@@ -22,15 +17,58 @@ const STATUS_LABEL: Record<DerivedStatus, string> = {
|
|||||||
|
|
||||||
export function StatusDot({ chatId, className }: Props) {
|
export function StatusDot({ chatId, className }: Props) {
|
||||||
const status = useChatStatus(chatId);
|
const status = useChatStatus(chatId);
|
||||||
|
|
||||||
|
if (status === 'streaming') {
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
aria-label="Status: streaming"
|
||||||
|
title="streaming"
|
||||||
|
className={cn('inline-block relative w-3 h-3 shrink-0', className)}
|
||||||
|
>
|
||||||
|
<span className="absolute inset-0 animate-spin-slow">
|
||||||
|
<span className="absolute top-0 left-1/2 -translate-x-1/2 w-1 h-1 rounded-full bg-amber-500" />
|
||||||
|
<span className="absolute bottom-0 left-1/2 -translate-x-1/2 w-1 h-1 rounded-full bg-amber-500/60" />
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status === 'tool_running') {
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
aria-label="Status: running tool"
|
||||||
|
title="running tool"
|
||||||
|
className={cn(
|
||||||
|
'inline-block w-3 h-3 rounded-full border-2 border-sky-500 border-t-transparent animate-spin shrink-0',
|
||||||
|
className,
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status === 'waiting_for_input') {
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
aria-label="Status: waiting for input"
|
||||||
|
title="waiting for input"
|
||||||
|
className={cn(
|
||||||
|
'inline-block w-1.5 h-1.5 rounded-full shrink-0 bg-violet-500',
|
||||||
|
className,
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const bg =
|
||||||
|
status === 'idle_warm' ? 'bg-emerald-500'
|
||||||
|
: status === 'error' ? 'bg-destructive'
|
||||||
|
: 'bg-muted-foreground/40';
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<span
|
<span
|
||||||
aria-label={`Status: ${STATUS_LABEL[status]}`}
|
aria-label={`Status: ${STATUS_LABEL[status]}`}
|
||||||
title={STATUS_LABEL[status]}
|
title={STATUS_LABEL[status]}
|
||||||
className={cn(
|
className={cn('inline-block w-1.5 h-1.5 rounded-full shrink-0', bg, className)}
|
||||||
'inline-block w-1.5 h-1.5 rounded-full shrink-0',
|
|
||||||
STATUS_CLASS[status],
|
|
||||||
className,
|
|
||||||
)}
|
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import { api } from '@/api/client';
|
|||||||
import { useSessionStream } from '@/hooks/useSessionStream';
|
import { useSessionStream } from '@/hooks/useSessionStream';
|
||||||
import { MessageList } from '@/components/MessageList';
|
import { MessageList } from '@/components/MessageList';
|
||||||
import { ChatInput } from '@/components/ChatInput';
|
import { ChatInput } from '@/components/ChatInput';
|
||||||
|
import { StaleStreamBanner } from '@/components/StaleStreamBanner';
|
||||||
import {
|
import {
|
||||||
DropdownMenu,
|
DropdownMenu,
|
||||||
DropdownMenuContent,
|
DropdownMenuContent,
|
||||||
@@ -44,6 +45,38 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
|
|||||||
|
|
||||||
const chatMessages = stream.messages.filter((m) => m.chat_id === chatId);
|
const chatMessages = stream.messages.filter((m) => m.chat_id === chatId);
|
||||||
const streaming = chatMessages.some((m) => m.status === 'streaming');
|
const streaming = chatMessages.some((m) => m.status === 'streaming');
|
||||||
|
|
||||||
|
// v1.12.3: stale-stream detection. Watches the (at most one) streaming
|
||||||
|
// assistant row. If its content length doesn't grow for STALE_THRESHOLD_MS,
|
||||||
|
// assume the upstream call is dead and surface the recovery banner. We use
|
||||||
|
// content length as the activity signal because every token delta extends
|
||||||
|
// it; last_seq isn't currently bumped per delta.
|
||||||
|
const STALE_THRESHOLD_MS = 60_000;
|
||||||
|
const streamingMsg = chatMessages.find((m) => m.status === 'streaming' && m.role === 'assistant');
|
||||||
|
const streamingId = streamingMsg?.id ?? null;
|
||||||
|
const streamingLen = streamingMsg?.content.length ?? 0;
|
||||||
|
const lastActivityRef = useRef<{ id: string; len: number; at: number } | null>(null);
|
||||||
|
const [stale, setStale] = useState(false);
|
||||||
|
useEffect(() => {
|
||||||
|
if (!streamingId) {
|
||||||
|
lastActivityRef.current = null;
|
||||||
|
setStale(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const prev = lastActivityRef.current;
|
||||||
|
if (!prev || prev.id !== streamingId || prev.len !== streamingLen) {
|
||||||
|
lastActivityRef.current = { id: streamingId, len: streamingLen, at: Date.now() };
|
||||||
|
setStale(false);
|
||||||
|
}
|
||||||
|
const interval = setInterval(() => {
|
||||||
|
const a = lastActivityRef.current;
|
||||||
|
if (!a) return;
|
||||||
|
if (Date.now() - a.at >= STALE_THRESHOLD_MS) {
|
||||||
|
setStale(true);
|
||||||
|
}
|
||||||
|
}, 5_000);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, [streamingId, streamingLen]);
|
||||||
// v1.11.5: per-chat model context limit comes from chat.model_context_limit
|
// v1.11.5: per-chat model context limit comes from chat.model_context_limit
|
||||||
// populated by GET /api/sessions/:id/chats. Threaded into ChatInput so
|
// populated by GET /api/sessions/:id/chats. Threaded into ChatInput so
|
||||||
// ContextBar can render a zero-state before the first assistant message.
|
// ContextBar can render a zero-state before the first assistant message.
|
||||||
@@ -87,6 +120,45 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleDiscardStale = useCallback(async () => {
|
||||||
|
if (!streamingId) return;
|
||||||
|
try {
|
||||||
|
await api.chats.discardStale(chatId, streamingId);
|
||||||
|
setStale(false);
|
||||||
|
lastActivityRef.current = null;
|
||||||
|
} catch (err) {
|
||||||
|
// 409 (race) is benign — the row already terminated some other way.
|
||||||
|
const msg = err instanceof Error ? err.message : 'discard failed';
|
||||||
|
if (!msg.includes('409')) toast.error(msg);
|
||||||
|
setStale(false);
|
||||||
|
}
|
||||||
|
}, [chatId, streamingId]);
|
||||||
|
|
||||||
|
const handleRetryStale = useCallback(async () => {
|
||||||
|
if (!streamingId) return;
|
||||||
|
const lastUser = [...chatMessages].reverse().find((m) => m.role === 'user' && m.kind === 'message');
|
||||||
|
if (!lastUser) {
|
||||||
|
toast.error('no prior user message to retry');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await api.chats.discardStale(chatId, streamingId);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : 'discard failed';
|
||||||
|
if (!msg.includes('409')) {
|
||||||
|
toast.error(msg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setStale(false);
|
||||||
|
lastActivityRef.current = null;
|
||||||
|
try {
|
||||||
|
await api.messages.send(chatId, lastUser.content);
|
||||||
|
} catch (err) {
|
||||||
|
toast.error(err instanceof Error ? err.message : 'retry send failed');
|
||||||
|
}
|
||||||
|
}, [chatId, streamingId, chatMessages]);
|
||||||
|
|
||||||
const handleForceSend = useCallback(async (content: string) => {
|
const handleForceSend = useCallback(async (content: string) => {
|
||||||
const trimmed = content.trim();
|
const trimmed = content.trim();
|
||||||
if (!trimmed) return;
|
if (!trimmed) return;
|
||||||
@@ -187,6 +259,13 @@ export function ChatPane({ sessionId, chatId, projectId, agentId, onAgentChange,
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{stale && streamingId && (
|
||||||
|
<StaleStreamBanner
|
||||||
|
onRetry={() => void handleRetryStale()}
|
||||||
|
onDiscard={() => void handleDiscardStale()}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
<ChatInput
|
<ChatInput
|
||||||
disabled={false}
|
disabled={false}
|
||||||
projectId={projectId}
|
projectId={projectId}
|
||||||
|
|||||||
@@ -41,6 +41,12 @@ export interface SessionUpdatedEvent {
|
|||||||
updated_at: string;
|
updated_at: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface SessionWorkspaceUpdatedEvent {
|
||||||
|
type: 'session_workspace_updated';
|
||||||
|
session_id: string;
|
||||||
|
workspace_panes: import('@/api/types').WorkspacePane[];
|
||||||
|
}
|
||||||
|
|
||||||
export interface SessionLoadedEvent {
|
export interface SessionLoadedEvent {
|
||||||
type: 'session_loaded';
|
type: 'session_loaded';
|
||||||
session_id: string;
|
session_id: string;
|
||||||
@@ -131,7 +137,7 @@ export interface ProjectUpdatedEvent {
|
|||||||
export interface ChatStatusEvent {
|
export interface ChatStatusEvent {
|
||||||
type: 'chat_status';
|
type: 'chat_status';
|
||||||
chat_id: string;
|
chat_id: string;
|
||||||
status: 'working' | 'idle' | 'error';
|
status: 'streaming' | 'tool_running' | 'waiting_for_input' | 'idle' | 'error';
|
||||||
at: string;
|
at: string;
|
||||||
reason?: ErrorReason;
|
reason?: ErrorReason;
|
||||||
}
|
}
|
||||||
@@ -143,6 +149,7 @@ export type SessionEvent =
|
|||||||
| SessionCreatedEvent
|
| SessionCreatedEvent
|
||||||
| SessionDeletedEvent
|
| SessionDeletedEvent
|
||||||
| SessionUpdatedEvent
|
| SessionUpdatedEvent
|
||||||
|
| SessionWorkspaceUpdatedEvent
|
||||||
| SessionLoadedEvent
|
| SessionLoadedEvent
|
||||||
| OpenFileInBrowserEvent
|
| OpenFileInBrowserEvent
|
||||||
| AttachChatFileEvent
|
| AttachChatFileEvent
|
||||||
|
|||||||
@@ -1,8 +1,14 @@
|
|||||||
import { useEffect, useState } from 'react';
|
import { useEffect, useState } from 'react';
|
||||||
import { sessionEvents } from './sessionEvents';
|
import { sessionEvents } from './sessionEvents';
|
||||||
|
|
||||||
export type RawStatus = 'working' | 'idle' | 'error';
|
export type RawStatus = 'streaming' | 'tool_running' | 'waiting_for_input' | 'idle' | 'error';
|
||||||
export type DerivedStatus = 'working' | 'idle_warm' | 'idle_cold' | 'error';
|
export type DerivedStatus =
|
||||||
|
| 'streaming'
|
||||||
|
| 'tool_running'
|
||||||
|
| 'waiting_for_input'
|
||||||
|
| 'idle_warm'
|
||||||
|
| 'idle_cold'
|
||||||
|
| 'error';
|
||||||
|
|
||||||
// Window during which an idle dot stays green; after this, it fades to gray.
|
// Window during which an idle dot stays green; after this, it fades to gray.
|
||||||
const WARM_WINDOW_MS = 30_000;
|
const WARM_WINDOW_MS = 30_000;
|
||||||
@@ -53,7 +59,9 @@ if (!G.__boocode_chat_status_subscribed) {
|
|||||||
|
|
||||||
function derive(entry: Entry | undefined): DerivedStatus {
|
function derive(entry: Entry | undefined): DerivedStatus {
|
||||||
if (!entry) return 'idle_cold';
|
if (!entry) return 'idle_cold';
|
||||||
if (entry.status === 'working') return 'working';
|
if (entry.status === 'streaming') return 'streaming';
|
||||||
|
if (entry.status === 'tool_running') return 'tool_running';
|
||||||
|
if (entry.status === 'waiting_for_input') return 'waiting_for_input';
|
||||||
if (entry.status === 'error') return 'error';
|
if (entry.status === 'error') return 'error';
|
||||||
const age = Date.now() - new Date(entry.at).getTime();
|
const age = Date.now() - new Date(entry.at).getTime();
|
||||||
return age < WARM_WINDOW_MS ? 'idle_warm' : 'idle_cold';
|
return age < WARM_WINDOW_MS ? 'idle_warm' : 'idle_cold';
|
||||||
|
|||||||
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
106
apps/web/src/hooks/useChatThroughput.ts
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import { useEffect, useState } from 'react';
|
||||||
|
|
||||||
|
// v1.12.2: live throughput stream consumer. Fed by useSessionStream when a
|
||||||
|
// 'usage' WS frame lands. Renders next to StatusDot via ChatThroughput.
|
||||||
|
//
|
||||||
|
// Singleton + Set<setState> pattern mirrors useChatStatus so any component
|
||||||
|
// can subscribe to any chatId without prop drilling.
|
||||||
|
|
||||||
|
export interface ThroughputSample {
|
||||||
|
tps: number | null;
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Entry {
|
||||||
|
ctx_used: number | null;
|
||||||
|
ctx_max: number | null;
|
||||||
|
completion_tokens: number | null;
|
||||||
|
recorded_at: number;
|
||||||
|
prev_completion_tokens: number | null;
|
||||||
|
prev_recorded_at: number | null;
|
||||||
|
tps: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stale window. After this, useChatThroughput returns null — clears the
|
||||||
|
// indicator after the stream ends without the next inference turn.
|
||||||
|
const STALE_MS = 10_000;
|
||||||
|
|
||||||
|
const entries = new Map<string, Entry>();
|
||||||
|
const subscribers = new Set<() => void>();
|
||||||
|
|
||||||
|
function notify(): void {
|
||||||
|
for (const s of subscribers) {
|
||||||
|
try { s(); } catch { /* swallow */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.12.2: imported by useSessionStream's WS handler. Computes tps from the
|
||||||
|
// gap between successive completion_tokens samples; first sample yields null
|
||||||
|
// (we need two points). Skips zero-progress samples so a duplicate usage
|
||||||
|
// frame doesn't push tps to 0.
|
||||||
|
export function recordUsage(
|
||||||
|
chatId: string,
|
||||||
|
data: { completion_tokens: number | null; ctx_used: number | null; ctx_max: number | null },
|
||||||
|
): void {
|
||||||
|
const now = Date.now();
|
||||||
|
const prev = entries.get(chatId);
|
||||||
|
let tps: number | null = prev?.tps ?? null;
|
||||||
|
if (
|
||||||
|
prev &&
|
||||||
|
data.completion_tokens != null &&
|
||||||
|
prev.completion_tokens != null &&
|
||||||
|
data.completion_tokens > prev.completion_tokens &&
|
||||||
|
now > prev.recorded_at
|
||||||
|
) {
|
||||||
|
const dTokens = data.completion_tokens - prev.completion_tokens;
|
||||||
|
const dSeconds = (now - prev.recorded_at) / 1000;
|
||||||
|
tps = dTokens / dSeconds;
|
||||||
|
}
|
||||||
|
entries.set(chatId, {
|
||||||
|
ctx_used: data.ctx_used,
|
||||||
|
ctx_max: data.ctx_max,
|
||||||
|
completion_tokens: data.completion_tokens,
|
||||||
|
recorded_at: now,
|
||||||
|
prev_completion_tokens: prev?.completion_tokens ?? null,
|
||||||
|
prev_recorded_at: prev?.recorded_at ?? null,
|
||||||
|
tps,
|
||||||
|
});
|
||||||
|
notify();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearThroughput(chatId: string): void {
|
||||||
|
if (entries.delete(chatId)) notify();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Periodic sweep: re-notify so stale entries fall off the UI when the
|
||||||
|
// stream ends without a follow-up frame. Light — one timer for the whole app.
|
||||||
|
const G = globalThis as Record<string, unknown>;
|
||||||
|
if (!G.__boocode_throughput_ticker) {
|
||||||
|
G.__boocode_throughput_ticker = true;
|
||||||
|
setInterval(() => {
|
||||||
|
const now = Date.now();
|
||||||
|
let touched = false;
|
||||||
|
for (const [k, v] of entries) {
|
||||||
|
if (now - v.recorded_at > STALE_MS) {
|
||||||
|
entries.delete(k);
|
||||||
|
touched = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (touched) notify();
|
||||||
|
}, 2_000);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useChatThroughput(chatId: string | null | undefined): ThroughputSample | null {
|
||||||
|
const [, force] = useState({});
|
||||||
|
useEffect(() => {
|
||||||
|
const sub = () => force({});
|
||||||
|
subscribers.add(sub);
|
||||||
|
return () => { subscribers.delete(sub); };
|
||||||
|
}, []);
|
||||||
|
if (!chatId) return null;
|
||||||
|
const entry = entries.get(chatId);
|
||||||
|
if (!entry) return null;
|
||||||
|
if (Date.now() - entry.recorded_at > STALE_MS) return null;
|
||||||
|
return { tps: entry.tps, ctx_used: entry.ctx_used, ctx_max: entry.ctx_max };
|
||||||
|
}
|
||||||
@@ -12,6 +12,7 @@ export interface UseSessionChatsOpts {
|
|||||||
// about pane indexing.
|
// about pane indexing.
|
||||||
openChatInActivePane: (chatId: string) => void;
|
openChatInActivePane: (chatId: string) => void;
|
||||||
initializeFirstChatIfEmpty: (chatId: string) => void;
|
initializeFirstChatIfEmpty: (chatId: string) => void;
|
||||||
|
validatePanes: (validChatIds: Set<string>) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface UseSessionChatsResult {
|
export interface UseSessionChatsResult {
|
||||||
@@ -44,12 +45,15 @@ export function useSessionChats(
|
|||||||
openChatInActivePaneRef.current = opts.openChatInActivePane;
|
openChatInActivePaneRef.current = opts.openChatInActivePane;
|
||||||
const initializeFirstChatIfEmptyRef = useRef(opts.initializeFirstChatIfEmpty);
|
const initializeFirstChatIfEmptyRef = useRef(opts.initializeFirstChatIfEmpty);
|
||||||
initializeFirstChatIfEmptyRef.current = opts.initializeFirstChatIfEmpty;
|
initializeFirstChatIfEmptyRef.current = opts.initializeFirstChatIfEmpty;
|
||||||
|
const validatePanesRef = useRef(opts.validatePanes);
|
||||||
|
validatePanesRef.current = opts.validatePanes;
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
let cancelled = false;
|
let cancelled = false;
|
||||||
api.chats.listForSession(sessionId).then((list) => {
|
api.chats.listForSession(sessionId).then((list) => {
|
||||||
if (cancelled) return;
|
if (cancelled) return;
|
||||||
setChats(list);
|
setChats(list);
|
||||||
|
validatePanesRef.current(new Set(list.map((c) => c.id)));
|
||||||
const openChat = list.find((c) => c.status === 'open');
|
const openChat = list.find((c) => c.status === 'open');
|
||||||
if (openChat) {
|
if (openChat) {
|
||||||
initializeFirstChatIfEmptyRef.current(openChat.id);
|
initializeFirstChatIfEmptyRef.current(openChat.id);
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { toast } from 'sonner';
|
|||||||
import type { Message, WsFrame } from '@/api/types';
|
import type { Message, WsFrame } from '@/api/types';
|
||||||
import { api } from '@/api/client';
|
import { api } from '@/api/client';
|
||||||
import { sessionEvents } from './sessionEvents';
|
import { sessionEvents } from './sessionEvents';
|
||||||
|
import { recordUsage } from './useChatThroughput';
|
||||||
|
|
||||||
// session_renamed frame removed from WsFrame — it was declared but never
|
// session_renamed frame removed from WsFrame — it was declared but never
|
||||||
// published on the per-session WS channel (server publishes via broker.publishUser
|
// published on the per-session WS channel (server publishes via broker.publishUser
|
||||||
@@ -125,6 +126,19 @@ function applyFrame(state: State, frame: WsFrame): State {
|
|||||||
);
|
);
|
||||||
return { ...state, messages: next };
|
return { ...state, messages: next };
|
||||||
}
|
}
|
||||||
|
case 'usage': {
|
||||||
|
// v1.12.2: live throughput. Side-effects into the module-level
|
||||||
|
// singleton consumed by ChatThroughput; no message-state mutation.
|
||||||
|
// chat_id is the optional ws-frame field; usage frames always include it.
|
||||||
|
if (frame.chat_id) {
|
||||||
|
recordUsage(frame.chat_id, {
|
||||||
|
completion_tokens: frame.completion_tokens,
|
||||||
|
ctx_used: frame.ctx_used,
|
||||||
|
ctx_max: frame.ctx_max,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return state;
|
||||||
|
}
|
||||||
case 'messages_deleted': {
|
case 'messages_deleted': {
|
||||||
const removeSet = new Set(frame.message_ids);
|
const removeSet = new Set(frame.message_ids);
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -143,6 +143,9 @@ function applyEvent(prev: SidebarResponse, event: import('./sessionEvents').Sess
|
|||||||
case 'session_loaded':
|
case 'session_loaded':
|
||||||
// activeSessionProjectId is updated in the subscribe callback; no data change here.
|
// activeSessionProjectId is updated in the subscribe callback; no data change here.
|
||||||
return prev;
|
return prev;
|
||||||
|
case 'session_workspace_updated':
|
||||||
|
// Pane layout is consumed by useWorkspacePanes; sidebar has no stake.
|
||||||
|
return prev;
|
||||||
case 'open_file_in_browser':
|
case 'open_file_in_browser':
|
||||||
// Consumed by Workspace (T7); no sidebar state change needed.
|
// Consumed by Workspace (T7); no sidebar state change needed.
|
||||||
return prev;
|
return prev;
|
||||||
|
|||||||
@@ -4,9 +4,14 @@ import { toast } from 'sonner';
|
|||||||
import { api } from '@/api/client';
|
import { api } from '@/api/client';
|
||||||
import type { WorkspacePane } from '@/api/types';
|
import type { WorkspacePane } from '@/api/types';
|
||||||
import { setActivePaneInfo, clearActivePane } from '@/hooks/useActivePane';
|
import { setActivePaneInfo, clearActivePane } from '@/hooks/useActivePane';
|
||||||
|
import { sessionEvents } from '@/hooks/sessionEvents';
|
||||||
|
|
||||||
export const MAX_PANES = 5;
|
export const MAX_PANES = 5;
|
||||||
const STORAGE_KEY = 'boocode.workspace.panes';
|
// v1.12.1: legacy localStorage key. Read once on mount to seed the server
|
||||||
|
// for sessions still on per-device state, then deleted. Server is now
|
||||||
|
// authoritative via sessions.workspace_panes.
|
||||||
|
const LEGACY_STORAGE_KEY = 'boocode.workspace.panes';
|
||||||
|
const SAVE_DEBOUNCE_MS = 300;
|
||||||
|
|
||||||
function generateId(): string {
|
function generateId(): string {
|
||||||
return crypto.randomUUID();
|
return crypto.randomUUID();
|
||||||
@@ -51,9 +56,11 @@ function nonSettingsCount(panes: WorkspacePane[]): number {
|
|||||||
return panes.reduce((n, p) => n + (p.kind === 'settings' ? 0 : 1), 0);
|
return panes.reduce((n, p) => n + (p.kind === 'settings' ? 0 : 1), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
function loadPanes(sessionId: string): WorkspacePane[] | null {
|
// v1.12.1: read legacy per-device localStorage. If present, the caller seeds
|
||||||
|
// the server then deletes the key. One-time migration per session.
|
||||||
|
function readLegacyPanes(sessionId: string): WorkspacePane[] | null {
|
||||||
try {
|
try {
|
||||||
const raw = localStorage.getItem(`${STORAGE_KEY}.${sessionId}`);
|
const raw = localStorage.getItem(`${LEGACY_STORAGE_KEY}.${sessionId}`);
|
||||||
if (!raw) return null;
|
if (!raw) return null;
|
||||||
const parsed = JSON.parse(raw) as WorkspacePane[];
|
const parsed = JSON.parse(raw) as WorkspacePane[];
|
||||||
if (!Array.isArray(parsed) || parsed.length === 0) return null;
|
if (!Array.isArray(parsed) || parsed.length === 0) return null;
|
||||||
@@ -63,15 +70,6 @@ function loadPanes(sessionId: string): WorkspacePane[] | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function savePanes(sessionId: string, panes: WorkspacePane[]): void {
|
|
||||||
try {
|
|
||||||
localStorage.setItem(
|
|
||||||
`${STORAGE_KEY}.${sessionId}`,
|
|
||||||
JSON.stringify(persistablePanes(panes)),
|
|
||||||
);
|
|
||||||
} catch { /* quota or disabled */ }
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface UseWorkspacePanesResult {
|
export interface UseWorkspacePanesResult {
|
||||||
panes: WorkspacePane[];
|
panes: WorkspacePane[];
|
||||||
activePaneIdx: number;
|
activePaneIdx: number;
|
||||||
@@ -96,6 +94,7 @@ export interface UseWorkspacePanesResult {
|
|||||||
removePane: (idx: number) => void;
|
removePane: (idx: number) => void;
|
||||||
removeChatFromPanes: (chatId: string) => void;
|
removeChatFromPanes: (chatId: string) => void;
|
||||||
initializeFirstChatIfEmpty: (chatId: string) => void;
|
initializeFirstChatIfEmpty: (chatId: string) => void;
|
||||||
|
validatePanes: (validChatIds: Set<string>) => void;
|
||||||
handlePaneDragStart: (idx: number) => (e: DragEvent<HTMLDivElement>) => void;
|
handlePaneDragStart: (idx: number) => (e: DragEvent<HTMLDivElement>) => void;
|
||||||
handlePaneDragOver: (idx: number) => (e: DragEvent<HTMLDivElement>) => void;
|
handlePaneDragOver: (idx: number) => (e: DragEvent<HTMLDivElement>) => void;
|
||||||
handlePaneDragLeave: () => void;
|
handlePaneDragLeave: () => void;
|
||||||
@@ -106,15 +105,85 @@ export interface UseWorkspacePanesResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
||||||
const [panes, setPanes] = useState<WorkspacePane[]>(() => {
|
const [panes, setPanes] = useState<WorkspacePane[]>(() => [emptyPane()]);
|
||||||
return loadPanes(sessionId) ?? [emptyPane()];
|
|
||||||
});
|
|
||||||
const [activePaneIdx, setActivePaneIdx] = useState(0);
|
const [activePaneIdx, setActivePaneIdx] = useState(0);
|
||||||
const draggingIdxRef = useRef<number | null>(null);
|
const draggingIdxRef = useRef<number | null>(null);
|
||||||
const [dragOverIdx, setDragOverIdx] = useState<number | null>(null);
|
const [dragOverIdx, setDragOverIdx] = useState<number | null>(null);
|
||||||
|
// v1.12.1: skip PATCH while hydrating from the server. Without this, the
|
||||||
|
// initial [emptyPane()] would be saved over the server's real state before
|
||||||
|
// the GET resolves.
|
||||||
|
const hydratedRef = useRef(false);
|
||||||
|
// Tracks the last value broadcast by another device (or this one's own
|
||||||
|
// round-trip). If a PATCH would echo this exact payload, we skip the call.
|
||||||
|
const lastRemoteJsonRef = useRef<string>('[]');
|
||||||
|
|
||||||
|
// v1.12.1: hydrate from server on mount, then subscribe to remote updates.
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
savePanes(sessionId, panes);
|
hydratedRef.current = false;
|
||||||
|
let cancelled = false;
|
||||||
|
void (async () => {
|
||||||
|
try {
|
||||||
|
const session = await api.sessions.get(sessionId);
|
||||||
|
if (cancelled) return;
|
||||||
|
let initial: WorkspacePane[] = Array.isArray(session.workspace_panes)
|
||||||
|
? session.workspace_panes
|
||||||
|
: [];
|
||||||
|
// One-time migration: if server is empty but legacy localStorage has
|
||||||
|
// a layout, seed the server and delete the local key.
|
||||||
|
if (initial.length === 0) {
|
||||||
|
const legacy = readLegacyPanes(sessionId);
|
||||||
|
if (legacy && legacy.length > 0) {
|
||||||
|
try {
|
||||||
|
const updated = await api.sessions.updateWorkspacePanes(sessionId, legacy);
|
||||||
|
if (cancelled) return;
|
||||||
|
initial = updated.workspace_panes;
|
||||||
|
localStorage.removeItem(`${LEGACY_STORAGE_KEY}.${sessionId}`);
|
||||||
|
} catch {
|
||||||
|
initial = legacy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const next = initial.length > 0 ? initial : [emptyPane()];
|
||||||
|
lastRemoteJsonRef.current = JSON.stringify(persistablePanes(next));
|
||||||
|
setPanes(next);
|
||||||
|
setActivePaneIdx(0);
|
||||||
|
} finally {
|
||||||
|
if (!cancelled) hydratedRef.current = true;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
return () => { cancelled = true; };
|
||||||
|
}, [sessionId]);
|
||||||
|
|
||||||
|
// v1.12.1: live cross-device sync. Replace local state when another device
|
||||||
|
// (or our own write echo) lands a session_workspace_updated frame.
|
||||||
|
useEffect(() => {
|
||||||
|
return sessionEvents.subscribe((ev) => {
|
||||||
|
if (ev.type !== 'session_workspace_updated') return;
|
||||||
|
if (ev.session_id !== sessionId) return;
|
||||||
|
const incoming = Array.isArray(ev.workspace_panes) ? ev.workspace_panes : [];
|
||||||
|
const json = JSON.stringify(incoming);
|
||||||
|
if (json === lastRemoteJsonRef.current) return;
|
||||||
|
lastRemoteJsonRef.current = json;
|
||||||
|
setPanes(incoming.length > 0 ? incoming : [emptyPane()]);
|
||||||
|
setActivePaneIdx((prev) => Math.min(prev, Math.max(0, incoming.length - 1)));
|
||||||
|
});
|
||||||
|
}, [sessionId]);
|
||||||
|
|
||||||
|
// v1.12.1: debounced PATCH on every change. Settings panes are stripped
|
||||||
|
// before saving (ephemeral per v1.9).
|
||||||
|
useEffect(() => {
|
||||||
|
if (!hydratedRef.current) return;
|
||||||
|
const payload = persistablePanes(panes);
|
||||||
|
const json = JSON.stringify(payload);
|
||||||
|
if (json === lastRemoteJsonRef.current) return;
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
lastRemoteJsonRef.current = json;
|
||||||
|
api.sessions.updateWorkspacePanes(sessionId, payload).catch(() => {
|
||||||
|
// Non-fatal: next change retries. Persistent failures surface via
|
||||||
|
// the network layer's existing reconnect toast.
|
||||||
|
});
|
||||||
|
}, SAVE_DEBOUNCE_MS);
|
||||||
|
return () => clearTimeout(timer);
|
||||||
}, [sessionId, panes]);
|
}, [sessionId, panes]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -328,6 +397,23 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
|||||||
});
|
});
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
const validatePanes = useCallback((validChatIds: Set<string>) => {
|
||||||
|
setPanes((prev) => {
|
||||||
|
const cleaned = prev.map((pane) => {
|
||||||
|
if (pane.kind !== 'chat' || pane.chatIds.length === 0) return pane;
|
||||||
|
const nextIds = pane.chatIds.filter((id) => validChatIds.has(id));
|
||||||
|
if (nextIds.length === pane.chatIds.length) return pane;
|
||||||
|
if (nextIds.length === 0) {
|
||||||
|
return { ...pane, kind: 'empty' as const, chatId: undefined, chatIds: [], activeChatIdx: -1 };
|
||||||
|
}
|
||||||
|
const nextActiveIdx = Math.min(pane.activeChatIdx, nextIds.length - 1);
|
||||||
|
return { ...pane, chatIds: nextIds, activeChatIdx: nextActiveIdx, chatId: nextIds[nextActiveIdx] };
|
||||||
|
});
|
||||||
|
const unchanged = cleaned.every((p, i) => p === prev[i]);
|
||||||
|
return unchanged ? prev : cleaned;
|
||||||
|
});
|
||||||
|
}, []);
|
||||||
|
|
||||||
const removeChatFromPanes = useCallback((chatId: string) => {
|
const removeChatFromPanes = useCallback((chatId: string) => {
|
||||||
setPanes((prev) => prev.map((p) => {
|
setPanes((prev) => prev.map((p) => {
|
||||||
const idx = p.chatIds.indexOf(chatId);
|
const idx = p.chatIds.indexOf(chatId);
|
||||||
@@ -411,6 +497,7 @@ export function useWorkspacePanes(sessionId: string): UseWorkspacePanesResult {
|
|||||||
removePane,
|
removePane,
|
||||||
removeChatFromPanes,
|
removeChatFromPanes,
|
||||||
initializeFirstChatIfEmpty,
|
initializeFirstChatIfEmpty,
|
||||||
|
validatePanes,
|
||||||
handlePaneDragStart,
|
handlePaneDragStart,
|
||||||
handlePaneDragOver,
|
handlePaneDragOver,
|
||||||
handlePaneDragLeave,
|
handlePaneDragLeave,
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ function SessionInner({ sessionId }: { sessionId: string }) {
|
|||||||
removePane,
|
removePane,
|
||||||
removeChatFromPanes,
|
removeChatFromPanes,
|
||||||
initializeFirstChatIfEmpty,
|
initializeFirstChatIfEmpty,
|
||||||
|
validatePanes,
|
||||||
} = panesHook;
|
} = panesHook;
|
||||||
|
|
||||||
const openChatInActivePane = useCallback(
|
const openChatInActivePane = useCallback(
|
||||||
@@ -70,6 +71,7 @@ function SessionInner({ sessionId }: { sessionId: string }) {
|
|||||||
openChatInPane,
|
openChatInPane,
|
||||||
openChatInActivePane,
|
openChatInActivePane,
|
||||||
initializeFirstChatIfEmpty,
|
initializeFirstChatIfEmpty,
|
||||||
|
validatePanes,
|
||||||
});
|
});
|
||||||
const { chats, renameChat } = chatsHook;
|
const { chats, renameChat } = chatsHook;
|
||||||
|
|
||||||
|
|||||||
@@ -138,6 +138,7 @@
|
|||||||
--radius-xl: calc(var(--radius) + 4px);
|
--radius-xl: calc(var(--radius) + 4px);
|
||||||
--font-sans: "Inter Variable", "Inter", system-ui, sans-serif;
|
--font-sans: "Inter Variable", "Inter", system-ui, sans-serif;
|
||||||
--font-mono: "JetBrains Mono Variable", ui-monospace, SFMono-Regular, monospace;
|
--font-mono: "JetBrains Mono Variable", ui-monospace, SFMono-Regular, monospace;
|
||||||
|
--animate-spin-slow: spin 1.2s linear infinite;
|
||||||
}
|
}
|
||||||
|
|
||||||
@layer base {
|
@layer base {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# BooCode v1.x — Roadmap
|
# BooCode v1.x — Roadmap
|
||||||
|
|
||||||
Last updated: 2026-05-20
|
Last updated: 2026-05-21
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
@@ -10,7 +10,7 @@ Live at `https://code.indifferentketchup.com` (Caddy → Authelia → Tailscale
|
|||||||
|
|
||||||
**Architectural commitments:**
|
**Architectural commitments:**
|
||||||
|
|
||||||
- No embeddings. The model uses file-view tools (`view_file`, `list_dir`, `grep`, `find_files`) + sidecar analyzers (codecontext, codesight). Walked away from the RAG pipeline May 2026.
|
- No embeddings. Model uses file-view tools (`view_file`, `list_dir`, `grep`, `find_files`) + sidecar analyzers (codecontext, codesight) + codecontext MCP tools. Walked away from the RAG pipeline May 2026.
|
||||||
- Read-only in v1.x. Write tools land in BooCoder (separate container, post-v1.x).
|
- Read-only in v1.x. Write tools land in BooCoder (separate container, post-v1.x).
|
||||||
- One Postgres (`boocode_db`), one frontend SPA, container-per-service for new capabilities.
|
- One Postgres (`boocode_db`), one frontend SPA, container-per-service for new capabilities.
|
||||||
|
|
||||||
@@ -18,136 +18,87 @@ External code lifted from / referenced in: see `boocode_code_review.md` for full
|
|||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## Shipped (status as of 2026-05-20)
|
## Shipped (status as of 2026-05-21)
|
||||||
|
|
||||||
| Version | Theme | Notes |
|
| Version | Theme | Tag |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| v1.0 | Initial scaffold | live |
|
| v1.0 | Initial scaffold | — |
|
||||||
| Batches 1–4.4 | Markdown, sidebar, panes, chats-inside-sessions, archive, fork/delete, header polish, settings drawer | merged |
|
| Batches 1–4.4 | Markdown, sidebar, panes, chats-inside-sessions, archive, fork/delete, header polish, settings drawer | — |
|
||||||
| v1.5 | resolveProjectPath, BOOTSTRAP_ROOT, vitest pin | merged |
|
| v1.5 | resolveProjectPath, BOOTSTRAP_ROOT, vitest pin | — |
|
||||||
| v1.6, v1.6.1, v1.6.2 | Mobile pass + RightRail mobile drawer | merged |
|
| v1.6, v1.6.1, v1.6.2 | Mobile pass + RightRail mobile drawer | — |
|
||||||
| v1.7 | Drag-drop file + paste-as-attachment | merged |
|
| v1.7 | Drag-drop file + paste-as-attachment | — |
|
||||||
| v1.8, v1.8.1, v1.8.2 | Settings drawer, git_status tool, WS reconnect, **per-turn budget reset + Continue affordance + CapHitSentinel** | merged |
|
| v1.8, v1.8.1, v1.8.2 | Settings drawer, git_status tool, WS reconnect, per-turn budget reset + Continue affordance + CapHitSentinel | — |
|
||||||
| v1.9.1 | Skills system (`/opt/skills/` + `skill_find`/`skill_use`/`skill_resource` tools + `/skill` slash command) | merged |
|
| v1.9.1 | Skills system (`/opt/skills/` + `skill_find` / `skill_use` / `skill_resource` + `/skill` slash command) | `v1.9.1` |
|
||||||
| v1.9.7 | `ask_user_input` elicitation tool | merged |
|
| v1.9.7 | `ask_user_input` elicitation tool | `v1.9.7` |
|
||||||
| **Batch 9 (Agents Tier 2)** | `AGENTS.md` + 6 builtin agents + AgentPicker in ChatInput toolbar + `sessions.agent_id` | **merged in `92bd3b1`**, included in v1.9.1/v1.9.7/v1.10.x tags |
|
| Batch 9 (Agents Tier 2) | `AGENTS.md` + 6 builtin agents + AgentPicker in ChatInput toolbar + `sessions.agent_id` | folded into `v1.9.1`/`v1.9.7` |
|
||||||
| v1.10.0 | BooTerm: separate container, xterm.js + node-pty + tmux | merged |
|
| v1.10.0 | BooTerm: separate container, xterm.js + node-pty + tmux | `v1.10.0` |
|
||||||
| v1.10.1 | BooTerm-user (spawn as samkintop, login bash, Claude Code/opencode PATH) | merged |
|
| v1.10.1 | BooTerm-user (spawn as samkintop, login bash, Claude Code/opencode PATH) | `v1.10.1` |
|
||||||
| v1.10.4, v1.10.5 | Mobile terminal + XML tool-call fallback parser | merged |
|
| v1.10.4, v1.10.5 | Mobile terminal + XML tool-call fallback parser | — |
|
||||||
| **v1.11.0** | **opencode-style compaction port** (auto-overflow, anchored summary, tail preservation) | merged |
|
| v1.11.0 | opencode-style compaction port (auto-overflow, anchored summary, tail preservation) | — |
|
||||||
| v1.11.1 | Compaction follow-up (working indicator during compaction, unit tests, .bak cleanup) | merged |
|
| v1.11.1 | Compaction follow-up (working indicator during compaction, unit tests, .bak cleanup) | — |
|
||||||
| v1.11.2 | ContextBar (persistent context-usage indicator) | merged |
|
| v1.11.2 | ContextBar (persistent context-usage indicator above MessageList) | — |
|
||||||
| v1.11.3 | `ctx_max` capture via `/upstream/<model>/props` (replaces dead `timings.n_ctx` read) | merged |
|
| v1.11.3 | `ctx_max` capture via `/upstream/<model>/props` (replaces dead `timings.n_ctx` read) | `v1.11.3` |
|
||||||
|
| v1.11.5 | ContextBar inline next to agent picker; remove ChatContextPopover; default new sessions to no agent | — |
|
||||||
|
| v1.11.6 | Doom-loop guard from opencode (3 identical tool calls → sentinel, abort recursion) | — |
|
||||||
|
| v1.11.7 | pathGuard secrets filter (continue.dev `DEFAULT_SECURITY_IGNORE_FILETYPES`) | — |
|
||||||
|
| v1.11.8 | web_search + web_fetch tools via SearXNG | — |
|
||||||
|
| v1.11.9 | Manual redirect handling — re-run URL guard on each hop (SSRF hardening) | — |
|
||||||
|
| v1.11.10 | Stream-cap response body at 5MB, abort on overflow | `v1.11.x` |
|
||||||
|
| **v1.12.0** | **codecontext sidecar (Go HTTP shim, NDJSON MCP framing, child.Wait supervisor) + container guidance (BOOCHAT.md/BOOCODER.md) + 7 vendored skills + system-prompt.ts extraction + mtime-watch cache + 8 codecontext tool wrappers + per-agent tool whitelists + .codecontextignore template + agents.ts ALL_TOOL_NAMES single-source-of-truth fix** | `v1.12.0` |
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## In flight / queued
|
## In flight (uncommitted on disk, 2026-05-21)
|
||||||
|
|
||||||
| Version | Theme | Status |
|
v1.12.1 work — landed today, not yet committed:
|
||||||
|
|
||||||
|
| Item | Status | Notes |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| ~~v1.11.4~~ | ~~Per-turn budget + Continue affordance~~ | **CANCELLED** — already shipped in v1.8.2 |
|
| Server-side workspace pane sync | Done | `sessions.workspace_panes jsonb` column; PATCH endpoint; `session_workspace_updated` WS frame; localStorage migration on first load; deprecated `session_panes` table dropped |
|
||||||
| **v1.11.5** | ContextBar relocate (above agent-picker row), thicker, always-visible, remove ChatContextPopover | **dispatched** |
|
| Richer status indicators | Done | Five states (`streaming` / `tool_running` / `waiting_for_input` / `idle` / `error`) with distinct visuals: amber orbiting dots for streaming, amber spinning ring for tool execution, blue static for waiting on user, emerald/gray/red for idle/error |
|
||||||
| v1.11.6 | Doom-loop guard from opencode (3 identical tool calls → sentinel, abort recursion) | drafted |
|
| Startup hung-row sweep | Done | `UPDATE messages SET status='failed' WHERE status='streaming' AND created_at < NOW() - INTERVAL '5 minutes'` on server boot |
|
||||||
| v1.11.7 | pathGuard secrets filter (continue.dev's `DEFAULT_SECURITY_IGNORE_FILETYPES`) | drafted |
|
| One stuck row from v1.12.0 smoke | Cleared | Manual UPDATE (`d63c25b1`) |
|
||||||
| v1.11.x | Tag consolidation point (everything since v1.11.0) | queued |
|
| `detectSameNameLoop` code path | Added, never fired | Candidate for revert in next batch — dead code |
|
||||||
|
| Diagnostic logging in inference.ts | Added for debugging | Must come out before commit |
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## Major work after v1.11.x
|
## v1.12.x cleanup (NEXT — small, immediate)
|
||||||
|
|
||||||
| Version | Theme | LoC est. |
|
Five items. Group them or split them — your call.
|
||||||
|---|---|---|
|
|
||||||
| **v1.12** | codecontext sidecar + tool output truncation + repair tool call (Integration 1 + 3 from May review, fused) | ~600 |
|
|
||||||
| v1.13 | Phase B groundwork — parts table + AI SDK adoption + per-tool `read_only`/`write` tagging | ~1500 |
|
|
||||||
| v1.14 | Phase C — outer agent loop (multi-step until non-tool finish, AGENTS.md `steps` field, reasoning as part type) | ~800 |
|
|
||||||
| v1.15 | Phase D — permission ruleset + MCP client (lays foundation for BooCoder) | ~600 |
|
|
||||||
| v1.16 | Batch 11b — codesight repo_health (call graph, circular deps, dead code) | ~400 |
|
|
||||||
| **v2.0** | Batch 14 — BooCoder pending changes (new container, write tools, plandex pattern) | ~1200 |
|
|
||||||
| v2.1 | Batch 15 — BooCoder runtime isolation (per-session Docker sandbox, OpenHands pattern) | ~600 |
|
|
||||||
| v2.x | Batch 16/17 — Multi-provider LLM (optional, pi-ai) and Workflow graphs (far future, agent-framework concepts) | tbd |
|
|
||||||
|
|
||||||
-----
|
### v1.12.1 — commit consolidation
|
||||||
|
|
||||||
## Roadmap doc deviations and corrections
|
**Action items, in order:**
|
||||||
|
|
||||||
This roadmap was significantly out of sync with reality until 2026-05-20. Key corrections folded in:
|
1. **Remove diagnostic logging** from `apps/server/src/services/inference.ts`. The 12 `ctx.log.info` calls added today proved the inference loop was functioning correctly; the prompts were just slow. Verbose for production. Strip them, keep the file clean.
|
||||||
|
|
||||||
1. **Batch 9 (Agents Tier 2) is done**, not "next up." Shipped as commit `92bd3b1`, included in v1.9.1 forward. The original "Track A: Batch 9 next" recommendation was correct but the doc never got updated.
|
2. **Revert `detectSameNameLoop`.** Three additions in inference.ts:
|
||||||
2. **v1.6.2 merged.** No longer "in flight."
|
- `DOOM_LOOP_SAME_NAME_THRESHOLD = 5` constant
|
||||||
3. **Batch 5 (fork/delete), Batch 6 (drag-drop), Batch 7 (settings drawer), Batch 8 (web search), Batch 10 (BooTerm) all shipped**, scattered across the v1.6–v1.10 version line. Original "Track A polish then agents" plan was abandoned; work happened opportunistically.
|
- `detectSameNameLoop()` function
|
||||||
4. **v1.11.0 was a major unplanned addition** — opencode-style compaction (auto-overflow detection + anchored rolling summary + tail preservation). This is NOT a batch from the old roadmap. It opened a new patch line (v1.11.x) of small follow-ups in front of the original Batches 11–17.
|
- Call site in `runAssistantTurn` immediately after the existing `detectDoomLoop` check
|
||||||
5. **Batch 11 (codecontext sidecar) moves to v1.12.** Bundles with truncation and repair-tool-call lift (both from opencode) since they share concerns and the `tool_choice='required'` confirmation makes repair-tool-call viable.
|
|
||||||
6. **Phase B (parts table + AI SDK + tool-call lifecycle) becomes v1.13.** This absorbs the old Batch 13 (append-only event log) — same outcome (typed message parts), different mental framing.
|
Never fired in any real run today. Dead code. The existing `detectDoomLoop` (identical args, threshold 3) is sufficient.
|
||||||
7. **Phase C and Phase D are new** (numbered v1.14/v1.15). They originate from the opencode integration analysis, not from the original 17-batch plan. Phase C delivers the outer agent loop with explicit step boundaries. Phase D delivers the permission ruleset + MCP client needed for codecontext to be useful and for BooCoder to gate writes.
|
|
||||||
8. **BooCoder (v2.0/v2.1)** is the second-major-version line. New container, new safety story (pending changes + per-session Docker sandbox). Maps to original Batches 14/15.
|
|
||||||
|
|
||||||
-----
|
3. **Drop the stale `messages_status_check` CHECK constraint** in `apps/server/src/schema.sql`. Two constraints exist on the table:
|
||||||
|
- `messages_status_check` allows `streaming|complete|failed` (old, stale)
|
||||||
|
- `messages_status_chk` allows `streaming|complete|failed|cancelled` (new)
|
||||||
|
|
||||||
|
The old one prevents `cancelled` from being written. Drop it with `ALTER TABLE messages DROP CONSTRAINT IF EXISTS messages_status_check;`.
|
||||||
|
|
||||||
## v1.11.x patches in detail
|
4. **Stop-handler writes terminal status.** When user clicks stop mid-stream, the abort path must `UPDATE messages SET status='cancelled' WHERE id = $assistantMessageId AND status='streaming'`. Currently rows just sit `streaming` forever. The startup sweep catches them on restart, but they should be written immediately. Edit `apps/server/src/services/inference.ts` `handleAbortOrError` to add the UPDATE.
|
||||||
|
|
||||||
### v1.11.0 — opencode-style compaction port ✅
|
5. **Commit + tag v1.12.1.** Include the workspace pane sync, status indicator overhaul, startup sweep, and items 1–4 above. Single commit per item is fine; tag at end.
|
||||||
|
|
||||||
**What shipped:** Auto-detection of context overflow (`isOverflow(usage, model)`) triggers compaction on the *next* user turn. Compaction preserves the last 2 turns verbatim and produces an anchored Markdown summary (8-section template lifted verbatim from opencode `compaction.ts`) that replaces older head messages. Summary is rolling — each new compaction updates the prior summary, not stacks. Schema additions: `messages.compacted_at`, `messages.summary`, `messages.tail_start_id`, `chats.needs_compaction`. WS `compacted` frame fires sonner toast on completion.
|
**Estimated:** ~150 LoC net (deletions dominate).
|
||||||
|
|
||||||
**Key divergences from opencode:** Per-chat (not per-session) compaction state because BooCode history is per-chat. UUID `tail_start_id` not BIGINT. No `parent_id` on messages. Context limit comes from `messages.ctx_max` (last-known `n_ctx`), not a `model.context_limit` field.
|
### v1.12.2 — live throughput display (small UX win)
|
||||||
|
|
||||||
### v1.11.1 — Compaction follow-up ✅
|
Surface `tokens_per_second` and `ctx_used` next to the status indicator while streaming. Backend already emits these in the `usage` frame; just consume them in the StatusDot wrapper or a sibling component. ~80 LoC, frontend-only.
|
||||||
|
|
||||||
Working-state `chat_status: working/idle` frames around the LLM call inside `compaction.process()`. 24 new vitest cases for the six pure functions (`usable`, `isOverflow`, `estimate`, `turns`, `select`, `buildPrompt`). 7 `.bak-v1.11` files deleted.
|
### v1.12.3 — stale-stream frontend banner
|
||||||
|
|
||||||
### v1.11.2 — ContextBar ✅
|
When a chat has a `streaming` row older than ~60s with no new tokens, the UI should surface a "Previous response didn't complete. [Retry] [Discard]" banner instead of silently queueing new sends. Today's debugging spent four hours misreading slow streams as dead; this is the UX fix that prevents that. ~150 LoC, frontend + small backend endpoint for the discard action.
|
||||||
|
|
||||||
New `ContextBar.tsx` rendering above MessageList. Shows `{used} / {max} ({pct}%)` with color tiers computed against `max - 20k` reserve (matches `compaction.usable()`): muted <60%, amber 60-80%, orange 80-95%, red ≥95%. Tooltip shows "Auto-compaction at ~N%". Mobile breakpoints: `< 380px` shows "Ctx" + numbers; `380-639px` adds parenthetical %; `≥ 640px` shows full "Context" label.
|
|
||||||
|
|
||||||
### v1.11.3 — ctx_max capture fix ✅
|
|
||||||
|
|
||||||
Discovered the dead code at `inference.ts:479-481` and `compaction.ts:300` reading `parsed.timings.n_ctx` never fired — llama-server emits `prompt_n / predicted_n / *_ms / *_per_second` in timings but NOT `n_ctx`. New `model-context.ts` module fetches `GET /upstream/<model>/props` with 3s timeout, positive cache (no TTL), 60s negative cache. Wired into all 4 ctx_max write sites (3 in inference.ts, 1 in compaction.ts). 12 new vitest cases. 7 historical rows backfilled to `ctx_max = 262144` (single-day backfill, only qwen3.6-35b-a3b-mxfp4 in use).
|
|
||||||
|
|
||||||
### v1.11.4 — CANCELLED
|
|
||||||
|
|
||||||
Original scope: per-turn budget reset + Continue affordance + CapHitSentinel card. Recon revealed all three are already shipped (v1.8.2 timestamps in inference.ts comments). Dead version slot.
|
|
||||||
|
|
||||||
### v1.11.5 — ContextBar relocate (DISPATCHED)
|
|
||||||
|
|
||||||
Relocate ContextBar from above MessageList to above the agent-picker row. Bump height from ~4px bar to ~10-12px. Always-visible (zero-state when no assistant messages + use `model_context_limit` from v1.11.3 cache). Remove `ChatContextPopover` entirely (redundant signal; mobile-hostile).
|
|
||||||
|
|
||||||
### v1.11.6 — Doom-loop guard (QUEUED)
|
|
||||||
|
|
||||||
Detect 3 identical tool calls in a row within one turn (same name + same args via JSON.stringify). On detection: abort tool-call recursion, insert `metadata.kind='doom_loop'` sentinel, trigger summary turn via existing `runCapHitSummary` path. New `DoomLoopSentinel.tsx` component (no Continue button — looping shouldn't be retried with same tools). Per-turn sliding window, scoped to current turn's tool-call accumulator.
|
|
||||||
|
|
||||||
**Lift source:** opencode `processor.ts`, `DOOM_LOOP_THRESHOLD = 3` constant.
|
|
||||||
|
|
||||||
### v1.11.7 — pathGuard secrets filter (QUEUED)
|
|
||||||
|
|
||||||
Extend pathGuard with `DEFAULT_SECURITY_IGNORE_FILETYPES` from continue.dev `core/indexing/ignore.ts`. Three-tier matcher: exact basenames (`credentials`, `secrets.yml`), extensions (`.env`, `.pem`, `.key`, `.crt`, etc.), prefix patterns (`id_rsa`, `id_dsa`, `id_ecdsa`, `id_ed25519`). Blocked files appear in `list_dir` and `find_files` results with `(blocked)` annotation. `view_file` returns `{ error: 'blocked_secret_file', ... }`. `grep` cannot read blocked file contents. No override mechanism in v1.x (use host shell).
|
|
||||||
|
|
||||||
**Why it matters:** `/opt:/opt:ro` mount currently exposes `boolab/.env`, `dubdrive/users.json`, `authelia/state`, every other service's secrets to any tool past path validation. Cheap close on that surface area.
|
|
||||||
|
|
||||||
-----
|
|
||||||
|
|
||||||
## v1.12 — codecontext sidecar + truncation + repair tool call
|
|
||||||
|
|
||||||
Three lifts fused because they share concerns:
|
|
||||||
|
|
||||||
1. **codecontext sidecar** — new container, single-instance, path-addressed multi-project. Mount `/opt/projects:/workspace:ro`. 8 tools wired as static `ToolDef` wrappers in `apps/server/src/services/tools/codecontext/` (one file per tool). HTTP client to `http://codecontext:8765`. New module `apps/server/src/services/codecontext_bridge.ts` translates `project_id` → `/workspace/<relative>/` paths.
|
|
||||||
|
|
||||||
2. **Tool output truncation** — opencode `truncate.ts` pattern. Cap at 2000 lines / 50KB. Larger outputs: write full content server-side, return preview + opaque `id`. New tool `view_truncated_output(id)` retrieves full content by server-mapped id. **No pathGuard exception** for `/tmp` directory — the opaque-id approach avoids exposing a writable filesystem location to the model. Only codecontext outputs need truncation; native tools (view_file 200 lines, grep 200 results, list_dir 500 entries, find_files 200 results) already cap reasonably.
|
|
||||||
|
|
||||||
3. **`experimental_repairToolCall` equivalent** — when model emits malformed tool call (JSON parse fails or Zod validation fails), return a synthetic tool result instead of an error: `{ error, raw_args, tool_name, hint: 'Retry with valid JSON arguments.' }`. Model self-corrects on next step. Add one line to system prompt instructing self-correction on malformed-args results. Confirmed working precondition: `tool_choice: "required"` accepted by llama-swap (verified 2026-05-20 against qwen3.6-35b-a3b-mxfp4).
|
|
||||||
|
|
||||||
**Hand-roll, not AI SDK adoption.** AI SDK migration deferred to v1.13.
|
|
||||||
|
|
||||||
**AGENTS.md updates:** Each of the 6 builtin agents gets a curated codecontext tool whitelist:
|
|
||||||
- Architect: all 8
|
|
||||||
- Debugger: `search_symbols`, `get_dependencies`
|
|
||||||
- Code Reviewer: `get_file_analysis`
|
|
||||||
- Refactorer: `get_semantic_neighborhoods`, `get_dependencies`
|
|
||||||
- Security Auditor: `get_file_analysis`, `search_symbols`, `get_dependencies`
|
|
||||||
- Prompt Builder: none (no structural reasoning relevance)
|
|
||||||
|
|
||||||
**Dependencies:** v1.11.x merged. No others.
|
|
||||||
|
|
||||||
**Estimated:** 600 LoC across 3-4 dispatches under the v1.12 umbrella.
|
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
@@ -162,11 +113,15 @@ Three lifts fused because they share concerns:
|
|||||||
3. Tool registry: `ToolDef<T>` gains `category: 'read_only' | 'write'` field. BooCode v1.x rejects any `write` tool at registry time (defense in depth for the BooCoder split). Alpha-sort tool list before sending to model (prompt-cache stability).
|
3. Tool registry: `ToolDef<T>` gains `category: 'read_only' | 'write'` field. BooCode v1.x rejects any `write` tool at registry time (defense in depth for the BooCoder split). Alpha-sort tool list before sending to model (prompt-cache stability).
|
||||||
4. Reasoning content (`reasoning_content` from Qwen3.6) captured as its own part type instead of dropped or inlined.
|
4. Reasoning content (`reasoning_content` from Qwen3.6) captured as its own part type instead of dropped or inlined.
|
||||||
|
|
||||||
**Migration risk:** non-trivial. inference.ts is ~1400 lines with custom XML fallback, SSE parsing, compaction integration. Plan dedicated cutover window. Compaction.ts must update to assemble head from parts.
|
**Migration risk:** non-trivial. `inference.ts` is ~1700 lines with custom XML fallback, SSE parsing, compaction integration. Plan dedicated cutover window. `compaction.ts` must update to assemble head from parts.
|
||||||
|
|
||||||
**Replaces:** Original Batch 13 (append-only event log) — same outcome, different vocabulary.
|
**Replaces:** Original Batch 13 (append-only event log) — same outcome, different vocabulary.
|
||||||
|
|
||||||
**Dependencies:** v1.12 merged.
|
**Today's debugging spike validates this work.** Four hours of confusion came from JSON-blob `tool_calls` / `tool_results` columns hiding state from logs and from the inference state machine being invisible. Typed parts + per-part status would have shown the slow-stream-vs-dead distinction in seconds.
|
||||||
|
|
||||||
|
**Dependencies:** v1.12.x cleanup merged.
|
||||||
|
|
||||||
|
**Estimated:** ~1500 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
@@ -179,10 +134,12 @@ Three lifts fused because they share concerns:
|
|||||||
1. Outer loop continues until model returns non-tool finish OR step cap hit. Step ≠ tool call: one step can contain multiple tool calls in parallel.
|
1. Outer loop continues until model returns non-tool finish OR step cap hit. Step ≠ tool call: one step can contain multiple tool calls in parallel.
|
||||||
2. `agent.steps ?? Infinity` per-agent step cap. AGENTS.md gains `steps:` field. Refactorer `steps: 5`, Architect `steps: 20`, etc.
|
2. `agent.steps ?? Infinity` per-agent step cap. AGENTS.md gains `steps:` field. Refactorer `steps: 5`, Architect `steps: 20`, etc.
|
||||||
3. Step-boundary events (`step_start`, `step_finish`) explicit in the parts stream. Per-step snapshot for revert (planned for BooCoder; backend-only in v1.14).
|
3. Step-boundary events (`step_start`, `step_finish`) explicit in the parts stream. Per-step snapshot for revert (planned for BooCoder; backend-only in v1.14).
|
||||||
4. Doom-loop guard (v1.11.6) migrates from "abort recursion" to "raise within loop iteration." Same predicate, different control flow.
|
4. Doom-loop guards (v1.11.6) migrate from "abort recursion" to "raise within loop iteration." Same predicate, different control flow.
|
||||||
|
|
||||||
**Dependencies:** v1.13 merged.
|
**Dependencies:** v1.13 merged.
|
||||||
|
|
||||||
|
**Estimated:** ~800 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## v1.15 — Phase D: permission ruleset + MCP client
|
## v1.15 — Phase D: permission ruleset + MCP client
|
||||||
@@ -200,6 +157,8 @@ Three lifts fused because they share concerns:
|
|||||||
|
|
||||||
**Dependencies:** v1.13 merged (parts table for permission events). Independent of v1.14.
|
**Dependencies:** v1.13 merged (parts table for permission events). Independent of v1.14.
|
||||||
|
|
||||||
|
**Estimated:** ~600 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## v1.16 — Batch 11b: codesight repo_health
|
## v1.16 — Batch 11b: codesight repo_health
|
||||||
@@ -208,6 +167,8 @@ Call graph, circular dependency detection, dead code flagging. Port `analyze.mjs
|
|||||||
|
|
||||||
**Dependencies:** v1.12 merged (can reuse codecontext parse output where overlapping).
|
**Dependencies:** v1.12 merged (can reuse codecontext parse output where overlapping).
|
||||||
|
|
||||||
|
**Estimated:** ~400 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## v2.0 — BooCoder pending changes
|
## v2.0 — BooCoder pending changes
|
||||||
@@ -218,6 +179,8 @@ New container `boocoder` at `100.114.205.53:9502`. Owns write tools (`edit_file`
|
|||||||
|
|
||||||
**Dependencies:** v1.13 (parts) + v1.15 (permissions).
|
**Dependencies:** v1.13 (parts) + v1.15 (permissions).
|
||||||
|
|
||||||
|
**Estimated:** ~1200 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## v2.1 — BooCoder runtime isolation
|
## v2.1 — BooCoder runtime isolation
|
||||||
@@ -228,6 +191,8 @@ Per-session Docker sandbox spawned by BooCoder on first write. Only project path
|
|||||||
|
|
||||||
**Dependencies:** v2.0.
|
**Dependencies:** v2.0.
|
||||||
|
|
||||||
|
**Estimated:** ~600 LoC.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## v2.x — Optional / far future
|
## v2.x — Optional / far future
|
||||||
@@ -243,17 +208,18 @@ Per-session Docker sandbox spawned by BooCoder on first write. Only project path
|
|||||||
|
|
||||||
| Container | Port | Mount | Purpose | Status |
|
| Container | Port | Mount | Purpose | Status |
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| `boocode` | `100.114.205.53:9500` | `/opt:/opt:ro` | Chat + read-only tools + SPA | Live |
|
| `boocode` | `100.114.205.53:9500` | `/opt:/opt` | Chat + read-only tools + SPA | Live |
|
||||||
| `boocode_db` | `127.0.0.1:5500` | `boocode_pgdata` volume | Postgres 16-alpine | Live |
|
| `boocode_db` | `127.0.0.1:5500` | `boocode_pgdata` volume | Postgres 16-alpine | Live |
|
||||||
| `booterm` | `100.114.205.53:9501` | `/opt/repos:/opt/repos:rw` | Terminals (tmux + node-pty) | Live (v1.10.0) |
|
| `booterm` | `100.114.205.53:9501` | `/opt/repos:/opt/repos:rw` | Terminals (tmux + node-pty) | Live (v1.10.0) |
|
||||||
| `codecontext` | `:8765` (internal) | `/opt/projects:/workspace:ro` | MCP server for architect tools | v1.12 |
|
| **`codecontext`** | **`:8765` (internal)** | **`/opt/projects:/workspace:ro`** | **MCP server for architect tools** | **Live (v1.12.0)** |
|
||||||
| `boocoder` | `100.114.205.53:9502` | per-session sandbox | Write tools | v2.0 |
|
| `boocoder` | `100.114.205.53:9502` | per-session sandbox | Write tools | v2.0 |
|
||||||
|
|
||||||
### Schema additions by version
|
### Schema additions by version
|
||||||
|
|
||||||
- **v1.11.0:** `messages.compacted_at`, `messages.summary`, `messages.tail_start_id`, `chats.needs_compaction`
|
- **v1.11.0:** `messages.compacted_at`, `messages.summary`, `messages.tail_start_id`, `chats.needs_compaction`
|
||||||
- **v1.11.7:** none (pathGuard logic, no DB)
|
- **v1.11.7:** none (pathGuard logic, no DB)
|
||||||
- **v1.12:** none (codecontext is stateless on disk; truncation uses in-memory id→path map with TTL cleanup)
|
- **v1.12.0:** none (codecontext stateless; truncation in-memory id-map with TTL cleanup)
|
||||||
|
- **v1.12.1:** `sessions.workspace_panes jsonb` (workspace sync); drop deprecated `session_panes` table; drop stale `messages_status_check` constraint
|
||||||
- **v1.13:** `message_parts` table; `messages` becomes header-only
|
- **v1.13:** `message_parts` table; `messages` becomes header-only
|
||||||
- **v1.14:** `agents.steps` column (or AGENTS.md parser extension; no DB if file-only)
|
- **v1.14:** `agents.steps` column (or AGENTS.md parser extension; no DB if file-only)
|
||||||
- **v1.15:** `permissions` table, `agent_permissions` join, `session_permissions` join
|
- **v1.15:** `permissions` table, `agent_permissions` join, `session_permissions` join
|
||||||
@@ -268,11 +234,11 @@ Full inventory in `boocode_code_review.md`. Headline items:
|
|||||||
|
|
||||||
| Source | Used for | Where |
|
| Source | Used for | Where |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| **`sst/opencode`** (MIT, TS) | **Compaction algorithms** | **v1.11.0 (shipped)** |
|
| `sst/opencode` (MIT, TS) | Compaction algorithms | v1.11.0 (shipped) |
|
||||||
| `sst/opencode` (MIT, TS) | Doom-loop guard | v1.11.6 |
|
| `sst/opencode` (MIT, TS) | Doom-loop guard | v1.11.6 (shipped) |
|
||||||
| `sst/opencode` (MIT, TS) | `repairToolCall`, truncate.ts, MCP client, permission evaluate, runLoop | v1.12/v1.13/v1.14/v1.15 |
|
| `sst/opencode` (MIT, TS) | `repairToolCall`, truncate.ts, MCP client, permission evaluate, runLoop | v1.12 (shipped) / v1.13 / v1.14 / v1.15 |
|
||||||
| `continuedev/continue` (Apache-2.0) | `DEFAULT_SECURITY_IGNORE_FILETYPES` | v1.11.7 |
|
| `continuedev/continue` (Apache-2.0) | `DEFAULT_SECURITY_IGNORE_FILETYPES` | v1.11.7 (shipped) |
|
||||||
| `nmakod/codecontext` (MIT, Go) | Architect: codebase map sidecar | v1.12 |
|
| `nmakod/codecontext` (MIT, Go) | Architect: codebase map sidecar | v1.12.0 (shipped) |
|
||||||
| `spirituslab/codesight` (MIT-ish, TS) | Architect: repo health analyzer | v1.16 |
|
| `spirituslab/codesight` (MIT-ish, TS) | Architect: repo health analyzer | v1.16 |
|
||||||
| `Aider-AI/aider` (Apache-2.0) | Fallback `.scm` grammars | v1.12 (fallback) |
|
| `Aider-AI/aider` (Apache-2.0) | Fallback `.scm` grammars | v1.12 (fallback) |
|
||||||
| `cline/cline` (Apache-2.0) | Plan/Act pattern (absorbed into v1.15 permissions) | v1.15 |
|
| `cline/cline` (Apache-2.0) | Plan/Act pattern (absorbed into v1.15 permissions) | v1.15 |
|
||||||
@@ -281,8 +247,6 @@ Full inventory in `boocode_code_review.md`. Headline items:
|
|||||||
| `aimasteracc/tree-sitter-analyzer` (MIT) | Outline-first patterns | v1.12 (alt) |
|
| `aimasteracc/tree-sitter-analyzer` (MIT) | Outline-first patterns | v1.12 (alt) |
|
||||||
| `earendil-works/pi` (MIT) | Multi-provider LLM | v2.x (optional) |
|
| `earendil-works/pi` (MIT) | Multi-provider LLM | v2.x (optional) |
|
||||||
|
|
||||||
**Original Batch 13 (event log from OpenHands) replaced** by v1.13 (parts table). Same outcome, different framing.
|
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
## Decisions log
|
## Decisions log
|
||||||
@@ -293,10 +257,15 @@ Full inventory in `boocode_code_review.md`. Headline items:
|
|||||||
- **Globstar parked** — not an architect tool. Future verify-before-commit candidate only.
|
- **Globstar parked** — not an architect tool. Future verify-before-commit candidate only.
|
||||||
- **codeprysm rejected** — embedding-based. Node/edge taxonomy noted as reference if we ever build our own graph.
|
- **codeprysm rejected** — embedding-based. Node/edge taxonomy noted as reference if we ever build our own graph.
|
||||||
- **Batch 9 decoupled from Batch 7 (2026-05-16); shipped in `92bd3b1`.** Builtin defaults: six agents (Code Reviewer, Debugger, Refactorer, Architect, Security Auditor, Prompt Builder) with no `model` field. Session model wins by default.
|
- **Batch 9 decoupled from Batch 7 (2026-05-16); shipped in `92bd3b1`.** Builtin defaults: six agents (Code Reviewer, Debugger, Refactorer, Architect, Security Auditor, Prompt Builder) with no `model` field. Session model wins by default.
|
||||||
- **opencode lift opened** (2026-05-20). Started with compaction (v1.11.0). Continuing through v1.15. Five distinct algorithms: compaction, doom-loop guard, repairToolCall, runLoop, permission evaluate. Plus `truncate.ts` and `MCP client`. Each lifts the algorithm, not the Effect-TS plumbing.
|
- **opencode lift opened** (2026-05-20). Started with compaction (v1.11.0). Continuing through v1.15. Five distinct algorithms: compaction, doom-loop guard, repairToolCall, runLoop, permission evaluate. Plus `truncate.ts` and MCP client. Each lifts the algorithm, not the Effect-TS plumbing.
|
||||||
- **AI SDK adoption deferred to v1.13.** Hand-roll repairToolCall in v1.12 first. Migrate everything together when parts table lands.
|
- **AI SDK adoption deferred to v1.13.** Hand-roll repairToolCall in v1.12 — not actually done in v1.12.0; truncation also deferred. v1.12.0 shipped codecontext + container guidance + skills only.
|
||||||
- **`tool_choice='required'` confirmed supported** by llama-swap (qwen3.6-35b-a3b-mxfp4, 2026-05-20). Unblocks repair tool call viability.
|
- **`tool_choice='required'` confirmed supported** by llama-swap (qwen3.6-35b-a3b-mxfp4, 2026-05-20).
|
||||||
- **v1.11.4 cancelled** (2026-05-20). Per-turn budget reset + Continue affordance + CapHitSentinel were already shipped in v1.8.2. Roadmap was 14 versions stale at time of recon.
|
- **v1.11.4 cancelled** (2026-05-20). Per-turn budget reset + Continue affordance + CapHitSentinel were already shipped in v1.8.2.
|
||||||
|
- **v1.12.0 shipped** (2026-05-21). codecontext sidecar Track B + container guidance Track A. v1.12 truncation and repairToolCall were deferred into v1.13's AI SDK migration where they get for-free.
|
||||||
|
- **v1.12.1 workspace pane sync** (2026-05-21). Moved pane state from per-device localStorage to `sessions.workspace_panes jsonb` with WS broadcast for cross-device sync. Deprecated `session_panes` table dropped. Legacy localStorage migrates on first load.
|
||||||
|
- **v1.12.1 status indicator overhaul** (2026-05-21). ChatStatusFrame expanded from `working|idle|error` to `streaming|tool_running|waiting_for_input|idle|error`. StatusDot rewritten with distinct animations per state. Added `executeToolPhase`-entry `tool_running` publish.
|
||||||
|
- **detectSameNameLoop reverted** (planned v1.12.1). Added during the 2026-05-21 debugging spike to catch same-tool-name-with-different-args loops. Never fired in any real run because the existing `detectDoomLoop` covers the actual failure modes. Dead code, reverting.
|
||||||
|
- **The 2026-05-21 "freeze" debugging spike taught one lesson**: BooCode has no UI signal for the difference between a slow stream and a dead stream. Diagnostic logging (added today, reverted in v1.12.1) revealed the inference loop was working correctly throughout — what looked like four hours of deterministic hang was multiple instances of qwen3.6 generating 8k tokens of self-doubt at temperature 0.2 on a "find the bug" prompt with no real bug. v1.12.2 (live tok/s display) and v1.12.3 (stale-stream banner) directly address this gap.
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user