feat: Paseo-like orchestrator Phase 1-2 — trace system, session persistence, timeline, run_command, auto-fix loop

Phase 1: Trace System + Observability
- tool_traces DB table + insert/update service
- tool_trace_start/tool_trace_finish WS frames (contracts + FE types)
- Instrumented tool-phase.ts with timing around every tool call
- GET /api/chats/:id/traces paginated endpoint
- Trace viewer frontend (collapsible panel with timing bars + token breakdown)

Phase 2: Session Persistence + Resume
- agent_snapshots table (UPSERT per chat, persisted on turn boundaries)
- save/load/delete service functions
- Agent snapshot sent on WS reconnect
- Session timeline view (vertical timeline with scroll-to + restore)

Tooling:
- run_command tool (execFile, 30s timeout, 32KB cap, path-guarded)
- Auto-fix loop: after write tools, runs pnpm build, injects errors into next turn
This commit is contained in:
2026-06-08 02:26:47 +00:00
parent 8f061c8d43
commit 9ef8f1948a
22 changed files with 2231 additions and 101 deletions

View File

@@ -20,6 +20,7 @@ import { resolveGrantRoot } from '../grant_resolver.js';
import { stripToolMarkup } from './tool-call-parser.js';
import { repairToolInput } from './tool-input-repair.js';
import type { FailureKind } from './mistake-tracker.js';
import { insertToolTrace, updateToolTrace } from '../tool-traces.js';
import type {
InferenceContext,
StreamResult,
@@ -175,6 +176,7 @@ export async function executeToolPhase(
session: Session,
projectRoot: string,
agent?: Agent | null,
turnNumber?: number,
): Promise<ToolPhaseResult> {
const { sessionId, chatId, assistantMessageId } = args;
const content = stripToolMarkup(result.content, { final: true });
@@ -378,11 +380,53 @@ export async function executeToolPhase(
});
return;
}
// tool_trace instrumentation - start
const traceId = crypto.randomUUID();
const traceStartTime = Date.now();
const startedAtIso = new Date().toISOString();
insertToolTrace(ctx.sql, {
session_id: sessionId,
chat_id: chatId,
message_id: assistantMessageId,
turn_number: turnNumber ?? 0,
tool_name: tc.name,
tool_input: tc.args as Record<string, unknown>,
}).catch(() => {});
ctx.publish(sessionId, {
type: 'tool_trace_start',
trace_id: traceId,
message_id: assistantMessageId,
chat_id: chatId,
tool_name: tc.name,
tool_input: tc.args as Record<string, unknown>,
started_at: startedAtIso,
});
const tres = await executeToolCall(
projectRoot, tc, session.allowed_read_paths,
{ sql: ctx.sql, sessionId },
ctx.hooks, sessionId,
);
// tool_trace instrumentation - finish
const finishedAtIso = new Date().toISOString();
const latencyMs = Date.now() - traceStartTime;
updateToolTrace(ctx.sql, traceId, {
finished_at: finishedAtIso,
...(tres.outcome === 'success' && tres.output != null ? { tool_output: JSON.stringify(tres.output) } : {}),
latency_ms: latencyMs,
outcome: tres.outcome,
...(tres.error ? { error: tres.error } : {}),
}).catch(() => {});
ctx.publish(sessionId, {
type: 'tool_trace_finish',
trace_id: traceId,
message_id: assistantMessageId,
chat_id: chatId,
tool_name: tc.name,
finished_at: finishedAtIso,
outcome: tres.outcome,
latency_ms: latencyMs,
...(tres.error ? { error: tres.error } : {}),
});
// vWhale: PostToolUse hook (best-effort, non-blocking).
if (ctx.hooks) {
ctx.hooks.run('PostToolUse', {

View File

@@ -37,6 +37,12 @@ import type {
StreamResult,
TurnArgs,
} from './types.js';
import { saveAgentSnapshot } from '../session-snapshots.js';
// vWhale: auto-fix loop — after write tools, build the project and inject
// errors. Uses execFile (no shell) against the project root.
import { execFile } from 'node:child_process';
import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import {
runCapHitSummary,
runDoomLoopSummary,
@@ -44,6 +50,71 @@ import {
insertMistakeRecoverySentinel,
} from './sentinel-summaries.js';
// vWhale: auto-fix — detect build command from package.json, run it, return
// error text for injection into next iteration. Best-effort, never throws.
const BUILD_TIMEOUT_MS = 60_000;
const BUILD_OUTPUT_CAP = 8_000;
async function detectAndRunBuild(
ctx: InferenceContext,
projectRoot: string,
sessionId: string,
chatId: string,
model: string,
existingNote: string | undefined,
): Promise<string | undefined> {
// Only run for DeepSeek models (local Qwen models don't benefit from build loop).
if (!model.startsWith('deepseek-')) return undefined;
// Detect build command from package.json in project root.
const pkgPath = join(projectRoot, 'package.json');
if (!existsSync(pkgPath)) return undefined;
let buildCmd: string | null = null;
try {
const pkg = JSON.parse(readFileSync(pkgPath, 'utf8')) as { scripts?: Record<string, string> };
if (pkg.scripts?.build) buildCmd = 'build';
else if (pkg.scripts?.compile) buildCmd = 'compile';
else if (pkg.scripts?.typecheck) buildCmd = 'typecheck';
} catch {
return undefined;
}
if (!buildCmd) return undefined;
// Detect package manager.
const hasPnpm = existsSync(join(projectRoot, 'pnpm-lock.yaml'));
const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
// Run the build.
try {
const out = await new Promise<string>((resolve, reject) => {
execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
(err, stdout, stderr) => {
if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
resolve(''); // package manager not found — skip
return;
}
const merged = (stdout + '\n' + stderr).trim();
resolve(merged.slice(0, BUILD_OUTPUT_CAP));
},
);
});
if (!out) return undefined; // build succeeded or no output
ctx.log.info({ sessionId, chatId, buildCmd, outputLen: out.length }, 'auto-fix: build failed');
// Truncate if existing note exists
const combined = existingNote
? existingNote + '\n\n--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP - existingNote.length)
: '--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP);
return combined;
} catch {
return undefined;
}
}
// P5: MAX_STEPS moved to ./turn-config.ts (with resolveTurnConfig). Re-exported
// here so the public surface (index.ts → './turn.js') is unchanged.
export { MAX_STEPS } from './turn-config.js';
@@ -240,7 +311,7 @@ export async function runAssistantTurn(
// ---- tool phase ----
let toolPhaseResult: ToolPhaseResult;
try {
toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent);
toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
} catch (err) {
// Tool phase errors are unexpected (individual tool failures are
// caught inside executeToolPhase). Log and break.
@@ -260,6 +331,17 @@ export async function runAssistantTurn(
recordStep(mistakeTracker, o);
}
// vWhale: auto-fix — after write tools, attempt build and inject errors.
const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
const hasWriteTools = toolPhaseResult.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
if (hasWriteTools) {
detectAndRunBuild(ctx, projectRoot, sessionId, chatId, iterSession.model, pendingRecoveryNote)
.then((buildError) => {
if (buildError) pendingRecoveryNote = buildError;
})
.catch(() => {});
}
// v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
// returned a non-'continue' action ('paused' for user input, or
// 'synthesis_done') — neither a nudge nor an escalate would change the
@@ -336,6 +418,19 @@ export async function runAssistantTurn(
}).catch(() => {});
}
// ---- persist agent snapshot (best-effort, never blocks inference) ----
const snapLoaded = await loadContext(ctx.sql, sessionId, chatId).catch(() => null);
if (snapLoaded) {
await saveAgentSnapshot(ctx.sql, chatId, {
session_id: sessionId,
model: snapLoaded.session.model,
agent: agent?.name ?? null,
mode: null,
turn_number: stepNumber,
messages: snapLoaded.history.map((m) => ({ role: m.role, content: m.content })),
}).catch(() => {});
}
// ---- post-loop: step-cap sentinel ----
// When the loop exits because stepNumber reached effectiveCap, the last
// iteration's tool phase returned 'continue' with a nextAssistantId that

View File

@@ -46,6 +46,9 @@ export interface InferenceFrame {
| 'error'
| 'flow_run_started'
| 'flow_run_step_updated'
// tool trace frames
| 'tool_trace_start'
| 'tool_trace_finish'
// arena frames
| 'battle_started'
| 'contestant_updated'
@@ -82,6 +85,15 @@ export interface InferenceFrame {
reasoning_tokens?: number | null;
session_id?: string;
name?: string;
// tool trace frames
trace_id?: string;
tool_name?: string;
tool_input?: Record<string, unknown>;
tool_output?: string | null;
latency_ms?: number;
outcome?: string;
// agent snapshot restore
agent?: string | null;
// orchestrator frames ([D-6])
run_id?: string;
flow_name?: string;