feat: Paseo-like orchestrator Phase 1-2 — trace system, session persistence, timeline, run_command, auto-fix loop

Phase 1: Trace System + Observability - tool_traces DB table + insert/update service - tool_trace_start/tool_trace_finish WS frames (contracts + FE types) - Instrumented tool-phase.ts with timing around every tool call - GET /api/chats/:id/traces paginated endpoint - Trace viewer frontend (collapsible panel with timing bars + token breakdown) Phase 2: Session Persistence + Resume - agent_snapshots table (UPSERT per chat, persisted on turn boundaries) - save/load/delete service functions - Agent snapshot sent on WS reconnect - Session timeline view (vertical timeline with scroll-to + restore) Tooling: - run_command tool (execFile, 30s timeout, 32KB cap, path-guarded) - Auto-fix loop: after write tools, runs pnpm build, injects errors into next turn
2026-06-08 02:26:47 +00:00
parent 8f061c8d43
commit 9ef8f1948a
22 changed files with 2231 additions and 101 deletions
--- a/apps/server/src/services/inference/tool-phase.ts
+++ b/apps/server/src/services/inference/tool-phase.ts
@@ -20,6 +20,7 @@ import { resolveGrantRoot } from '../grant_resolver.js';
 import { stripToolMarkup } from './tool-call-parser.js';
 import { repairToolInput } from './tool-input-repair.js';
 import type { FailureKind } from './mistake-tracker.js';
+import { insertToolTrace, updateToolTrace } from '../tool-traces.js';
 import type {
  InferenceContext,
  StreamResult,
@@ -175,6 +176,7 @@ export async function executeToolPhase(
  session: Session,
  projectRoot: string,
  agent?: Agent | null,
+  turnNumber?: number,
 ): Promise<ToolPhaseResult> {
  const { sessionId, chatId, assistantMessageId } = args;
  const content = stripToolMarkup(result.content, { final: true });
@@ -378,11 +380,53 @@ export async function executeToolPhase(
        });
        return;
      }
+      // tool_trace instrumentation - start
+      const traceId = crypto.randomUUID();
+      const traceStartTime = Date.now();
+      const startedAtIso = new Date().toISOString();
+      insertToolTrace(ctx.sql, {
+        session_id: sessionId,
+        chat_id: chatId,
+        message_id: assistantMessageId,
+        turn_number: turnNumber ?? 0,
+        tool_name: tc.name,
+        tool_input: tc.args as Record<string, unknown>,
+      }).catch(() => {});
+      ctx.publish(sessionId, {
+        type: 'tool_trace_start',
+        trace_id: traceId,
+        message_id: assistantMessageId,
+        chat_id: chatId,
+        tool_name: tc.name,
+        tool_input: tc.args as Record<string, unknown>,
+        started_at: startedAtIso,
+      });
      const tres = await executeToolCall(
        projectRoot, tc, session.allowed_read_paths,
        { sql: ctx.sql, sessionId },
        ctx.hooks, sessionId,
      );
+      // tool_trace instrumentation - finish
+      const finishedAtIso = new Date().toISOString();
+      const latencyMs = Date.now() - traceStartTime;
+      updateToolTrace(ctx.sql, traceId, {
+        finished_at: finishedAtIso,
+        ...(tres.outcome === 'success' && tres.output != null ? { tool_output: JSON.stringify(tres.output) } : {}),
+        latency_ms: latencyMs,
+        outcome: tres.outcome,
+        ...(tres.error ? { error: tres.error } : {}),
+      }).catch(() => {});
+      ctx.publish(sessionId, {
+        type: 'tool_trace_finish',
+        trace_id: traceId,
+        message_id: assistantMessageId,
+        chat_id: chatId,
+        tool_name: tc.name,
+        finished_at: finishedAtIso,
+        outcome: tres.outcome,
+        latency_ms: latencyMs,
+        ...(tres.error ? { error: tres.error } : {}),
+      });
      // vWhale: PostToolUse hook (best-effort, non-blocking).
      if (ctx.hooks) {
        ctx.hooks.run('PostToolUse', {
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -37,6 +37,12 @@ import type {
  StreamResult,
  TurnArgs,
 } from './types.js';
+import { saveAgentSnapshot } from '../session-snapshots.js';
+// vWhale: auto-fix loop — after write tools, build the project and inject
+// errors. Uses execFile (no shell) against the project root.
+import { execFile } from 'node:child_process';
+import { readFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
 import {
  runCapHitSummary,
  runDoomLoopSummary,
@@ -44,6 +50,71 @@ import {
  insertMistakeRecoverySentinel,
 } from './sentinel-summaries.js';

+// vWhale: auto-fix — detect build command from package.json, run it, return
+// error text for injection into next iteration. Best-effort, never throws.
+const BUILD_TIMEOUT_MS = 60_000;
+const BUILD_OUTPUT_CAP = 8_000;
+
+async function detectAndRunBuild(
+  ctx: InferenceContext,
+  projectRoot: string,
+  sessionId: string,
+  chatId: string,
+  model: string,
+  existingNote: string | undefined,
+): Promise<string | undefined> {
+  // Only run for DeepSeek models (local Qwen models don't benefit from build loop).
+  if (!model.startsWith('deepseek-')) return undefined;
+
+  // Detect build command from package.json in project root.
+  const pkgPath = join(projectRoot, 'package.json');
+  if (!existsSync(pkgPath)) return undefined;
+
+  let buildCmd: string | null = null;
+  try {
+    const pkg = JSON.parse(readFileSync(pkgPath, 'utf8')) as { scripts?: Record<string, string> };
+    if (pkg.scripts?.build) buildCmd = 'build';
+    else if (pkg.scripts?.compile) buildCmd = 'compile';
+    else if (pkg.scripts?.typecheck) buildCmd = 'typecheck';
+  } catch {
+    return undefined;
+  }
+  if (!buildCmd) return undefined;
+
+  // Detect package manager.
+  const hasPnpm = existsSync(join(projectRoot, 'pnpm-lock.yaml'));
+  const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
+  const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
+
+  // Run the build.
+  try {
+    const out = await new Promise<string>((resolve, reject) => {
+      execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
+        (err, stdout, stderr) => {
+          if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
+            resolve('');  // package manager not found — skip
+            return;
+          }
+          const merged = (stdout + '\n' + stderr).trim();
+          resolve(merged.slice(0, BUILD_OUTPUT_CAP));
+        },
+      );
+    });
+
+    if (!out) return undefined;  // build succeeded or no output
+    ctx.log.info({ sessionId, chatId, buildCmd, outputLen: out.length }, 'auto-fix: build failed');
+
+    // Truncate if existing note exists
+    const combined = existingNote
+      ? existingNote + '\n\n--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP - existingNote.length)
+      : '--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP);
+
+    return combined;
+  } catch {
+    return undefined;
+  }
+}
+
 // P5: MAX_STEPS moved to ./turn-config.ts (with resolveTurnConfig). Re-exported
 // here so the public surface (index.ts → './turn.js') is unchanged.
 export { MAX_STEPS } from './turn-config.js';
@@ -240,7 +311,7 @@ export async function runAssistantTurn(
    // ---- tool phase ----
    let toolPhaseResult: ToolPhaseResult;
    try {
-      toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent);
+      toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
    } catch (err) {
      // Tool phase errors are unexpected (individual tool failures are
      // caught inside executeToolPhase). Log and break.
@@ -260,6 +331,17 @@ export async function runAssistantTurn(
      recordStep(mistakeTracker, o);
    }

+    // vWhale: auto-fix — after write tools, attempt build and inject errors.
+    const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
+    const hasWriteTools = toolPhaseResult.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
+    if (hasWriteTools) {
+      detectAndRunBuild(ctx, projectRoot, sessionId, chatId, iterSession.model, pendingRecoveryNote)
+        .then((buildError) => {
+          if (buildError) pendingRecoveryNote = buildError;
+        })
+        .catch(() => {});
+    }
+
    // v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
    // returned a non-'continue' action ('paused' for user input, or
    // 'synthesis_done') — neither a nudge nor an escalate would change the
@@ -336,6 +418,19 @@ export async function runAssistantTurn(
    }).catch(() => {});
  }

+  // ---- persist agent snapshot (best-effort, never blocks inference) ----
+  const snapLoaded = await loadContext(ctx.sql, sessionId, chatId).catch(() => null);
+  if (snapLoaded) {
+    await saveAgentSnapshot(ctx.sql, chatId, {
+      session_id: sessionId,
+      model: snapLoaded.session.model,
+      agent: agent?.name ?? null,
+      mode: null,
+      turn_number: stepNumber,
+      messages: snapLoaded.history.map((m) => ({ role: m.role, content: m.content })),
+    }).catch(() => {});
+  }
+
  // ---- post-loop: step-cap sentinel ----
  // When the loop exits because stepNumber reached effectiveCap, the last
  // iteration's tool phase returned 'continue' with a nextAssistantId that
--- a/apps/server/src/services/inference/types.ts
+++ b/apps/server/src/services/inference/types.ts
@@ -46,6 +46,9 @@ export interface InferenceFrame {
    | 'error'
    | 'flow_run_started'
    | 'flow_run_step_updated'
+    // tool trace frames
+    | 'tool_trace_start'
+    | 'tool_trace_finish'
    // arena frames
    | 'battle_started'
    | 'contestant_updated'
@@ -82,6 +85,15 @@ export interface InferenceFrame {
  reasoning_tokens?: number | null;
  session_id?: string;
  name?: string;
+  // tool trace frames
+  trace_id?: string;
+  tool_name?: string;
+  tool_input?: Record<string, unknown>;
+  tool_output?: string | null;
+  latency_ms?: number;
+  outcome?: string;
+  // agent snapshot restore
+  agent?: string | null;
  // orchestrator frames ([D-6])
  run_id?: string;
  flow_name?: string;