feat: Paseo-like orchestrator Phase 1-2 — trace system, session persistence, timeline, run_command, auto-fix loop

Phase 1: Trace System + Observability - tool_traces DB table + insert/update service - tool_trace_start/tool_trace_finish WS frames (contracts + FE types) - Instrumented tool-phase.ts with timing around every tool call - GET /api/chats/:id/traces paginated endpoint - Trace viewer frontend (collapsible panel with timing bars + token breakdown) Phase 2: Session Persistence + Resume - agent_snapshots table (UPSERT per chat, persisted on turn boundaries) - save/load/delete service functions - Agent snapshot sent on WS reconnect - Session timeline view (vertical timeline with scroll-to + restore) Tooling: - run_command tool (execFile, 30s timeout, 32KB cap, path-guarded) - Auto-fix loop: after write tools, runs pnpm build, injects errors into next turn
2026-06-08 02:26:47 +00:00
parent 7cb692d8be
commit abe9c5a3a8
22 changed files with 2231 additions and 101 deletions
--- a/apps/server/src/services/inference/turn.ts
+++ b/apps/server/src/services/inference/turn.ts
@@ -37,6 +37,12 @@ import type {
  StreamResult,
  TurnArgs,
 } from './types.js';
+import { saveAgentSnapshot } from '../session-snapshots.js';
+// vWhale: auto-fix loop — after write tools, build the project and inject
+// errors. Uses execFile (no shell) against the project root.
+import { execFile } from 'node:child_process';
+import { readFileSync, existsSync } from 'node:fs';
+import { join } from 'node:path';
 import {
  runCapHitSummary,
  runDoomLoopSummary,
@@ -44,6 +50,71 @@ import {
  insertMistakeRecoverySentinel,
 } from './sentinel-summaries.js';

+// vWhale: auto-fix — detect build command from package.json, run it, return
+// error text for injection into next iteration. Best-effort, never throws.
+const BUILD_TIMEOUT_MS = 60_000;
+const BUILD_OUTPUT_CAP = 8_000;
+
+async function detectAndRunBuild(
+  ctx: InferenceContext,
+  projectRoot: string,
+  sessionId: string,
+  chatId: string,
+  model: string,
+  existingNote: string | undefined,
+): Promise<string | undefined> {
+  // Only run for DeepSeek models (local Qwen models don't benefit from build loop).
+  if (!model.startsWith('deepseek-')) return undefined;
+
+  // Detect build command from package.json in project root.
+  const pkgPath = join(projectRoot, 'package.json');
+  if (!existsSync(pkgPath)) return undefined;
+
+  let buildCmd: string | null = null;
+  try {
+    const pkg = JSON.parse(readFileSync(pkgPath, 'utf8')) as { scripts?: Record<string, string> };
+    if (pkg.scripts?.build) buildCmd = 'build';
+    else if (pkg.scripts?.compile) buildCmd = 'compile';
+    else if (pkg.scripts?.typecheck) buildCmd = 'typecheck';
+  } catch {
+    return undefined;
+  }
+  if (!buildCmd) return undefined;
+
+  // Detect package manager.
+  const hasPnpm = existsSync(join(projectRoot, 'pnpm-lock.yaml'));
+  const hasYarn = existsSync(join(projectRoot, 'yarn.lock'));
+  const pm = hasPnpm ? 'pnpm' : hasYarn ? 'yarn' : 'npm';
+
+  // Run the build.
+  try {
+    const out = await new Promise<string>((resolve, reject) => {
+      execFile(pm, ['run', buildCmd!], { cwd: projectRoot, timeout: BUILD_TIMEOUT_MS, maxBuffer: BUILD_OUTPUT_CAP * 2 },
+        (err, stdout, stderr) => {
+          if (err && (err as NodeJS.ErrnoException).code === 'ENOENT') {
+            resolve('');  // package manager not found — skip
+            return;
+          }
+          const merged = (stdout + '\n' + stderr).trim();
+          resolve(merged.slice(0, BUILD_OUTPUT_CAP));
+        },
+      );
+    });
+
+    if (!out) return undefined;  // build succeeded or no output
+    ctx.log.info({ sessionId, chatId, buildCmd, outputLen: out.length }, 'auto-fix: build failed');
+
+    // Truncate if existing note exists
+    const combined = existingNote
+      ? existingNote + '\n\n--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP - existingNote.length)
+      : '--- Build error ---\n' + out.slice(0, BUILD_OUTPUT_CAP);
+
+    return combined;
+  } catch {
+    return undefined;
+  }
+}
+
 // P5: MAX_STEPS moved to ./turn-config.ts (with resolveTurnConfig). Re-exported
 // here so the public surface (index.ts → './turn.js') is unchanged.
 export { MAX_STEPS } from './turn-config.js';
@@ -240,7 +311,7 @@ export async function runAssistantTurn(
    // ---- tool phase ----
    let toolPhaseResult: ToolPhaseResult;
    try {
-      toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent);
+      toolPhaseResult = await executeToolPhase(ctx, iterArgs, result, state.startedAt, iterSession, projectRoot, agent, stepNumber);
    } catch (err) {
      // Tool phase errors are unexpected (individual tool failures are
      // caught inside executeToolPhase). Log and break.
@@ -260,6 +331,17 @@ export async function runAssistantTurn(
      recordStep(mistakeTracker, o);
    }

+    // vWhale: auto-fix — after write tools, attempt build and inject errors.
+    const WRITE_TOOLS = new Set(['edit_file', 'create_file', 'delete_file', 'apply_pending']);
+    const hasWriteTools = toolPhaseResult.toolCalls.some((tc) => WRITE_TOOLS.has(tc.name));
+    if (hasWriteTools) {
+      detectAndRunBuild(ctx, projectRoot, sessionId, chatId, iterSession.model, pendingRecoveryNote)
+        .then((buildError) => {
+          if (buildError) pendingRecoveryNote = buildError;
+        })
+        .catch(() => {});
+    }
+
    // v#12 MistakeTracker: post-tool decision (pure). 'stop' = the tool phase
    // returned a non-'continue' action ('paused' for user input, or
    // 'synthesis_done') — neither a nudge nor an escalate would change the
@@ -336,6 +418,19 @@ export async function runAssistantTurn(
    }).catch(() => {});
  }

+  // ---- persist agent snapshot (best-effort, never blocks inference) ----
+  const snapLoaded = await loadContext(ctx.sql, sessionId, chatId).catch(() => null);
+  if (snapLoaded) {
+    await saveAgentSnapshot(ctx.sql, chatId, {
+      session_id: sessionId,
+      model: snapLoaded.session.model,
+      agent: agent?.name ?? null,
+      mode: null,
+      turn_number: stepNumber,
+      messages: snapLoaded.history.map((m) => ({ role: m.role, content: m.content })),
+    }).catch(() => {});
+  }
+
  // ---- post-loop: step-cap sentinel ----
  // When the loop exits because stepNumber reached effectiveCap, the last
  // iteration's tool phase returned 'continue' with a nextAssistantId that