From 12d91c9a12c453cb35e4a6cb7bdea0fafef77251 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Sat, 16 May 2026 23:16:02 +0000 Subject: [PATCH] v1.8.1: global agents + parser robustness + WS reconnect toast Builtins move out of code into /data/AGENTS.md (always-on, mounted ro into the container); per-project AGENTS.md is now an optional override. agents.ts merges global + project entries with project-wins-by-name and caches per-source mtimes (60s TTL). Parser switches to per-block try/catch and returns AgentsResponse { agents, errors[] } so one malformed block no longer fails the file. AgentPicker shows a non-blocking amber chip listing skipped blocks and only fires a gray toast when zero agents loaded. WS reconnect UX (useUserEvents + useSessionStream) now silent on the first disconnect; createWsReconnectToast escalates to gray after 3 failures or 15 s, then to red with a Retry Now action after 60 s. useSessionStream also gained the exponential-backoff reconnect it was missing. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/server/src/services/agents.ts | 453 ++++++++---------------- apps/server/src/types/api.ts | 17 +- apps/web/src/api/types.ts | 12 +- apps/web/src/components/AgentPicker.tsx | 25 +- apps/web/src/hooks/useSessionStream.ts | 67 ++-- apps/web/src/hooks/useUserEvents.ts | 19 +- apps/web/src/hooks/wsReconnectToast.ts | 95 +++++ docker-compose.yml | 3 + 8 files changed, 352 insertions(+), 339 deletions(-) create mode 100644 apps/web/src/hooks/wsReconnectToast.ts diff --git a/apps/server/src/services/agents.ts b/apps/server/src/services/agents.ts index 631b160..aba55a8 100644 --- a/apps/server/src/services/agents.ts +++ b/apps/server/src/services/agents.ts @@ -1,9 +1,17 @@ import { promises as fs } from 'node:fs'; import { join } from 'node:path'; -import type { Agent, AgentsResponse } from '../types/api.js'; +import type { Agent, AgentsResponse, AgentParseError } from '../types/api.js'; + +// v1.8.1: global agents live at /data/AGENTS.md inside the container +// (./data:/data:ro mount on the host). Per-project AGENTS.md at the project +// root overrides global by name. In-code builtins are gone — the seed file is +// the contents of the previous BUILTIN_AGENTS list, copied into /data/AGENTS.md +// once on first deploy. +const GLOBAL_AGENTS_PATH = '/data/AGENTS.md'; +const CACHE_TTL_MS = 60_000; // Tools whitelist universe matches services/tools.ts ALL_TOOLS. Keep in sync. -const ALL_TOOL_NAMES = ['view_file', 'list_dir', 'grep', 'find_files'] as const; +const ALL_TOOL_NAMES = ['view_file', 'list_dir', 'grep', 'find_files', 'git_status'] as const; const DEFAULT_TOOLS: string[] = [...ALL_TOOL_NAMES]; const DEFAULT_TEMPERATURE = 0.7; @@ -14,214 +22,6 @@ export function slugify(name: string): string { .replace(/^-+|-+$/g, ''); } -// Six builtin defaults. model is intentionally null — session.model wins. -// Match AGENTS.md format; system prompts are verbatim. -const BUILTIN_AGENTS: Agent[] = [ - { - id: 'code-reviewer', - name: 'Code Reviewer', - description: 'Reviews code for bugs, security issues, and maintainability. Read-only.', - temperature: 0.3, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You review code. Find real problems, not style nits. - -Process: -1. Read the file(s) in question with view_file. If a diff is provided, read surrounding context too. -2. Use grep/find_files to check how changed symbols are used elsewhere. -3. Cite every finding as file:line. - -Prioritize in order: -1. Bugs and logic errors -2. Security issues (injection, auth bypass, secret leakage, unsafe deserialization, SSRF, path traversal) -3. Race conditions, error handling, resource leaks -4. Performance issues with measurable impact -5. Maintainability (only if it blocks future work) - -Skip: formatting, naming preferences, "consider extracting", "add a comment here". The user has a linter. - -Output format: -- Critical: -- Major: -- Minor: - -If nothing critical or major, say so in one line. Do not pad.`, - }, - { - id: 'debugger', - name: 'Debugger', - description: 'Diagnoses bugs from error messages, logs, or described symptoms.', - temperature: 0.2, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You diagnose bugs. Form a hypothesis, prove it with evidence from the code. - -Process: -1. Restate the symptom in one line. Confirm you understand it. -2. Read the error/stacktrace. Identify the exact frame where things go wrong. -3. view_file on that frame. Read 50 lines around it. -4. grep for callers, related state, recent changes that could explain it. -5. State the root cause with file:line evidence. -6. Propose the minimal fix. Note any side effects. - -Rules: -- Never guess. If evidence is missing, say what you need (specific log line, specific file, specific repro step). -- Distinguish symptom from cause. A null check fixes the symptom; missing init causes it. -- Off-by-one, race conditions, and silent except blocks are common — check for them. -- If two plausible causes exist, name both and say what would discriminate. - -Output: -- Symptom: -- Root cause: -- Fix: -- Risk: `, - }, - { - id: 'refactorer', - name: 'Refactorer', - description: 'Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.', - temperature: 0.3, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code. - -Process: -1. Read the target file(s). -2. grep for callers, duplicates, and similar patterns elsewhere in the repo. -3. Identify the smallest refactor that delivers the goal. - -Prioritize: -1. Deduplication where 3+ sites have near-identical logic -2. Extracting a function/module when one is doing two unrelated jobs -3. Decoupling when a change in A forces a change in B unnecessarily -4. Renaming when a name actively misleads - -Reject: -- Refactors that touch 10+ files for marginal gain -- "Modernization" with no concrete benefit -- Abstraction for future flexibility that may never come -- Style-only changes - -Output: -- Goal: -- Scope: -- Plan: numbered steps, each one self-contained -- Risk: -- Skip if: `, - }, - { - id: 'architect', - name: 'Architect', - description: 'Designs new features, modules, or architectural changes. Outputs a build plan.', - temperature: 0.5, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You design. You produce build plans, not code. - -Process: -1. Restate the goal in your own words. Confirm constraints (perf, deploy, deps). -2. list_dir the relevant areas. Read existing patterns — match them unless there's a reason not to. -3. Decide: extend existing code or add new module. Justify. -4. Sketch the data flow: inputs → transforms → outputs → side effects. -5. Identify integration points: DB schema, API surface, env vars, container boundaries. -6. List failure modes and how the design handles them. - -Rules: -- Reuse before inventing. If a service/lib in the repo already does this, say so. -- Prefer boring tech. New deps require justification. -- Tailscale IPs for internal routing. No 0.0.0.0 binds. -- Least privilege: separate read/write paths, explicit auth gates. -- State assumptions inline. Do not ask clarifying questions mid-design unless blocked. - -Output: -- Goal -- Existing code to reuse: -- New code: -- Data model changes: -- API surface: -- Failure modes: -- Build order: numbered, each step 30-90 min`, - }, - { - id: 'security-auditor', - name: 'Security Auditor', - description: 'Audits code for security vulnerabilities. Read-only.', - temperature: 0.2, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You audit for security issues. Concrete findings only, no generic warnings. - -Process: -1. Identify the trust boundary: where does untrusted input enter? Where does it leave? -2. Trace input flow with grep. Mark every transformation. -3. Check each finding against a real attack scenario. - -Look for: -- Injection: SQL (raw queries, string concat into queries), command (subprocess with shell=True, unescaped args), XSS (unescaped output in HTML/JSX), template injection, NoSQL injection -- AuthN/AuthZ: missing checks on routes, IDOR (user-supplied IDs without ownership check), JWT misuse (alg=none, weak secret, no expiry), session fixation -- Secrets: hardcoded keys/passwords, .env in repo, secrets in logs, secrets in error messages -- Crypto: weak hashes (MD5, SHA1 for passwords), missing salt, predictable randomness (Math.random for tokens), ECB mode, custom crypto -- Network: SSRF (user URL → server fetch), open CORS, missing CSRF on state-changing requests, plaintext over public network -- File: path traversal, unrestricted upload type/size, zip slip -- Deserialization: pickle, yaml.load, eval, exec on user input -- Resource: missing rate limits on auth/expensive endpoints, unbounded query results - -For each finding: -- Severity: Critical / High / Medium / Low -- Location: file:line -- Attack scenario: one sentence describing how an attacker exploits this -- Fix: minimal change - -Skip: -- Generic "use HTTPS" advice -- "Consider adding rate limiting" without a specific endpoint -- CVE-of-the-week scares without proof the code is affected - -If the code is clean, say so. Do not invent findings.`, - }, - { - id: 'prompt-builder', - name: 'Prompt Builder', - description: 'Builds prompts for OpenCode, Claude Code, or BooCode dispatch.', - temperature: 0.4, - tools: [...DEFAULT_TOOLS], - model: null, - source: 'builtin', - system_prompt: `You write prompts that another coding agent will execute. Your output is the prompt, not the work. - -Process: -1. Ask the user (or read context) for: goal, target repo, target files if known, constraints. -2. list_dir and view_file the target area. Confirm files exist and are roughly the shape you think. -3. Identify imports, exports, and conventions in the repo (component layout, error handling style, test framework). -4. Write the prompt. - -Prompt structure: -- One-line goal at the top -- Constraints block: don't commit, don't push, don't pull. Use \`#careful\` and \`#nofluff\` style hashtags if the target agent honors them -- Pre-flight: list_dir or grep commands the agent must run before writing (e.g. "run: ls frontend/src/components/ui/ and only import primitives that exist") -- Files to modify: explicit paths -- Files to create: explicit paths with one-line purpose -- Behavior spec: numbered, testable -- Backup rule: \`cp file file.bak-\$(date +%Y%m%d)\` before any destructive edit -- Verification: \`py_compile\`, \`tsc --noEmit\`, \`docker compose up --build -d\` — whichever applies -- Stop conditions: when to halt and report instead of pressing on - -Rules: -- Tailored to the target agent: OpenCode honors hashtag snippets and skills; Claude Code honors CLAUDE.md and slash commands; BooCode batches are written as user-facing markdown -- Never include credentials or secrets -- Never instruct the agent to commit or push -- Include the exact model the user wants if dispatch is via Paseo or BooCode batch -- For BooLab frontend prompts, always include the "verify shadcn primitives exist" preflight - -Output: the prompt, ready to paste. Nothing else.`, - }, -]; - // ---- AGENTS.md parser ------------------------------------------------------ interface ParsedFrontmatter { @@ -296,18 +96,14 @@ function parseFrontmatter(yaml: string): { data: ParsedFrontmatter; errors: stri return { data, errors }; } -interface ParseResult { - agents: Agent[]; - error: string | null; +interface RawSection { + name: string; + body: string; } -export function parseAgentsMd(content: string): ParseResult { - const errors: string[] = []; - const agents: Agent[] = []; - - // Split into per-agent sections by lines that exactly match "## ". - // Lines starting with "### " (level-3 headings) are not section boundaries. - const sections: { name: string; body: string }[] = []; +function splitSections(content: string): RawSection[] { + // Split by lines matching exactly "## ". Level-3+ headings are body content. + const sections: RawSection[] = []; let currentName: string | null = null; let currentLines: string[] = []; @@ -329,74 +125,101 @@ export function parseAgentsMd(content: string): ParseResult { if (currentName !== null) { sections.push({ name: currentName, body: currentLines.join('\n') }); } + return sections; +} - for (const section of sections) { - const lines = section.body.split('\n'); - // Opening "---" fence must be the first non-empty line (blank lines allowed). - let openIdx = -1; - for (let i = 0; i < lines.length; i++) { - const t = lines[i]!.trim(); - if (t === '') continue; - if (t === '---') { - openIdx = i; - } +// Throws on malformed section — caller handles per-block error collection. +function parseAgentSection(section: RawSection): Omit { + const lines = section.body.split('\n'); + + // Opening "---" fence must be the first non-empty line. + let openIdx = -1; + for (let i = 0; i < lines.length; i++) { + const t = lines[i]!.trim(); + if (t === '') continue; + if (t === '---') { + openIdx = i; + } + break; + } + if (openIdx < 0) { + throw new Error('missing opening --- fence after heading'); + } + let closeIdx = -1; + for (let i = openIdx + 1; i < lines.length; i++) { + if (lines[i]!.trim() === '---') { + closeIdx = i; break; } - if (openIdx < 0) { - errors.push(`agent "${section.name}": missing opening --- fence after heading`); - continue; - } - let closeIdx = -1; - for (let i = openIdx + 1; i < lines.length; i++) { - if (lines[i]!.trim() === '---') { - closeIdx = i; - break; - } - } - if (closeIdx < 0) { - errors.push(`agent "${section.name}": missing closing --- fence`); - continue; - } - const yamlText = lines.slice(openIdx + 1, closeIdx).join('\n'); - const systemPrompt = lines.slice(closeIdx + 1).join('\n').trim(); + } + if (closeIdx < 0) { + throw new Error('missing closing --- fence'); + } + const yamlText = lines.slice(openIdx + 1, closeIdx).join('\n'); + const systemPrompt = lines.slice(closeIdx + 1).join('\n').trim(); - const { data: fm, errors: fmErrors } = parseFrontmatter(yamlText); - if (fmErrors.length > 0) { - errors.push(`agent "${section.name}": ${fmErrors.join('; ')}`); - continue; - } - - const filteredTools = Array.isArray(fm.tools) - ? fm.tools.filter((t): t is string => - (ALL_TOOL_NAMES as readonly string[]).includes(t) - ) - : DEFAULT_TOOLS; - - agents.push({ - id: slugify(section.name), - name: section.name, - description: fm.description ?? '', - system_prompt: systemPrompt, - temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE, - tools: filteredTools, - model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null, - source: 'file', - }); + const { data: fm, errors: fmErrors } = parseFrontmatter(yamlText); + if (fmErrors.length > 0) { + throw new Error(fmErrors.join('; ')); } - return { agents, error: errors.length > 0 ? errors.join('; ') : null }; + const filteredTools = Array.isArray(fm.tools) + ? fm.tools.filter((t): t is string => + (ALL_TOOL_NAMES as readonly string[]).includes(t), + ) + : DEFAULT_TOOLS; + + return { + id: slugify(section.name), + name: section.name, + description: fm.description ?? '', + system_prompt: systemPrompt, + temperature: typeof fm.temperature === 'number' ? fm.temperature : DEFAULT_TEMPERATURE, + tools: filteredTools, + model: typeof fm.model === 'string' && fm.model.length > 0 ? fm.model : null, + }; +} + +interface ParseResult { + agents: Omit[]; + errors: AgentParseError[]; +} + +// v1.8.1: parse each `## Name` block independently. A failure in one block +// does not abort the rest of the file — we collect a per-agent error and +// keep parsing. Server logs a console.warn for each skipped agent. +export function parseAgentsMd(content: string): ParseResult { + const sections = splitSections(content); + const agents: Omit[] = []; + const errors: AgentParseError[] = []; + + for (const section of sections) { + try { + agents.push(parseAgentSection(section)); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + console.warn(`agents: skipped "${section.name}" — ${reason}`); + errors.push({ agent_name: section.name, reason }); + } + } + + return { agents, errors }; } // ---- mtime-keyed cache + public API ---------------------------------------- interface CacheEntry { - mtimeMs: number; + globalMtime: number | null; + projectMtime: number | null; + cachedAt: number; result: AgentsResponse; } +// Keyed by projectPath ('' is fine — no project case, e.g. tests). Two files +// participate in the cache key (global + project); editing either bumps the +// corresponding mtime so the next read sees a miss without a watcher. const cache = new Map(); -// Test/admin: force re-parse on next call for a project (or all projects). export function invalidateAgentsCache(projectPath?: string): void { if (projectPath === undefined) { cache.clear(); @@ -405,54 +228,74 @@ export function invalidateAgentsCache(projectPath?: string): void { } } -export async function getAgentsForProject(projectPath: string): Promise { - const agentsPath = join(projectPath, 'AGENTS.md'); - let mtimeMs: number; +async function safeStat(path: string): Promise { try { - const s = await fs.stat(agentsPath); - mtimeMs = s.mtimeMs; + const s = await fs.stat(path); + return s.mtimeMs; } catch { - // No AGENTS.md → builtins, no parse error - cache.delete(projectPath); - return { agents: BUILTIN_AGENTS, parse_error: null }; + return null; } +} - const cached = cache.get(projectPath); - if (cached && cached.mtimeMs === mtimeMs) { +async function safeRead(path: string): Promise { + try { + return await fs.readFile(path, 'utf8'); + } catch { + return null; + } +} + +export async function getAgentsForProject(projectPath: string): Promise { + const projectAgentsPath = projectPath ? join(projectPath, 'AGENTS.md') : null; + + const [globalMtime, projectMtime] = await Promise.all([ + safeStat(GLOBAL_AGENTS_PATH), + projectAgentsPath ? safeStat(projectAgentsPath) : Promise.resolve(null), + ]); + + const cacheKey = projectPath || '__none__'; + const cached = cache.get(cacheKey); + const now = Date.now(); + if ( + cached && + cached.globalMtime === globalMtime && + cached.projectMtime === projectMtime && + now - cached.cachedAt < CACHE_TTL_MS + ) { return cached.result; } - let content: string; - try { - content = await fs.readFile(agentsPath, 'utf8'); - } catch { - cache.delete(projectPath); - return { agents: BUILTIN_AGENTS, parse_error: null }; + const [globalContent, projectContent] = await Promise.all([ + globalMtime !== null ? safeRead(GLOBAL_AGENTS_PATH) : Promise.resolve(null), + projectAgentsPath && projectMtime !== null ? safeRead(projectAgentsPath) : Promise.resolve(null), + ]); + + const errors: AgentParseError[] = []; + const byName = new Map(); + + if (globalContent !== null) { + const r = parseAgentsMd(globalContent); + for (const a of r.agents) byName.set(a.name, { ...a, source: 'global' }); + errors.push(...r.errors); + } + if (projectContent !== null) { + const r = parseAgentsMd(projectContent); + for (const a of r.agents) byName.set(a.name, { ...a, source: 'project' }); + errors.push(...r.errors); } - const parsed = parseAgentsMd(content); - let result: AgentsResponse; - if (parsed.error) { - // Parse error: surface in API, fall back to builtins - result = { agents: BUILTIN_AGENTS, parse_error: parsed.error }; - } else if (parsed.agents.length === 0) { - // Empty / no headings → builtins - result = { agents: BUILTIN_AGENTS, parse_error: null }; - } else { - // At least one valid agent → file-defined agents win, builtins hidden - result = { agents: parsed.agents, parse_error: null }; - } - - cache.set(projectPath, { mtimeMs, result }); + const result: AgentsResponse = { + agents: Array.from(byName.values()), + errors, + }; + cache.set(cacheKey, { globalMtime, projectMtime, cachedAt: now, result }); return result; } export async function getAgentById( projectPath: string, - agentId: string + agentId: string, ): Promise { const { agents } = await getAgentsForProject(projectPath); return agents.find((a) => a.id === agentId) ?? null; } - -export { BUILTIN_AGENTS }; diff --git a/apps/server/src/types/api.ts b/apps/server/src/types/api.ts index c99bd04..c6fe7d6 100644 --- a/apps/server/src/types/api.ts +++ b/apps/server/src/types/api.ts @@ -31,9 +31,10 @@ export interface Session { agent_id: string | null; } -// Agent sources: 'builtin' = baked-in default (services/agents.ts), -// 'file' = parsed from project's AGENTS.md. -export type AgentSource = 'builtin' | 'file'; +// v1.8.1: agents come from two sources. 'global' = /data/AGENTS.md (always +// loaded inside the container), 'project' = per-project override at +// /AGENTS.md. Project entries override global by name (case-sensitive). +export type AgentSource = 'global' | 'project'; export interface Agent { id: string; // slug of name; stable handle stored in sessions.agent_id @@ -46,9 +47,17 @@ export interface Agent { source: AgentSource; } +// One entry per malformed `## Name` block. Per-block errors don't fail the +// whole file — the loader returns parsed-successfully agents AND the list of +// skipped ones so the UI can show a non-blocking warning chip. +export interface AgentParseError { + agent_name: string; + reason: string; +} + export interface AgentsResponse { agents: Agent[]; - parse_error: string | null; // present (non-null) when AGENTS.md exists but failed to parse + errors: AgentParseError[]; } // KEEP IN SYNC: apps/server/src/schema.sql chats_status_chk diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index 020c582..a6554ba 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -30,7 +30,10 @@ export interface Session { agent_id: string | null; } -export type AgentSource = 'builtin' | 'file'; +// v1.8.1: 'global' = /data/AGENTS.md (always-on), 'project' = per-project +// override at /AGENTS.md. In-code builtins were retired; the seed file +// lives at /data/AGENTS.md. +export type AgentSource = 'global' | 'project'; export interface Agent { id: string; @@ -43,9 +46,14 @@ export interface Agent { source: AgentSource; } +export interface AgentParseError { + agent_name: string; + reason: string; +} + export interface AgentsResponse { agents: Agent[]; - parse_error: string | null; + errors: AgentParseError[]; } export const CHAT_STATUSES = ['open', 'archived'] as const; diff --git a/apps/web/src/components/AgentPicker.tsx b/apps/web/src/components/AgentPicker.tsx index d62c239..78181cd 100644 --- a/apps/web/src/components/AgentPicker.tsx +++ b/apps/web/src/components/AgentPicker.tsx @@ -2,7 +2,7 @@ import { useEffect, useState } from 'react'; import { Check, ChevronDown } from 'lucide-react'; import { toast } from 'sonner'; import { api } from '@/api/client'; -import type { Agent } from '@/api/types'; +import type { Agent, AgentParseError } from '@/api/types'; import { DropdownMenu, DropdownMenuContent, @@ -19,23 +19,28 @@ interface Props { export function AgentPicker({ projectId, value, onChange }: Props) { const [agents, setAgents] = useState(null); + const [parseErrors, setParseErrors] = useState([]); const [error, setError] = useState(null); const [open, setOpen] = useState(false); - // Load on mount (and on projectId change) so the trigger shows the agent - // name immediately, not the raw id. AGENTS.md parse errors surface as a - // toast once per load. + // v1.8.1: per-agent parse errors are non-blocking. Silent if any agents + // loaded successfully; a gray warning toast fires only when EVERY agent + // in AGENTS.md failed to parse. Server logs a console.warn either way. useEffect(() => { let cancelled = false; setAgents(null); + setParseErrors([]); setError(null); api.agents .list(projectId) .then((res) => { if (cancelled) return; setAgents(res.agents); - if (res.parse_error) { - toast.error(`AGENTS.md parse error: ${res.parse_error}`); + setParseErrors(res.errors); + if (res.errors.length > 0 && res.agents.length === 0) { + toast.warning( + `AGENTS.md: ${res.errors.length} agent${res.errors.length === 1 ? '' : 's'} failed to parse, none loaded`, + ); } }) .catch((err) => { @@ -100,6 +105,14 @@ export function AgentPicker({ projectId, value, onChange }: Props) { )} ))} + {parseErrors.length > 0 && ( +
`${e.agent_name}: ${e.reason}`).join('\n')} + > + {parseErrors.length} agent{parseErrors.length === 1 ? '' : 's'} skipped +
+ )} )} diff --git a/apps/web/src/hooks/useSessionStream.ts b/apps/web/src/hooks/useSessionStream.ts index 78d9afd..42d9b1f 100644 --- a/apps/web/src/hooks/useSessionStream.ts +++ b/apps/web/src/hooks/useSessionStream.ts @@ -143,6 +143,11 @@ function applyFrame(state: State, frame: WsFrame): State { } } +// Matches useUserEvents — exponential backoff with the same ceiling so the +// two channels reconnect on the same cadence after a network handoff. +const RECONNECT_INITIAL_MS = 1000; +const RECONNECT_MAX_MS = 30_000; + export function useSessionStream(sessionId: string | undefined) { const [state, setState] = useState({ messages: [], connected: false, error: null }); const wsRef = useRef(null); @@ -152,32 +157,52 @@ export function useSessionStream(sessionId: string | undefined) { setState({ messages: [], connected: false, error: null }); - const proto = window.location.protocol === 'https:' ? 'wss' : 'ws'; - const url = `${proto}://${window.location.host}/api/ws/sessions/${sessionId}`; - const ws = new WebSocket(url); - wsRef.current = ws; + let unmounted = false; + let reconnectTimer: ReturnType | null = null; + let reconnectDelay = RECONNECT_INITIAL_MS; - ws.onopen = () => { - setState((s) => ({ ...s, connected: true, error: null })); - }; - ws.onmessage = (ev) => { - try { - const frame = JSON.parse(typeof ev.data === 'string' ? ev.data : '') as WsFrame; - setState((s) => applyFrame(s, frame)); - } catch (err) { - console.warn('bad ws frame', err); - } - }; - ws.onerror = () => { - setState((s) => ({ ...s, error: 'websocket error' })); - }; - ws.onclose = () => { - setState((s) => ({ ...s, connected: false })); + const connect = () => { + if (unmounted) return; + const proto = window.location.protocol === 'https:' ? 'wss' : 'ws'; + const url = `${proto}://${window.location.host}/api/ws/sessions/${sessionId}`; + const ws = new WebSocket(url); + wsRef.current = ws; + + ws.onopen = () => { + reconnectDelay = RECONNECT_INITIAL_MS; + setState((s) => ({ ...s, connected: true, error: null })); + }; + ws.onmessage = (ev) => { + try { + const frame = JSON.parse(typeof ev.data === 'string' ? ev.data : '') as WsFrame; + setState((s) => applyFrame(s, frame)); + } catch (err) { + console.warn('bad ws frame', err); + } + }; + // v1.8.1: WS errors no longer surface as user-facing toasts here. The + // user-channel hook (useUserEvents) owns the debounced "reconnecting…" + // UI; this channel just reconnects silently on the same backoff. + ws.onerror = () => { + try { ws.close(); } catch {} + }; + ws.onclose = () => { + if (unmounted) return; + setState((s) => ({ ...s, connected: false })); + const delay = reconnectDelay; + reconnectDelay = Math.min(reconnectDelay * 2, RECONNECT_MAX_MS); + reconnectTimer = setTimeout(connect, delay); + }; }; + connect(); + return () => { + unmounted = true; + if (reconnectTimer) clearTimeout(reconnectTimer); + const ws = wsRef.current; wsRef.current = null; - ws.close(); + if (ws) try { ws.close(); } catch {} }; }, [sessionId]); diff --git a/apps/web/src/hooks/useUserEvents.ts b/apps/web/src/hooks/useUserEvents.ts index 2fd5ae4..04b346a 100644 --- a/apps/web/src/hooks/useUserEvents.ts +++ b/apps/web/src/hooks/useUserEvents.ts @@ -1,5 +1,6 @@ import { useEffect } from 'react'; import { sessionEvents } from './sessionEvents'; +import { createWsReconnectToast } from './wsReconnectToast'; const RECONNECT_INITIAL_MS = 1000; const RECONNECT_MAX_MS = 30000; @@ -11,6 +12,20 @@ export function useUserEvents(): void { let reconnectDelay = RECONNECT_INITIAL_MS; let unmounted = false; + // v1.8.1: silent on the first disconnect; gray "reconnecting…" after 3 + // fails / 15 s; red "connection lost" with a Retry Now action after 60 s. + const reconnectToast = createWsReconnectToast({ + label: 'Live updates', + onRetryNow: () => { + if (reconnectTimer) { + clearTimeout(reconnectTimer); + reconnectTimer = null; + reconnectDelay = RECONNECT_INITIAL_MS; + connect(); + } + }, + }); + const connect = () => { if (unmounted) return; const url = new URL('/api/ws/user', window.location.href); @@ -19,6 +34,7 @@ export function useUserEvents(): void { ws.onopen = () => { reconnectDelay = RECONNECT_INITIAL_MS; + reconnectToast.onConnected(); }; ws.onmessage = (ev) => { @@ -34,6 +50,7 @@ export function useUserEvents(): void { ws.onclose = () => { if (unmounted) return; + reconnectToast.onFailure(); const delay = reconnectDelay; reconnectDelay = Math.min(reconnectDelay * 2, RECONNECT_MAX_MS); reconnectTimer = setTimeout(connect, delay); @@ -50,8 +67,8 @@ export function useUserEvents(): void { return () => { unmounted = true; + reconnectToast.dispose(); if (reconnectTimer) clearTimeout(reconnectTimer); - // best-effort cleanup; ignore failure because the socket may already be closed if (ws) try { ws.close(); } catch {} }; }, []); diff --git a/apps/web/src/hooks/wsReconnectToast.ts b/apps/web/src/hooks/wsReconnectToast.ts new file mode 100644 index 0000000..285150a --- /dev/null +++ b/apps/web/src/hooks/wsReconnectToast.ts @@ -0,0 +1,95 @@ +import { toast } from 'sonner'; + +// v1.8.1 thresholds. First disconnect is silent — mobile Authelia idle timeouts +// and tab suspensions trip reconnects constantly and the old red "websocket +// error" toast made the app feel broken. Only escalate once the failure is +// sustained. +const TOAST_AFTER_FAILS = 3; +const TOAST_AFTER_MS = 15_000; +const PERSISTENT_AFTER_MS = 60_000; + +export interface WsReconnectToast { + onFailure(): void; + onConnected(): void; + dispose(): void; +} + +interface Options { + label: string; // shown in the toast (e.g. "Live updates") + onRetryNow: () => void; // user clicked the "Retry now" action +} + +// Per-connection toast wrapper. Caller drives it from the WS lifecycle: +// onFailure — after each failed connection attempt +// onConnected — after a successful onopen +// dispose — on hook unmount +// The wrapper itself runs no timers and does not change the caller's reconnect +// cadence; it only decides when to show / dismiss the toast. +export function createWsReconnectToast(opts: Options): WsReconnectToast { + let firstFailureAt: number | null = null; + let failureCount = 0; + let reconnectingId: string | number | null = null; + let persistentId: string | number | null = null; + + function dismissReconnecting(): void { + if (reconnectingId !== null) { + toast.dismiss(reconnectingId); + reconnectingId = null; + } + } + function dismissPersistent(): void { + if (persistentId !== null) { + toast.dismiss(persistentId); + persistentId = null; + } + } + + return { + onFailure() { + if (firstFailureAt === null) firstFailureAt = Date.now(); + failureCount += 1; + const elapsed = Date.now() - firstFailureAt; + + // Escalate to red error + Retry button after PERSISTENT_AFTER_MS. Replaces + // the gray toast if it's still showing. + if (persistentId === null && elapsed >= PERSISTENT_AFTER_MS) { + dismissReconnecting(); + persistentId = toast.error(`${opts.label}: connection lost`, { + duration: Infinity, + action: { + label: 'Retry now', + onClick: () => { + dismissReconnecting(); + dismissPersistent(); + opts.onRetryNow(); + }, + }, + }); + return; + } + + // Gray "reconnecting…" toast once we've crossed either threshold. + if ( + reconnectingId === null && + persistentId === null && + (failureCount >= TOAST_AFTER_FAILS || elapsed >= TOAST_AFTER_MS) + ) { + reconnectingId = toast.warning(`${opts.label}: reconnecting…`, { + duration: Infinity, + }); + } + }, + onConnected() { + firstFailureAt = null; + failureCount = 0; + dismissReconnecting(); + dismissPersistent(); + }, + dispose() { + firstFailureAt = null; + failureCount = 0; + dismissReconnecting(); + dismissPersistent(); + }, + }; +} diff --git a/docker-compose.yml b/docker-compose.yml index e813045..b36035b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,9 @@ services: # Host must `mkdir -p /opt/projects` before container start. - /opt/projects:/opt/projects:rw - ./secrets/boocode_gitea:/root/.ssh/id_ed25519:ro + # v1.8.1: global agents file. Host seeds it once before deploy: + # cp /opt/boocode/AGENTS.md /opt/boocode/data/AGENTS.md + - ./data:/data:ro depends_on: - boocode_db networks: