From d2108b2f8d3f36cbd2ceac8e6cdccc2d4d456b19 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Mon, 25 May 2026 02:52:49 +0000 Subject: [PATCH] verification discipline rules + chat naming from assistant response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BOOCHAT.md + BOOCODER.md: 4 verification rules added to both — verify against running container not source files, never count dist/, run commands before claiming success, derive counts from commands. auto_name.ts: chat titles now derived from the assistant's first response only (user message dropped from naming input). System prompt updated to "summarize the topic or outcome — do NOT copy the first few words verbatim." Produces titles like "Fastify Route Setup" instead of echoing the assistant's opening sentence. Co-Authored-By: Claude Opus 4.7 (1M context) --- BOOCHAT.md | 7 +++++++ BOOCODER.md | 7 +++++++ apps/server/src/services/auto_name.ts | 13 +++---------- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/BOOCHAT.md b/BOOCHAT.md index 022e976..ce26d45 100644 --- a/BOOCHAT.md +++ b/BOOCHAT.md @@ -39,6 +39,13 @@ Always-true rules (process discipline, refusals, behavior contracts) live here in `BOOCHAT.md` — and in `BOOCODER.md` / `CLAUDE.md` per their scopes — where they are 100% present in every turn. On-demand recipes (specific procedures, scaffolds, checklists) live in `/data/skills/` and invoke roughly 6% of the time in clean multi-turn flow (Codeminer42 measurement, 2026). Don't file workflow rules as skills — they silently misfire. See Anthropic agent-skills best-practices (platform.claude.com/docs/en/agents-and-tools/agent-skills/best-practices) for the canonical conventions. +## Verification discipline + +- When assessing implementation status, verify against the running container (`curl /api/health`) and latest git commit (`git log --oneline -3`), not just source file contents. Source files can be mid-edit. The deployed state is the truth. +- Never count `dist/` directory sizes as source lines. Only count `src/**/*.ts` files. Compiled output is inflated by inlined types and transpilation artifacts. +- Before claiming a feature works, run the actual command and show the output. "Should work" is not verification. Acceptable evidence: test output (`pnpm test`), build output (`pnpm build`), curl response, docker logs, `\d tablename` output. If you can't run it, say so explicitly — don't assert success without evidence. +- When reporting counts (tools, tests, files, routes, lines), derive the number from a command (`grep -c`, `wc -l`, test runner output) — not from memory or approximation. + ## Known limitations - Codecontext re-analyzes the project graph on each call against a different target_dir. First call to a new project may take 1-3 seconds; subsequent calls to the same project return in ~10ms. diff --git a/BOOCODER.md b/BOOCODER.md index 3638be0..96432a7 100644 --- a/BOOCODER.md +++ b/BOOCODER.md @@ -30,3 +30,10 @@ Every file modification queues in `pending_changes` before touching disk. The us - If uncertain about scope, use smaller edits and verify between steps. - Cite file paths + line numbers for context. - Verify before reporting work complete: run the relevant test/build/smoke and confirm output matches the claim. Evidence first, assertion second. + +## Verification discipline + +- When assessing implementation status, verify against the running container (`curl /api/health`) and latest git commit (`git log --oneline -3`), not just source file contents. Source files can be mid-edit. The deployed state is the truth. +- Never count `dist/` directory sizes as source lines. Only count `src/**/*.ts` files. Compiled output is inflated by inlined types and transpilation artifacts. +- Before claiming a feature works, run the actual command and show the output. "Should work" is not verification. Acceptable evidence: test output (`pnpm test`), build output (`pnpm build`), curl response, docker logs, `\d tablename` output. If you can't run it, say so explicitly — don't assert success without evidence. +- When reporting counts (tools, tests, files, routes, lines), derive the number from a command (`grep -c`, `wc -l`, test runner output) — not from memory or approximation. diff --git a/apps/server/src/services/auto_name.ts b/apps/server/src/services/auto_name.ts index 746f25b..4022d6a 100644 --- a/apps/server/src/services/auto_name.ts +++ b/apps/server/src/services/auto_name.ts @@ -1,7 +1,7 @@ import type { InferenceContext } from './inference/index.js'; const NAMING_SYSTEM_PROMPT = - 'You name chat sessions. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".'; + 'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".'; const MAX_TITLE_CHARS = 60; @@ -70,12 +70,6 @@ export async function maybeAutoNameChat( const model = sessionRows[0]?.model; if (!model) return; - const userMsg = await ctx.sql<{ content: string }[]>` - SELECT content FROM messages - WHERE chat_id = ${chatId} AND role = 'user' - ORDER BY created_at ASC - LIMIT 1 - `; const assistantMsg = await ctx.sql<{ content: string }[]>` SELECT content FROM messages WHERE chat_id = ${chatId} @@ -85,9 +79,8 @@ export async function maybeAutoNameChat( ORDER BY created_at ASC LIMIT 1 `; - if (!userMsg[0] || !assistantMsg[0]) return; + if (!assistantMsg[0]) return; - const userText = userMsg[0].content.slice(0, 2000); const assistantText = assistantMsg[0].content.slice(0, 2000); const body = { @@ -96,7 +89,7 @@ export async function maybeAutoNameChat( { role: 'system', content: NAMING_SYSTEM_PROMPT }, { role: 'user', - content: `First user message: ${userText}\nFirst assistant reply: ${assistantText}`, + content: assistantText, }, ], max_tokens: 30,