From d2108b2f8d3f36cbd2ceac8e6cdccc2d4d456b19 Mon Sep 17 00:00:00 2001
From: indifferentketchup <samkintop@gmail.com>
Date: Mon, 25 May 2026 02:52:49 +0000
Subject: [PATCH] verification discipline rules + chat naming from assistant
 response
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BOOCHAT.md + BOOCODER.md: 4 verification rules added to both —
verify against running container not source files, never count dist/,
run commands before claiming success, derive counts from commands.

auto_name.ts: chat titles now derived from the assistant's first
response only (user message dropped from naming input). System prompt
updated to "summarize the topic or outcome — do NOT copy the first
few words verbatim." Produces titles like "Fastify Route Setup"
instead of echoing the assistant's opening sentence.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 BOOCHAT.md                            |  7 +++++++
 BOOCODER.md                           |  7 +++++++
 apps/server/src/services/auto_name.ts | 13 +++----------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/BOOCHAT.md b/BOOCHAT.md
index 022e976..ce26d45 100644
--- a/BOOCHAT.md
+++ b/BOOCHAT.md
@@ -39,6 +39,13 @@
 
 Always-true rules (process discipline, refusals, behavior contracts) live here in `BOOCHAT.md` — and in `BOOCODER.md` / `CLAUDE.md` per their scopes — where they are 100% present in every turn. On-demand recipes (specific procedures, scaffolds, checklists) live in `/data/skills/` and invoke roughly 6% of the time in clean multi-turn flow (Codeminer42 measurement, 2026). Don't file workflow rules as skills — they silently misfire. See Anthropic agent-skills best-practices (platform.claude.com/docs/en/agents-and-tools/agent-skills/best-practices) for the canonical conventions.
 
+## Verification discipline
+
+- When assessing implementation status, verify against the running container (`curl /api/health`) and latest git commit (`git log --oneline -3`), not just source file contents. Source files can be mid-edit. The deployed state is the truth.
+- Never count `dist/` directory sizes as source lines. Only count `src/**/*.ts` files. Compiled output is inflated by inlined types and transpilation artifacts.
+- Before claiming a feature works, run the actual command and show the output. "Should work" is not verification. Acceptable evidence: test output (`pnpm test`), build output (`pnpm build`), curl response, docker logs, `\d tablename` output. If you can't run it, say so explicitly — don't assert success without evidence.
+- When reporting counts (tools, tests, files, routes, lines), derive the number from a command (`grep -c`, `wc -l`, test runner output) — not from memory or approximation.
+
 ## Known limitations
 
 - Codecontext re-analyzes the project graph on each call against a different target_dir. First call to a new project may take 1-3 seconds; subsequent calls to the same project return in ~10ms.
diff --git a/BOOCODER.md b/BOOCODER.md
index 3638be0..96432a7 100644
--- a/BOOCODER.md
+++ b/BOOCODER.md
@@ -30,3 +30,10 @@ Every file modification queues in `pending_changes` before touching disk. The us
 - If uncertain about scope, use smaller edits and verify between steps.
 - Cite file paths + line numbers for context.
 - Verify before reporting work complete: run the relevant test/build/smoke and confirm output matches the claim. Evidence first, assertion second.
+
+## Verification discipline
+
+- When assessing implementation status, verify against the running container (`curl /api/health`) and latest git commit (`git log --oneline -3`), not just source file contents. Source files can be mid-edit. The deployed state is the truth.
+- Never count `dist/` directory sizes as source lines. Only count `src/**/*.ts` files. Compiled output is inflated by inlined types and transpilation artifacts.
+- Before claiming a feature works, run the actual command and show the output. "Should work" is not verification. Acceptable evidence: test output (`pnpm test`), build output (`pnpm build`), curl response, docker logs, `\d tablename` output. If you can't run it, say so explicitly — don't assert success without evidence.
+- When reporting counts (tools, tests, files, routes, lines), derive the number from a command (`grep -c`, `wc -l`, test runner output) — not from memory or approximation.
diff --git a/apps/server/src/services/auto_name.ts b/apps/server/src/services/auto_name.ts
index 746f25b..4022d6a 100644
--- a/apps/server/src/services/auto_name.ts
+++ b/apps/server/src/services/auto_name.ts
@@ -1,7 +1,7 @@
 import type { InferenceContext } from './inference/index.js';
 
 const NAMING_SYSTEM_PROMPT =
-  'You name chat sessions. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
+  'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
 
 const MAX_TITLE_CHARS = 60;
 
@@ -70,12 +70,6 @@ export async function maybeAutoNameChat(
   const model = sessionRows[0]?.model;
   if (!model) return;
 
-  const userMsg = await ctx.sql<{ content: string }[]>`
-    SELECT content FROM messages
-    WHERE chat_id = ${chatId} AND role = 'user'
-    ORDER BY created_at ASC
-    LIMIT 1
-  `;
   const assistantMsg = await ctx.sql<{ content: string }[]>`
     SELECT content FROM messages
     WHERE chat_id = ${chatId}
@@ -85,9 +79,8 @@ export async function maybeAutoNameChat(
     ORDER BY created_at ASC
     LIMIT 1
   `;
-  if (!userMsg[0] || !assistantMsg[0]) return;
+  if (!assistantMsg[0]) return;
 
-  const userText = userMsg[0].content.slice(0, 2000);
   const assistantText = assistantMsg[0].content.slice(0, 2000);
 
   const body = {
@@ -96,7 +89,7 @@ export async function maybeAutoNameChat(
       { role: 'system', content: NAMING_SYSTEM_PROMPT },
       {
         role: 'user',
-        content: `First user message: ${userText}\nFirst assistant reply: ${assistantText}`,
+        content: assistantText,
       },
     ],
     max_tokens: 30,