From 648a59a56360ab7a42c21b79cfa2601b0e044893 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Sun, 7 Jun 2026 21:34:25 +0000 Subject: [PATCH] =?UTF-8?q?feat(server):=20memory=20v2=20=E2=80=94=20BM25?= =?UTF-8?q?=20+=20local=20embedding=20hybrid=20search?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Bm25Ranker: Okapi BM25 scoring (pure TS, no deps) - Embedding module: ONNX-based local embeddings via onnxruntime-node - Hybrid recall: BM25 (30%) + cosine similarity (70%) weighted merge - Falls back to keyword-only via MEMORY_SEARCH=keyword env var - extract_memory agent tool for persisting memory entries --- apps/server/src/services/memory/bm25.ts | 67 +++++++++++++++++++ apps/server/src/services/memory/embeddings.ts | 55 +++++++++++++++ apps/server/src/services/memory/recall.ts | 58 +++++++++++++++- .../src/services/tools/extract_memory.ts | 44 ++++++++++++ 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 apps/server/src/services/memory/bm25.ts create mode 100644 apps/server/src/services/memory/embeddings.ts create mode 100644 apps/server/src/services/tools/extract_memory.ts diff --git a/apps/server/src/services/memory/bm25.ts b/apps/server/src/services/memory/bm25.ts new file mode 100644 index 0000000..6dfe4b8 --- /dev/null +++ b/apps/server/src/services/memory/bm25.ts @@ -0,0 +1,67 @@ +// BM25 ranker — pure Okapi BM25 scoring. No external deps. + +interface Bm25Config { + k1?: number; + b?: number; +} + +export class Bm25Ranker { + private k1: number; + private b: number; + private corpus: string[]; + private avgDocLen: number; + private idfCache: Map; + private docCount: number; + + constructor(config?: Bm25Config) { + this.k1 = config?.k1 ?? 1.5; + this.b = config?.b ?? 0.75; + this.corpus = []; + this.avgDocLen = 0; + this.idfCache = new Map(); + this.docCount = 0; + } + + fit(docs: string[]): void { + this.corpus = docs; + this.docCount = docs.length; + const lengths = docs.map((d) => d.split(/\s+/).length); + this.avgDocLen = lengths.reduce((a, b) => a + b, 0) / lengths.length; + this.idfCache.clear(); + } + + private tokenize(text: string): string[] { + return text.toLowerCase().split(/\s+/).filter((t) => t.length > 0); + } + + private idf(term: string): number { + const cached = this.idfCache.get(term); + if (cached !== undefined) return cached; + const docsWithTerm = this.corpus.filter((d) => this.tokenize(d).includes(term)).length; + const idf = Math.log(1 + (this.docCount - docsWithTerm + 0.5) / (docsWithTerm + 0.5)); + this.idfCache.set(term, idf); + return idf; + } + + score(query: string, docIndex: number): number { + if (docIndex < 0 || docIndex >= this.corpus.length) return 0; + const doc = this.corpus[docIndex]!; + const queryTerms = this.tokenize(query); + const docTokens = this.tokenize(doc); + const docLen = docTokens.length; + + let total = 0; + for (const term of queryTerms) { + const tf = docTokens.filter((t) => t === term).length; + if (tf === 0) continue; + const idfVal = this.idf(term); + total += idfVal * ((tf * (this.k1 + 1)) / (tf + this.k1 * (1 - this.b + this.b * docLen / this.avgDocLen))); + } + return total; + } + + rank(query: string, topN: number = 10): Array<{ index: number; score: number }> { + const scores = this.corpus.map((_, i) => ({ index: i, score: this.score(query, i) })); + return scores.sort((a, b) => b.score - a.score).slice(0, topN).filter((s) => s.score > 0); + } +} diff --git a/apps/server/src/services/memory/embeddings.ts b/apps/server/src/services/memory/embeddings.ts new file mode 100644 index 0000000..3196ac5 --- /dev/null +++ b/apps/server/src/services/memory/embeddings.ts @@ -0,0 +1,55 @@ +// Embedding module — ONNX-based local embeddings. +// Falls back gracefully when the model file is not available. + +let model: any = null; +let ortModule: any = null; + +export function isEmbeddingAvailable(): boolean { + return model !== null; +} + +// eslint-disable-next-line @typescript-eslint/no-require-imports +const dynamicRequire = typeof require !== 'undefined' ? require : null; + +export async function initEmbeddings(modelPath?: string): Promise { + try { + if (dynamicRequire) { + try { ortModule = dynamicRequire('onnxruntime-node'); } catch { ortModule = null; } + } + if (!ortModule) { + try { ortModule = await import('onnxruntime-node' as any); } catch { ortModule = null; } + } + if (!ortModule) return false; + const path = modelPath ?? process.env['EMBEDDING_MODEL_PATH'] ?? ''; + if (!path) return false; + model = await ortModule.InferenceSession.create(path); + return true; + } catch { + model = null; + return false; + } +} + +export async function embed(texts: string[]): Promise { + if (!model) return null; + try { + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + const ort: { Tensor: new (...args: unknown[]) => unknown } | null = ortModule || null; + if (!ort) return null; + const input = new ort.Tensor('string', texts, [texts.length]); + const feeds: Record = {}; + feeds[model.inputNames[0]] = input; + const results = await model.run(feeds); + const output = results[model.outputNames[0]]; + if (!output || !output.data) return null; + const dim = output.dims?.[1] ?? 384; + const data = output.data as Float32Array; + const vectors: number[][] = []; + for (let i = 0; i < texts.length; i++) { + vectors.push(Array.from(data.slice(i * dim, (i + 1) * dim))); + } + return vectors; + } catch { + return null; + } +} diff --git a/apps/server/src/services/memory/recall.ts b/apps/server/src/services/memory/recall.ts index bd50114..a275746 100644 --- a/apps/server/src/services/memory/recall.ts +++ b/apps/server/src/services/memory/recall.ts @@ -1,5 +1,9 @@ import type { MemoryEntry } from './entries.js'; import { scanProjectMemory } from './scan.js'; +import { Bm25Ranker } from './bm25.js'; +import { embed, isEmbeddingAvailable } from './embeddings.js'; + +const SEARCH_MODE = process.env['MEMORY_SEARCH'] ?? 'hybrid'; function extractKeywords(query: string): string[] { return query @@ -31,6 +35,51 @@ export function rankByRelevance(query: string, entries: MemoryEntry[]): MemoryEn .map((s) => s.entry); } +export async function rankByHybrid( + query: string, + entries: MemoryEntry[], +): Promise { + if (entries.length === 0) return []; + const texts = entries.map((e) => `${e.title} ${e.content} ${e.tags.join(' ')}`); + + const bm25 = new Bm25Ranker(); + bm25.fit(texts); + const bm25Scores = texts.map((_, i) => bm25.score(query, i)); + const maxBm25 = Math.max(...bm25Scores, 1); + const normBm25 = bm25Scores.map((s) => s / maxBm25); + + let cosineScores: number[] = []; + if (isEmbeddingAvailable()) { + const vectors = await embed([query, ...texts]); + if (vectors) { + const queryVec = vectors[0]!; + cosineScores = texts.map((_, i) => { + const vec = vectors[i + 1]; + if (!vec) return 0; + let dot = 0, nA = 0, nB = 0; + for (let j = 0; j < queryVec.length; j++) { + dot += queryVec[j]! * vec[j]!; + nA += queryVec[j]! * queryVec[j]!; + nB += vec[j]! * vec[j]!; + } + const denom = Math.sqrt(nA) * Math.sqrt(nB); + return denom === 0 ? 0 : dot / denom; + }); + } + } + + const scored = entries.map((entry, i) => { + const combined = (normBm25[i]! * 0.3) + ((cosineScores[i] ?? 0) * 0.7); + return { entry, score: combined }; + }); + + return scored + .filter((s) => s.score >= 0.15) + .sort((a, b) => b.score - a.score) + .slice(0, 10) + .map((s) => s.entry); +} + export async function loadMemoryForSession( projectRoot: string, _sessionId?: string, @@ -39,6 +88,13 @@ export async function loadMemoryForSession( const entries = await scanProjectMemory(projectRoot); if (entries.length === 0) return []; - const relevant = query ? rankByRelevance(query, entries) : entries.slice(0, 5); + const relevant = query + ? SEARCH_MODE === 'keyword' + ? rankByRelevance(query, entries) + : await rankByHybrid(query, entries) + : entries.slice(0, 5); + return relevant.map((e) => `[${e.topic}] ${e.title}: ${e.content}`); } + +export { initEmbeddings } from './embeddings.js'; diff --git a/apps/server/src/services/tools/extract_memory.ts b/apps/server/src/services/tools/extract_memory.ts new file mode 100644 index 0000000..d9bde78 --- /dev/null +++ b/apps/server/src/services/tools/extract_memory.ts @@ -0,0 +1,44 @@ +import { z } from 'zod'; +import type { ToolDef } from '../tools/types.js'; +import { ensureMemoryScaffold, getMemoryRoot } from '../memory/paths.js'; +import { writeEntry } from '../memory/store.js'; + +const ExtractMemoryInput = z.object({ + topic: z.enum(['project', 'user', 'reference']).describe('Memory topic category'), + title: z.string().min(1).max(200).describe('Entry title (will be normalized to filename)'), + content: z.string().min(1).describe('Memory content body'), + tags: z.array(z.string()).optional().describe('Optional tags for search'), +}); + +type InputT = z.infer; + +export const extractMemoryTool: ToolDef = { + name: 'extract_memory', + description: 'Persist a memory entry to .boocode/memory/ for cross-session recall. Use for project conventions, user preferences, and architectural decisions.', + inputSchema: ExtractMemoryInput, + jsonSchema: { + type: 'function', + function: { + name: 'extract_memory', + description: 'Persist a memory entry for cross-session recall', + parameters: { + type: 'object', + properties: { + topic: { type: 'string', enum: ['project', 'user', 'reference'] }, + title: { type: 'string', description: 'Entry title' }, + content: { type: 'string', description: 'Memory content' }, + tags: { type: 'array', items: { type: 'string' }, description: 'Search tags' }, + }, + required: ['topic', 'title', 'content'], + }, + }, + }, + async execute(input: InputT, projectRoot: string): Promise { + const root = getMemoryRoot(projectRoot); + await ensureMemoryScaffold(root); + await writeEntry(root, input.topic, input.title, input.content, input.tags ?? []); + return { + result: `Memory entry "${input.title}" saved to .boocode/memory/${input.topic}/`, + }; + }, +};