feat(server): memory v2 — BM25 + local embedding hybrid search
- Bm25Ranker: Okapi BM25 scoring (pure TS, no deps) - Embedding module: ONNX-based local embeddings via onnxruntime-node - Hybrid recall: BM25 (30%) + cosine similarity (70%) weighted merge - Falls back to keyword-only via MEMORY_SEARCH=keyword env var - extract_memory agent tool for persisting memory entries
This commit is contained in:
67
apps/server/src/services/memory/bm25.ts
Normal file
67
apps/server/src/services/memory/bm25.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
// BM25 ranker — pure Okapi BM25 scoring. No external deps.
|
||||
|
||||
interface Bm25Config {
|
||||
k1?: number;
|
||||
b?: number;
|
||||
}
|
||||
|
||||
export class Bm25Ranker {
|
||||
private k1: number;
|
||||
private b: number;
|
||||
private corpus: string[];
|
||||
private avgDocLen: number;
|
||||
private idfCache: Map<string, number>;
|
||||
private docCount: number;
|
||||
|
||||
constructor(config?: Bm25Config) {
|
||||
this.k1 = config?.k1 ?? 1.5;
|
||||
this.b = config?.b ?? 0.75;
|
||||
this.corpus = [];
|
||||
this.avgDocLen = 0;
|
||||
this.idfCache = new Map();
|
||||
this.docCount = 0;
|
||||
}
|
||||
|
||||
fit(docs: string[]): void {
|
||||
this.corpus = docs;
|
||||
this.docCount = docs.length;
|
||||
const lengths = docs.map((d) => d.split(/\s+/).length);
|
||||
this.avgDocLen = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
||||
this.idfCache.clear();
|
||||
}
|
||||
|
||||
private tokenize(text: string): string[] {
|
||||
return text.toLowerCase().split(/\s+/).filter((t) => t.length > 0);
|
||||
}
|
||||
|
||||
private idf(term: string): number {
|
||||
const cached = this.idfCache.get(term);
|
||||
if (cached !== undefined) return cached;
|
||||
const docsWithTerm = this.corpus.filter((d) => this.tokenize(d).includes(term)).length;
|
||||
const idf = Math.log(1 + (this.docCount - docsWithTerm + 0.5) / (docsWithTerm + 0.5));
|
||||
this.idfCache.set(term, idf);
|
||||
return idf;
|
||||
}
|
||||
|
||||
score(query: string, docIndex: number): number {
|
||||
if (docIndex < 0 || docIndex >= this.corpus.length) return 0;
|
||||
const doc = this.corpus[docIndex]!;
|
||||
const queryTerms = this.tokenize(query);
|
||||
const docTokens = this.tokenize(doc);
|
||||
const docLen = docTokens.length;
|
||||
|
||||
let total = 0;
|
||||
for (const term of queryTerms) {
|
||||
const tf = docTokens.filter((t) => t === term).length;
|
||||
if (tf === 0) continue;
|
||||
const idfVal = this.idf(term);
|
||||
total += idfVal * ((tf * (this.k1 + 1)) / (tf + this.k1 * (1 - this.b + this.b * docLen / this.avgDocLen)));
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
rank(query: string, topN: number = 10): Array<{ index: number; score: number }> {
|
||||
const scores = this.corpus.map((_, i) => ({ index: i, score: this.score(query, i) }));
|
||||
return scores.sort((a, b) => b.score - a.score).slice(0, topN).filter((s) => s.score > 0);
|
||||
}
|
||||
}
|
||||
55
apps/server/src/services/memory/embeddings.ts
Normal file
55
apps/server/src/services/memory/embeddings.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
// Embedding module — ONNX-based local embeddings.
|
||||
// Falls back gracefully when the model file is not available.
|
||||
|
||||
let model: any = null;
|
||||
let ortModule: any = null;
|
||||
|
||||
export function isEmbeddingAvailable(): boolean {
|
||||
return model !== null;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const dynamicRequire = typeof require !== 'undefined' ? require : null;
|
||||
|
||||
export async function initEmbeddings(modelPath?: string): Promise<boolean> {
|
||||
try {
|
||||
if (dynamicRequire) {
|
||||
try { ortModule = dynamicRequire('onnxruntime-node'); } catch { ortModule = null; }
|
||||
}
|
||||
if (!ortModule) {
|
||||
try { ortModule = await import('onnxruntime-node' as any); } catch { ortModule = null; }
|
||||
}
|
||||
if (!ortModule) return false;
|
||||
const path = modelPath ?? process.env['EMBEDDING_MODEL_PATH'] ?? '';
|
||||
if (!path) return false;
|
||||
model = await ortModule.InferenceSession.create(path);
|
||||
return true;
|
||||
} catch {
|
||||
model = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function embed(texts: string[]): Promise<number[][] | null> {
|
||||
if (!model) return null;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
const ort: { Tensor: new (...args: unknown[]) => unknown } | null = ortModule || null;
|
||||
if (!ort) return null;
|
||||
const input = new ort.Tensor('string', texts, [texts.length]);
|
||||
const feeds: Record<string, any> = {};
|
||||
feeds[model.inputNames[0]] = input;
|
||||
const results = await model.run(feeds);
|
||||
const output = results[model.outputNames[0]];
|
||||
if (!output || !output.data) return null;
|
||||
const dim = output.dims?.[1] ?? 384;
|
||||
const data = output.data as Float32Array;
|
||||
const vectors: number[][] = [];
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
vectors.push(Array.from(data.slice(i * dim, (i + 1) * dim)));
|
||||
}
|
||||
return vectors;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,9 @@
|
||||
import type { MemoryEntry } from './entries.js';
|
||||
import { scanProjectMemory } from './scan.js';
|
||||
import { Bm25Ranker } from './bm25.js';
|
||||
import { embed, isEmbeddingAvailable } from './embeddings.js';
|
||||
|
||||
const SEARCH_MODE = process.env['MEMORY_SEARCH'] ?? 'hybrid';
|
||||
|
||||
function extractKeywords(query: string): string[] {
|
||||
return query
|
||||
@@ -31,6 +35,51 @@ export function rankByRelevance(query: string, entries: MemoryEntry[]): MemoryEn
|
||||
.map((s) => s.entry);
|
||||
}
|
||||
|
||||
export async function rankByHybrid(
|
||||
query: string,
|
||||
entries: MemoryEntry[],
|
||||
): Promise<MemoryEntry[]> {
|
||||
if (entries.length === 0) return [];
|
||||
const texts = entries.map((e) => `${e.title} ${e.content} ${e.tags.join(' ')}`);
|
||||
|
||||
const bm25 = new Bm25Ranker();
|
||||
bm25.fit(texts);
|
||||
const bm25Scores = texts.map((_, i) => bm25.score(query, i));
|
||||
const maxBm25 = Math.max(...bm25Scores, 1);
|
||||
const normBm25 = bm25Scores.map((s) => s / maxBm25);
|
||||
|
||||
let cosineScores: number[] = [];
|
||||
if (isEmbeddingAvailable()) {
|
||||
const vectors = await embed([query, ...texts]);
|
||||
if (vectors) {
|
||||
const queryVec = vectors[0]!;
|
||||
cosineScores = texts.map((_, i) => {
|
||||
const vec = vectors[i + 1];
|
||||
if (!vec) return 0;
|
||||
let dot = 0, nA = 0, nB = 0;
|
||||
for (let j = 0; j < queryVec.length; j++) {
|
||||
dot += queryVec[j]! * vec[j]!;
|
||||
nA += queryVec[j]! * queryVec[j]!;
|
||||
nB += vec[j]! * vec[j]!;
|
||||
}
|
||||
const denom = Math.sqrt(nA) * Math.sqrt(nB);
|
||||
return denom === 0 ? 0 : dot / denom;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const scored = entries.map((entry, i) => {
|
||||
const combined = (normBm25[i]! * 0.3) + ((cosineScores[i] ?? 0) * 0.7);
|
||||
return { entry, score: combined };
|
||||
});
|
||||
|
||||
return scored
|
||||
.filter((s) => s.score >= 0.15)
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, 10)
|
||||
.map((s) => s.entry);
|
||||
}
|
||||
|
||||
export async function loadMemoryForSession(
|
||||
projectRoot: string,
|
||||
_sessionId?: string,
|
||||
@@ -39,6 +88,13 @@ export async function loadMemoryForSession(
|
||||
const entries = await scanProjectMemory(projectRoot);
|
||||
if (entries.length === 0) return [];
|
||||
|
||||
const relevant = query ? rankByRelevance(query, entries) : entries.slice(0, 5);
|
||||
const relevant = query
|
||||
? SEARCH_MODE === 'keyword'
|
||||
? rankByRelevance(query, entries)
|
||||
: await rankByHybrid(query, entries)
|
||||
: entries.slice(0, 5);
|
||||
|
||||
return relevant.map((e) => `[${e.topic}] ${e.title}: ${e.content}`);
|
||||
}
|
||||
|
||||
export { initEmbeddings } from './embeddings.js';
|
||||
|
||||
44
apps/server/src/services/tools/extract_memory.ts
Normal file
44
apps/server/src/services/tools/extract_memory.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import { z } from 'zod';
|
||||
import type { ToolDef } from '../tools/types.js';
|
||||
import { ensureMemoryScaffold, getMemoryRoot } from '../memory/paths.js';
|
||||
import { writeEntry } from '../memory/store.js';
|
||||
|
||||
const ExtractMemoryInput = z.object({
|
||||
topic: z.enum(['project', 'user', 'reference']).describe('Memory topic category'),
|
||||
title: z.string().min(1).max(200).describe('Entry title (will be normalized to filename)'),
|
||||
content: z.string().min(1).describe('Memory content body'),
|
||||
tags: z.array(z.string()).optional().describe('Optional tags for search'),
|
||||
});
|
||||
|
||||
type InputT = z.infer<typeof ExtractMemoryInput>;
|
||||
|
||||
export const extractMemoryTool: ToolDef<InputT> = {
|
||||
name: 'extract_memory',
|
||||
description: 'Persist a memory entry to .boocode/memory/ for cross-session recall. Use for project conventions, user preferences, and architectural decisions.',
|
||||
inputSchema: ExtractMemoryInput,
|
||||
jsonSchema: {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'extract_memory',
|
||||
description: 'Persist a memory entry for cross-session recall',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
topic: { type: 'string', enum: ['project', 'user', 'reference'] },
|
||||
title: { type: 'string', description: 'Entry title' },
|
||||
content: { type: 'string', description: 'Memory content' },
|
||||
tags: { type: 'array', items: { type: 'string' }, description: 'Search tags' },
|
||||
},
|
||||
required: ['topic', 'title', 'content'],
|
||||
},
|
||||
},
|
||||
},
|
||||
async execute(input: InputT, projectRoot: string): Promise<unknown> {
|
||||
const root = getMemoryRoot(projectRoot);
|
||||
await ensureMemoryScaffold(root);
|
||||
await writeEntry(root, input.topic, input.title, input.content, input.tags ?? []);
|
||||
return {
|
||||
result: `Memory entry "${input.title}" saved to .boocode/memory/${input.topic}/`,
|
||||
};
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user