import { describe, it, expect } from 'vitest'; import { locateMatch, SIMILARITY_THRESHOLD } from '../fuzzy-match.js'; // Helper: assert a resolved span and slice it back out of the content so the // test pins the EXACT file text the caller would replace. function span(result: ReturnType): { start: number; end: number } { if (result.kind !== 'exact' && result.kind !== 'fuzzy') { throw new Error(`expected a located span, got ${result.kind}`); } return { start: result.start, end: result.end }; } describe('locateMatch — strategy 1: exact', () => { it('returns an exact unique span', () => { const content = 'alpha\nbeta\ngamma\n'; const result = locateMatch(content, 'beta'); expect(result.kind).toBe('exact'); const { start, end } = span(result); expect(content.slice(start, end)).toBe('beta'); }); it('returns the right offsets for a multi-line exact needle', () => { const content = 'one\ntwo\nthree\nfour\n'; const needle = 'two\nthree'; const result = locateMatch(content, needle); expect(result.kind).toBe('exact'); const { start, end } = span(result); expect(content.slice(start, end)).toBe(needle); }); it('refuses when the exact needle occurs more than once', () => { const content = 'foo\nbar\nfoo\nbar\nfoo\n'; const result = locateMatch(content, 'foo'); expect(result).toEqual({ kind: 'ambiguous', count: 3 }); }); }); describe('locateMatch — strategy 2: per-line whitespace', () => { it('matches across trailing-whitespace drift at the real span', () => { // File has trailing spaces the model dropped from a TWO-line copy. A // single-line needle would be located by exact indexOf (it's a substring), // so use two lines where line 1's trailing ws breaks an exact substring run. const content = 'function f() {\n setup(); \n return 1;\n}\n'; const needle = ' setup();\n return 1;'; // line 1 missing trailing spaces const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); // The returned span covers the ORIGINAL lines including the trailing spaces. expect(content.slice(start, end)).toBe(' setup(); \n return 1;'); }); it('matches across indentation drift (multi-line block)', () => { // File indents with 4 spaces; model emitted 2-space indentation. trimEnd // alone does not normalize LEADING whitespace, so this exercises... actually // leading-indent drift is a Levenshtein-tier fallback. Here we keep the // leading indent identical and drift only trailing whitespace per line. const content = ['if (x) {', ' doThing(); ', ' doOther();', '}'].join('\n'); const needle = [' doThing();', ' doOther();'].join('\n'); const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe(' doThing(); \n doOther();'); }); it('ignores leading/trailing blank needle lines', () => { const content = 'header\nbody line\nfooter\n'; const needle = '\n\nbody line\n\n'; const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe('body line'); }); it('reports ambiguous when a whitespace-window matches twice', () => { // Both line 1 and line 4 differ from the needle only by trailing whitespace, // so exact indexOf fails (no exact substring) and the whitespace tier finds // two equivalent windows → ambiguous. const content = 'x = 1; \ny = 2;\nz = 3;\nx = 1;\t\n'; const needle = 'x = 1;'; // no trailing ws → not an exact substring of either line const result = locateMatch(content, needle); expect(result).toEqual({ kind: 'ambiguous', count: 2 }); }); }); describe('locateMatch — strategy 3: unicode canonicalization', () => { it('matches across curly quotes', () => { const content = "const s = 'hello';\n"; const needle = 'const s = ‘hello’;'; // ‘hello’ const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); // Span maps back to ORIGINAL (straight-quote) text. expect(content.slice(start, end)).toBe("const s = 'hello';"); }); it('matches across curly double-quotes', () => { const content = 'log("done");\n'; const needle = 'log(“done”);'; // “done” const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe('log("done");'); }); it('matches across an em-dash drift', () => { const content = 'range 1-10 inclusive\n'; const needle = 'range 1—10 inclusive'; // em-dash const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe('range 1-10 inclusive'); }); it('matches across a non-breaking space drift', () => { const content = 'a b c\n'; // plain spaces const needle = 'a b c'; // nbsp between words const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe('a b c'); }); }); describe('locateMatch — strategy 4: Levenshtein', () => { it('matches a >= threshold near-miss (small typo drift)', () => { // Needle has a one-char typo ('totals' vs 'total') so it is NOT an exact // substring and the whitespace/canonical tiers (which require equality) both // miss; Levenshtein similarity stays well above the 0.66 floor. const content = 'const total = sum + tax;\n'; const needle = 'const totals = sum + tax;'; const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); // Span maps to the real (correctly-spelled) file line. expect(content.slice(start, end)).toBe('const total = sum + tax;'); }); it('matches a multi-line block with indentation drift via Levenshtein', () => { const content = ['function g() {', ' return compute(a, b);', '}'].join('\n'); // 6-space indent vs file's 2-space; trimEnd does not fix leading indent, so // this lands on the Levenshtein tier (joined-trim makes it identical → ~1.0). const needle = [' return compute(a, b);'].join('\n'); const result = locateMatch(content, needle); expect(result.kind).toBe('fuzzy'); const { start, end } = span(result); expect(content.slice(start, end)).toBe(' return compute(a, b);'); }); it('returns not_found for a below-threshold miss', () => { const content = 'the quick brown fox jumps over the lazy dog\n'; const needle = 'completely unrelated string of text here xyz'; const result = locateMatch(content, needle); expect(result).toEqual({ kind: 'not_found' }); }); it('returns not_found for a genuinely-absent needle', () => { const content = 'alpha\nbeta\ngamma\n'; const needle = 'this content does not exist anywhere at all'; const result = locateMatch(content, needle); expect(result).toEqual({ kind: 'not_found' }); }); }); describe('locateMatch — edge cases', () => { it('returns not_found for an empty needle', () => { expect(locateMatch('anything', '')).toEqual({ kind: 'not_found' }); }); it('exposes a sane similarity threshold', () => { expect(SIMILARITY_THRESHOLD).toBeGreaterThan(0); expect(SIMILARITY_THRESHOLD).toBeLessThanOrEqual(1); }); });