Files
boocode/apps/coder/src/services/__tests__/fuzzy-match.test.ts
indifferentketchup 59f07e8cb8 feat: write/edit robustness — fuzzy patch applier + worktree checkpoints (v2.7.1)
#3 Fuzzy patch applier: new pure fuzzy-match.ts (locateMatch, exact→trim→
unicode-canon→Levenshtein≥0.66, refuse-on-ambiguous) wired into pending_changes
applyOne/rewindOne so local-model whitespace/unicode drift in old_string no
longer loses the edit.

#4 Worktree checkpoint + conversation-trim: checkpoints table + checkpoints.ts
(shadow-commit of tracked+untracked into refs/boocode/checkpoints, hooked into
the 3 external-agent dispatcher paths) + POST restore route (reset --hard +
clean -fd -> transcript trim -> backend-session reset) + "Restore to here" UI.

Built by 3 parallel agents; DB-integration testing caught a created_at
self-deletion bug. Coder suite 234 passing; server+coder build + web tsc clean.
Builds on v2.7.0-mit. openspec write-edit-robustness.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 12:01:57 +00:00

174 lines
7.5 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { describe, it, expect } from 'vitest';
import { locateMatch, SIMILARITY_THRESHOLD } from '../fuzzy-match.js';
// Helper: assert a resolved span and slice it back out of the content so the
// test pins the EXACT file text the caller would replace.
function span(result: ReturnType<typeof locateMatch>): { start: number; end: number } {
if (result.kind !== 'exact' && result.kind !== 'fuzzy') {
throw new Error(`expected a located span, got ${result.kind}`);
}
return { start: result.start, end: result.end };
}
describe('locateMatch — strategy 1: exact', () => {
it('returns an exact unique span', () => {
const content = 'alpha\nbeta\ngamma\n';
const result = locateMatch(content, 'beta');
expect(result.kind).toBe('exact');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe('beta');
});
it('returns the right offsets for a multi-line exact needle', () => {
const content = 'one\ntwo\nthree\nfour\n';
const needle = 'two\nthree';
const result = locateMatch(content, needle);
expect(result.kind).toBe('exact');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe(needle);
});
it('refuses when the exact needle occurs more than once', () => {
const content = 'foo\nbar\nfoo\nbar\nfoo\n';
const result = locateMatch(content, 'foo');
expect(result).toEqual({ kind: 'ambiguous', count: 3 });
});
});
describe('locateMatch — strategy 2: per-line whitespace', () => {
it('matches across trailing-whitespace drift at the real span', () => {
// File has trailing spaces the model dropped from a TWO-line copy. A
// single-line needle would be located by exact indexOf (it's a substring),
// so use two lines where line 1's trailing ws breaks an exact substring run.
const content = 'function f() {\n setup(); \n return 1;\n}\n';
const needle = ' setup();\n return 1;'; // line 1 missing trailing spaces
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
// The returned span covers the ORIGINAL lines including the trailing spaces.
expect(content.slice(start, end)).toBe(' setup(); \n return 1;');
});
it('matches across indentation drift (multi-line block)', () => {
// File indents with 4 spaces; model emitted 2-space indentation. trimEnd
// alone does not normalize LEADING whitespace, so this exercises... actually
// leading-indent drift is a Levenshtein-tier fallback. Here we keep the
// leading indent identical and drift only trailing whitespace per line.
const content = ['if (x) {', ' doThing(); ', ' doOther();', '}'].join('\n');
const needle = [' doThing();', ' doOther();'].join('\n');
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe(' doThing(); \n doOther();');
});
it('ignores leading/trailing blank needle lines', () => {
const content = 'header\nbody line\nfooter\n';
const needle = '\n\nbody line\n\n';
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe('body line');
});
it('reports ambiguous when a whitespace-window matches twice', () => {
// Both line 1 and line 4 differ from the needle only by trailing whitespace,
// so exact indexOf fails (no exact substring) and the whitespace tier finds
// two equivalent windows → ambiguous.
const content = 'x = 1; \ny = 2;\nz = 3;\nx = 1;\t\n';
const needle = 'x = 1;'; // no trailing ws → not an exact substring of either line
const result = locateMatch(content, needle);
expect(result).toEqual({ kind: 'ambiguous', count: 2 });
});
});
describe('locateMatch — strategy 3: unicode canonicalization', () => {
it('matches across curly quotes', () => {
const content = "const s = 'hello';\n";
const needle = 'const s = hello;'; // hello
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
// Span maps back to ORIGINAL (straight-quote) text.
expect(content.slice(start, end)).toBe("const s = 'hello';");
});
it('matches across curly double-quotes', () => {
const content = 'log("done");\n';
const needle = 'log(“done”);'; // “done”
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe('log("done");');
});
it('matches across an em-dash drift', () => {
const content = 'range 1-10 inclusive\n';
const needle = 'range 1—10 inclusive'; // em-dash
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe('range 1-10 inclusive');
});
it('matches across a non-breaking space drift', () => {
const content = 'a b c\n'; // plain spaces
const needle = 'a b c'; // nbsp between words
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe('a b c');
});
});
describe('locateMatch — strategy 4: Levenshtein', () => {
it('matches a >= threshold near-miss (small typo drift)', () => {
// Needle has a one-char typo ('totals' vs 'total') so it is NOT an exact
// substring and the whitespace/canonical tiers (which require equality) both
// miss; Levenshtein similarity stays well above the 0.66 floor.
const content = 'const total = sum + tax;\n';
const needle = 'const totals = sum + tax;';
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
// Span maps to the real (correctly-spelled) file line.
expect(content.slice(start, end)).toBe('const total = sum + tax;');
});
it('matches a multi-line block with indentation drift via Levenshtein', () => {
const content = ['function g() {', ' return compute(a, b);', '}'].join('\n');
// 6-space indent vs file's 2-space; trimEnd does not fix leading indent, so
// this lands on the Levenshtein tier (joined-trim makes it identical → ~1.0).
const needle = [' return compute(a, b);'].join('\n');
const result = locateMatch(content, needle);
expect(result.kind).toBe('fuzzy');
const { start, end } = span(result);
expect(content.slice(start, end)).toBe(' return compute(a, b);');
});
it('returns not_found for a below-threshold miss', () => {
const content = 'the quick brown fox jumps over the lazy dog\n';
const needle = 'completely unrelated string of text here xyz';
const result = locateMatch(content, needle);
expect(result).toEqual({ kind: 'not_found' });
});
it('returns not_found for a genuinely-absent needle', () => {
const content = 'alpha\nbeta\ngamma\n';
const needle = 'this content does not exist anywhere at all';
const result = locateMatch(content, needle);
expect(result).toEqual({ kind: 'not_found' });
});
});
describe('locateMatch — edge cases', () => {
it('returns not_found for an empty needle', () => {
expect(locateMatch('anything', '')).toEqual({ kind: 'not_found' });
});
it('exposes a sane similarity threshold', () => {
expect(SIMILARITY_THRESHOLD).toBeGreaterThan(0);
expect(SIMILARITY_THRESHOLD).toBeLessThanOrEqual(1);
});
});