/** * Pure utilities for parsing unified diff text and building display structures * for both unified and side-by-side (split) diff views. */ // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- export type DiffLineType = 'add' | 'remove' | 'context' | 'header'; export interface DiffLine { type: DiffLineType; content: string; } export interface DiffHunk { oldStart: number; oldCount: number; newStart: number; newCount: number; lines: DiffLine[]; } export interface ParsedDiffFile { path: string; hunks: DiffHunk[]; } /** A single cell in the split (side-by-side) view */ export interface SplitDisplayLine { type: DiffLineType; content: string; lineNumber: number | null; } /** A row in the split view — either a hunk header or a left/right pair */ export type SplitRow = | { kind: 'header'; content: string } | { kind: 'pair'; left: SplitDisplayLine | null; right: SplitDisplayLine | null }; // --------------------------------------------------------------------------- // parseDiff // --------------------------------------------------------------------------- /** * Parse unified diff text into an array of ParsedDiffFile objects. * * Splits on `diff --git` headers, extracts file paths from `+++ b/` * (falling back to `--- a/`), and classifies each line within hunks. */ export function parseDiff(diffBody: string): ParsedDiffFile[] { if (!diffBody || diffBody.trim().length === 0) { return []; } const files: ParsedDiffFile[] = []; const sections = diffBody.split(/^diff --git /m).filter(Boolean); for (const section of sections) { const lines = section.split('\n'); const path = extractPath(lines); const hunks = parseSectionBody(lines); files.push({ path, hunks }); } return files; } // --------------------------------------------------------------------------- // buildSplitRows // --------------------------------------------------------------------------- /** * Build side-by-side (split) display rows from a parsed diff file. * * For each hunk: * - Emits a header row (`@@ -... +... @@`) * - Buffers consecutive removals and additions * - On a context line (or hunk end), flushes buffered removals/additions as * paired rows (left = removal or null, right = addition or null) * - Context lines become paired rows with identical content on both sides */ export function buildSplitRows(file: ParsedDiffFile): SplitRow[] { const rows: SplitRow[] = []; for (const hunk of file.hunks) { // Header row const headerLine = hunk.lines.find((l) => l.type === 'header'); rows.push({ kind: 'header', content: headerLine?.content ?? '@@' }); let oldLineNo = hunk.oldStart; let newLineNo = hunk.newStart; let pendingRemovals: SplitDisplayLine[] = []; let pendingAdditions: SplitDisplayLine[] = []; const flushPending = (): void => { const pairCount = Math.max(pendingRemovals.length, pendingAdditions.length); for (let i = 0; i < pairCount; i++) { rows.push({ kind: 'pair', left: pendingRemovals[i] ?? null, right: pendingAdditions[i] ?? null, }); } pendingRemovals = []; pendingAdditions = []; }; for (const line of hunk.lines) { if (line.type === 'header') continue; if (line.type === 'remove') { pendingRemovals.push({ type: 'remove', content: line.content, lineNumber: oldLineNo++, }); continue; } if (line.type === 'add') { pendingAdditions.push({ type: 'add', content: line.content, lineNumber: newLineNo++, }); continue; } // Context line — flush any pending changes first flushPending(); rows.push({ kind: 'pair', left: { type: 'context', content: line.content, lineNumber: oldLineNo++, }, right: { type: 'context', content: line.content, lineNumber: newLineNo++, }, }); } // Flush any trailing removals/additions at hunk end flushPending(); } return rows; } // --------------------------------------------------------------------------- // reconstructNewContent // --------------------------------------------------------------------------- /** * Reconstruct the "new" file content from diff hunks by concatenating * addition and context lines. Useful for syntax-highlighting the split * view's right column. */ export function reconstructNewContent(hunks: DiffHunk[]): string { const lines: string[] = []; for (const hunk of hunks) { for (const line of hunk.lines) { if (line.type === 'add' || line.type === 'context') { lines.push(line.content); } } } return lines.join('\n'); } // --------------------------------------------------------------------------- // Internal helpers // --------------------------------------------------------------------------- /** Extract file path from `+++ b/` or `--- a/` metadata lines. */ function extractPath(lines: string[]): string { // Try +++ b/ first (most reliable for the "new" side) const newLine = lines.find((l) => l.startsWith('+++ ')); if (newLine) { const raw = newLine.slice(4).replace(/\t.*$/, '').trimEnd(); if (raw !== '/dev/null') { return stripPrefix(raw); } } // Fall back to --- a/ const oldLine = lines.find((l) => l.startsWith('--- ')); if (oldLine) { const raw = oldLine.slice(4).replace(/\t.*$/, '').trimEnd(); if (raw !== '/dev/null') { return stripPrefix(raw); } } // Last resort: parse the first line (e.g. "a/path b/path") const firstLine = lines[0] ?? ''; const match = firstLine.match(/^a\/(.+)\s+b\/(.+)$/); if (match) return match[2]!; return 'unknown'; } /** Strip the `a/` or `b/` prefix that git adds to diff paths. */ function stripPrefix(path: string): string { if (path.startsWith('b/') || path.startsWith('a/')) { return path.slice(2); } return path; } /** Parse hunk headers and line content from a diff section body. */ function parseSectionBody(lines: string[]): DiffHunk[] { const hunks: DiffHunk[] = []; let currentHunk: DiffHunk | null = null; // Start at index 1 to skip the first line (the "a/path b/path" header from // the `diff --git` split) for (let i = 1; i < lines.length; i++) { const line = lines[i]!; if (isMetadataLine(line)) continue; const newHunk = parseHunkHeader(line); if (newHunk) { if (currentHunk) hunks.push(currentHunk); currentHunk = newHunk; continue; } if (!currentHunk) continue; if (line.startsWith('+')) { currentHunk.lines.push({ type: 'add', content: line.slice(1) }); } else if (line.startsWith('-')) { currentHunk.lines.push({ type: 'remove', content: line.slice(1) }); } else if (line.startsWith(' ')) { currentHunk.lines.push({ type: 'context', content: line.slice(1) }); } else if (line.length > 0 && !line.startsWith('\\')) { currentHunk.lines.push({ type: 'context', content: line }); } } if (currentHunk) hunks.push(currentHunk); return hunks; } /** Parse a `@@ -oldStart,oldCount +newStart,newCount @@` header line. */ function parseHunkHeader(line: string): DiffHunk | null { const match = line.match(/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/); if (!match) return null; return { oldStart: parseInt(match[1]!, 10), oldCount: parseInt(match[2] ?? '1', 10), newStart: parseInt(match[3]!, 10), newCount: parseInt(match[4] ?? '1', 10), lines: [ { type: 'header', content: line.match(/^(@@ .+? @@)/)?.[1] ?? line, }, ], }; } /** Check if a line is diff metadata (not content). */ function isMetadataLine(line: string): boolean { return ( line.startsWith('index ') || line.startsWith('--- ') || line.startsWith('+++ ') || line.startsWith('new file mode') || line.startsWith('deleted file mode') ); }