Files
boocode/apps/server/src/services/git_diff.ts
indifferentketchup 31d8efe66a feat(web): enhanced file panel — side-by-side diff, hide whitespace, inline review
Adds DiffSplitView component for side-by-side diff mode, whitespace-only
change filtering, inline review comments with thread/gutter cell UI, diff
preferences persistence, and write-file API support for in-browser editing.

Backend: hideWhitespace param on git diff endpoint, write_file route.
2026-06-07 22:16:20 +00:00

559 lines
19 KiB
TypeScript

import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { stat, realpath } from 'node:fs/promises';
import { isAbsolute, join, resolve, sep } from 'node:path';
const execFileAsync = promisify(execFile);
const GIT_TIMEOUT_MS = 30_000;
const GIT_MAX_BUFFER = 10 * 1024 * 1024; // 10MB
const FILE_DIFF_CAP = 512 * 1024; // 512KB per-file display cap
export type GitDiffMode = 'uncommitted' | 'committed';
export type ChangeType = 'added' | 'modified' | 'deleted' | 'renamed' | 'untracked';
export interface GitDiffFile {
path: string;
old_path: string | null;
change_type: ChangeType;
added_lines: number;
removed_lines: number;
staged: boolean;
diff_body: string | null; // null when is_binary or is_too_large
is_binary: boolean;
is_too_large: boolean;
}
export interface GitDiffResult {
mode: GitDiffMode;
base_label: string | null;
in_progress_op: string | null;
files: GitDiffFile[];
}
// runGit with 30s deadline and 10MB buffer for diff payloads. Returns null on
// any failure so callers can degrade gracefully without surfacing git errors.
async function runGit(args: string[], cwd: string): Promise<string | null> {
try {
const { stdout } = await execFileAsync('git', args, {
cwd,
timeout: GIT_TIMEOUT_MS,
windowsHide: true,
maxBuffer: GIT_MAX_BUFFER,
});
return stdout.toString();
} catch {
return null;
}
}
// ── Pure helpers (unit-testable without spawning git) ──────────────────────
/** Parses a single `git diff --name-status` output line. Returns null on garbage. */
function parseNameStatusLine(line: string): {
path: string;
old_path: string | null;
change_type: ChangeType;
} | null {
const trimmed = line.trim();
if (!trimmed) return null;
const parts = trimmed.split('\t');
if (parts.length < 2) return null;
const code = parts[0] ?? '';
// Rename: R<score>\told\tnew Copy: C<score>\told\tnew
if (code.startsWith('R') || code.startsWith('C')) {
if (parts.length < 3) return null;
return { path: parts[2] ?? '', old_path: parts[1] ?? null, change_type: 'renamed' };
}
const path = parts[1] ?? '';
if (!path) return null;
switch (code[0]) {
case 'A': return { path, old_path: null, change_type: 'added' };
case 'M':
case 'T': // type changed
case 'U': // unmerged
return { path, old_path: null, change_type: 'modified' };
case 'D': return { path, old_path: null, change_type: 'deleted' };
default: return null;
}
}
/** Parses multi-line `git diff --name-status` output into a file list. */
export function parseNameStatus(output: string): {
path: string;
old_path: string | null;
change_type: ChangeType;
}[] {
return output
.split('\n')
.map((l) => parseNameStatusLine(l))
.filter((x): x is NonNullable<typeof x> => x !== null);
}
/** Parses a single `git diff --numstat` output line. */
export function parseNumStatLine(line: string): {
path: string;
added: number;
removed: number;
binary: boolean;
} | null {
const parts = line.trim().split('\t');
if (parts.length < 3) return null;
const [added, removed, path] = parts;
if (!path) return null;
if (added === '-' && removed === '-') {
return { path, added: 0, removed: 0, binary: true };
}
const a = parseInt(added ?? '', 10);
const r = parseInt(removed ?? '', 10);
if (isNaN(a) || isNaN(r)) return null;
return { path, added: a, removed: r, binary: false };
}
/** Splits a unified diff text into per-file bodies keyed by current path. */
export function splitDiffByFile(diffText: string): Map<string, string> {
const result = new Map<string, string>();
if (!diffText.trim()) return result;
// Split at each "diff --git" header (lookahead keeps the header with its section)
const sections = diffText.split(/(?=^diff --git )/m);
for (const section of sections) {
if (!section.trim()) continue;
// Current path: prefer "+++ b/<path>" (absent for pure renames / deleted files)
const pppMatch = section.match(/^\+{3} b\/(.+)$/m);
if (pppMatch) {
result.set((pppMatch[1] ?? '').trim(), section);
continue;
}
// Deleted file: "--- a/<path>" with "+++ /dev/null"
const mmmMatch = section.match(/^-{3} a\/(.+)$/m);
if (mmmMatch) {
const p = (mmmMatch[1] ?? '').trim();
if (p && p !== '/dev/null') {
result.set(p, section);
continue;
}
}
// Pure rename with no content change: extract from "diff --git a/... b/..."
// Take everything after the last " b/" on that line.
const gitLineMatch = section.match(/^diff --git a\/.+ b\/(.+)$/m);
if (gitLineMatch) {
result.set((gitLineMatch[1] ?? '').trim(), section);
}
}
return result;
}
/** Classifies a diff body segment as diff | binary | too_large. */
export function classifyDiffBody(body: string, cap = FILE_DIFF_CAP): 'diff' | 'binary' | 'too_large' {
if (/^Binary files /m.test(body)) return 'binary';
if (body.length > cap) return 'too_large';
return 'diff';
}
/** Returns the auto-selected diff mode based on dirty state. */
export function autoSelectMode(isDirty: boolean): GitDiffMode {
return isDirty ? 'uncommitted' : 'committed';
}
/** Returns true when at least one file is staged (commit is possible). */
export function canCommit(files: GitDiffFile[]): boolean {
return files.some((f) => f.staged);
}
/** Returns true when the working tree has uncommitted changes (staged or unstaged). */
export async function isRepoDirty(cwd: string): Promise<boolean> {
const gitRoot = await resolveGitRoot(cwd);
if (!gitRoot) return false;
const out = await runGit(['status', '--porcelain'], gitRoot);
if (out === null) return true; // can't determine — assume dirty
return out.trim().length > 0;
}
/**
* Async per-file symlink-escape guard (FIX 3 / D-4). Resolves the real path of
* the target (if it already exists on disk) and rejects when it falls outside
* the repo root. Non-existent paths (new files being staged) are allowed — there
* is no symlink to follow when the file hasn't been created yet.
*/
export async function checkSymlinkEscape(repoRoot: string, filePath: string): Promise<void> {
const resolved = resolve(repoRoot, filePath);
let real: string;
try {
real = await realpath(resolved);
} catch {
// File doesn't exist yet — no symlink to resolve, safe to proceed.
return;
}
if (real !== repoRoot && !real.startsWith(repoRoot + sep)) {
throw new GitWriteError(`path escapes repository root via symlink: ${filePath}`, false);
}
}
// ── Async helpers ──────────────────────────────────────────────────────────
/** Resolves the base ref for Committed mode with fallback chain. */
export async function resolveCommittedBase(
cwd: string,
): Promise<{ base: string | null; label: string }> {
// 1. Tracking branch (@{upstream})
const upstream = await runGit(
['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{upstream}'],
cwd,
);
if (upstream !== null) {
const trimmed = upstream.trim();
if (trimmed && !trimmed.includes('fatal')) {
return { base: trimmed, label: trimmed };
}
}
// 2. origin/HEAD (default branch)
const originHead = await runGit(['rev-parse', '--abbrev-ref', 'origin/HEAD'], cwd);
if (originHead !== null) {
const trimmed = originHead.trim();
if (trimmed && !trimmed.includes('fatal') && !trimmed.includes('unknown')) {
return { base: trimmed, label: trimmed };
}
}
return { base: null, label: 'uncommitted (no base found)' };
}
/** Detects in-progress git operations via .git sentinel files/dirs. */
export async function detectInProgress(repoRoot: string): Promise<string | null> {
const fileChecks: [string, string][] = [
['MERGE_HEAD', 'merge'],
['CHERRY_PICK_HEAD', 'cherry-pick'],
['BISECT_LOG', 'bisect'],
];
for (const [file, op] of fileChecks) {
try {
await stat(join(repoRoot, '.git', file));
return op;
} catch {
// sentinel not present — continue
}
}
for (const dir of ['rebase-merge', 'rebase-apply']) {
try {
await stat(join(repoRoot, '.git', dir));
return 'rebase';
} catch {
// not present — continue
}
}
return null;
}
// ── Read logic ─────────────────────────────────────────────────────────────
/** Resolves the git work-tree root for the given path. Returns null if not a repo. */
async function resolveGitRoot(cwd: string): Promise<string | null> {
const out = await runGit(['rev-parse', '--show-toplevel'], cwd);
return out !== null ? out.trim() : null;
}
function buildNumstatMap(
output: string,
): Map<string, { added: number; removed: number; binary: boolean }> {
const map = new Map<string, { added: number; removed: number; binary: boolean }>();
for (const line of output.split('\n')) {
const parsed = parseNumStatLine(line);
if (parsed) map.set(parsed.path, { added: parsed.added, removed: parsed.removed, binary: parsed.binary });
}
return map;
}
async function getUncommittedDiff(
gitRoot: string,
inProgress: string | null,
ignoreWhitespace = false,
): Promise<GitDiffResult> {
const ws = ignoreWhitespace ? ['-w'] : [];
const hasCommits = (await runGit(['rev-parse', '--verify', 'HEAD'], gitRoot)) !== null;
const [nameStatusOut, cachedNameStatusOut, untrackedOut, numstatOut, diffOut, cachedDiffOut] =
await Promise.all([
hasCommits
? runGit(['diff', '--name-status', 'HEAD'], gitRoot)
: Promise.resolve(''),
hasCommits
? runGit(['diff', '--cached', '--name-status', 'HEAD'], gitRoot)
: runGit(['diff', '--cached', '--name-status'], gitRoot),
runGit(['ls-files', '--others', '--exclude-standard'], gitRoot),
hasCommits ? runGit(['diff', '--numstat', 'HEAD'], gitRoot) : Promise.resolve(''),
hasCommits ? runGit(['diff', ...ws, 'HEAD'], gitRoot) : Promise.resolve(''),
hasCommits
? runGit(['diff', ...ws, '--cached', 'HEAD'], gitRoot)
: runGit(['diff', ...ws, '--cached'], gitRoot),
]);
const allChanged = parseNameStatus(nameStatusOut ?? '');
const stagedSet = new Set(
parseNameStatus(cachedNameStatusOut ?? '').map((f) => f.path),
);
const untracked = (untrackedOut ?? '').split('\n').filter(Boolean);
const numstatMap = buildNumstatMap(numstatOut ?? '');
// Merge unstaged and staged diff maps
const diffMap = splitDiffByFile(diffOut ?? '');
const cachedDiffMap = splitDiffByFile(cachedDiffOut ?? '');
// Staged-only files won't be in diffOut; supplement from cachedDiffMap
for (const [k, v] of cachedDiffMap) {
if (!diffMap.has(k)) diffMap.set(k, v);
}
const files: GitDiffFile[] = [];
for (const entry of allChanged) {
const ns = numstatMap.get(entry.path);
const body = diffMap.get(entry.path) ?? null;
const kind = body !== null ? classifyDiffBody(body) : ns?.binary ? 'binary' : 'diff';
files.push({
path: entry.path,
old_path: entry.old_path,
change_type: entry.change_type,
added_lines: ns?.added ?? 0,
removed_lines: ns?.removed ?? 0,
staged: stagedSet.has(entry.path),
diff_body: kind === 'diff' ? body : null,
is_binary: kind === 'binary',
is_too_large: kind === 'too_large',
});
}
for (const p of untracked) {
files.push({
path: p,
old_path: null,
change_type: 'untracked',
added_lines: 0,
removed_lines: 0,
staged: false,
diff_body: null,
is_binary: false,
is_too_large: false,
});
}
return { mode: 'uncommitted', base_label: null, in_progress_op: inProgress, files };
}
async function getCommittedDiff(
gitRoot: string,
base: string,
label: string,
inProgress: string | null,
ignoreWhitespace = false,
): Promise<GitDiffResult> {
const ws = ignoreWhitespace ? ['-w'] : [];
const [nameStatusOut, numstatOut, diffOut] = await Promise.all([
runGit(['diff', '--name-status', base, 'HEAD'], gitRoot),
runGit(['diff', '--numstat', base, 'HEAD'], gitRoot),
runGit(['diff', ...ws, base, 'HEAD'], gitRoot),
]);
const allChanged = parseNameStatus(nameStatusOut ?? '');
const numstatMap = buildNumstatMap(numstatOut ?? '');
const diffMap = splitDiffByFile(diffOut ?? '');
const files: GitDiffFile[] = allChanged.map((entry) => {
const ns = numstatMap.get(entry.path);
const body = diffMap.get(entry.path) ?? null;
const kind = body !== null ? classifyDiffBody(body) : ns?.binary ? 'binary' : 'diff';
return {
path: entry.path,
old_path: entry.old_path,
change_type: entry.change_type,
added_lines: ns?.added ?? 0,
removed_lines: ns?.removed ?? 0,
staged: false, // staged concept does not apply in committed mode
diff_body: kind === 'diff' ? body : null,
is_binary: kind === 'binary',
is_too_large: kind === 'too_large',
};
});
return { mode: 'committed', base_label: label, in_progress_op: inProgress, files };
}
/**
* Returns the structured git diff for the given directory and mode, or null if
* the directory is not a git repository. On a null committed-mode base, falls
* back to uncommitted and labels the result accordingly.
*/
export async function getGitDiff(cwd: string, mode: GitDiffMode, ignoreWhitespace?: boolean): Promise<GitDiffResult | null> {
const gitRoot = await resolveGitRoot(cwd);
if (!gitRoot) return null;
const inProgress = await detectInProgress(gitRoot);
if (mode === 'uncommitted') {
return getUncommittedDiff(gitRoot, inProgress, ignoreWhitespace ?? false);
}
const { base, label } = await resolveCommittedBase(gitRoot);
if (!base) {
// Fall back to uncommitted with a descriptive label
const result = await getUncommittedDiff(gitRoot, inProgress, ignoreWhitespace ?? false);
return { ...result, base_label: label };
}
return getCommittedDiff(gitRoot, base, label, inProgress, ignoreWhitespace ?? false);
}
// ── Phase 2: Write helpers ─────────────────────────────────────────────────
// Fallback identity matching project_bootstrap.ts constants.
const GIT_USER_NAME = 'indifferentketchup';
const GIT_USER_EMAIL = 'samkintop@gmail.com';
export class GitWriteError extends Error {
constructor(
message: string,
public readonly busy: boolean,
) {
super(message);
this.name = 'GitWriteError';
}
}
/**
* Validates a per-file path argument for write operations.
* Rejects flag injection (leading `-`), repo-root discard (`.`), absolute
* paths, and `..` traversal without requiring the file to exist on disk.
*/
export function validateWritePath(repoRoot: string, filePath: string): void {
if (!filePath || typeof filePath !== 'string' || filePath.trim() === '') {
throw new GitWriteError('path is required', false);
}
if (filePath.startsWith('-')) {
throw new GitWriteError(`invalid path (flag injection): ${filePath}`, false);
}
if (filePath === '.') {
throw new GitWriteError('cannot operate on repository root (.)', false);
}
if (isAbsolute(filePath)) {
throw new GitWriteError(`path must be relative: ${filePath}`, false);
}
const resolved = resolve(repoRoot, filePath);
if (resolved === repoRoot || !resolved.startsWith(repoRoot + sep)) {
throw new GitWriteError(`path escapes repository root: ${filePath}`, false);
}
}
/** Reads git config user.name/email, falling back to bootstrap constants. */
export async function deriveCommitIdentity(
repoRoot: string,
): Promise<{ name: string; email: string }> {
const [nameOut, emailOut] = await Promise.all([
runGit(['config', 'user.name'], repoRoot),
runGit(['config', 'user.email'], repoRoot),
]);
return {
name: nameOut?.trim() || GIT_USER_NAME,
email: emailOut?.trim() || GIT_USER_EMAIL,
};
}
/** Runs a git write operation, propagating errors. Throws GitWriteError. */
async function runGitWrite(args: string[], cwd: string): Promise<void> {
try {
await execFileAsync('git', args, { cwd, timeout: GIT_TIMEOUT_MS, windowsHide: true });
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
const busy = msg.includes('index.lock') || msg.includes('Another git process');
throw new GitWriteError(busy ? 'repository is busy, try again' : msg, busy);
}
}
/** Stages the given files (`git add -- <files>`). */
export async function stageFiles(repoRoot: string, files: string[]): Promise<void> {
for (const f of files) {
validateWritePath(repoRoot, f);
await checkSymlinkEscape(repoRoot, f);
}
await runGitWrite(['add', '--', ...files], repoRoot);
}
/** Unstages the given files (`git restore --staged -- <files>`). */
export async function unstageFiles(repoRoot: string, files: string[]): Promise<void> {
for (const f of files) {
validateWritePath(repoRoot, f);
await checkSymlinkEscape(repoRoot, f);
}
await runGitWrite(['restore', '--staged', '--', ...files], repoRoot);
}
/** Commits staged files with a server-derived identity. */
export async function commitFiles(
repoRoot: string,
message: string,
files?: string[],
): Promise<void> {
if (files && files.length > 0) {
for (const f of files) {
validateWritePath(repoRoot, f);
await checkSymlinkEscape(repoRoot, f);
}
}
const id = await deriveCommitIdentity(repoRoot);
const args = ['-c', `user.name=${id.name}`, '-c', `user.email=${id.email}`, 'commit', '-m', message];
if (files && files.length > 0) args.push('--', ...files);
await runGitWrite(args, repoRoot);
}
export interface DiscardFileInfo {
path: string;
change_type: string;
staged: boolean;
}
/**
* Discards changes for the given files.
* - Untracked files: `git clean -f -- <path>`
* - Staged additions (new file staged, no HEAD version): unstage then clean
* - All other tracked files: `git restore HEAD -- <path>` (undoes staged + unstaged)
*/
export async function discardFiles(repoRoot: string, files: DiscardFileInfo[]): Promise<void> {
for (const { path } of files) {
validateWritePath(repoRoot, path);
await checkSymlinkEscape(repoRoot, path);
}
const untracked: string[] = [];
const stagedAdditions: string[] = [];
const tracked: string[] = [];
for (const f of files) {
if (f.change_type === 'untracked') {
untracked.push(f.path);
} else if (f.change_type === 'added' && f.staged) {
stagedAdditions.push(f.path);
} else {
tracked.push(f.path);
}
}
// Restore tracked files from HEAD (handles staged + unstaged modifications/deletions).
// git checkout HEAD -- <file> is the most portable form: resets index + worktree.
if (tracked.length > 0) {
await runGitWrite(['checkout', 'HEAD', '--', ...tracked], repoRoot);
}
// Staged additions: unstage first, then remove from working tree.
for (const p of stagedAdditions) {
await runGitWrite(['restore', '--staged', '--', p], repoRoot);
await runGitWrite(['clean', '-f', '--', p], repoRoot);
}
// Untracked files: clean (hard delete).
if (untracked.length > 0) {
await runGitWrite(['clean', '-f', '--', ...untracked], repoRoot);
}
}