import { execFile } from 'node:child_process'; import { promisify } from 'node:util'; import { stat, realpath } from 'node:fs/promises'; import { isAbsolute, join, resolve, sep } from 'node:path'; const execFileAsync = promisify(execFile); const GIT_TIMEOUT_MS = 30_000; const GIT_MAX_BUFFER = 10 * 1024 * 1024; // 10MB const FILE_DIFF_CAP = 512 * 1024; // 512KB per-file display cap export type GitDiffMode = 'uncommitted' | 'committed'; export type ChangeType = 'added' | 'modified' | 'deleted' | 'renamed' | 'untracked'; export interface GitDiffFile { path: string; old_path: string | null; change_type: ChangeType; added_lines: number; removed_lines: number; staged: boolean; diff_body: string | null; // null when is_binary or is_too_large is_binary: boolean; is_too_large: boolean; } export interface GitDiffResult { mode: GitDiffMode; base_label: string | null; in_progress_op: string | null; files: GitDiffFile[]; } // runGit with 30s deadline and 10MB buffer for diff payloads. Returns null on // any failure so callers can degrade gracefully without surfacing git errors. async function runGit(args: string[], cwd: string): Promise { try { const { stdout } = await execFileAsync('git', args, { cwd, timeout: GIT_TIMEOUT_MS, windowsHide: true, maxBuffer: GIT_MAX_BUFFER, }); return stdout.toString(); } catch { return null; } } // ── Pure helpers (unit-testable without spawning git) ────────────────────── /** Parses a single `git diff --name-status` output line. Returns null on garbage. */ function parseNameStatusLine(line: string): { path: string; old_path: string | null; change_type: ChangeType; } | null { const trimmed = line.trim(); if (!trimmed) return null; const parts = trimmed.split('\t'); if (parts.length < 2) return null; const code = parts[0] ?? ''; // Rename: R\told\tnew Copy: C\told\tnew if (code.startsWith('R') || code.startsWith('C')) { if (parts.length < 3) return null; return { path: parts[2] ?? '', old_path: parts[1] ?? null, change_type: 'renamed' }; } const path = parts[1] ?? ''; if (!path) return null; switch (code[0]) { case 'A': return { path, old_path: null, change_type: 'added' }; case 'M': case 'T': // type changed case 'U': // unmerged return { path, old_path: null, change_type: 'modified' }; case 'D': return { path, old_path: null, change_type: 'deleted' }; default: return null; } } /** Parses multi-line `git diff --name-status` output into a file list. */ export function parseNameStatus(output: string): { path: string; old_path: string | null; change_type: ChangeType; }[] { return output .split('\n') .map((l) => parseNameStatusLine(l)) .filter((x): x is NonNullable => x !== null); } /** Parses a single `git diff --numstat` output line. */ export function parseNumStatLine(line: string): { path: string; added: number; removed: number; binary: boolean; } | null { const parts = line.trim().split('\t'); if (parts.length < 3) return null; const [added, removed, path] = parts; if (!path) return null; if (added === '-' && removed === '-') { return { path, added: 0, removed: 0, binary: true }; } const a = parseInt(added ?? '', 10); const r = parseInt(removed ?? '', 10); if (isNaN(a) || isNaN(r)) return null; return { path, added: a, removed: r, binary: false }; } /** Splits a unified diff text into per-file bodies keyed by current path. */ export function splitDiffByFile(diffText: string): Map { const result = new Map(); if (!diffText.trim()) return result; // Split at each "diff --git" header (lookahead keeps the header with its section) const sections = diffText.split(/(?=^diff --git )/m); for (const section of sections) { if (!section.trim()) continue; // Current path: prefer "+++ b/" (absent for pure renames / deleted files) const pppMatch = section.match(/^\+{3} b\/(.+)$/m); if (pppMatch) { result.set((pppMatch[1] ?? '').trim(), section); continue; } // Deleted file: "--- a/" with "+++ /dev/null" const mmmMatch = section.match(/^-{3} a\/(.+)$/m); if (mmmMatch) { const p = (mmmMatch[1] ?? '').trim(); if (p && p !== '/dev/null') { result.set(p, section); continue; } } // Pure rename with no content change: extract from "diff --git a/... b/..." // Take everything after the last " b/" on that line. const gitLineMatch = section.match(/^diff --git a\/.+ b\/(.+)$/m); if (gitLineMatch) { result.set((gitLineMatch[1] ?? '').trim(), section); } } return result; } /** Classifies a diff body segment as diff | binary | too_large. */ export function classifyDiffBody(body: string, cap = FILE_DIFF_CAP): 'diff' | 'binary' | 'too_large' { if (/^Binary files /m.test(body)) return 'binary'; if (body.length > cap) return 'too_large'; return 'diff'; } /** Returns the auto-selected diff mode based on dirty state. */ export function autoSelectMode(isDirty: boolean): GitDiffMode { return isDirty ? 'uncommitted' : 'committed'; } /** Returns true when at least one file is staged (commit is possible). */ export function canCommit(files: GitDiffFile[]): boolean { return files.some((f) => f.staged); } /** Returns true when the working tree has uncommitted changes (staged or unstaged). */ export async function isRepoDirty(cwd: string): Promise { const gitRoot = await resolveGitRoot(cwd); if (!gitRoot) return false; const out = await runGit(['status', '--porcelain'], gitRoot); if (out === null) return true; // can't determine — assume dirty return out.trim().length > 0; } /** * Async per-file symlink-escape guard (FIX 3 / D-4). Resolves the real path of * the target (if it already exists on disk) and rejects when it falls outside * the repo root. Non-existent paths (new files being staged) are allowed — there * is no symlink to follow when the file hasn't been created yet. */ export async function checkSymlinkEscape(repoRoot: string, filePath: string): Promise { const resolved = resolve(repoRoot, filePath); let real: string; try { real = await realpath(resolved); } catch { // File doesn't exist yet — no symlink to resolve, safe to proceed. return; } if (real !== repoRoot && !real.startsWith(repoRoot + sep)) { throw new GitWriteError(`path escapes repository root via symlink: ${filePath}`, false); } } // ── Async helpers ────────────────────────────────────────────────────────── /** Resolves the base ref for Committed mode with fallback chain. */ export async function resolveCommittedBase( cwd: string, ): Promise<{ base: string | null; label: string }> { // 1. Tracking branch (@{upstream}) const upstream = await runGit( ['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{upstream}'], cwd, ); if (upstream !== null) { const trimmed = upstream.trim(); if (trimmed && !trimmed.includes('fatal')) { return { base: trimmed, label: trimmed }; } } // 2. origin/HEAD (default branch) const originHead = await runGit(['rev-parse', '--abbrev-ref', 'origin/HEAD'], cwd); if (originHead !== null) { const trimmed = originHead.trim(); if (trimmed && !trimmed.includes('fatal') && !trimmed.includes('unknown')) { return { base: trimmed, label: trimmed }; } } return { base: null, label: 'uncommitted (no base found)' }; } /** Detects in-progress git operations via .git sentinel files/dirs. */ export async function detectInProgress(repoRoot: string): Promise { const fileChecks: [string, string][] = [ ['MERGE_HEAD', 'merge'], ['CHERRY_PICK_HEAD', 'cherry-pick'], ['BISECT_LOG', 'bisect'], ]; for (const [file, op] of fileChecks) { try { await stat(join(repoRoot, '.git', file)); return op; } catch { // sentinel not present — continue } } for (const dir of ['rebase-merge', 'rebase-apply']) { try { await stat(join(repoRoot, '.git', dir)); return 'rebase'; } catch { // not present — continue } } return null; } // ── Read logic ───────────────────────────────────────────────────────────── /** Resolves the git work-tree root for the given path. Returns null if not a repo. */ async function resolveGitRoot(cwd: string): Promise { const out = await runGit(['rev-parse', '--show-toplevel'], cwd); return out !== null ? out.trim() : null; } function buildNumstatMap( output: string, ): Map { const map = new Map(); for (const line of output.split('\n')) { const parsed = parseNumStatLine(line); if (parsed) map.set(parsed.path, { added: parsed.added, removed: parsed.removed, binary: parsed.binary }); } return map; } async function getUncommittedDiff( gitRoot: string, inProgress: string | null, ignoreWhitespace = false, ): Promise { const ws = ignoreWhitespace ? ['-w'] : []; const hasCommits = (await runGit(['rev-parse', '--verify', 'HEAD'], gitRoot)) !== null; const [nameStatusOut, cachedNameStatusOut, untrackedOut, numstatOut, diffOut, cachedDiffOut] = await Promise.all([ hasCommits ? runGit(['diff', '--name-status', 'HEAD'], gitRoot) : Promise.resolve(''), hasCommits ? runGit(['diff', '--cached', '--name-status', 'HEAD'], gitRoot) : runGit(['diff', '--cached', '--name-status'], gitRoot), runGit(['ls-files', '--others', '--exclude-standard'], gitRoot), hasCommits ? runGit(['diff', '--numstat', 'HEAD'], gitRoot) : Promise.resolve(''), hasCommits ? runGit(['diff', ...ws, 'HEAD'], gitRoot) : Promise.resolve(''), hasCommits ? runGit(['diff', ...ws, '--cached', 'HEAD'], gitRoot) : runGit(['diff', ...ws, '--cached'], gitRoot), ]); const allChanged = parseNameStatus(nameStatusOut ?? ''); const stagedSet = new Set( parseNameStatus(cachedNameStatusOut ?? '').map((f) => f.path), ); const untracked = (untrackedOut ?? '').split('\n').filter(Boolean); const numstatMap = buildNumstatMap(numstatOut ?? ''); // Merge unstaged and staged diff maps const diffMap = splitDiffByFile(diffOut ?? ''); const cachedDiffMap = splitDiffByFile(cachedDiffOut ?? ''); // Staged-only files won't be in diffOut; supplement from cachedDiffMap for (const [k, v] of cachedDiffMap) { if (!diffMap.has(k)) diffMap.set(k, v); } const files: GitDiffFile[] = []; for (const entry of allChanged) { const ns = numstatMap.get(entry.path); const body = diffMap.get(entry.path) ?? null; const kind = body !== null ? classifyDiffBody(body) : ns?.binary ? 'binary' : 'diff'; files.push({ path: entry.path, old_path: entry.old_path, change_type: entry.change_type, added_lines: ns?.added ?? 0, removed_lines: ns?.removed ?? 0, staged: stagedSet.has(entry.path), diff_body: kind === 'diff' ? body : null, is_binary: kind === 'binary', is_too_large: kind === 'too_large', }); } for (const p of untracked) { files.push({ path: p, old_path: null, change_type: 'untracked', added_lines: 0, removed_lines: 0, staged: false, diff_body: null, is_binary: false, is_too_large: false, }); } return { mode: 'uncommitted', base_label: null, in_progress_op: inProgress, files }; } async function getCommittedDiff( gitRoot: string, base: string, label: string, inProgress: string | null, ignoreWhitespace = false, ): Promise { const ws = ignoreWhitespace ? ['-w'] : []; const [nameStatusOut, numstatOut, diffOut] = await Promise.all([ runGit(['diff', '--name-status', base, 'HEAD'], gitRoot), runGit(['diff', '--numstat', base, 'HEAD'], gitRoot), runGit(['diff', ...ws, base, 'HEAD'], gitRoot), ]); const allChanged = parseNameStatus(nameStatusOut ?? ''); const numstatMap = buildNumstatMap(numstatOut ?? ''); const diffMap = splitDiffByFile(diffOut ?? ''); const files: GitDiffFile[] = allChanged.map((entry) => { const ns = numstatMap.get(entry.path); const body = diffMap.get(entry.path) ?? null; const kind = body !== null ? classifyDiffBody(body) : ns?.binary ? 'binary' : 'diff'; return { path: entry.path, old_path: entry.old_path, change_type: entry.change_type, added_lines: ns?.added ?? 0, removed_lines: ns?.removed ?? 0, staged: false, // staged concept does not apply in committed mode diff_body: kind === 'diff' ? body : null, is_binary: kind === 'binary', is_too_large: kind === 'too_large', }; }); return { mode: 'committed', base_label: label, in_progress_op: inProgress, files }; } /** * Returns the structured git diff for the given directory and mode, or null if * the directory is not a git repository. On a null committed-mode base, falls * back to uncommitted and labels the result accordingly. */ export async function getGitDiff(cwd: string, mode: GitDiffMode, ignoreWhitespace?: boolean): Promise { const gitRoot = await resolveGitRoot(cwd); if (!gitRoot) return null; const inProgress = await detectInProgress(gitRoot); if (mode === 'uncommitted') { return getUncommittedDiff(gitRoot, inProgress, ignoreWhitespace ?? false); } const { base, label } = await resolveCommittedBase(gitRoot); if (!base) { // Fall back to uncommitted with a descriptive label const result = await getUncommittedDiff(gitRoot, inProgress, ignoreWhitespace ?? false); return { ...result, base_label: label }; } return getCommittedDiff(gitRoot, base, label, inProgress, ignoreWhitespace ?? false); } // ── Phase 2: Write helpers ───────────────────────────────────────────────── // Fallback identity matching project_bootstrap.ts constants. const GIT_USER_NAME = 'indifferentketchup'; const GIT_USER_EMAIL = 'samkintop@gmail.com'; export class GitWriteError extends Error { constructor( message: string, public readonly busy: boolean, ) { super(message); this.name = 'GitWriteError'; } } /** * Validates a per-file path argument for write operations. * Rejects flag injection (leading `-`), repo-root discard (`.`), absolute * paths, and `..` traversal without requiring the file to exist on disk. */ export function validateWritePath(repoRoot: string, filePath: string): void { if (!filePath || typeof filePath !== 'string' || filePath.trim() === '') { throw new GitWriteError('path is required', false); } if (filePath.startsWith('-')) { throw new GitWriteError(`invalid path (flag injection): ${filePath}`, false); } if (filePath === '.') { throw new GitWriteError('cannot operate on repository root (.)', false); } if (isAbsolute(filePath)) { throw new GitWriteError(`path must be relative: ${filePath}`, false); } const resolved = resolve(repoRoot, filePath); if (resolved === repoRoot || !resolved.startsWith(repoRoot + sep)) { throw new GitWriteError(`path escapes repository root: ${filePath}`, false); } } /** Reads git config user.name/email, falling back to bootstrap constants. */ export async function deriveCommitIdentity( repoRoot: string, ): Promise<{ name: string; email: string }> { const [nameOut, emailOut] = await Promise.all([ runGit(['config', 'user.name'], repoRoot), runGit(['config', 'user.email'], repoRoot), ]); return { name: nameOut?.trim() || GIT_USER_NAME, email: emailOut?.trim() || GIT_USER_EMAIL, }; } /** Runs a git write operation, propagating errors. Throws GitWriteError. */ async function runGitWrite(args: string[], cwd: string): Promise { try { await execFileAsync('git', args, { cwd, timeout: GIT_TIMEOUT_MS, windowsHide: true }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); const busy = msg.includes('index.lock') || msg.includes('Another git process'); throw new GitWriteError(busy ? 'repository is busy, try again' : msg, busy); } } /** Stages the given files (`git add -- `). */ export async function stageFiles(repoRoot: string, files: string[]): Promise { for (const f of files) { validateWritePath(repoRoot, f); await checkSymlinkEscape(repoRoot, f); } await runGitWrite(['add', '--', ...files], repoRoot); } /** Unstages the given files (`git restore --staged -- `). */ export async function unstageFiles(repoRoot: string, files: string[]): Promise { for (const f of files) { validateWritePath(repoRoot, f); await checkSymlinkEscape(repoRoot, f); } await runGitWrite(['restore', '--staged', '--', ...files], repoRoot); } /** Commits staged files with a server-derived identity. */ export async function commitFiles( repoRoot: string, message: string, files?: string[], ): Promise { if (files && files.length > 0) { for (const f of files) { validateWritePath(repoRoot, f); await checkSymlinkEscape(repoRoot, f); } } const id = await deriveCommitIdentity(repoRoot); const args = ['-c', `user.name=${id.name}`, '-c', `user.email=${id.email}`, 'commit', '-m', message]; if (files && files.length > 0) args.push('--', ...files); await runGitWrite(args, repoRoot); } export interface DiscardFileInfo { path: string; change_type: string; staged: boolean; } /** * Discards changes for the given files. * - Untracked files: `git clean -f -- ` * - Staged additions (new file staged, no HEAD version): unstage then clean * - All other tracked files: `git restore HEAD -- ` (undoes staged + unstaged) */ export async function discardFiles(repoRoot: string, files: DiscardFileInfo[]): Promise { for (const { path } of files) { validateWritePath(repoRoot, path); await checkSymlinkEscape(repoRoot, path); } const untracked: string[] = []; const stagedAdditions: string[] = []; const tracked: string[] = []; for (const f of files) { if (f.change_type === 'untracked') { untracked.push(f.path); } else if (f.change_type === 'added' && f.staged) { stagedAdditions.push(f.path); } else { tracked.push(f.path); } } // Restore tracked files from HEAD (handles staged + unstaged modifications/deletions). // git checkout HEAD -- is the most portable form: resets index + worktree. if (tracked.length > 0) { await runGitWrite(['checkout', 'HEAD', '--', ...tracked], repoRoot); } // Staged additions: unstage first, then remove from working tree. for (const p of stagedAdditions) { await runGitWrite(['restore', '--staged', '--', p], repoRoot); await runGitWrite(['clean', '-f', '--', p], repoRoot); } // Untracked files: clean (hard delete). if (untracked.length > 0) { await runGitWrite(['clean', '-f', '--', ...untracked], repoRoot); } }