boocode/apps/server/src/services/file_ops.ts

import { readFile, readdir, stat } from 'node:fs/promises';
import { resolve, relative } from 'node:path';
import { spawn } from 'node:child_process';
import { pathGuard, PathScopeError } from './path_guard.js';

const MAX_FILE_BYTES = 5 * 1024 * 1024;
const DEFAULT_VIEW_LINES = 200;
const MAX_GREP_RESULTS = 200;
const DEFAULT_GREP_RESULTS = 100;
const MAX_FIND_RESULTS = 200;
const DEFAULT_FIND_RESULTS = 100;
const MAX_DIR_ENTRIES = 500;

export interface FileEntry {
  name: string;
  kind: 'file' | 'dir';
  size?: number;
}

export interface ListDirResult {
  entries: FileEntry[];
  truncated: boolean;
  total: number;
}

export interface ViewFileResult {
  content: string;
  truncated: boolean;
  total_bytes: number;
  bytes_returned: number;
}

export interface GrepMatch {
  path: string;
  line: number;
  text: string;
}

export interface GrepResult {
  matches: GrepMatch[];
  truncated: boolean;
}

export interface FindFilesResult {
  files: string[];
  total: number;
  truncated: boolean;
}

export async function listDir(
  projectRoot: string,
  relPath: string,
  opts?: { extra_roots?: readonly string[] },
): Promise<ListDirResult> {
  const real = await pathGuard(projectRoot, relPath, opts?.extra_roots);
  const s = await stat(real);
  if (!s.isDirectory()) {
    throw new PathScopeError(`not a directory: ${relPath}`);
  }
  const entries = await readdir(real, { withFileTypes: true });
  const total = entries.length;
  const slice = entries.slice(0, MAX_DIR_ENTRIES);
  const out: FileEntry[] = await Promise.all(
    slice.map(async (e) => {
      const child = resolve(real, e.name);
      let size: number | undefined;
      if (e.isFile()) {
        try {
          const cs = await stat(child);
          size = cs.size;
        } catch {
          /* ignore */
        }
      }
      return {
        name: e.name,
        kind: e.isDirectory() ? ('dir' as const) : ('file' as const),
        ...(size != null ? { size } : {}),
      };
    })
  );
  return {
    entries: out,
    total,
    truncated: total > MAX_DIR_ENTRIES,
  };
}

export async function viewFile(
  projectRoot: string,
  relPath: string,
  opts?: { extra_roots?: readonly string[] },
): Promise<ViewFileResult> {
  const real = await pathGuard(projectRoot, relPath, opts?.extra_roots);
  const s = await stat(real);
  if (!s.isFile()) {
    throw new PathScopeError(`not a file: ${relPath}`);
  }
  if (s.size > MAX_FILE_BYTES) {
    throw new Error(`file too large (${s.size} bytes, max ${MAX_FILE_BYTES})`);
  }
  const raw = await readFile(real, 'utf8');
  const lines = raw.split('\n');
  const total = lines.length;
  const end = Math.min(total, DEFAULT_VIEW_LINES);
  const slice = lines.slice(0, end);
  const content = slice.join('\n');
  const truncated = total > end;
  const bytes_returned = Buffer.byteLength(content, 'utf8');
  return {
    content,
    truncated,
    total_bytes: s.size,
    bytes_returned,
  };
}

interface RipgrepMatch {
  type: string;
  data?: {
    path?: { text?: string };
    line_number?: number;
    lines?: { text?: string };
  };
}

export async function grep(
  projectRoot: string,
  pattern: string,
  opts?: { path?: string; max_matches?: number; case_sensitive?: boolean; hidden?: boolean; extra_roots?: readonly string[] }
): Promise<GrepResult> {
  const targetPath = opts?.path ?? projectRoot;
  const target = await pathGuard(projectRoot, targetPath, opts?.extra_roots);
  const limit = Math.min(
    Math.max(opts?.max_matches ?? DEFAULT_GREP_RESULTS, 1),
    MAX_GREP_RESULTS
  );
  const args = [
    '--json',
    '--max-count',
    String(limit),
    '--max-columns',
    '300',
  ];
  if (!opts?.case_sensitive) args.push('--ignore-case');
  if (opts?.hidden) args.push('--hidden');
  args.push('--', pattern, target);

  return new Promise((resolveP, rejectP) => {
    const child = spawn('rg', args, { cwd: projectRoot });
    const matches: GrepMatch[] = [];
    let buf = '';
    let stderr = '';
    child.stdout.setEncoding('utf8');
    child.stderr.setEncoding('utf8');
    child.stdout.on('data', (chunk: string) => {
      buf += chunk;
      let idx;
      while ((idx = buf.indexOf('\n')) >= 0) {
        const line = buf.slice(0, idx);
        buf = buf.slice(idx + 1);
        if (!line) continue;
        if (matches.length >= limit) continue;
        try {
          const parsed = JSON.parse(line) as RipgrepMatch;
          if (parsed.type !== 'match' || !parsed.data) continue;
          const filePath = parsed.data.path?.text ?? '';
          const lineNumber = parsed.data.line_number ?? 0;
          const content = parsed.data.lines?.text ?? '';
          matches.push({
            path: relative(projectRoot, filePath) || filePath,
            line: lineNumber,
            text: content.replace(/\n$/, ''),
          });
        } catch {
          /* ignore non-json */
        }
      }
      if (matches.length >= limit) {
        child.kill();
      }
    });
    child.stderr.on('data', (chunk: string) => {
      stderr += chunk;
    });
    child.on('error', (err) => rejectP(err));
    child.on('close', (code) => {
      if (code === 2 && matches.length === 0) {
        rejectP(new Error(`ripgrep failed: ${stderr.slice(0, 300)}`));
        return;
      }
      resolveP({
        matches,
        truncated: matches.length >= limit,
      });
    });
  });
}

export async function findFiles(
  projectRoot: string,
  pattern?: string,
  opts?: { type?: 'file' | 'dir'; max_results?: number; path?: string; extra_roots?: readonly string[] }
): Promise<FindFilesResult> {
  const limit = Math.min(
    Math.max(opts?.max_results ?? DEFAULT_FIND_RESULTS, 1),
    MAX_FIND_RESULTS
  );
  const target = opts?.path != null
    ? await pathGuard(projectRoot, opts.path, opts?.extra_roots)
    : projectRoot;
  const args = ['--files'];
  if (pattern) args.push('--glob', pattern);
  args.push(target);

  return new Promise((resolveP, rejectP) => {
    const child = spawn('rg', args, { cwd: projectRoot });
    const files: string[] = [];
    let total = 0;
    let buf = '';
    let stderr = '';
    child.stdout.setEncoding('utf8');
    child.stderr.setEncoding('utf8');
    child.stdout.on('data', (chunk: string) => {
      buf += chunk;
      let idx;
      while ((idx = buf.indexOf('\n')) >= 0) {
        const line = buf.slice(0, idx);
        buf = buf.slice(idx + 1);
        if (!line) continue;
        // Keep counting after limit to report accurate `total` to the caller.
        // grep kills early since the LLM doesn't need a total; this differs intentionally.
        total++;
        if (files.length < limit) {
          files.push(relative(projectRoot, line) || line);
        }
      }
    });
    child.stderr.on('data', (chunk: string) => {
      stderr += chunk;
    });
    child.on('error', (err) => rejectP(err));
    child.on('close', (code) => {
      if (code === 2) {
        rejectP(new Error(`ripgrep failed: ${stderr.slice(0, 300)}`));
        return;
      }
      if (buf.length > 0) {
        total++;
        if (files.length < limit) {
          files.push(relative(projectRoot, buf) || buf);
        }
      }
      resolveP({
        files,
        total,
        truncated: total > files.length,
      });
    });
  });
}