import { NodeHtmlMarkdown } from 'node-html-markdown'; // MIT-licensed HTML→Markdown rendering for the web_fetch tool. Output feeds an // LLM, so structural fidelity matters more than exact whitespace. const OPTIONS = { // GFM-style emphasis markers (matches what most models expect). emDelimiter: '*', strongDelimiter: '**', bulletMarker: '*', codeFence: '```', codeBlockStyle: 'fenced' as const, // Always use []() syntax for links rather than autolinks. useInlineLinks: false, // Collapse runs of blank lines to a single separator. maxConsecutiveNewlines: 1, // Strip non-content elements entirely (script/style are skipped by default, // but listing them here is explicit; head/nav/footer/etc. drop their text). ignore: ['script', 'style', 'head', 'noscript', 'svg', 'math', 'nav', 'footer'], }; export function htmlToMarkdown(sourceHtml: string): string { if (!sourceHtml) return ''; return NodeHtmlMarkdown.translate(sourceHtml, OPTIONS).trim(); }