import { NodeHtmlMarkdown } from 'node-html-markdown';
// MIT-licensed HTML→Markdown rendering for the web_fetch tool. Output feeds an
// LLM, so structural fidelity matters more than exact whitespace.
const OPTIONS = {
// GFM-style emphasis markers (matches what most models expect).
emDelimiter: '*',
strongDelimiter: '**',
bulletMarker: '*',
codeFence: '```',
codeBlockStyle: 'fenced' as const,
// Always use []() syntax for links rather than autolinks.
useInlineLinks: false,
// Collapse runs of blank lines to a single separator.
maxConsecutiveNewlines: 1,
// Strip non-content elements entirely (script/style are skipped by default,
// but listing them here is explicit; head/nav/footer/etc. drop their text).
ignore: ['script', 'style', 'head', 'noscript', 'svg', 'math', 'nav', 'footer'],
};
export function htmlToMarkdown(sourceHtml: string): string {
if (!sourceHtml) return '';
return NodeHtmlMarkdown.translate(sourceHtml, OPTIONS).trim();
}