import { describe, expect, it } from 'vitest'; import { htmlToMarkdown } from '../web/html-to-md.js'; describe('htmlToMarkdown', () => { it('converts h1 heading', () => { expect(htmlToMarkdown('
| Name | Age | City |
|---|---|---|
| Alice | 30 | NYC |
| Bob | 25 | LA |
| A |
|---|
| x | y |
'; const md = htmlToMarkdown(html); expect(md).toContain('> quoted text'); }); it('converts multi-line blockquote', () => { const html = 'quoted text
'; const md = htmlToMarkdown(html); expect(md).toContain('> line one'); expect(md).toContain('> line two'); }); it('converts fenced code block', () => { const html = 'line one
line two
const x = 1;';
const md = htmlToMarkdown(html);
expect(md).toContain('```\nconst x = 1;\n```');
});
it('preserves language hint from code class', () => {
const html = 'print("hello")';
const md = htmlToMarkdown(html);
expect(md).toContain('```py\nprint("hello")\n```');
});
it('converts inline code', () => {
expect(htmlToMarkdown('use npm install to install'))
.toContain('`npm install`');
});
it('decodes HTML entities', () => {
expect(htmlToMarkdown('& < > "')).toBe('& < > "');
});
it('decodes numeric character references', () => {
expect(htmlToMarkdown(''')).toBe("'");
});
it('decodes as space', () => {
const md = htmlToMarkdown('hello world');
expect(md).toMatch(/hello\s+world/);
});
it('skips script content', () => {
const html = 'before
after
'; const md = htmlToMarkdown(html); expect(md).not.toContain('alert'); expect(md).toContain('before'); expect(md).toContain('after'); }); it('skips style content', () => { const html = 'text
'; const md = htmlToMarkdown(html); expect(md).not.toContain('color'); expect(md).toContain('text'); }); it('does not throw on malformed HTML', () => { expect(() => htmlToMarkdown('unclosed bold italic')).not.toThrow(); const md = htmlToMarkdown('
unclosed bold italic'); expect(md).toContain('bold'); expect(md).toContain('italic'); }); it('returns empty string for empty input', () => { expect(htmlToMarkdown('')).toBe(''); }); it('returns empty string for whitespace-only input', () => { expect(htmlToMarkdown(' \n\n ')).toBe(''); }); it('converts hr to horizontal rule', () => { const md = htmlToMarkdown('
above
below
'); expect(md).toContain('---'); }); it('converts br to newline', () => { const md = htmlToMarkdown('line oneone
two
'; const md = htmlToMarkdown(html); const blankRuns = md.match(/\n{3,}/g); expect(blankRuns).toBeNull(); }); // Golden test: small Hacker News-style snippet it('golden: HN-style snippet produces structured markdown', () => { const html = `This is a test page with a link.
| Metric | Value |
|---|---|
| Uptime | 99.9% |
| Latency | 42ms |
This tool is amazing.
console.log("hello");
`;
const md = htmlToMarkdown(html);
expect(md).toContain('# Welcome');
expect(md).toContain('**test**');
expect(md).toContain('[a link](https://example.com)');
expect(md).toContain('## Features');
expect(md).toContain('* Fast');
expect(md).toContain('| Metric | Value |');
expect(md).toContain('| --- | --- |');
expect(md).toContain('| Uptime | 99.9% |');
expect(md).toContain('> This tool is amazing.');
expect(md).toContain('```js\nconsole.log("hello");\n```');
expect(md).not.toContain('evil');
expect(md).not.toContain('