import { describe, expect, it } from 'vitest'; import { htmlToMarkdown } from '../web/html-to-md.js'; describe('htmlToMarkdown', () => { it('converts h1 heading', () => { expect(htmlToMarkdown('

Title

')).toBe('# Title'); }); it('converts h1 through h6', () => { const html = '

One

Two

Three

Four

Five
Six
'; const md = htmlToMarkdown(html); expect(md).toContain('# One'); expect(md).toContain('## Two'); expect(md).toContain('### Three'); expect(md).toContain('#### Four'); expect(md).toContain('##### Five'); expect(md).toContain('###### Six'); }); it('converts anchor with href', () => { expect(htmlToMarkdown('click here')) .toBe('[click here](https://example.com)'); }); it('converts anchor without href to plain text', () => { expect(htmlToMarkdown('just text')).toBe('just text'); }); it('converts bold and italic', () => { expect(htmlToMarkdown('bold')).toBe('**bold**'); expect(htmlToMarkdown('bold')).toBe('**bold**'); expect(htmlToMarkdown('italic')).toBe('*italic*'); expect(htmlToMarkdown('italic')).toBe('*italic*'); }); it('handles combined bold+italic', () => { const md = htmlToMarkdown('bold italic'); expect(md).toBe('***bold italic***'); }); it('converts unordered list', () => { const html = ''; const md = htmlToMarkdown(html); expect(md).toContain('* one'); expect(md).toContain('* two'); expect(md).toContain('* three'); }); it('converts ordered list', () => { const html = '
  1. first
  2. second
'; const md = htmlToMarkdown(html); expect(md).toContain('1. first'); expect(md).toContain('2. second'); }); it('handles nested lists', () => { const html = ''; const md = htmlToMarkdown(html); expect(md).toContain('* outer'); expect(md).toContain(' * inner'); }); it('converts 3-column GFM table with header', () => { const html = `
NameAgeCity
Alice30NYC
Bob25LA
`; const md = htmlToMarkdown(html); expect(md).toContain('| Name | Age | City |'); expect(md).toContain('| --- | --- | --- |'); expect(md).toContain('| Alice | 30 | NYC |'); expect(md).toContain('| Bob | 25 | LA |'); }); it('escapes pipe characters in table cells', () => { const html = '
A
x | y
'; const md = htmlToMarkdown(html); expect(md).toContain('x \\| y'); }); it('converts blockquote', () => { const html = '

quoted text

'; const md = htmlToMarkdown(html); expect(md).toContain('> quoted text'); }); it('converts multi-line blockquote', () => { const html = '

line one

line two

'; const md = htmlToMarkdown(html); expect(md).toContain('> line one'); expect(md).toContain('> line two'); }); it('converts fenced code block', () => { const html = '
const x = 1;
'; const md = htmlToMarkdown(html); expect(md).toContain('```\nconst x = 1;\n```'); }); it('preserves language hint from code class', () => { const html = '
print("hello")
'; const md = htmlToMarkdown(html); expect(md).toContain('```py\nprint("hello")\n```'); }); it('converts inline code', () => { expect(htmlToMarkdown('use npm install to install')) .toContain('`npm install`'); }); it('decodes HTML entities', () => { expect(htmlToMarkdown('& < > "')).toBe('& < > "'); }); it('decodes numeric character references', () => { expect(htmlToMarkdown(''')).toBe("'"); }); it('decodes   as space', () => { const md = htmlToMarkdown('hello world'); expect(md).toMatch(/hello\s+world/); }); it('skips script content', () => { const html = '

before

after

'; const md = htmlToMarkdown(html); expect(md).not.toContain('alert'); expect(md).toContain('before'); expect(md).toContain('after'); }); it('skips style content', () => { const html = '

text

'; const md = htmlToMarkdown(html); expect(md).not.toContain('color'); expect(md).toContain('text'); }); it('does not throw on malformed HTML', () => { expect(() => htmlToMarkdown('

unclosed bold italic')).not.toThrow(); const md = htmlToMarkdown('

unclosed bold italic'); expect(md).toContain('bold'); expect(md).toContain('italic'); }); it('returns empty string for empty input', () => { expect(htmlToMarkdown('')).toBe(''); }); it('returns empty string for whitespace-only input', () => { expect(htmlToMarkdown(' \n\n ')).toBe(''); }); it('converts hr to horizontal rule', () => { const md = htmlToMarkdown('

above


below

'); expect(md).toContain('---'); }); it('converts br to newline', () => { const md = htmlToMarkdown('line one
line two'); expect(md).toContain('line one\nline two'); }); it('handles ol with start attribute', () => { const html = '
  1. five
  2. six
'; const md = htmlToMarkdown(html); expect(md).toContain('5. five'); expect(md).toContain('6. six'); }); it('collapses excessive blank lines', () => { const html = '

one

two

'; const md = htmlToMarkdown(html); const blankRuns = md.match(/\n{3,}/g); expect(blankRuns).toBeNull(); }); // Golden test: small Hacker News-style snippet it('golden: HN-style snippet produces structured markdown', () => { const html = ` Test Page

Welcome

This is a test page with a link.

Features

  • Fast
  • Reliable
  • Secure

Data

MetricValue
Uptime99.9%
Latency42ms

This tool is amazing.

console.log("hello");
`; const md = htmlToMarkdown(html); expect(md).toContain('# Welcome'); expect(md).toContain('**test**'); expect(md).toContain('[a link](https://example.com)'); expect(md).toContain('## Features'); expect(md).toContain('* Fast'); expect(md).toContain('| Metric | Value |'); expect(md).toContain('| --- | --- |'); expect(md).toContain('| Uptime | 99.9% |'); expect(md).toContain('> This tool is amazing.'); expect(md).toContain('```js\nconsole.log("hello");\n```'); expect(md).not.toContain('evil'); expect(md).not.toContain(''); }); });