Clear the 3 Unsloth-Studio-derived AGPL files and flip LICENSE + 5 package.json from AGPL-3.0-only to MIT. - html-to-md.ts → MIT node-html-markdown (parse5 dropped) - llama-args-validator.ts → clean-room (flag denylist = facts) - tool-call-parser.ts → delete dead Unsloth-ported code; keep extractToolCallBlocks/stripToolMarkup byte-identical (no behavior change) - LICENSE → MIT (Copyright (c) 2026 indifferentketchup); 5 package.json → MIT; AGPL SPDX headers removed; README License section; license-mit guard test - roadmap License-debt batch marked shipped; openspec/changes/license-debt-mit Decouples the relicense from the native-parsing retirement (the ported parser was dead code). Server suite 519 passing; build + coder typecheck clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
25 lines
994 B
TypeScript
25 lines
994 B
TypeScript
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
|
|
|
// MIT-licensed HTML→Markdown rendering for the web_fetch tool. Output feeds an
|
|
// LLM, so structural fidelity matters more than exact whitespace.
|
|
const OPTIONS = {
|
|
// GFM-style emphasis markers (matches what most models expect).
|
|
emDelimiter: '*',
|
|
strongDelimiter: '**',
|
|
bulletMarker: '*',
|
|
codeFence: '```',
|
|
codeBlockStyle: 'fenced' as const,
|
|
// Always use []() syntax for links rather than <url> autolinks.
|
|
useInlineLinks: false,
|
|
// Collapse runs of blank lines to a single separator.
|
|
maxConsecutiveNewlines: 1,
|
|
// Strip non-content elements entirely (script/style are skipped by default,
|
|
// but listing them here is explicit; head/nav/footer/etc. drop their text).
|
|
ignore: ['script', 'style', 'head', 'noscript', 'svg', 'math', 'nav', 'footer'],
|
|
};
|
|
|
|
export function htmlToMarkdown(sourceHtml: string): string {
|
|
if (!sourceHtml) return '';
|
|
return NodeHtmlMarkdown.translate(sourceHtml, OPTIONS).trim();
|
|
}
|