#!/usr/bin/env python3 """Analyze MTP on/off benchmark results → CSV + SUMMARY.md + recommendations.""" import csv import json import os import re import statistics from pathlib import Path RESULTS_DIR = Path(__file__).parent / "results" CSV_PATH = Path(__file__).parent / "results.csv" SUMMARY_PATH = Path(__file__).parent / "SUMMARY.md" RECO_PATH = Path(__file__).parent / "llama-swap-recommendations.md" FNAME_RE = re.compile( r"^(?P.+?)__mtp-(?Pon|off)__len(?P\d+)__run(?P\d+)\.json$" ) def parse_result(path: Path) -> dict | None: m = FNAME_RE.match(path.name) if not m: return None try: data = json.loads(path.read_text()) except (json.JSONDecodeError, OSError): return None t = data.get("timings", {}) return { "gguf": m.group("stem"), "mtp": m.group("mtp"), "prompt_len": int(m.group("len")), "run": int(m.group("run")), "prompt_tps": t.get("prompt_per_second"), "predicted_tps": t.get("predicted_per_second"), "cache_n": t.get("cache_n"), "draft_n": t.get("draft_n"), "accepted_n": t.get("draft_n_accepted"), "total_ms": (t.get("prompt_ms", 0) or 0) + (t.get("predicted_ms", 0) or 0), } def load_all() -> list[dict]: rows = [] for f in sorted(RESULTS_DIR.glob("*.json")): r = parse_result(f) if r: rows.append(r) return rows def write_csv(rows: list[dict]) -> None: fields = ["gguf", "mtp", "prompt_len", "run", "prompt_tps", "predicted_tps", "cache_n", "draft_n", "accepted_n", "total_ms"] with open(CSV_PATH, "w", newline="") as f: w = csv.DictWriter(f, fieldnames=fields) w.writeheader() w.writerows(rows) print(f"Wrote {len(rows)} rows to {CSV_PATH}") def median_of(values: list[float]) -> float: return statistics.median(values) if values else 0.0 def write_summary(rows: list[dict]) -> None: ggufs = sorted(set(r["gguf"] for r in rows)) lens = sorted(set(r["prompt_len"] for r in rows)) lines = ["# MTP On/Off Benchmark Results\n"] lines.append(f"**{len(rows)} measurements across {len(ggufs)} GGUFs.**\n") lines.append(f"Runs 2 & 3 used for median (run 1 = warmup, discarded).\n") verdicts = [] for gguf in ggufs: lines.append(f"\n## {gguf}\n") header_parts = ["prompt_len"] for state in ["off", "on"]: header_parts.append(f"MTP-{state} tok/s") header_parts.extend(["delta %", "accept %"]) lines.append("| " + " | ".join(header_parts) + " |") lines.append("|" + "|".join("---" for _ in header_parts) + "|") any_above_10 = False for pl in lens: off_vals = [r["predicted_tps"] for r in rows if r["gguf"] == gguf and r["mtp"] == "off" and r["prompt_len"] == pl and r["run"] >= 2 and r["predicted_tps"] is not None] on_vals = [r["predicted_tps"] for r in rows if r["gguf"] == gguf and r["mtp"] == "on" and r["prompt_len"] == pl and r["run"] >= 2 and r["predicted_tps"] is not None] off_med = median_of(off_vals) on_med = median_of(on_vals) if off_med > 0: delta = ((on_med - off_med) / off_med) * 100 else: delta = 0.0 if abs(delta) >= 10: any_above_10 = True draft_rows = [r for r in rows if r["gguf"] == gguf and r["mtp"] == "on" and r["prompt_len"] == pl and r["run"] >= 2 and r.get("draft_n")] total_draft = sum(r.get("draft_n", 0) for r in draft_rows) total_accepted = sum(r.get("accepted_n", 0) for r in draft_rows) accept_pct = f"{(total_accepted / total_draft * 100):.0f}%" if total_draft > 0 else "—" lines.append( f"| {pl} | {off_med:.1f} | {on_med:.1f} | {delta:+.1f}% | {accept_pct} |" ) if any_above_10: verdict = "KEEP MTP" else: verdict = "DROP MTP" verdicts.append((gguf, verdict)) lines.append(f"\n**Verdict: {verdict}**\n") lines.append("\n---\n") lines.append("## Verdict Summary\n") lines.append("| GGUF | Verdict |") lines.append("|------|---------|") for gguf, verdict in verdicts: lines.append(f"| {gguf} | {verdict} |") summary = "\n".join(lines) + "\n" SUMMARY_PATH.write_text(summary) print(f"Wrote {SUMMARY_PATH}") print(summary) def write_recommendations(rows: list[dict]) -> None: ggufs = sorted(set(r["gguf"] for r in rows)) lens = sorted(set(r["prompt_len"] for r in rows)) lines = ["# llama-swap Config Recommendations\n"] lines.append("Based on MTP on/off benchmark results.\n") lines.append("**Read-only reference** — do NOT edit D:\\llama-swap\\config.yaml directly.\n") lines.append("```yaml") lines.append("# Commented diff against current config.yaml") lines.append("# Lines starting with + should be added, - should be removed") lines.append("") model_map = { "Qwen3.6-35B-A3B-MXFP4_MOE": "qwen3.6-35b-a3b-mxfp4", "Qwen3.6-27B-Q6_K": "qwen3.6-27b-mtp", "Qwopus3.5-4B-v3-MTP-Q8_0": "qwopus3.5-4b-mtp", "Qwen3.5-9B-DeepSeek-V4-Flash-MTP-Q8_0": "qwen3.5-9b-deepseek-v4-mtp", "Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M": "qwopus3.6-35b-a3b-v1-mtp", "Qwopus3.6-35B-A3B-v1-MTP-MXFP4_MOE_BF16": "qwopus3.6-35b-a3b-mxfp4-mtp", "Qwopus3.6-27B-v2-MTP-Q6_K": "qwopus3.6-27b-v2-mtp", "Qwopus3.5-9B-Coder-MTP-Q8_0": "qwopus3.5-9b-coder-mtp", } currently_mtp = { "Qwen3.6-35B-A3B-MXFP4_MOE": False, "Qwen3.6-27B-Q6_K": True, "Qwopus3.5-4B-v3-MTP-Q8_0": True, "Qwen3.5-9B-DeepSeek-V4-Flash-MTP-Q8_0": True, "Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M": True, "Qwopus3.6-35B-A3B-v1-MTP-MXFP4_MOE_BF16": True, "Qwopus3.6-27B-v2-MTP-Q6_K": True, "Qwopus3.5-9B-Coder-MTP-Q8_0": True, } for gguf in ggufs: model_id = model_map.get(gguf, gguf) is_mtp_now = currently_mtp.get(gguf, False) off_vals = [r["predicted_tps"] for r in rows if r["gguf"] == gguf and r["mtp"] == "off" and r["run"] >= 2 and r["predicted_tps"] is not None] on_vals = [r["predicted_tps"] for r in rows if r["gguf"] == gguf and r["mtp"] == "on" and r["run"] >= 2 and r["predicted_tps"] is not None] off_med = median_of(off_vals) on_med = median_of(on_vals) delta = ((on_med - off_med) / off_med * 100) if off_med > 0 else 0 should_mtp = delta >= 10 lines.append(f" # {model_id}: MTP {'on' if is_mtp_now else 'off'} → {'on' if should_mtp else 'off'} (delta {delta:+.1f}%)") if should_mtp and not is_mtp_now: lines.append(f" # + --spec-type draft-mtp --spec-draft-n-max 2") elif not should_mtp and is_mtp_now: lines.append(f" # - --spec-type draft-mtp --spec-draft-n-max 2") else: lines.append(f" # (no change)") lines.append("") lines.append("```\n") reco = "\n".join(lines) RECO_PATH.write_text(reco) print(f"Wrote {RECO_PATH}") def main() -> None: rows = load_all() if not rows: print("No results found in", RESULTS_DIR) return write_csv(rows) write_summary(rows) write_recommendations(rows) if __name__ == "__main__": main()