#!/usr/bin/env python3 """Analyze MTP n_max sweep results and produce summary.md.""" import json from pathlib import Path RESULTS_PATH = Path(__file__).parent / "results.json" SUMMARY_PATH = Path(__file__).parent / "summary.md" def load_results() -> list[dict]: data = json.loads(RESULTS_PATH.read_text()) return [r for r in data if r.get("eval_tok_s") is not None and r.get("error") is None] def main() -> None: rows = load_results() if not rows: print("No valid results found.") return models = sorted(set(r["model"] for r in rows)) lines = ["# MTP n_max Sweep Results\n"] lines.append(f"**{len(rows)} valid measurements across {len(models)} models.**\n") recommendations = [] for model in models: model_rows = [r for r in rows if r["model"] == model] n_max_values = sorted(set(r["n_max"] for r in model_rows)) prompt_names = sorted(set(r["prompt"] for r in model_rows)) lines.append(f"\n## {model}\n") header = "| n_max | " + " | ".join(f"{p} tok/s" for p in prompt_names) + " | avg tok/s | vs n_max=0 |" sep = "|-------|" + "|".join("-" * (len(p) + 7) for p in prompt_names) + "|-----------|------------|" lines.append(header) lines.append(sep) baseline_avg = None best_avg = 0 best_n = 0 for n in n_max_values: cells = [] vals = [] for p in prompt_names: matching = [r for r in model_rows if r["n_max"] == n and r["prompt"] == p] if matching: v = matching[0]["eval_tok_s"] cells.append(f"{v:.1f}") vals.append(v) else: cells.append("—") avg = sum(vals) / len(vals) if vals else 0 if n == 0: baseline_avg = avg delta = "baseline" elif baseline_avg and baseline_avg > 0: pct = ((avg - baseline_avg) / baseline_avg) * 100 delta = f"{pct:+.1f}%" else: delta = "—" if avg > best_avg: best_avg = avg best_n = n draft_info = "" draft_rows = [r for r in model_rows if r["n_max"] == n and r.get("draft_n")] if draft_rows: total_draft = sum(r.get("draft_n", 0) for r in draft_rows) total_accepted = sum(r.get("draft_n_accepted", 0) for r in draft_rows) if total_draft > 0: accept_pct = (total_accepted / total_draft) * 100 draft_info = f" (accept {accept_pct:.0f}%)" row_str = f"| {n} | " + " | ".join(cells) + f" | {avg:.1f} | {delta}{draft_info} |" lines.append(row_str) if baseline_avg and baseline_avg > 0 and best_avg > 0: improvement = ((best_avg - baseline_avg) / baseline_avg) * 100 lines.append(f"\n**Optimal n_max: {best_n}** (avg {best_avg:.1f} tok/s, {improvement:+.1f}% vs baseline)\n") recommendations.append((model, best_n, best_avg, improvement)) else: lines.append(f"\n**Optimal n_max: {best_n}** (avg {best_avg:.1f} tok/s)\n") # Recommendations section lines.append("\n---\n") lines.append("## Recommended `llama_extra_args` per model\n") lines.append("| Model | n_max | avg tok/s | vs baseline | suggested flags |") lines.append("|-------|-------|-----------|-------------|-----------------|") for model, n, avg, imp in recommendations: if n > 0: flags = f'`["--spec-type", "draft-mtp", "--spec-draft-n-max", "{n}"]`' else: flags = "_(none — MTP not beneficial)_" lines.append(f"| {model} | {n} | {avg:.1f} | {imp:+.1f}% | {flags} |") lines.append("") summary = "\n".join(lines) SUMMARY_PATH.write_text(summary) print(summary) print(f"\nWritten to: {SUMMARY_PATH}") if __name__ == "__main__": main()