feat: deterministic PZ log parser module + unit tests

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 15:18:41 +00:00
parent 511583035b
commit 4fec3a58f6
21 changed files with 1217 additions and 0 deletions
--- a/tools/pz-analyzer/tests/test_attribution.py
+++ b/tools/pz-analyzer/tests/test_attribution.py
@@ -0,0 +1,95 @@
+"""Tests for pz_parser phase 3 — mod attribution."""
+from __future__ import annotations
+
+import pathlib
+import sys
+import unittest
+
+sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1]))
+
+import pz_parser  # noqa: E402
+
+FIXTURE_DIR = pathlib.Path(__file__).resolve().parent / "fixtures"
+
+
+def fixture(name: str) -> pathlib.Path:
+    return FIXTURE_DIR / name
+
+
+class AttributionBucketTests(unittest.TestCase):
+    """Three confidence buckets: direct (high), inferred (medium),
+    unattributed (low)."""
+
+    def test_direct_attribution_when_lua_marker_on_entry(self) -> None:
+        entries = pz_parser.parse_file(fixture("fixture_lua_attributed.txt"))
+        records = pz_parser.classify_entries(entries, source_file="la.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        self.assertEqual(rec.attribution, "direct")
+        self.assertEqual(rec.confidence, "high")
+        # mod_id is normalised: lowercase, no spaces / apostrophes / hyphens.
+        self.assertEqual(rec.mod_id, "testmodalpha")
+        self.assertEqual(rec.mod_name, "Test Mod Alpha")
+
+    def test_inferred_attribution_within_lookback_window(self) -> None:
+        entries = pz_parser.parse_file(fixture("fixture_inferred.txt"))
+        records = pz_parser.classify_entries(entries, source_file="in.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        self.assertEqual(rec.attribution, "inferred")
+        self.assertEqual(rec.confidence, "medium")
+        self.assertEqual(rec.mod_id, "spongiesclothing")
+
+    def test_unattributed_when_no_marker_and_not_lua_shaped(self) -> None:
+        entries = pz_parser.parse_file(fixture("fixture_unattributed.txt"))
+        records = pz_parser.classify_entries(entries, source_file="ua.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        self.assertEqual(rec.attribution, "unattributed")
+        self.assertEqual(rec.confidence, "low")
+        self.assertEqual(rec.mod_id, "__unattributed__")
+
+
+class LookbackBoundaryTests(unittest.TestCase):
+    """Phase 3 — 40-line inferred-attribution window boundary."""
+
+    def test_lua_marker_beyond_lookback_does_not_attribute(self) -> None:
+        # Fixture places the Lua((MOD:...)) >40 lines before the ERROR.
+        entries = pz_parser.parse_file(fixture("fixture_lookback_boundary.txt"))
+        records = pz_parser.classify_entries(entries, source_file="lb.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        # The Lua-shaped ERROR is far enough back to be unattributed.
+        self.assertEqual(rec.attribution, "unattributed")
+        self.assertEqual(rec.mod_id, "__unattributed__")
+
+    def test_non_lua_shaped_body_rejects_inferred_attribution(self) -> None:
+        # Recent Lua((MOD:Spongies Clothing)) emitted, but the ERROR body
+        # ("Disk full while writing chunk data") isn't Lua-shaped.
+        entries = pz_parser.parse_file(fixture("fixture_non_lua_no_inferred.txt"))
+        records = pz_parser.classify_entries(entries, source_file="nl.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        self.assertEqual(rec.attribution, "unattributed")
+
+
+class NeededByTests(unittest.TestCase):
+    """Phase 3 — direct attribution via "needed by <mod>" hint."""
+
+    def test_needed_by_extracts_dependent_mod(self) -> None:
+        entries = pz_parser.parse_file(fixture("fixture_require_failed.txt"))
+        records = pz_parser.classify_entries(entries, source_file="rf.txt")
+        self.assertEqual(len(records), 1)
+        rec = records[0]
+        # "needed by Test Mod Alpha" should set the mod to Test Mod Alpha
+        # (preferred over the require("...") side which would mention
+        # DependencyMod). Either way we want direct/high.
+        self.assertEqual(rec.attribution, "direct")
+        self.assertEqual(rec.confidence, "high")
+        # The "needed by" branch is checked before the require() branch in
+        # the priority order; mod_id should reflect Test Mod Alpha.
+        self.assertEqual(rec.mod_id, "testmodalpha")
+
+
+if __name__ == "__main__":
+    unittest.main()