From fdf70a0c0642c7846ac3fb287e3ae914e8ca61f4 Mon Sep 17 00:00:00 2001 From: indifferentketchup Date: Mon, 4 May 2026 15:39:44 +0000 Subject: [PATCH] docs: align lookback test purpose and spec normalization list Honest test docstring (old/new semantics equivalent on contiguous entries; test locks post-fix behavior against future regressions), and add severity-prefix strip to the spec's normalization list. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...5-04-pz-deterministic-classifier-design.md | 2 ++ tools/pz-analyzer/tests/test_attribution.py | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/superpowers/specs/2026-05-04-pz-deterministic-classifier-design.md b/docs/superpowers/specs/2026-05-04-pz-deterministic-classifier-design.md index c8754b4..dabbdb2 100644 --- a/docs/superpowers/specs/2026-05-04-pz-deterministic-classifier-design.md +++ b/docs/superpowers/specs/2026-05-04-pz-deterministic-classifier-design.md @@ -126,6 +126,7 @@ signature = sha256(pattern_id + mod_id)[:16] Normalization for `pattern_id`: - Strip session metadata prefix (`General f:N, t:N, st:N,N,N,N>` shape) +- Strip body-prefix severity token (`ERROR:` / `SEVERE:` / `WARN:` / `FATAL:`, case-insensitive) so a body that opens with the severity word still hashes the same as one that doesn't. - Flatten double- and single-quoted strings to `""` / `''` - Flatten ≥2-digit numeric runs to `` - Collapse whitespace @@ -240,5 +241,6 @@ Test invocation: `python -m unittest discover tools/pz-analyzer/tests/` should b - Editing `pz_error_analysis.py` or `pz_redact_all.sh`. - Modifying any file in `/opt/ik-codex/src/`, `/opt/ik-codex/test/`, or `/opt/iblogs/`. - AI / LLM integration of any kind in the new tool. +- LLM inference at runtime in iblogs / bosslogs production. The Qwen analyzer (`pz_error_analysis.py`) is a developer-only discovery tool used to expand the deterministic ruleset in `pz_parser.py` (and its future PHP port). Production rendering is deterministic-only, forever. - iblogs front-end rendering of the classification output. - Filesystem mod-scan reattribution (pzmm's symbol/vehicle indexes). diff --git a/tools/pz-analyzer/tests/test_attribution.py b/tools/pz-analyzer/tests/test_attribution.py index 89e4732..fdb89f6 100644 --- a/tools/pz-analyzer/tests/test_attribution.py +++ b/tools/pz-analyzer/tests/test_attribution.py @@ -169,12 +169,15 @@ class RawLineLookbackTests(unittest.TestCase): path.unlink() def test_multiline_entry_does_not_shrink_practical_lookback(self) -> None: + """Multi-line entries inside the lookback window do not break + attribution. (Old body-line-budget and new raw-line-distance semantics + happen to be equivalent on contiguous PZ entries; this test locks the + post-fix semantic against future regression to a budget that *would* + differ — e.g. a body-line cap with a smaller value.) + """ # Layout the file so a multi-line entry sits between marker and ERROR. - # Under the OLD body-line-budget semantics the multi-line entry's 5 - # continuation lines would consume the budget and push the marker - # outside the window. Under raw-line semantics the marker on line 1 is - # still within 40 raw lines of the ERROR even though the file has a - # 6-line multi-line entry in between. + # The marker on line 1 is within 40 raw lines of the ERROR even though + # the file has a 6-line multi-line entry in between. lines = [_make_marker_line(0)] # raw line 1: marker entry # Single-line fillers on raw lines 2..30 (29 entries). for i in range(1, 30): @@ -207,9 +210,11 @@ class RawLineLookbackTests(unittest.TestCase): self.assertEqual(entries[-1].line_start, 41) records = pz_parser.classify_entries(entries, source_file="ml.txt") self.assertEqual(len(records), 1) - # Under the OLD body-line-budget rule, the 5 stack-frame lines - # plus the surrounding fillers would have pushed the marker out - # of the budget. Under raw-line semantics it survives. + # Raw-line-distance semantics: the marker on line 1 is 40 raw + # lines from the ERROR on line 41, so attribution holds. (Old + # body-line-budget would also pass here on contiguous entries; + # this assertion locks the post-fix behavior against future + # regression to a tighter cap.) self.assertEqual(records[0].attribution, "inferred") self.assertEqual(records[0].mod_id, "testmodalpha") finally: