feat: deterministic PZ log parser module + unit tests

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 15:18:41 +00:00
parent 511583035b
commit 4fec3a58f6
21 changed files with 1217 additions and 0 deletions

View File

@@ -0,0 +1,738 @@
"""
pz_parser.py — Deterministic Project Zomboid log parser.
Pure module (no I/O beyond reading the path it is handed). Walks a redacted
DebugLog-server*.txt file, extracts errors/warnings, attributes each to a mod
where evidence allows, classifies by kind, and computes deterministic
signatures. Output records are designed to be `dataclasses.asdict()`-ready
for direct JSON serialisation.
Pipeline phases (per design spec at
docs/superpowers/specs/2026-05-04-pz-deterministic-classifier-design.md):
1. Severity-prefix recognition (ERROR|SEVERE|WARN)
2. Bidirectional stack collection (pre-stack walk back, post-stack walk forward)
3. Mod attribution (direct, inferred, unattributed)
4. File:line extraction (five fallbacks)
5. Cause-chain extraction (Caused by: chains + standalone exception lines)
6. Java exception kind detection
7. Engine-noise tagging
8. Signature computation (pattern_id + signature)
9. Aggregation (dedup on signature)
Style notes mirror sibling tool pz_error_analysis.py: type hints with built-in
generics, `from __future__ import annotations`, regex precompilation as
module-level constants, stdlib-only.
"""
from __future__ import annotations
import hashlib
import pathlib
import re
from dataclasses import dataclass
# ---------------------------------------------------------------------------
# Tunable constants
# ---------------------------------------------------------------------------
#: Lookback window (in raw file lines) for inferred mod attribution.
INFERRED_LOOKBACK_LINES: int = 40
#: Maximum frames retained per record after pre+post stack merge.
MAX_STACK_FRAMES: int = 8
#: Maximum lines walked in each direction during bidirectional stack collection.
STACK_WALK_LINES: int = 25
#: Maximum cause-chain depth retained.
MAX_CAUSE_CHAIN_LEVELS: int = 6
#: Truncation length for the normalised first line that feeds pattern_id.
PATTERN_ID_FIRST_LINE_MAX: int = 200
# ---------------------------------------------------------------------------
# Line-shape regexes (parsing)
# ---------------------------------------------------------------------------
#: PZ DebugLog entry header.
#: Example: ``[16-04-26 00:01:19.080] ERROR: General f:0, t:1, st:1,2,3,4> body``
ENTRY_RE = re.compile(
r"^\[(?P<ts>\d{2}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3})\]\s+"
r"(?P<level>[A-Z]+)\s*:\s*(?P<rest>.*)$"
)
#: Strips the "General f:N, t:N, st:N,N,N,N>" prefix from a body line.
SESSION_META_RE = re.compile(
r"^[A-Za-z][A-Za-z0-9]*\s+f:\d+,?\s*(?:t:\d+,?\s*)?st:[\d,]+>\s*"
)
# ---------------------------------------------------------------------------
# Severity-prefix recognition (phase 1)
# ---------------------------------------------------------------------------
#: Severity tokens that flag a body line as an error/warning event when they
#: appear at the start of body text. Per spec: broader than the existing
#: pz_error_analysis.py regex (adds SEVERE for Java util-logging).
SEVERITY_BODY_RE = re.compile(r"^\s*(ERROR|SEVERE|WARN)\s*[:\s]")
#: Bracketed-level tokens that map to severity events.
SEVERITY_LEVELS: tuple[str, ...] = ("ERROR", "WARN", "SEVERE", "FATAL")
# ---------------------------------------------------------------------------
# Stack-frame recognition (phase 2)
# ---------------------------------------------------------------------------
#: Markers that identify a line as stack-shaped. Used to gate pre/post stack
#: collection so we don't latch onto non-stack continuation text.
STACK_HINT_RE = re.compile(
r"(?:\bat\s+\S+|\[string\s+\"|function:\s|file:\s|\.lua\b)",
re.IGNORECASE,
)
# ---------------------------------------------------------------------------
# Mod attribution (phase 3)
# ---------------------------------------------------------------------------
#: Direct attribution marker: ``Lua((MOD:<name>))``.
LUA_MOD_MARKER_RE = re.compile(r"Lua\(\(MOD:([^)]+)\)\)")
#: Direct attribution: ``require("X") failed`` shape.
REQUIRE_FAILED_RE = re.compile(
r"""require\s*\(\s*["']([^"']+)["']\s*\)\s+failed""",
re.IGNORECASE,
)
#: Direct attribution: explicit ``needed by <mod>`` hint.
NEEDED_BY_RE = re.compile(r"needed\s+by\s+([A-Za-z0-9_'\- ]+?)(?:[,.]|$)", re.IGNORECASE)
#: Patterns that flag a body as "Lua-shaped" — gating filter for inferred
#: attribution. Mirrors the spec's enumeration.
LUA_SHAPED_PATTERNS: tuple[re.Pattern[str], ...] = (
re.compile(r"luamanager\.getfunctionobject", re.IGNORECASE),
re.compile(r"no\s+such\s+function", re.IGNORECASE),
re.compile(r"exception\s+thrown", re.IGNORECASE),
re.compile(r"runtimeexception", re.IGNORECASE),
re.compile(r"illegalstateexception", re.IGNORECASE),
re.compile(r"\blua\b", re.IGNORECASE),
)
# ---------------------------------------------------------------------------
# File:line extraction (phase 4) — five fallbacks tried in order
# ---------------------------------------------------------------------------
#: 1. ``at <path>.lua:<n>`` — typical Lua stack frame.
FILE_LINE_AT_RE = re.compile(r"\bat\s+([^\s:]+\.lua):(\d+)")
#: 2. ``function: ... file: <path>.lua line #<n>`` (or `: <n>`).
FILE_LINE_FUNCTION_RE = re.compile(
r"function:\s*[^,]*?file:\s*([^\s,]+\.lua)\s+line\s*(?:#|:)\s*(\d+)",
re.IGNORECASE,
)
#: 3. ``[string "<path>.lua"]:<n>`` — Lua VM source string.
FILE_LINE_STRING_RE = re.compile(r"""\[string\s+["']([^"']+\.lua)["']\]:(\d+)""")
#: 4. quoted path ending in a known extension; line # optional.
FILE_LINE_QUOTED_RE = re.compile(
r"""["']([^"']+\.(?:lua|txt|xml|json|ini|cfg|bin))["'](?::(\d+))?"""
)
#: 5. unquoted path segment beginning with a recognised root.
FILE_LINE_UNQUOTED_RE = re.compile(
r"\b((?:media|maps|lua|scripts)/[\w./\-]+\.(?:lua|txt|xml|json|ini|cfg|bin))(?::(\d+))?"
)
# ---------------------------------------------------------------------------
# Cause-chain extraction (phase 5)
# ---------------------------------------------------------------------------
#: ``Caused by: <ExceptionClass>: <msg>`` (msg optional).
CAUSED_BY_RE = re.compile(
r"Caused\s+by:\s+((?:\w+\.)+\w+(?:Exception|Error))(?::\s*(.+?))?\s*$",
re.IGNORECASE,
)
#: Standalone Java exception line: ``com.foo.BarException: msg``.
EXCEPTION_LINE_RE = re.compile(
r"((?:\w+\.)+\w+(?:Exception|Error))(?::\s*(.+?))?(?=\s+at\s|\s*$)"
)
# ---------------------------------------------------------------------------
# Engine-noise tagging (phase 7)
# ---------------------------------------------------------------------------
ENGINE_NOISE_PATTERNS: tuple[re.Pattern[str], ...] = (
re.compile(r"kahluathread\.flusherrormessage", re.IGNORECASE),
re.compile(r"dumping\s+lua\s+stack\s+trace", re.IGNORECASE),
)
# ---------------------------------------------------------------------------
# Signature normalisation (phase 8)
# ---------------------------------------------------------------------------
DOUBLE_QUOTED_RE = re.compile(r'"[^"]*"')
SINGLE_QUOTED_RE = re.compile(r"'[^']*'")
NUMERIC_RUN_RE = re.compile(r"\d{2,}")
WS_RUN_RE = re.compile(r"\s+")
# ---------------------------------------------------------------------------
# Dataclasses — match the JSON keys the spec mandates so consumers can
# `dataclasses.asdict(record)` straight to JSON.
# ---------------------------------------------------------------------------
@dataclass
class Entry:
"""One parsed log entry. Continuation lines (TAB-indented or otherwise
non-header lines) are folded into ``body``. Phase-2 stack collection
walks neighbouring entries (not raw lines), so no extra context is
stored here.
"""
timestamp: str
level: str
body: list[str]
line_start: int
line_end: int
@dataclass
class FirstSeen:
"""Provenance for the first occurrence of a deduped record."""
file: str
line: int
timestamp: str
@dataclass
class Record:
"""One classified, deduplicated error/warning record. Field names mirror
the JSON output schema in the spec verbatim — this object is intended to
be `dataclasses.asdict()`-ed straight into the output document.
"""
signature: str
pattern_id: str
level: str
kind: str
mod_id: str
mod_name: str
attribution: str
confidence: str
attribution_reason: str
file: str
line: int
cause_chain: str
stack: list[str]
first_seen: FirstSeen
occurrence_count: int
files: list[str]
excerpt: str
# ---------------------------------------------------------------------------
# Phase 0: file parse
# ---------------------------------------------------------------------------
def parse_file(path: pathlib.Path) -> list[Entry]:
"""Parse a DebugLog-server file into a list of multi-line entries.
Continuation lines (those not matching ENTRY_RE) append to the previous
entry's body, mirroring codex's PatternParser behaviour for multi-line
Java stack traces under an ERROR header.
"""
entries: list[Entry] = []
current: Entry | None = None
with path.open("r", encoding="utf-8", errors="replace") as f:
for lineno, raw in enumerate(f, start=1):
line = raw.rstrip("\n")
m = ENTRY_RE.match(line)
if m:
if current is not None:
entries.append(current)
current = Entry(
timestamp=m.group("ts"),
level=m.group("level"),
body=[m.group("rest")],
line_start=lineno,
line_end=lineno,
)
elif current is not None:
current.body.append(line)
current.line_end = lineno
# else: orphan line at start of file (no preceding entry); ignore.
if current is not None:
entries.append(current)
return entries
# ---------------------------------------------------------------------------
# Phase 1: severity-prefix recognition
# ---------------------------------------------------------------------------
def is_severity_entry(entry: Entry) -> bool:
"""True if this entry is an ERROR/WARN/SEVERE/FATAL — either by the
bracketed level or a leading SEVERE/ERROR/WARN token in the body (after
stripping the session-meta prefix)."""
if entry.level in SEVERITY_LEVELS:
return True
if entry.body and SEVERITY_BODY_RE.match(_strip_session_meta(entry.body[0])):
return True
return False
def effective_level(entry: Entry) -> str:
"""Return the effective severity for an entry. Body-prefix takes
precedence — covers the SEVERE-in-body case where bracketed level is LOG
*and* the case where bracketed level is ERROR but body says SEVERE.
"""
if entry.body:
m = SEVERITY_BODY_RE.match(_strip_session_meta(entry.body[0]))
if m:
return m.group(1).upper()
return entry.level
# ---------------------------------------------------------------------------
# Phase 2: bidirectional stack collection
# ---------------------------------------------------------------------------
def _is_stack_shaped(line: str) -> bool:
return bool(STACK_HINT_RE.search(line))
def _strip_session_meta(body_line: str) -> str:
"""Strip the ``General f:N, t:N, st:...> `` session-metadata prefix from
a body's first line so pattern matching can run against the meaningful tail.
"""
return SESSION_META_RE.sub("", body_line)
def _collect_pre_stack(entries: list[Entry], hit_idx: int) -> list[str]:
"""Walk back through prior entries; collect stack-shaped lines from each
entry's body. Stop at the previous severity-flagged entry. Cap collection
at MAX_STACK_FRAMES and at STACK_WALK_LINES of body lines examined.
Per spec, only return the block if at least one line looks stack-shaped.
"""
collected: list[str] = []
lines_examined = 0
for j in range(hit_idx - 1, -1, -1):
prior = entries[j]
# Stop at another severity line (the previous error's boundary).
if is_severity_entry(prior):
break
# Walk this entry's body in reverse; for body[0] the session-meta
# prefix is part of the line — strip it before stack-shape check.
for k in range(len(prior.body) - 1, -1, -1):
line = prior.body[k]
stripped = _strip_session_meta(line) if k == 0 else line
lines_examined += 1
if _is_stack_shaped(stripped):
collected.append(stripped.strip())
if len(collected) >= MAX_STACK_FRAMES:
break
if lines_examined >= STACK_WALK_LINES:
break
if len(collected) >= MAX_STACK_FRAMES or lines_examined >= STACK_WALK_LINES:
break
if not collected:
return []
collected.reverse() # restore source order
return collected
def _collect_post_stack(entries: list[Entry], hit_idx: int) -> list[str]:
"""Look at the entry's own body continuation lines first (stack frames
attached to the ERROR header become continuation lines after parsing),
then walk forward through subsequent entries. Stop at the next severity
entry. Cap at MAX_STACK_FRAMES and at STACK_WALK_LINES of body lines."""
entry = entries[hit_idx]
collected: list[str] = []
lines_examined = 0
# Body continuations (skip body[0] which is the headline itself).
for line in entry.body[1:]:
lines_examined += 1
if _is_stack_shaped(line):
collected.append(line.strip())
if len(collected) >= MAX_STACK_FRAMES:
return collected
if lines_examined >= STACK_WALK_LINES:
return collected
for j in range(hit_idx + 1, len(entries)):
next_entry = entries[j]
if is_severity_entry(next_entry):
break
for k, line in enumerate(next_entry.body):
stripped = _strip_session_meta(line) if k == 0 else line
lines_examined += 1
if _is_stack_shaped(stripped):
collected.append(stripped.strip())
if len(collected) >= MAX_STACK_FRAMES:
return collected
if lines_examined >= STACK_WALK_LINES:
return collected
return collected
def collect_stack(entries: list[Entry], hit_idx: int) -> list[str]:
"""Merge pre + post stack, dedup preserving order, cap at MAX_STACK_FRAMES."""
pre = _collect_pre_stack(entries, hit_idx)
post = _collect_post_stack(entries, hit_idx)
seen: set[str] = set()
merged: list[str] = []
for frame in pre + post:
if frame in seen:
continue
seen.add(frame)
merged.append(frame)
if len(merged) >= MAX_STACK_FRAMES:
break
return merged
# ---------------------------------------------------------------------------
# Phase 3: mod attribution
# ---------------------------------------------------------------------------
def _norm_mod_key(raw_name: str) -> str:
"""Lowercase, strip spaces / apostrophes / hyphens. Used as mod_id."""
s = raw_name.lower()
for ch in (" ", "'", "-"):
s = s.replace(ch, "")
return s
def _entry_text(entry: Entry) -> str:
"""Whole-entry text (body + collected stack) for marker scanning."""
return "\n".join(entry.body)
def attribute_entry(entry: Entry, prior_lookback_lines: list[str]) -> tuple[str, str, str, str, str]:
"""Determine ``(mod_id, mod_name, attribution, confidence, reason)``.
``prior_lookback_lines`` is the up-to-INFERRED_LOOKBACK_LINES raw file
lines preceding this entry (used to look up a recent ``Lua((MOD:Y))``
marker for inferred attribution).
"""
text = _entry_text(entry)
# 1. Direct via Lua((MOD:X))
m = LUA_MOD_MARKER_RE.search(text)
if m:
raw = m.group(1).strip()
return (
_norm_mod_key(raw),
raw,
"direct",
"high",
"Lua((MOD:...)) marker on the entry itself",
)
# 2. Direct via "needed by <mod>"
m = NEEDED_BY_RE.search(text)
if m:
raw = m.group(1).strip().rstrip(".,;")
return (
_norm_mod_key(raw),
raw,
"direct",
"high",
"needed by <mod> hint",
)
# 3. Direct via require("X") failed — attribute to required module name.
m = REQUIRE_FAILED_RE.search(text)
if m:
raw = m.group(1).strip()
# Mod-name first segment (PZ paths often look like Mod/Foo/Bar).
mod_name = raw.split("/")[0] if "/" in raw else raw
return (
_norm_mod_key(mod_name),
mod_name,
"direct",
"high",
'require("...") failed shape',
)
# 4. Inferred — Lua-shaped body + recent Lua((MOD:Y)) within lookback.
if any(p.search(text) for p in LUA_SHAPED_PATTERNS):
for line in reversed(prior_lookback_lines):
mm = LUA_MOD_MARKER_RE.search(line)
if mm:
raw = mm.group(1).strip()
return (
_norm_mod_key(raw),
raw,
"inferred",
"medium",
f"Lua-shaped body; nearest Lua((MOD:{raw})) within "
f"{INFERRED_LOOKBACK_LINES}-line lookback",
)
return (
"__unattributed__",
"",
"unattributed",
"low",
"no marker; body not Lua-shaped or no recent Lua((MOD:...))",
)
# ---------------------------------------------------------------------------
# Phase 4: file:line extraction (five fallbacks, in order)
# ---------------------------------------------------------------------------
def extract_file_line(text: str) -> tuple[str, int]:
"""Run the five fallbacks in order. Returns ``(file, line)`` with line=0
when only a path was matched."""
m = FILE_LINE_AT_RE.search(text)
if m:
return m.group(1), int(m.group(2))
m = FILE_LINE_FUNCTION_RE.search(text)
if m:
return m.group(1), int(m.group(2))
m = FILE_LINE_STRING_RE.search(text)
if m:
return m.group(1), int(m.group(2))
m = FILE_LINE_QUOTED_RE.search(text)
if m:
return m.group(1), int(m.group(2)) if m.group(2) else 0
m = FILE_LINE_UNQUOTED_RE.search(text)
if m:
return m.group(1), int(m.group(2)) if m.group(2) else 0
return "", 0
# ---------------------------------------------------------------------------
# Phase 5: cause-chain extraction
# ---------------------------------------------------------------------------
def extract_cause_chain(text: str) -> str:
"""Return ``ExceptionA: msg -> ExceptionB: msg`` joined chain, deduped,
capped at MAX_CAUSE_CHAIN_LEVELS levels.
"""
tokens: list[str] = []
seen: set[str] = set()
for line in text.splitlines():
cb = CAUSED_BY_RE.search(line)
if cb:
cls = cb.group(1)
msg = cb.group(2) or ""
tok = f"{cls}: {msg.strip()}".rstrip(": ").strip()
if tok not in seen:
seen.add(tok)
tokens.append(tok)
continue
ex = EXCEPTION_LINE_RE.search(line)
if ex:
cls = ex.group(1)
msg = ex.group(2) or ""
tok = f"{cls}: {msg.strip()}".rstrip(": ").strip()
if tok not in seen:
seen.add(tok)
tokens.append(tok)
if len(tokens) >= MAX_CAUSE_CHAIN_LEVELS:
break
return " -> ".join(tokens[:MAX_CAUSE_CHAIN_LEVELS])
# ---------------------------------------------------------------------------
# Phase 6: Java exception kind detection
# ---------------------------------------------------------------------------
JAVA_EXCEPTION_RE = re.compile(r"(?:\w+\.)+\w+(?:Exception|Error)\b")
def detect_kind(entry: Entry, attribution: str, body_text: str) -> str:
"""Determine the ``kind`` field. Order: engine_noise > require_failed >
java_exception > lua_runtime > runtime."""
# Phase 7 short-circuit (engine noise outranks others per spec — engine
# noise is PZ's own diagnostic chatter regardless of class).
if any(p.search(body_text) for p in ENGINE_NOISE_PATTERNS):
return "engine_noise"
if REQUIRE_FAILED_RE.search(body_text):
return "require_failed"
has_java = bool(JAVA_EXCEPTION_RE.search(body_text))
has_lua_marker = bool(LUA_MOD_MARKER_RE.search(body_text))
if has_java and not has_lua_marker:
return "java_exception"
# Lua-attributed runtime / inferred
if has_lua_marker or attribution in ("direct", "inferred"):
return "lua_runtime"
return "runtime"
# ---------------------------------------------------------------------------
# Phase 8: signature computation
# ---------------------------------------------------------------------------
def normalize_first_line(first: str) -> str:
"""Per spec: strip session metadata prefix, flatten quoted strings to
``"<S>"`` / ``'<S>'``, flatten ≥2-digit numeric runs to ``<N>``, collapse
whitespace, truncate to 200 chars.
"""
s = first.strip()
s = SESSION_META_RE.sub("", s)
s = DOUBLE_QUOTED_RE.sub('"<S>"', s)
s = SINGLE_QUOTED_RE.sub("'<S>'", s)
s = NUMERIC_RUN_RE.sub("<N>", s)
s = WS_RUN_RE.sub(" ", s)
return s[:PATTERN_ID_FIRST_LINE_MAX]
def compute_pattern_id(level: str, first_line: str) -> str:
"""``sha256(level + normalized_first_line)[:16]``, prefixed ``sha256:``."""
norm = normalize_first_line(first_line)
h = hashlib.sha256(f"{level}\n{norm}".encode("utf-8")).hexdigest()
return f"sha256:{h[:16]}"
def compute_signature(pattern_id: str, mod_id: str) -> str:
"""``sha256(pattern_id + mod_id)[:16]``, prefixed ``sha256:``."""
h = hashlib.sha256(f"{pattern_id}\n{mod_id}".encode("utf-8")).hexdigest()
return f"sha256:{h[:16]}"
# ---------------------------------------------------------------------------
# Aggregation (phase 9) and the public classify_entries entry point
# ---------------------------------------------------------------------------
_CONFIDENCE_RANK: dict[str, int] = {"low": 0, "medium": 1, "high": 2}
_ATTRIBUTION_RANK: dict[str, int] = {
"unattributed": 0,
"inferred": 1,
"direct": 2,
}
def _build_excerpt(entry: Entry, max_chars: int = 1000) -> str:
"""Best-effort one-block excerpt of the entry (header + continuations)."""
lines: list[str] = []
header = f'[{entry.timestamp}] {entry.level}: '
if entry.body:
lines.append(header + entry.body[0])
for cont in entry.body[1:]:
lines.append(cont)
text = "\n".join(lines)
if len(text) > max_chars:
text = text[:max_chars] + "\n... [truncated]"
return text
def _build_lookback_window(entries: list[Entry], hit_idx: int) -> list[str]:
"""Flatten body lines from prior entries (most recent first; up to
INFERRED_LOOKBACK_LINES total) for inferred attribution lookback.
We walk backwards through ``entries`` accumulating each entry's body lines
until we've gathered INFERRED_LOOKBACK_LINES lines or run out of entries.
Returned list is in source order (oldest first) so callers can call
``reversed()`` on it.
"""
collected: list[str] = []
i = hit_idx - 1
while i >= 0 and len(collected) < INFERRED_LOOKBACK_LINES:
for line in reversed(entries[i].body):
collected.append(line)
if len(collected) >= INFERRED_LOOKBACK_LINES:
break
i -= 1
collected.reverse()
return collected
def classify_entries(entries: list[Entry], source_file: str = "") -> list[Record]:
"""Apply phases 1-9 to a parsed-file entry list. Returns one Record per
unique (mod_id, error_shape) pair after dedup on signature.
"""
by_signature: dict[str, Record] = {}
for hit_idx, entry in enumerate(entries):
if not is_severity_entry(entry):
continue
level = effective_level(entry)
body_text = _entry_text(entry)
# Phase 2: stack collection
stack = collect_stack(entries, hit_idx)
# Phase 3: attribution (with INFERRED_LOOKBACK_LINES lookback)
prior_window = _build_lookback_window(entries, hit_idx)
mod_id, mod_name, attribution, confidence, attribution_reason = attribute_entry(
entry, prior_window
)
# Phase 4: file:line extraction (search body + stack frames)
search_text = body_text + "\n" + "\n".join(stack)
file_path, line_no = extract_file_line(search_text)
# Phase 5: cause-chain extraction
cause_chain = extract_cause_chain(search_text)
# Phase 6 & 7: kind detection (engine_noise short-circuits)
kind = detect_kind(entry, attribution, body_text)
# Phase 8: signature computation
pattern_id = compute_pattern_id(level, entry.body[0] if entry.body else "")
signature = compute_signature(pattern_id, mod_id)
# Phase 9: dedup & aggregate
if signature not in by_signature:
by_signature[signature] = Record(
signature=signature,
pattern_id=pattern_id,
level=level,
kind=kind,
mod_id=mod_id,
mod_name=mod_name,
attribution=attribution,
confidence=confidence,
attribution_reason=attribution_reason,
file=file_path,
line=line_no,
cause_chain=cause_chain,
stack=list(stack),
first_seen=FirstSeen(
file=source_file,
line=entry.line_start,
timestamp=entry.timestamp,
),
occurrence_count=1,
files=[source_file] if source_file else [],
excerpt=_build_excerpt(entry),
)
else:
rec = by_signature[signature]
rec.occurrence_count += 1
if source_file and source_file not in rec.files:
rec.files.append(source_file)
# Promote attribution / confidence if this hit is stronger.
if _ATTRIBUTION_RANK[attribution] > _ATTRIBUTION_RANK[rec.attribution]:
rec.attribution = attribution
rec.attribution_reason = attribution_reason
if mod_name:
rec.mod_name = mod_name
if _CONFIDENCE_RANK[confidence] > _CONFIDENCE_RANK[rec.confidence]:
rec.confidence = confidence
# Merge stack frames (preserving order, capped).
for frame in stack:
if frame not in rec.stack and len(rec.stack) < MAX_STACK_FRAMES:
rec.stack.append(frame)
# Extend cause chain if the new hit has additional segments.
if cause_chain and cause_chain != rec.cause_chain:
# Concatenate unseen tokens.
old = rec.cause_chain.split(" -> ") if rec.cause_chain else []
new = cause_chain.split(" -> ")
merged = list(old)
for tok in new:
if tok and tok not in merged:
merged.append(tok)
rec.cause_chain = " -> ".join(merged[:MAX_CAUSE_CHAIN_LEVELS])
return list(by_signature.values())
__all__ = [
"Entry",
"FirstSeen",
"Record",
"parse_file",
"classify_entries",
"is_severity_entry",
"effective_level",
"collect_stack",
"attribute_entry",
"extract_file_line",
"extract_cause_chain",
"detect_kind",
"normalize_first_line",
"compute_pattern_id",
"compute_signature",
"INFERRED_LOOKBACK_LINES",
"MAX_STACK_FRAMES",
"STACK_WALK_LINES",
"MAX_CAUSE_CHAIN_LEVELS",
]

View File

View File

@@ -0,0 +1,7 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:04:00.000] ERROR: General f:0, t:1776297840000, st:48,648,355,178> Lua((MOD:Test Mod Alpha)) wrapper failure
java.lang.RuntimeException: outer wrapper at zombie.Foo(Foo.java:10)
Caused by: java.lang.IllegalStateException: middle layer
Caused by: java.lang.NullPointerException: deepest cause
at zombie.Bar(Bar.java:99)
[16-04-26 00:04:01.000] LOG : General f:0, t:1776297841000, st:48,648,356,178> after.

View File

@@ -0,0 +1,8 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] ERROR: General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Test Mod Alpha)) crash 1
at media/lua/client/A.lua:11
[16-04-26 00:01:01.000] ERROR: General f:0, t:1776297661000, st:48,648,176,178> Lua((MOD:Test Mod Alpha)) crash 1
at media/lua/client/A.lua:11
[16-04-26 00:01:02.000] ERROR: General f:0, t:1776297662000, st:48,648,177,178> Lua((MOD:Test Mod Alpha)) crash 1
at media/lua/client/A.lua:11
[16-04-26 00:01:03.000] LOG : General f:0, t:1776297663000, st:48,648,178,178> ok.

View File

View File

@@ -0,0 +1,4 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:03:00.000] ERROR: General f:0, t:1776297780000, st:48,648,295,178> KahluaThread.flusherrormessage> dumping lua stack trace
at media/lua/client/Foo.lua:1
[16-04-26 00:03:01.000] LOG : General f:0, t:1776297781000, st:48,648,296,178> after.

View File

@@ -0,0 +1,10 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] ERROR: General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Test Mod A)) format1
at media/lua/client/F1.lua:11
[16-04-26 00:01:01.000] ERROR: General f:0, t:1776297661000, st:48,648,176,178> Lua((MOD:Test Mod B)) format2
function: doStuff -- file: media/lua/client/F2.lua line # 22
[16-04-26 00:01:02.000] ERROR: General f:0, t:1776297662000, st:48,648,177,178> Lua((MOD:Test Mod C)) format3
[string "media/lua/client/F3.lua"]:33: bang
[16-04-26 00:01:03.000] ERROR: General f:0, t:1776297663000, st:48,648,178,178> Lua((MOD:Test Mod D)) format4 about "media/lua/client/F4.lua" failure
[16-04-26 00:01:04.000] ERROR: General f:0, t:1776297664000, st:48,648,179,178> Lua((MOD:Test Mod E)) format5 path media/lua/client/F5.lua mention
[16-04-26 00:01:05.000] LOG : General f:0, t:1776297665000, st:48,648,180,178> ok.

View File

@@ -0,0 +1,7 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] LOG : General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Spongies Clothing)) initialised.
[16-04-26 00:01:01.000] LOG : General f:0, t:1776297661000, st:48,648,176,178> ordinary log line.
[16-04-26 00:01:02.000] LOG : General f:0, t:1776297662000, st:48,648,177,178> another log line.
[16-04-26 00:01:03.000] ERROR: General f:0, t:1776297663000, st:48,648,178,178> LuaManager.GetFunctionObject> no such function: doStuff
at media/lua/client/Spongie.lua:7
[16-04-26 00:01:04.000] LOG : General f:0, t:1776297664000, st:48,648,179,178> ok.

View File

@@ -0,0 +1,8 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:19.080] ERROR: General f:0, t:1776297679080, st:48,648,194,258> DebugFileWatcher.registerDir> Exception thrown
java.nio.file.NoSuchFileException: /placeholder/config/mods at UnixException.translateToIOException(null:-1).
Stack trace:
at java.base/sun.nio.fs.UnixException.translateToIOException(Unknown Source)
at java.base/sun.nio.fs.UnixException.asIOException(Unknown Source)
at java.base/sun.nio.fs.LinuxWatchService$Poller.implRegister(Unknown Source)
[16-04-26 00:01:19.090] LOG : General f:0, t:1776297679090, st:48,648,194,268> after.

View File

@@ -0,0 +1,45 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] LOG : General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Test Mod Distant)) initialised.
[16-04-26 00:01:01.000] LOG : General f:0, t:1776297661000, st:48,648,176,178> filler 1.
[16-04-26 00:01:02.000] LOG : General f:0, t:1776297662000, st:48,648,177,178> filler 2.
[16-04-26 00:01:03.000] LOG : General f:0, t:1776297663000, st:48,648,178,178> filler 3.
[16-04-26 00:01:04.000] LOG : General f:0, t:1776297664000, st:48,648,179,178> filler 4.
[16-04-26 00:01:05.000] LOG : General f:0, t:1776297665000, st:48,648,180,178> filler 5.
[16-04-26 00:01:06.000] LOG : General f:0, t:1776297666000, st:48,648,181,178> filler 6.
[16-04-26 00:01:07.000] LOG : General f:0, t:1776297667000, st:48,648,182,178> filler 7.
[16-04-26 00:01:08.000] LOG : General f:0, t:1776297668000, st:48,648,183,178> filler 8.
[16-04-26 00:01:09.000] LOG : General f:0, t:1776297669000, st:48,648,184,178> filler 9.
[16-04-26 00:01:10.000] LOG : General f:0, t:1776297670000, st:48,648,185,178> filler 10.
[16-04-26 00:01:11.000] LOG : General f:0, t:1776297671000, st:48,648,186,178> filler 11.
[16-04-26 00:01:12.000] LOG : General f:0, t:1776297672000, st:48,648,187,178> filler 12.
[16-04-26 00:01:13.000] LOG : General f:0, t:1776297673000, st:48,648,188,178> filler 13.
[16-04-26 00:01:14.000] LOG : General f:0, t:1776297674000, st:48,648,189,178> filler 14.
[16-04-26 00:01:15.000] LOG : General f:0, t:1776297675000, st:48,648,190,178> filler 15.
[16-04-26 00:01:16.000] LOG : General f:0, t:1776297676000, st:48,648,191,178> filler 16.
[16-04-26 00:01:17.000] LOG : General f:0, t:1776297677000, st:48,648,192,178> filler 17.
[16-04-26 00:01:18.000] LOG : General f:0, t:1776297678000, st:48,648,193,178> filler 18.
[16-04-26 00:01:19.000] LOG : General f:0, t:1776297679000, st:48,648,194,178> filler 19.
[16-04-26 00:01:20.000] LOG : General f:0, t:1776297680000, st:48,648,195,178> filler 20.
[16-04-26 00:01:21.000] LOG : General f:0, t:1776297681000, st:48,648,196,178> filler 21.
[16-04-26 00:01:22.000] LOG : General f:0, t:1776297682000, st:48,648,197,178> filler 22.
[16-04-26 00:01:23.000] LOG : General f:0, t:1776297683000, st:48,648,198,178> filler 23.
[16-04-26 00:01:24.000] LOG : General f:0, t:1776297684000, st:48,648,199,178> filler 24.
[16-04-26 00:01:25.000] LOG : General f:0, t:1776297685000, st:48,648,200,178> filler 25.
[16-04-26 00:01:26.000] LOG : General f:0, t:1776297686000, st:48,648,201,178> filler 26.
[16-04-26 00:01:27.000] LOG : General f:0, t:1776297687000, st:48,648,202,178> filler 27.
[16-04-26 00:01:28.000] LOG : General f:0, t:1776297688000, st:48,648,203,178> filler 28.
[16-04-26 00:01:29.000] LOG : General f:0, t:1776297689000, st:48,648,204,178> filler 29.
[16-04-26 00:01:30.000] LOG : General f:0, t:1776297690000, st:48,648,205,178> filler 30.
[16-04-26 00:01:31.000] LOG : General f:0, t:1776297691000, st:48,648,206,178> filler 31.
[16-04-26 00:01:32.000] LOG : General f:0, t:1776297692000, st:48,648,207,178> filler 32.
[16-04-26 00:01:33.000] LOG : General f:0, t:1776297693000, st:48,648,208,178> filler 33.
[16-04-26 00:01:34.000] LOG : General f:0, t:1776297694000, st:48,648,209,178> filler 34.
[16-04-26 00:01:35.000] LOG : General f:0, t:1776297695000, st:48,648,210,178> filler 35.
[16-04-26 00:01:36.000] LOG : General f:0, t:1776297696000, st:48,648,211,178> filler 36.
[16-04-26 00:01:37.000] LOG : General f:0, t:1776297697000, st:48,648,212,178> filler 37.
[16-04-26 00:01:38.000] LOG : General f:0, t:1776297698000, st:48,648,213,178> filler 38.
[16-04-26 00:01:39.000] LOG : General f:0, t:1776297699000, st:48,648,214,178> filler 39.
[16-04-26 00:01:40.000] LOG : General f:0, t:1776297700000, st:48,648,215,178> filler 40.
[16-04-26 00:01:41.000] LOG : General f:0, t:1776297701000, st:48,648,216,178> filler 41.
[16-04-26 00:01:42.000] ERROR: General f:0, t:1776297702000, st:48,648,217,178> LuaManager.GetFunctionObject> no such function (way past lookback)
[16-04-26 00:01:43.000] LOG : General f:0, t:1776297703000, st:48,648,218,178> ok.

View File

@@ -0,0 +1,6 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:19.131] LOG : Mod f:0, t:1776297679131, st:48,648,194,309> loading example_mod_alpha.
[16-04-26 00:05:00.000] ERROR: General f:0, t:1776297900000, st:48,648,415,178> Lua((MOD:Test Mod Alpha)) something broke
at media/lua/client/Foo.lua:42
function: doStuff -- file: media/lua/client/Foo.lua line # 42
[16-04-26 00:05:01.000] LOG : General f:0, t:1776297901000, st:48,648,416,178> after the error.

View File

@@ -0,0 +1,3 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] LOG : General f:0, t:1776297660000, st:48,648,175,178> ordinary line.
[16-04-26 00:02:00.000] LOG : General f:0, t:1776297720000, st:48,648,235,178> nothing wrong.

View File

@@ -0,0 +1,5 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] LOG : General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Spongies Clothing)) initialised.
[16-04-26 00:01:01.000] LOG : General f:0, t:1776297661000, st:48,648,176,178> ordinary log line.
[16-04-26 00:01:03.000] ERROR: General f:0, t:1776297663000, st:48,648,178,178> Disk full while writing chunk data
[16-04-26 00:01:04.000] LOG : General f:0, t:1776297664000, st:48,648,179,178> ok.

View File

@@ -0,0 +1,6 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] ERROR: General f:0, t:1776297660000, st:48,648,175,178> Lua((MOD:Test Mod Alpha)) crash now
at media/lua/client/X.lua:11
at media/lua/client/Y.lua:22
[string "media/lua/client/Z.lua"]:33: oops
[16-04-26 00:01:04.000] LOG : General f:0, t:1776297664000, st:48,648,179,178> ok.

View File

@@ -0,0 +1,6 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] LOG : General f:0, t:1776297660000, st:48,648,175,178> at media/lua/client/A.lua:11
[16-04-26 00:01:01.000] LOG : General f:0, t:1776297661000, st:48,648,176,178> at media/lua/client/B.lua:22
[16-04-26 00:01:02.000] LOG : General f:0, t:1776297662000, st:48,648,177,178> [string "media/lua/client/C.lua"]:33: oops
[16-04-26 00:01:03.000] ERROR: General f:0, t:1776297663000, st:48,648,178,178> Lua((MOD:Test Mod Alpha)) crash
[16-04-26 00:01:04.000] LOG : General f:0, t:1776297664000, st:48,648,179,178> ok.

View File

@@ -0,0 +1,3 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] ERROR: General f:0, t:1776297660000, st:48,648,175,178> require("DependencyMod/Foo") failed: needed by Test Mod Alpha
[16-04-26 00:01:01.000] LOG : General f:0, t:1776297661000, st:48,648,176,178> ok.

View File

@@ -0,0 +1,5 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:01:00.000] ERROR: General f:0, t:1776297660000, st:48,648,175,178> ERROR: top-level error message
[16-04-26 00:01:01.000] WARN : General f:0, t:1776297661000, st:48,648,176,178> WARN: top-level warn message
[16-04-26 00:01:02.000] ERROR: General f:0, t:1776297662000, st:48,648,177,178> SEVERE: java-style severe message at zombie.Foo(Foo.java:5)
[16-04-26 00:01:03.000] LOG : General f:0, t:1776297663000, st:48,648,178,178> ok.

View File

@@ -0,0 +1,3 @@
[16-04-26 00:00:42.314] LOG : General f:0, t:1776297642254, st:48,648,157,434> server starting.
[16-04-26 00:02:00.000] WARN : General f:0, t:1776297720000, st:48,648,235,178> ZomboidFileSystem.loadModAndRequired> required mod "absent_mod" not found.
[16-04-26 00:02:01.000] LOG : General f:0, t:1776297721000, st:48,648,236,178> after.

View File

@@ -0,0 +1,95 @@
"""Tests for pz_parser phase 3 — mod attribution."""
from __future__ import annotations
import pathlib
import sys
import unittest
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1]))
import pz_parser # noqa: E402
FIXTURE_DIR = pathlib.Path(__file__).resolve().parent / "fixtures"
def fixture(name: str) -> pathlib.Path:
return FIXTURE_DIR / name
class AttributionBucketTests(unittest.TestCase):
"""Three confidence buckets: direct (high), inferred (medium),
unattributed (low)."""
def test_direct_attribution_when_lua_marker_on_entry(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_lua_attributed.txt"))
records = pz_parser.classify_entries(entries, source_file="la.txt")
self.assertEqual(len(records), 1)
rec = records[0]
self.assertEqual(rec.attribution, "direct")
self.assertEqual(rec.confidence, "high")
# mod_id is normalised: lowercase, no spaces / apostrophes / hyphens.
self.assertEqual(rec.mod_id, "testmodalpha")
self.assertEqual(rec.mod_name, "Test Mod Alpha")
def test_inferred_attribution_within_lookback_window(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_inferred.txt"))
records = pz_parser.classify_entries(entries, source_file="in.txt")
self.assertEqual(len(records), 1)
rec = records[0]
self.assertEqual(rec.attribution, "inferred")
self.assertEqual(rec.confidence, "medium")
self.assertEqual(rec.mod_id, "spongiesclothing")
def test_unattributed_when_no_marker_and_not_lua_shaped(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_unattributed.txt"))
records = pz_parser.classify_entries(entries, source_file="ua.txt")
self.assertEqual(len(records), 1)
rec = records[0]
self.assertEqual(rec.attribution, "unattributed")
self.assertEqual(rec.confidence, "low")
self.assertEqual(rec.mod_id, "__unattributed__")
class LookbackBoundaryTests(unittest.TestCase):
"""Phase 3 — 40-line inferred-attribution window boundary."""
def test_lua_marker_beyond_lookback_does_not_attribute(self) -> None:
# Fixture places the Lua((MOD:...)) >40 lines before the ERROR.
entries = pz_parser.parse_file(fixture("fixture_lookback_boundary.txt"))
records = pz_parser.classify_entries(entries, source_file="lb.txt")
self.assertEqual(len(records), 1)
rec = records[0]
# The Lua-shaped ERROR is far enough back to be unattributed.
self.assertEqual(rec.attribution, "unattributed")
self.assertEqual(rec.mod_id, "__unattributed__")
def test_non_lua_shaped_body_rejects_inferred_attribution(self) -> None:
# Recent Lua((MOD:Spongies Clothing)) emitted, but the ERROR body
# ("Disk full while writing chunk data") isn't Lua-shaped.
entries = pz_parser.parse_file(fixture("fixture_non_lua_no_inferred.txt"))
records = pz_parser.classify_entries(entries, source_file="nl.txt")
self.assertEqual(len(records), 1)
rec = records[0]
self.assertEqual(rec.attribution, "unattributed")
class NeededByTests(unittest.TestCase):
"""Phase 3 — direct attribution via "needed by <mod>" hint."""
def test_needed_by_extracts_dependent_mod(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_require_failed.txt"))
records = pz_parser.classify_entries(entries, source_file="rf.txt")
self.assertEqual(len(records), 1)
rec = records[0]
# "needed by Test Mod Alpha" should set the mod to Test Mod Alpha
# (preferred over the require("...") side which would mention
# DependencyMod). Either way we want direct/high.
self.assertEqual(rec.attribution, "direct")
self.assertEqual(rec.confidence, "high")
# The "needed by" branch is checked before the require() branch in
# the priority order; mod_id should reflect Test Mod Alpha.
self.assertEqual(rec.mod_id, "testmodalpha")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,199 @@
"""Tests for pz_parser parsing pipeline (phases 1, 2, 4-7, 9)."""
from __future__ import annotations
import pathlib
import sys
import unittest
# Make the parser module importable when running via `python -m unittest
# discover -s tools/pz-analyzer/tests`.
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1]))
import pz_parser # noqa: E402
FIXTURE_DIR = pathlib.Path(__file__).resolve().parent / "fixtures"
def fixture(name: str) -> pathlib.Path:
return FIXTURE_DIR / name
class ParseFileTests(unittest.TestCase):
"""Phase 0 — basic line-shape recognition and continuation folding."""
def test_parse_file_groups_continuations_under_entry(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_java_exception.txt"))
# 3 bracketed entries; the ERROR has 4 continuation lines.
self.assertEqual(len(entries), 3)
error_entry = entries[1]
self.assertEqual(error_entry.level, "ERROR")
self.assertGreater(len(error_entry.body), 1)
# First continuation should be the java exception line.
self.assertIn("NoSuchFileException", error_entry.body[1])
def test_parse_file_handles_empty_file(self) -> None:
self.assertEqual(pz_parser.parse_file(fixture("fixture_empty.txt")), [])
def test_parse_file_handles_no_errors(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_no_errors.txt"))
self.assertEqual(len(entries), 3)
self.assertTrue(all(e.level == "LOG" for e in entries))
class SeverityRecognitionTests(unittest.TestCase):
"""Phase 1 — ERROR / WARN / SEVERE recognition."""
def test_classify_picks_up_error_warn_and_severe(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_severity_variants.txt"))
records = pz_parser.classify_entries(entries, source_file="severity.txt")
levels = sorted({r.level for r in records})
# Spec accepts ERROR / WARN / SEVERE. The third entry has bracketed
# ERROR but body starts with SEVERE: ; effective_level should be SEVERE.
self.assertIn("ERROR", levels)
self.assertIn("WARN", levels)
self.assertIn("SEVERE", levels)
def test_log_lines_are_ignored(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_no_errors.txt"))
records = pz_parser.classify_entries(entries, source_file="x.txt")
self.assertEqual(records, [])
class StackCollectionTests(unittest.TestCase):
"""Phase 2 — bidirectional stack collection."""
def test_pre_stack_walk_picks_up_preceding_lua_frames(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_pre_stack.txt"))
# The ERROR entry is the 5th LOG-bracketed line; its predecessors are
# LOG-bracketed entries whose bodies are stack-shaped lines.
records = pz_parser.classify_entries(entries, source_file="pre.txt")
self.assertEqual(len(records), 1)
rec = records[0]
# Pre-stack walk should pick up at least the "at media/lua/.../A.lua:11" frame.
self.assertTrue(any("A.lua:11" in f for f in rec.stack))
def test_post_stack_collected_from_entry_body_continuations(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_post_stack.txt"))
records = pz_parser.classify_entries(entries, source_file="post.txt")
self.assertEqual(len(records), 1)
rec = records[0]
self.assertTrue(any("X.lua:11" in f for f in rec.stack))
self.assertTrue(any("Y.lua:22" in f for f in rec.stack))
# Lua [string "..."]:N form preserves quoting in the captured frame.
self.assertTrue(any("Z.lua" in f and ":33" in f for f in rec.stack))
def test_stack_capped_at_eight_frames(self) -> None:
# Synthesise an ERROR with many continuation frames.
lines = ["[16-04-26 00:00:42.314] ERROR: General f:0, t:1, st:1,2,3,4> Lua((MOD:Test Mod Alpha)) crash"]
for i in range(20):
lines.append(f"\tat media/lua/client/F{i}.lua:{i + 1}")
path = FIXTURE_DIR / "_runtime_stack_cap.txt"
path.write_text("\n".join(lines) + "\n")
try:
entries = pz_parser.parse_file(path)
records = pz_parser.classify_entries(entries, source_file="cap.txt")
self.assertEqual(len(records), 1)
self.assertLessEqual(len(records[0].stack), pz_parser.MAX_STACK_FRAMES)
# And it should be exactly MAX_STACK_FRAMES given >MAX inputs.
self.assertEqual(len(records[0].stack), pz_parser.MAX_STACK_FRAMES)
finally:
path.unlink()
class FileLineExtractionTests(unittest.TestCase):
"""Phase 4 — five-fallback file:line extraction."""
def test_each_fallback_form_extracts_path(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_file_line_fallbacks.txt"))
records = pz_parser.classify_entries(entries, source_file="ff.txt")
# 5 distinct ERRORs, distinct mods — should produce 5 records.
files = sorted(r.file for r in records)
self.assertEqual(
files,
sorted([
"media/lua/client/F1.lua",
"media/lua/client/F2.lua",
"media/lua/client/F3.lua",
"media/lua/client/F4.lua",
"media/lua/client/F5.lua",
]),
)
def test_quoted_path_without_line_number_yields_zero(self) -> None:
# Format 4 fixture line lacks a :NN suffix on the quoted path.
file_path, line_no = pz_parser.extract_file_line(
'failure about "media/lua/client/F4.lua" tail'
)
self.assertEqual(file_path, "media/lua/client/F4.lua")
self.assertEqual(line_no, 0)
class CauseChainTests(unittest.TestCase):
"""Phase 5 — Caused-by chain unwinding."""
def test_caused_by_chain_renders_with_arrow_separator(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_cause_chain.txt"))
records = pz_parser.classify_entries(entries, source_file="cc.txt")
self.assertEqual(len(records), 1)
chain = records[0].cause_chain
self.assertIn("RuntimeException", chain)
self.assertIn("IllegalStateException", chain)
self.assertIn("NullPointerException", chain)
# Order preserved (outer -> inner).
idx_runtime = chain.index("RuntimeException")
idx_illegal = chain.index("IllegalStateException")
idx_null = chain.index("NullPointerException")
self.assertLess(idx_runtime, idx_illegal)
self.assertLess(idx_illegal, idx_null)
def test_no_cause_chain_when_no_exceptions(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_unattributed.txt"))
records = pz_parser.classify_entries(entries, source_file="u.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].cause_chain, "")
class KindDetectionTests(unittest.TestCase):
"""Phases 6 & 7 — kind classification."""
def test_java_exception_kind_when_no_lua_marker(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_java_exception.txt"))
records = pz_parser.classify_entries(entries, source_file="je.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].kind, "java_exception")
# Java engine errors should resolve to __unattributed__.
self.assertEqual(records[0].mod_id, "__unattributed__")
def test_engine_noise_kind_for_kahluathread(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_engine_noise.txt"))
records = pz_parser.classify_entries(entries, source_file="en.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].kind, "engine_noise")
def test_lua_runtime_kind_for_attributed_lua_error(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_lua_attributed.txt"))
records = pz_parser.classify_entries(entries, source_file="la.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].kind, "lua_runtime")
def test_require_failed_kind(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_require_failed.txt"))
records = pz_parser.classify_entries(entries, source_file="rf.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].kind, "require_failed")
class AggregationTests(unittest.TestCase):
"""Phase 9 — dedup, occurrence_count, files-set growth."""
def test_three_identical_errors_dedup_to_one_record(self) -> None:
entries = pz_parser.parse_file(fixture("fixture_dedup.txt"))
records = pz_parser.classify_entries(entries, source_file="dd.txt")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].occurrence_count, 3)
# files list shouldn't duplicate "dd.txt".
self.assertEqual(records[0].files, ["dd.txt"])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,59 @@
"""Tests for pz_parser phase 8 — signature computation."""
from __future__ import annotations
import pathlib
import sys
import unittest
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1]))
import pz_parser # noqa: E402
class PatternIdStabilityTests(unittest.TestCase):
"""pattern_id should be invariant under formatting variations."""
def test_pattern_id_collapses_numeric_runs(self) -> None:
a = pz_parser.compute_pattern_id(
"ERROR",
"General f:0, t:1776297642, st:48,648,157,434> failed at offset 12345",
)
b = pz_parser.compute_pattern_id(
"ERROR",
"General f:0, t:9999999999, st:99,99,99,99> failed at offset 99999",
)
self.assertEqual(a, b)
def test_pattern_id_collapses_quoted_strings_and_whitespace(self) -> None:
a = pz_parser.compute_pattern_id(
"ERROR",
'no such function "doStuff" in module',
)
b = pz_parser.compute_pattern_id(
"ERROR",
'no such function "fooBarBaz" in module',
)
# Whitespace-collapse plus quoted-string-flatten => same pattern_id.
self.assertEqual(a, b)
def test_pattern_id_changes_with_level(self) -> None:
a = pz_parser.compute_pattern_id("ERROR", "exception thrown")
b = pz_parser.compute_pattern_id("WARN", "exception thrown")
self.assertNotEqual(a, b)
class SignatureUniquenessTests(unittest.TestCase):
"""signature should fan out across mods sharing a pattern_id."""
def test_signature_unique_per_mod_for_shared_pattern(self) -> None:
# Same first line, different mod_ids — different signatures, same pattern_id.
pat = pz_parser.compute_pattern_id("ERROR", "Lua((MOD:X)) crash")
sig_a = pz_parser.compute_signature(pat, "spongiesclothing")
sig_b = pz_parser.compute_signature(pat, "testmodalpha")
self.assertNotEqual(sig_a, sig_b)
# Both should share their pattern_id (consumer's pattern-fanout view).
self.assertEqual(pat[:7], "sha256:")
if __name__ == "__main__":
unittest.main()