"""File-level conflict detection from cached manifests. Port of pzmm core/scanner.py:scan_file_conflicts adapted to read from the mod_files table (populated by worker.build_manifest_and_types) instead of walking on-disk media trees. See docs/plans/2026-05-04-pzmm-conflict-and-typing.md. """ from __future__ import annotations from collections import defaultdict from dataclasses import dataclass from typing import Dict, List, Tuple from mlos_sort import ModInfo # Filenames that legitimately appear across multiple mods because they're # intended merge points — either PZ engine-merged at runtime, or framework # extension hooks where multiple client mods coexist by design. Multiple # distinct sha1s here is by-design, not a conflict, so we skip them in # scan_file_conflicts. Sourced from HellDrinx Mod Manager's known false- # positive list (backend/services/workshop.cjs); add more here as we # encounter them. _IGNORED_FILENAMES = { # PZ engine-merged at load time "sandbox-options.txt", "fileguidtable.xml", # Framework extension hooks (designed for many mods to override) "mf_ismoodle.lua", # MoodleFramework hook "kp_extrabodylocations.lua", # KP body-locations framework hook "registries.lua", # damnlib-style registry framework # Specific mod-side merge points "hat_gasmask.xml", "hat_gasmask_nofilter.xml", "sounds_tmrremovemumble.txt", "null.wav", # silent placeholder, deliberately shared } @dataclass class FileConflict: rel_path: str providers: List[str] # mod_ids in input load order winner: str # mod_id (last in load order) _FETCH_MANIFEST = """ WITH inputs AS ( SELECT unnest($1::text[]) AS workshop_id, unnest($2::text[]) AS mod_id ) SELECT mf.workshop_id, mf.mod_id, mf.rel_path, mf.sha1 FROM mod_files mf JOIN inputs i ON mf.workshop_id = i.workshop_id AND mf.mod_id = i.mod_id """ async def scan_file_conflicts(conn, mods: List[ModInfo]) -> List[FileConflict]: """For the given (already-loaded) ModInfos, report rel_paths claimed by ≥2 mods with non-equal sha1. Returns list ordered by rel_path. Mods without manifest rows (`files_manifest_built=false`) silently contribute nothing to the conflict scan; the caller is responsible for surfacing them as `missing_manifests` in any user-facing payload. """ if len(mods) < 2: return [] wsids = [m.workshop_id or "" for m in mods] mod_ids = [m.id for m in mods] rows = await conn.fetch(_FETCH_MANIFEST, wsids, mod_ids) # mod_id → load-order index (input order = load order, mirroring pzmm) order_index: Dict[str, int] = {m.id: i for i, m in enumerate(mods)} # rel_path → list of (load_order_index, mod_id, sha1) by_path: Dict[str, List[Tuple[int, str, str]]] = defaultdict(list) for r in rows: mod_id = r["mod_id"] idx = order_index.get(mod_id) if idx is None: continue rel_path = r["rel_path"] # Skip known intended-merge-point filenames (engine-concatenated or # framework hooks). These produce noisy false positives because # multiple mods adding sandbox vars / framework hooks is by design. basename = rel_path.rsplit("/", 1)[-1] if basename in _IGNORED_FILENAMES: continue by_path[rel_path].append((idx, mod_id, r["sha1"])) conflicts: List[FileConflict] = [] for rel, entries in by_path.items(): # Need ≥2 distinct providers AND ≥2 distinct sha1s. If every # provider ships byte-identical content (same sha1), it's a # duplicate, not a conflict — pzmm scanner.py:55–66. unique_providers = {mod_id for _, mod_id, _ in entries} if len(unique_providers) < 2: continue unique_hashes = {sha for _, _, sha in entries} if len(unique_hashes) < 2: continue # Order providers by input load-order index. Winner = last loaded. ordered = sorted(entries, key=lambda e: e[0]) providers = [mod_id for _, mod_id, _ in ordered] # De-dup providers preserving order (a mod could ship the same # rel_path under both B41 and B42 layouts → seen twice). seen: set = set() dedup_providers: List[str] = [] for p in providers: if p not in seen: seen.add(p) dedup_providers.append(p) conflicts.append(FileConflict( rel_path=rel, providers=dedup_providers, winner=dedup_providers[-1], )) conflicts.sort(key=lambda c: c.rel_path) return conflicts