feat: pzmm conflict detection + content-type categorization

- mod_files manifest table populated at parse time
- POST /api/conflicts endpoint
- mod_types fingerprinting feeds derive_category
- DD filelist regex broadened to cover conflict-eligible exts
- media/maps/<*>/* excluded from manifest (per-mod namespaced,
  no conflict value, can be tens of MB per mod)

Plan: docs/plans/2026-05-04-pzmm-conflict-and-typing.md
This commit is contained in:
2026-05-04 15:22:35 +00:00
parent a15d35214e
commit b73325882e
9 changed files with 936 additions and 18 deletions

View File

@@ -130,6 +130,11 @@ class ModInfo:
# signal for build / multiplayer / category detection. Distinct from
# `tags` which is mod.info-side (freeform).
workshop_tags: List[str] = field(default_factory=list)
# pzmm-style content fingerprint (Maps, Vehicles, Weapons, Traits, …)
# populated by worker.build_manifest_and_types at parse time. Empty when
# files_manifest_built=false (older cached rows); derive_category falls
# through to the existing cascade in that case.
mod_types: List[str] = field(default_factory=list)
warnings: Dict[str, List[str]] = field(default_factory=dict)
@@ -347,8 +352,15 @@ def load_mods_from_dir(root: Path) -> List[ModInfo]:
_PATCH_NAME_RE = re.compile(r"\b(patch|compat|compatibility)\b", re.IGNORECASE)
# Substring-based category hints (kept in sync with api/mlos_sort.py)
_LIB_NAME_HINTS = ["library", "libraries", "framework"]
# Substring lists used for derive_category name heuristics. Plain substring
# matching (vs. \b regex) survives PZ's mishmash of camelCase + underscore
# + version-suffix mod names (TrueActions_1.09, TrueMusic, TMMumble, …)
# that strict word boundaries fail on. False positives are accepted in
# exchange — names containing "music" without being music-related are rare
# in PZ.
_LIB_NAME_HINTS = [
"library", "libraries", "framework",
]
_LIB_NAME_RE = re.compile(
r'(?<![A-Za-z])(?:lib|api|core)(?![A-Za-z])'
r'|(?<=[a-z])(?:Lib|API|Core)(?![A-Za-z])',
@@ -381,6 +393,43 @@ def _name_has(name: str, hints: List[str]) -> bool:
return any(h in n for h in hints)
# pzmm content-type → sortof CATEGORY_ORDER mapping. "skip" entries fall
# through to the existing derive_category cascade. Items/Animations/Lua/Unknown
# are too generic; Maps/Sounds/Patch/Vehicles/Clothing duplicate signals already
# captured by the cascade but stay here as fallbacks for poorly-tagged mods.
_TYPE_TO_CAT: Dict[str, str] = {
"Maps": "map",
"Vehicles": "vehicle",
"Weapons": "weapon",
"Clothing": "wearable",
"Traits": "code",
"Professions": "profession",
"Recipes": "crafting",
"Tiles": "tile",
"Textures": "texture",
"Sounds": "sound",
"UI": "ui",
"Translations": "translation",
"Patch": "patch",
"Dependency": "tweaks",
"Framework": "tweaks",
}
def _types_to_category(mod_types: List[str], name: str) -> Optional[str]:
"""First mod_type that maps to a sortof CATEGORY_ORDER bucket wins.
Returns None if mod_types contains only skip-types (Items/Animations/Lua/
Unknown), so the caller can fall through to the existing cascade."""
for t in mod_types:
cat = _TYPE_TO_CAT.get(t)
if cat:
# vehicle_spawn refinement matches the downstream ws_tag check.
if cat == "vehicle" and name and "spawn zone" in name.lower():
return "vehicle_spawn"
return cat
return None
def derive_category(mod: ModInfo) -> str:
"""Best-effort category from mod.info + workshop_meta.tags + name.
Mirrors api/mlos_sort.py; keep both copies in sync.
@@ -389,6 +438,15 @@ def derive_category(mod: ModInfo) -> str:
return mod.category
name = mod.name or ""
# pzmm-style content fingerprint takes precedence over name regex when
# available. Empty mod_types means files_manifest_built=false (older
# cached row); fall through to existing cascade.
if mod.mod_types:
cat = _types_to_category(mod.mod_types, name)
if cat:
return cat
if name and _PATCH_NAME_RE.search(name):
return "patch"
if _name_has(name, _LIB_NAME_HINTS) or (name and _LIB_NAME_RE.search(name)):