feat: pzmm conflict detection + content-type categorization
- mod_files manifest table populated at parse time - POST /api/conflicts endpoint - mod_types fingerprinting feeds derive_category - DD filelist regex broadened to cover conflict-eligible exts - media/maps/<*>/* excluded from manifest (per-mod namespaced, no conflict value, can be tens of MB per mod) Plan: docs/plans/2026-05-04-pzmm-conflict-and-typing.md
This commit is contained in:
73
api/app.py
73
api/app.py
@@ -26,6 +26,7 @@ from pydantic import BaseModel, Field
|
||||
|
||||
import adapters
|
||||
import db
|
||||
import diagnostics
|
||||
import expansion
|
||||
import jobs
|
||||
import steam
|
||||
@@ -191,6 +192,7 @@ def _row_to_modinfo(r) -> ModInfo:
|
||||
maps=list(r["maps"] or []),
|
||||
is_addon=bool(r["is_addon"]) if "is_addon" in r else False,
|
||||
workshop_tags=list(r["workshop_tags"] or []) if "workshop_tags" in r else [],
|
||||
mod_types=list(r["mod_types"] or []) if "mod_types" in r else [],
|
||||
)
|
||||
|
||||
|
||||
@@ -684,7 +686,7 @@ async def _build_result_for_job(
|
||||
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
|
||||
mp.requirements, mp.load_after, mp.load_before,
|
||||
mp.incompatible_mods, mp.load_first, mp.load_last,
|
||||
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
|
||||
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
|
||||
FROM mod_parsed mp
|
||||
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
|
||||
WHERE mp.workshop_id = ANY($1::text[])
|
||||
@@ -1001,7 +1003,7 @@ async def sort_endpoint(req: SortRequest, request: Request) -> Dict[str, Any]:
|
||||
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
|
||||
mp.requirements, mp.load_after, mp.load_before,
|
||||
mp.incompatible_mods, mp.load_first, mp.load_last,
|
||||
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
|
||||
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
|
||||
FROM mod_parsed mp
|
||||
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
|
||||
WHERE mp.workshop_id = ANY($1::text[])
|
||||
@@ -1215,7 +1217,7 @@ async def resort_endpoint(req: ResortRequest, request: Request) -> Dict[str, Any
|
||||
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
|
||||
mp.requirements, mp.load_after, mp.load_before,
|
||||
mp.incompatible_mods, mp.load_first, mp.load_last,
|
||||
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
|
||||
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
|
||||
FROM mod_parsed mp
|
||||
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
|
||||
WHERE mp.workshop_id IN (SELECT workshop_id FROM selected_wsids)
|
||||
@@ -1469,6 +1471,71 @@ async def vote_broken_mod(
|
||||
return {"upvotes": int(row["upvotes"]), "downvotes": int(row["downvotes"])}
|
||||
|
||||
|
||||
@app.post("/api/conflicts")
|
||||
async def conflicts_endpoint(req: SortRequest, request: Request) -> Dict[str, Any]:
|
||||
"""Detect rel_paths claimed by ≥2 input mods with non-equal sha1.
|
||||
|
||||
v1: bare wsids only. Collection input returns 400 so the caller can
|
||||
resolve via /api/sort first (where the async-job + drain-progress
|
||||
plumbing already lives). Mods whose `files_manifest_built` is false
|
||||
cannot be analyzed and are reported in `missing_manifests` instead of
|
||||
silently ignored.
|
||||
"""
|
||||
bare_wsids, collection_ids = parse_with_collections(req.input or "")
|
||||
if collection_ids:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="conflict scan does not support collection input; resolve via /api/sort first",
|
||||
)
|
||||
if not bare_wsids:
|
||||
raise HTTPException(status_code=400, detail="no workshop ids found in input")
|
||||
if len(bare_wsids) > MAX_IDS:
|
||||
raise HTTPException(
|
||||
status_code=413,
|
||||
detail=f"too many workshop ids ({len(bare_wsids)} > {MAX_IDS})",
|
||||
)
|
||||
|
||||
pool = request.app.state.db
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
|
||||
mp.requirements, mp.load_after, mp.load_before,
|
||||
mp.incompatible_mods, mp.load_first, mp.load_last,
|
||||
mp.tags, mp.maps, mp.is_addon, mp.mod_types,
|
||||
mp.files_manifest_built, wm.tags AS workshop_tags
|
||||
FROM mod_parsed mp
|
||||
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
|
||||
WHERE mp.workshop_id = ANY($1::text[])
|
||||
AND mp.parsed_at_time_updated = wm.time_updated
|
||||
ORDER BY mp.workshop_id, mp.mod_id
|
||||
""",
|
||||
bare_wsids,
|
||||
)
|
||||
|
||||
mods: List[ModInfo] = [_row_to_modinfo(r) for r in rows]
|
||||
|
||||
# Missing-manifest wsids: input wsids that have no mod_parsed rows
|
||||
# OR whose rows all have files_manifest_built=false. Any single
|
||||
# built row in a multi-mod wsid counts as "manifest available".
|
||||
wsid_has_manifest: Dict[str, bool] = {}
|
||||
for r in rows:
|
||||
w = r["workshop_id"]
|
||||
built = bool(r["files_manifest_built"])
|
||||
wsid_has_manifest[w] = wsid_has_manifest.get(w, False) or built
|
||||
missing_manifests = [w for w in bare_wsids if not wsid_has_manifest.get(w, False)]
|
||||
|
||||
conflicts = await diagnostics.scan_file_conflicts(conn, mods)
|
||||
|
||||
return {
|
||||
"conflicts": [
|
||||
{"rel_path": c.rel_path, "providers": c.providers, "winner": c.winner}
|
||||
for c in conflicts
|
||||
],
|
||||
"missing_manifests": missing_manifests,
|
||||
}
|
||||
|
||||
|
||||
# ── static frontend ────────────────────────────────────────────────────────
|
||||
# Mount LAST so all API routes win path resolution.
|
||||
_FRONTEND_DIR = Path(__file__).resolve().parent.parent / "frontend"
|
||||
|
||||
58
api/categorize.py
Normal file
58
api/categorize.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Public helper for mapping pzmm content-type tags to sortof CATEGORY_ORDER.
|
||||
|
||||
The same mapping is also inlined in `mlos_sort.py` (both api/ and worker/
|
||||
copies, deliberately — worker uses a separate venv with no FastAPI deps,
|
||||
so it cannot import from api/). This module exposes the helper for
|
||||
non-mlos consumers (e.g. /api/conflicts diagnostics output) without
|
||||
forcing them to drag in the whole sorter module.
|
||||
|
||||
Source: pzmm core/mods.py:detect_mod_types ordering, mapped to sortof's
|
||||
CATEGORY_ORDER buckets per docs/plans/2026-05-04-pzmm-conflict-and-typing.md
|
||||
§3.4.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# Items / Animations / Lua / Unknown intentionally absent — too generic to
|
||||
# drive a category decision; callers should fall through to other heuristics.
|
||||
_TYPE_TO_CAT: Dict[str, str] = {
|
||||
"Maps": "map",
|
||||
"Vehicles": "vehicle",
|
||||
"Weapons": "weapon",
|
||||
"Clothing": "wearable",
|
||||
"Traits": "code",
|
||||
"Professions": "profession",
|
||||
"Recipes": "crafting",
|
||||
"Tiles": "tile",
|
||||
"Textures": "texture",
|
||||
"Sounds": "sound",
|
||||
"UI": "ui",
|
||||
"Translations": "translation",
|
||||
"Patch": "patch",
|
||||
"Dependency": "tweaks",
|
||||
"Framework": "tweaks",
|
||||
}
|
||||
|
||||
|
||||
def types_to_category(mod_types: List[str], name: str = "") -> Optional[str]:
|
||||
"""First mod_type that maps to a sortof CATEGORY_ORDER bucket wins.
|
||||
|
||||
Returns the bucket name (e.g. "weapon", "vehicle"), or None when:
|
||||
- mod_types is empty (manifest not yet built), or
|
||||
- mod_types contains only skip-types (Items / Animations / Lua / Unknown).
|
||||
|
||||
The `name` arg is used for the vehicle_spawn refinement only — when a
|
||||
Vehicles-tagged mod is named like "spawn zone X", the more specific
|
||||
`vehicle_spawn` bucket wins over the generic `vehicle`.
|
||||
"""
|
||||
if not mod_types:
|
||||
return None
|
||||
for t in mod_types:
|
||||
cat = _TYPE_TO_CAT.get(t)
|
||||
if cat:
|
||||
if cat == "vehicle" and name and "spawn zone" in name.lower():
|
||||
return "vehicle_spawn"
|
||||
return cat
|
||||
return None
|
||||
93
api/diagnostics.py
Normal file
93
api/diagnostics.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""File-level conflict detection from cached manifests.
|
||||
|
||||
Port of pzmm core/scanner.py:scan_file_conflicts adapted to read from the
|
||||
mod_files table (populated by worker.build_manifest_and_types) instead of
|
||||
walking on-disk media trees. See docs/plans/2026-05-04-pzmm-conflict-and-typing.md.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from mlos_sort import ModInfo
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileConflict:
|
||||
rel_path: str
|
||||
providers: List[str] # mod_ids in input load order
|
||||
winner: str # mod_id (last in load order)
|
||||
|
||||
|
||||
_FETCH_MANIFEST = """
|
||||
WITH inputs AS (
|
||||
SELECT unnest($1::text[]) AS workshop_id,
|
||||
unnest($2::text[]) AS mod_id
|
||||
)
|
||||
SELECT mf.workshop_id, mf.mod_id, mf.rel_path, mf.sha1
|
||||
FROM mod_files mf
|
||||
JOIN inputs i
|
||||
ON mf.workshop_id = i.workshop_id
|
||||
AND mf.mod_id = i.mod_id
|
||||
"""
|
||||
|
||||
|
||||
async def scan_file_conflicts(conn, mods: List[ModInfo]) -> List[FileConflict]:
|
||||
"""For the given (already-loaded) ModInfos, report rel_paths claimed by
|
||||
≥2 mods with non-equal sha1. Returns list ordered by rel_path.
|
||||
|
||||
Mods without manifest rows (`files_manifest_built=false`) silently
|
||||
contribute nothing to the conflict scan; the caller is responsible for
|
||||
surfacing them as `missing_manifests` in any user-facing payload.
|
||||
"""
|
||||
if len(mods) < 2:
|
||||
return []
|
||||
|
||||
wsids = [m.workshop_id or "" for m in mods]
|
||||
mod_ids = [m.id for m in mods]
|
||||
rows = await conn.fetch(_FETCH_MANIFEST, wsids, mod_ids)
|
||||
|
||||
# mod_id → load-order index (input order = load order, mirroring pzmm)
|
||||
order_index: Dict[str, int] = {m.id: i for i, m in enumerate(mods)}
|
||||
|
||||
# rel_path → list of (load_order_index, mod_id, sha1)
|
||||
by_path: Dict[str, List[Tuple[int, str, str]]] = defaultdict(list)
|
||||
for r in rows:
|
||||
mod_id = r["mod_id"]
|
||||
idx = order_index.get(mod_id)
|
||||
if idx is None:
|
||||
continue
|
||||
by_path[r["rel_path"]].append((idx, mod_id, r["sha1"]))
|
||||
|
||||
conflicts: List[FileConflict] = []
|
||||
for rel, entries in by_path.items():
|
||||
# Need ≥2 distinct providers AND ≥2 distinct sha1s. If every
|
||||
# provider ships byte-identical content (same sha1), it's a
|
||||
# duplicate, not a conflict — pzmm scanner.py:55–66.
|
||||
unique_providers = {mod_id for _, mod_id, _ in entries}
|
||||
if len(unique_providers) < 2:
|
||||
continue
|
||||
unique_hashes = {sha for _, _, sha in entries}
|
||||
if len(unique_hashes) < 2:
|
||||
continue
|
||||
# Order providers by input load-order index. Winner = last loaded.
|
||||
ordered = sorted(entries, key=lambda e: e[0])
|
||||
providers = [mod_id for _, mod_id, _ in ordered]
|
||||
# De-dup providers preserving order (a mod could ship the same
|
||||
# rel_path under both B41 and B42 layouts → seen twice).
|
||||
seen: set = set()
|
||||
dedup_providers: List[str] = []
|
||||
for p in providers:
|
||||
if p not in seen:
|
||||
seen.add(p)
|
||||
dedup_providers.append(p)
|
||||
conflicts.append(FileConflict(
|
||||
rel_path=rel,
|
||||
providers=dedup_providers,
|
||||
winner=dedup_providers[-1],
|
||||
))
|
||||
|
||||
conflicts.sort(key=lambda c: c.rel_path)
|
||||
return conflicts
|
||||
@@ -130,6 +130,11 @@ class ModInfo:
|
||||
# signal for build / multiplayer / category detection. Distinct from
|
||||
# `tags` which is mod.info-side (freeform).
|
||||
workshop_tags: List[str] = field(default_factory=list)
|
||||
# pzmm-style content fingerprint (Maps, Vehicles, Weapons, Traits, …)
|
||||
# populated by worker.build_manifest_and_types at parse time. Empty when
|
||||
# files_manifest_built=false (older cached rows); derive_category falls
|
||||
# through to the existing cascade in that case.
|
||||
mod_types: List[str] = field(default_factory=list)
|
||||
warnings: Dict[str, List[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
@@ -389,30 +394,77 @@ def _name_has(name: str, hints: List[str]) -> bool:
|
||||
return any(h in n for h in hints)
|
||||
|
||||
|
||||
# pzmm content-type → sortof CATEGORY_ORDER mapping. "skip" entries fall
|
||||
# through to the existing derive_category cascade. Items/Animations/Lua/Unknown
|
||||
# are too generic; Maps/Sounds/Patch/Vehicles/Clothing duplicate signals already
|
||||
# captured by the cascade but stay here as fallbacks for poorly-tagged mods.
|
||||
_TYPE_TO_CAT: Dict[str, str] = {
|
||||
"Maps": "map",
|
||||
"Vehicles": "vehicle",
|
||||
"Weapons": "weapon",
|
||||
"Clothing": "wearable",
|
||||
"Traits": "code",
|
||||
"Professions": "profession",
|
||||
"Recipes": "crafting",
|
||||
"Tiles": "tile",
|
||||
"Textures": "texture",
|
||||
"Sounds": "sound",
|
||||
"UI": "ui",
|
||||
"Translations": "translation",
|
||||
"Patch": "patch",
|
||||
"Dependency": "tweaks",
|
||||
"Framework": "tweaks",
|
||||
}
|
||||
|
||||
|
||||
def _types_to_category(mod_types: List[str], name: str) -> Optional[str]:
|
||||
"""First mod_type that maps to a sortof CATEGORY_ORDER bucket wins.
|
||||
Returns None if mod_types contains only skip-types (Items/Animations/Lua/
|
||||
Unknown), so the caller can fall through to the existing cascade."""
|
||||
for t in mod_types:
|
||||
cat = _TYPE_TO_CAT.get(t)
|
||||
if cat:
|
||||
# vehicle_spawn refinement matches the downstream ws_tag check.
|
||||
if cat == "vehicle" and name and "spawn zone" in name.lower():
|
||||
return "vehicle_spawn"
|
||||
return cat
|
||||
return None
|
||||
|
||||
|
||||
def derive_category(mod: ModInfo) -> str:
|
||||
"""Best-effort category from mod.info + workshop_meta.tags + name.
|
||||
|
||||
Detection order (most specific → least):
|
||||
1. mod.info `category=` if explicit and recognized.
|
||||
2. patch / fix name regex (Spec G-patch).
|
||||
3. library/framework name regex (extends FRAMEWORK_KEYS).
|
||||
4. mod.maps non-empty → map.
|
||||
5. moodle / profession / movement / specific gameplay axes by name.
|
||||
6. Workshop tags (canonical Steam controlled vocab): Audio + 'music' →
|
||||
2. pzmm-style mod_types fingerprint (when files_manifest_built=true).
|
||||
3. patch / fix name regex (Spec G-patch).
|
||||
4. library/framework name regex (extends FRAMEWORK_KEYS).
|
||||
5. mod.maps non-empty → map.
|
||||
6. moodle / profession / movement / specific gameplay axes by name.
|
||||
7. Workshop tags (canonical Steam controlled vocab): Audio + 'music' →
|
||||
music; Audio → sound; Weapons → weapon; Vehicles → vehicle;
|
||||
Clothing/Armor + 'armor' → armor, else wearable; Building →
|
||||
building; Farming → farming; Food → food; Skills → profession
|
||||
(or moodle); Interface → ui; Textures → texture;
|
||||
Language/Translation → translation; QOL → qol; Multiplayer alone
|
||||
→ multiplayer.
|
||||
7. mod.info tags (freeform fallback).
|
||||
8. FRAMEWORK_KEYS substring match → tweaks.
|
||||
9. Default → other.
|
||||
8. mod.info tags (freeform fallback).
|
||||
9. FRAMEWORK_KEYS substring match → tweaks.
|
||||
10. Default → other.
|
||||
"""
|
||||
if mod.category in CATEGORY_ORDER and mod.category != "undefined":
|
||||
return mod.category
|
||||
|
||||
name = mod.name or ""
|
||||
|
||||
# pzmm-style content fingerprint takes precedence over name regex when
|
||||
# available. Empty mod_types means files_manifest_built=false (older
|
||||
# cached row); fall through to existing cascade.
|
||||
if mod.mod_types:
|
||||
cat = _types_to_category(mod.mod_types, name)
|
||||
if cat:
|
||||
return cat
|
||||
|
||||
if name and _PATCH_NAME_RE.search(name):
|
||||
return "patch"
|
||||
if _name_has(name, _LIB_NAME_HINTS) or (name and _LIB_NAME_RE.search(name)):
|
||||
|
||||
Reference in New Issue
Block a user