feat: pzmm conflict detection + content-type categorization

- mod_files manifest table populated at parse time
- POST /api/conflicts endpoint
- mod_types fingerprinting feeds derive_category
- DD filelist regex broadened to cover conflict-eligible exts
- media/maps/<*>/* excluded from manifest (per-mod namespaced,
  no conflict value, can be tens of MB per mod)

Plan: docs/plans/2026-05-04-pzmm-conflict-and-typing.md
This commit is contained in:
2026-05-04 15:22:35 +00:00
parent a15d35214e
commit b73325882e
9 changed files with 936 additions and 18 deletions

View File

@@ -26,6 +26,7 @@ from pydantic import BaseModel, Field
import adapters
import db
import diagnostics
import expansion
import jobs
import steam
@@ -191,6 +192,7 @@ def _row_to_modinfo(r) -> ModInfo:
maps=list(r["maps"] or []),
is_addon=bool(r["is_addon"]) if "is_addon" in r else False,
workshop_tags=list(r["workshop_tags"] or []) if "workshop_tags" in r else [],
mod_types=list(r["mod_types"] or []) if "mod_types" in r else [],
)
@@ -684,7 +686,7 @@ async def _build_result_for_job(
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
mp.requirements, mp.load_after, mp.load_before,
mp.incompatible_mods, mp.load_first, mp.load_last,
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
FROM mod_parsed mp
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
WHERE mp.workshop_id = ANY($1::text[])
@@ -1001,7 +1003,7 @@ async def sort_endpoint(req: SortRequest, request: Request) -> Dict[str, Any]:
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
mp.requirements, mp.load_after, mp.load_before,
mp.incompatible_mods, mp.load_first, mp.load_last,
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
FROM mod_parsed mp
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
WHERE mp.workshop_id = ANY($1::text[])
@@ -1215,7 +1217,7 @@ async def resort_endpoint(req: ResortRequest, request: Request) -> Dict[str, Any
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
mp.requirements, mp.load_after, mp.load_before,
mp.incompatible_mods, mp.load_first, mp.load_last,
mp.tags, mp.maps, mp.is_addon, wm.tags AS workshop_tags
mp.tags, mp.maps, mp.is_addon, mp.mod_types, wm.tags AS workshop_tags
FROM mod_parsed mp
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
WHERE mp.workshop_id IN (SELECT workshop_id FROM selected_wsids)
@@ -1469,6 +1471,71 @@ async def vote_broken_mod(
return {"upvotes": int(row["upvotes"]), "downvotes": int(row["downvotes"])}
@app.post("/api/conflicts")
async def conflicts_endpoint(req: SortRequest, request: Request) -> Dict[str, Any]:
"""Detect rel_paths claimed by ≥2 input mods with non-equal sha1.
v1: bare wsids only. Collection input returns 400 so the caller can
resolve via /api/sort first (where the async-job + drain-progress
plumbing already lives). Mods whose `files_manifest_built` is false
cannot be analyzed and are reported in `missing_manifests` instead of
silently ignored.
"""
bare_wsids, collection_ids = parse_with_collections(req.input or "")
if collection_ids:
raise HTTPException(
status_code=400,
detail="conflict scan does not support collection input; resolve via /api/sort first",
)
if not bare_wsids:
raise HTTPException(status_code=400, detail="no workshop ids found in input")
if len(bare_wsids) > MAX_IDS:
raise HTTPException(
status_code=413,
detail=f"too many workshop ids ({len(bare_wsids)} > {MAX_IDS})",
)
pool = request.app.state.db
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT mp.workshop_id, mp.mod_id, mp.name, mp.category,
mp.requirements, mp.load_after, mp.load_before,
mp.incompatible_mods, mp.load_first, mp.load_last,
mp.tags, mp.maps, mp.is_addon, mp.mod_types,
mp.files_manifest_built, wm.tags AS workshop_tags
FROM mod_parsed mp
JOIN workshop_meta wm ON wm.workshop_id = mp.workshop_id
WHERE mp.workshop_id = ANY($1::text[])
AND mp.parsed_at_time_updated = wm.time_updated
ORDER BY mp.workshop_id, mp.mod_id
""",
bare_wsids,
)
mods: List[ModInfo] = [_row_to_modinfo(r) for r in rows]
# Missing-manifest wsids: input wsids that have no mod_parsed rows
# OR whose rows all have files_manifest_built=false. Any single
# built row in a multi-mod wsid counts as "manifest available".
wsid_has_manifest: Dict[str, bool] = {}
for r in rows:
w = r["workshop_id"]
built = bool(r["files_manifest_built"])
wsid_has_manifest[w] = wsid_has_manifest.get(w, False) or built
missing_manifests = [w for w in bare_wsids if not wsid_has_manifest.get(w, False)]
conflicts = await diagnostics.scan_file_conflicts(conn, mods)
return {
"conflicts": [
{"rel_path": c.rel_path, "providers": c.providers, "winner": c.winner}
for c in conflicts
],
"missing_manifests": missing_manifests,
}
# ── static frontend ────────────────────────────────────────────────────────
# Mount LAST so all API routes win path resolution.
_FRONTEND_DIR = Path(__file__).resolve().parent.parent / "frontend"