Bring in two narrow additions previously sitting on a feature branch: - api/diagnostics.py: \_IGNORED_FILENAMES skip-list for /api/conflicts. PZ engine-concatenated and framework-hook files (sandbox-options.txt etc.) ship with intentionally distinct sha1s across mods; they are not real conflicts. Live cache had 33 providers of sandbox-options.txt with 31 distinct hashes generating false-positive conflict rows. - worker/build_manifest_and_types: extend the path-based Vehicles signal to include models_x/vehicles/ and models/vehicles/, catching mods that ship 3D vehicle assets without scripts. Existing mods need their manifest rebuilt before mod_types reflects the new signal.
737 lines
28 KiB
Python
737 lines
28 KiB
Python
"""
|
|
worker.py - pzsort cache filler
|
|
|
|
Single-shot CLI that takes Steam Workshop IDs on argv, refreshes metadata
|
|
from Steam's anonymous API, and only runs DepotDownloader for cache misses
|
|
(where workshop_meta.time_updated has changed since last parse).
|
|
|
|
Usage:
|
|
python3 worker.py <workshop_id> [<workshop_id> ...]
|
|
python3 worker.py --force <workshop_id> ... # ignore cache, re-download
|
|
|
|
Env (or .env file):
|
|
DATABASE_URL postgresql://pzsort:<pw>@127.0.0.1:5439/pzsort
|
|
DD_PATH path to DepotDownloader executable
|
|
PZ_APP_ID 108600 (default)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
import asyncpg
|
|
import httpx
|
|
|
|
# Reuse the parser from the sorter
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
from mlos_sort import parse_mod_info, ModInfo # noqa: E402
|
|
|
|
PZ_APP_ID = int(os.environ.get("PZ_APP_ID", "108600"))
|
|
DEFAULT_DD_PATH = os.environ.get("DD_PATH", "./DepotDownloader")
|
|
STEAM_API = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
|
|
STEAM_AUTHED_DETAILS = "https://api.steampowered.com/IPublishedFileService/GetDetails/v1/"
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Steam API
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
def fetch_workshop_details(workshop_ids: List[str]) -> Dict[str, dict]:
|
|
"""
|
|
POST to legacy GetPublishedFileDetails. Anonymous, no API key needed.
|
|
Returns {workshop_id: detail_dict}.
|
|
"""
|
|
if not workshop_ids:
|
|
return {}
|
|
data: Dict[str, str] = {"itemcount": str(len(workshop_ids))}
|
|
for i, wid in enumerate(workshop_ids):
|
|
data[f"publishedfileids[{i}]"] = wid
|
|
with httpx.Client(timeout=30.0) as client:
|
|
r = client.post(STEAM_API, data=data)
|
|
r.raise_for_status()
|
|
body = r.json()
|
|
out: Dict[str, dict] = {}
|
|
for item in body.get("response", {}).get("publishedfiledetails", []):
|
|
out[item["publishedfileid"]] = item
|
|
return out
|
|
|
|
|
|
def flatten_tags(detail: dict) -> List[str]:
|
|
return [t.get("tag", "") for t in detail.get("tags", []) if t.get("tag")]
|
|
|
|
|
|
def fetch_required_wsids(
|
|
workshop_id: str,
|
|
timeout: int = 15,
|
|
) -> Optional[List[str]]:
|
|
"""Fetch the Required Items wsids for a Workshop item via the
|
|
authenticated `IPublishedFileService/GetDetails` endpoint, which
|
|
returns the same `children` array Steam renders into the public
|
|
page's Required Items sidebar.
|
|
|
|
Returns
|
|
None — missing/invalid `STEAM_WEB_API_KEY`, network error, or
|
|
non-success result. Caller MUST NOT overwrite the
|
|
existing cached value.
|
|
[] — item exists but has no Required Items.
|
|
list — required item wsids in `sortorder` order, deduped.
|
|
|
|
Replaces the previous HTML-scrape path (Steam 429'd anonymous
|
|
/sharedfiles/filedetails/ requests aggressively, requiring throttle
|
|
+ 1h cooldown after a 429 storm). The authenticated API has a far
|
|
more generous quota and stays well clear of those limits at our
|
|
drain rate.
|
|
"""
|
|
key = os.environ.get("STEAM_WEB_API_KEY")
|
|
if not key:
|
|
return None
|
|
params = {
|
|
"key": key,
|
|
"publishedfileids[0]": workshop_id,
|
|
"includechildren": "true",
|
|
}
|
|
try:
|
|
with httpx.Client(timeout=timeout) as client:
|
|
r = client.get(STEAM_AUTHED_DETAILS, params=params)
|
|
r.raise_for_status()
|
|
body = r.json()
|
|
except (httpx.HTTPError, httpx.TimeoutException, ValueError) as e:
|
|
print(f" ! required_wsids fetch failed for {workshop_id}: {e}",
|
|
file=sys.stderr)
|
|
return None
|
|
items = body.get("response", {}).get("publishedfiledetails") or []
|
|
if not items:
|
|
return None
|
|
item = items[0]
|
|
# Steam returns result=1 on success; 9 = file not found, etc. Treat
|
|
# anything else as a soft failure so we don't clobber a previously
|
|
# cached value with [] on a transient lookup miss.
|
|
if item.get("result") != 1:
|
|
return None
|
|
seen: set = set()
|
|
out: List[str] = []
|
|
children = sorted(
|
|
item.get("children") or [],
|
|
key=lambda c: c.get("sortorder", 0),
|
|
)
|
|
for c in children:
|
|
wid = c.get("publishedfileid")
|
|
if wid and wid not in seen and wid != workshop_id:
|
|
seen.add(wid)
|
|
out.append(wid)
|
|
return out
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# DepotDownloader
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
def run_depot_downloader(
|
|
workshop_id: str,
|
|
output_dir: Path,
|
|
dd_path: Path,
|
|
# mod.info / map.info for the existing parse path, plus _CONFLICT_EXTS
|
|
# (lua/txt/xml/json/ini) for build_manifest_and_types — both the conflict
|
|
# manifest and pzmm-style content sniffing live on these. Binary assets
|
|
# (.png/.dds/.bank/.ogg/.wav/.X) are intentionally excluded; their type
|
|
# signals degrade gracefully via workshop_meta.tags fallback in
|
|
# mlos_sort.derive_category.
|
|
filelist_regex: str = r"regex:.*\.(info|lua|txt|xml|json|ini)$",
|
|
timeout: int = 300,
|
|
max_attempts: int = 3,
|
|
backoff_s: float = 2.0,
|
|
) -> bool:
|
|
"""
|
|
Fetch workshop item using DepotDownloader, filtered to mod.info plus
|
|
conflict-eligible asset files (lua/txt/xml/json/ini). Writes
|
|
<output_dir>/mods/<mod_id>/mod.info (and the asset tree under media/).
|
|
Returns True on success.
|
|
|
|
Retries up to max_attempts times on rc!=0 or timeout - Steam Workshop's
|
|
CDN occasionally flakes on the manifest fetch and a fresh DD invocation
|
|
typically succeeds. Caller is also free to retry at a higher level
|
|
(drain.py's MAX_ATTEMPTS), but in-process retry avoids the full re-claim
|
|
cycle for the common transient case.
|
|
"""
|
|
import time as _time
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
filelist = output_dir / "_filelist.txt"
|
|
filelist.write_text(filelist_regex + "\n", encoding="utf-8")
|
|
|
|
cmd = [
|
|
str(dd_path),
|
|
"-app", str(PZ_APP_ID),
|
|
"-pubfile", workshop_id,
|
|
"-filelist", str(filelist),
|
|
"-dir", str(output_dir),
|
|
]
|
|
last_err = ""
|
|
for attempt in range(1, max_attempts + 1):
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
check=False,
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
last_err = "timeout"
|
|
print(f" ! DepotDownloader timeout for {workshop_id} (attempt {attempt}/{max_attempts})",
|
|
file=sys.stderr)
|
|
else:
|
|
if proc.returncode == 0:
|
|
if attempt > 1:
|
|
print(f" ✓ DepotDownloader recovered for {workshop_id} on attempt {attempt}",
|
|
file=sys.stderr)
|
|
return True
|
|
last_err = f"rc={proc.returncode}"
|
|
print(f" ! DepotDownloader rc={proc.returncode} for {workshop_id} "
|
|
f"(attempt {attempt}/{max_attempts})", file=sys.stderr)
|
|
print(proc.stderr[-500:] if proc.stderr else proc.stdout[-500:], file=sys.stderr)
|
|
if attempt < max_attempts:
|
|
_time.sleep(backoff_s)
|
|
print(f" !! DepotDownloader gave up on {workshop_id} after {max_attempts} attempts (last: {last_err})",
|
|
file=sys.stderr)
|
|
return False
|
|
|
|
|
|
def discover_mod_infos(output_dir: Path) -> List[Path]:
|
|
"""Find all mod.info files. Two layouts coexist in the wild:
|
|
B41: mods/<mod_id>/mod.info
|
|
B42: mods/<mod_id>/<gameVersion>/mod.info e.g. mods/Foo/42/mod.info
|
|
A single mod can ship both. UPSERT on (workshop_id, mod_id) collapses
|
|
duplicates; lexicographic sort means the B41 (root-level) variant wins
|
|
last when present, the highest-numbered B42 variant otherwise."""
|
|
out = list(output_dir.glob("mods/*/mod.info"))
|
|
out.extend(output_dir.glob("mods/*/*/mod.info"))
|
|
return sorted(out)
|
|
|
|
|
|
def discover_map_folders(mip_parent: Path) -> List[str]:
|
|
"""Find map folders for the mod whose mod.info lives in `mip_parent`.
|
|
|
|
Three layouts coexist:
|
|
B41: mods/<modId>/mod.info
|
|
mods/<modId>/media/maps/<x>/map.info
|
|
B42: mods/<modId>/<branch>/mod.info (branch is e.g., '42','42.13')
|
|
mods/<modId>/<branch>/media/maps/<x>/map.info
|
|
B42 split: mod.info under '42/' but map data under a sibling 'common/'
|
|
branch — observed in Project RV Interior. This is why we
|
|
walk back to the mod-id root and enumerate every branch.
|
|
"""
|
|
if mip_parent.parent.name == "mods":
|
|
modid_root = mip_parent
|
|
else:
|
|
modid_root = mip_parent.parent
|
|
seen: set = set()
|
|
out: List[str] = []
|
|
candidates = list(modid_root.glob("media/maps/*/map.info"))
|
|
candidates.extend(modid_root.glob("*/media/maps/*/map.info"))
|
|
for cand in sorted(candidates):
|
|
folder = cand.parent.name
|
|
if folder in seen:
|
|
continue
|
|
seen.add(folder)
|
|
out.append(folder)
|
|
return out
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Single-pass manifest + content-type detection (Plan: 2026-05-04 pzmm port)
|
|
#
|
|
# Both Integration A (file-conflict manifest) and Integration B (mod_types
|
|
# fingerprinting) read the same files from the temp DD extraction. We walk
|
|
# the mod_id root once, hashing conflict-eligible files into a manifest map
|
|
# AND collecting pzmm-style content signals into a tag set in the same loop.
|
|
# No two-pass implementations.
|
|
# -----------------------------------------------------------------------------
|
|
|
|
_CONFLICT_EXTS = {".lua", ".txt", ".xml", ".json", ".ini"}
|
|
|
|
# map subtrees are namespaced per-mod by directory; conflict surface is ~zero,
|
|
# and worldmap.xml can be tens of MB. Skipping anything under maps/<MapName>/
|
|
# from manifest insertion AND mod_types content sniffing. map.info itself
|
|
# still drives `mod.maps` (via discover_map_folders) which feeds the existing
|
|
# `mod.maps non-empty → map` rule in derive_category.
|
|
_RE_MAP_SUBTREE = re.compile(r"^maps/[^/]+/")
|
|
|
|
# Ordered by pzmm preference: first match wins when types_to_category maps
|
|
# this list down to a single sortof CATEGORY_ORDER bucket.
|
|
_TYPE_PREFERRED = [
|
|
"Maps", "Vehicles", "Weapons", "Items", "Clothing", "Traits",
|
|
"Professions", "Recipes", "Tiles", "Textures", "Sounds",
|
|
"Animations", "UI", "Translations", "Lua", "Patch", "Dependency",
|
|
"Framework",
|
|
]
|
|
|
|
|
|
def _sha1(path: Path) -> str:
|
|
h = hashlib.sha1()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(1024 * 128), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def _read_small_text(path: Path, limit: int = 256_000) -> str:
|
|
try:
|
|
with path.open("rb") as f:
|
|
data = f.read(limit)
|
|
return data.decode("utf-8", errors="ignore").lower()
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def build_manifest_and_types(
|
|
mip_parent: Path, mod_id: str, raw: str
|
|
) -> Tuple[List[Tuple[str, str, int]], List[str]]:
|
|
"""Single-pass walk under the mod_id root.
|
|
|
|
Returns:
|
|
manifest_rows: list of (rel_path, sha1, size_bytes) for conflict-eligible
|
|
files (lua/txt/xml/json/ini). rel_path is lowercased,
|
|
posix-style, prefixed with "media/" so it's comparable
|
|
across mods regardless of B41/B42/split branch layout.
|
|
Last-wins on duplicate rel_paths from multi-branch mods.
|
|
mod_types: pzmm-ordered content tag list (Maps, Vehicles, …).
|
|
Falls back to ["Dependency"] or ["Unknown"] when no
|
|
media/ subtree exists.
|
|
"""
|
|
if mip_parent.parent.name == "mods":
|
|
modid_root = mip_parent
|
|
else:
|
|
modid_root = mip_parent.parent
|
|
|
|
tags: set = set()
|
|
|
|
name_blob = (raw or "").lower()
|
|
if any(w in name_blob for w in ("compat", "compatibility", "patch", "fix")):
|
|
tags.add("Patch")
|
|
if any(w in name_blob for w in ("api", "core", "dependency", "framework", "library", "required")):
|
|
tags.add("Dependency")
|
|
|
|
manifest_map: Dict[str, Tuple[str, int]] = {}
|
|
script_text_parts: List[str] = []
|
|
lua_text_parts: List[str] = []
|
|
has_media = False
|
|
|
|
for media_dir in modid_root.rglob("media"):
|
|
if not media_dir.is_dir():
|
|
continue
|
|
has_media = True
|
|
for path in media_dir.rglob("*"):
|
|
if not path.is_file():
|
|
continue
|
|
try:
|
|
rel_below = path.relative_to(media_dir).as_posix().lower()
|
|
except ValueError:
|
|
continue
|
|
suffix = path.suffix.lower()
|
|
in_map_subtree = bool(_RE_MAP_SUBTREE.match(rel_below))
|
|
|
|
# Manifest: skip per-map subtree (see _RE_MAP_SUBTREE comment).
|
|
if suffix in _CONFLICT_EXTS and not in_map_subtree:
|
|
rel = "media/" + rel_below
|
|
try:
|
|
size = path.stat().st_size
|
|
sha = _sha1(path)
|
|
manifest_map[rel] = (sha, size)
|
|
except OSError:
|
|
pass
|
|
|
|
# Path-based mod_type signals — always fire, even for files inside
|
|
# the per-map subtree. The "Maps" tag is the canonical signal that
|
|
# this mod ships a map and shouldn't be lost when we exclude the
|
|
# heavy worldmap.xml / objects.lua content from the manifest.
|
|
if rel_below.startswith("maps/"):
|
|
tags.add("Maps")
|
|
if rel_below.startswith("texturepacks/") or "tiledefinitions" in rel_below:
|
|
tags.add("Tiles")
|
|
if (rel_below.startswith(("textures/", "models_x/", "models/"))
|
|
or suffix in {".png", ".dds"}):
|
|
if path.name.lower() != "poster.png":
|
|
tags.add("Textures")
|
|
# Path-based vehicle detection. scripts/vehicles is the universal
|
|
# signal; models[_x]/vehicles/ catches mods that ship 3D assets
|
|
# without scripts (rarer, but real — borrowed from HellDrinx Mod
|
|
# Manager's heuristic). rel_below is already lowercased so the
|
|
# capital "X" in the original PZ "models_X" path lands as "models_x".
|
|
if rel_below.startswith(("scripts/vehicles/", "scripts/vehicle",
|
|
"models_x/vehicles/", "models/vehicles/")):
|
|
tags.add("Vehicles")
|
|
if rel_below.startswith(("clothing/", "scripts/clothing/")):
|
|
tags.add("Clothing")
|
|
if (rel_below.startswith(("sound/", "sounds/", "fmod/"))
|
|
or suffix in {".bank", ".ogg", ".wav"}):
|
|
tags.add("Sounds")
|
|
if rel_below.startswith(("ui/", "lua/client/ui/")):
|
|
tags.add("UI")
|
|
if rel_below.startswith(("anims/", "animsets/", "actiongroups/")):
|
|
tags.add("Animations")
|
|
if rel_below.startswith("lua/shared/translate/") or "/translate/" in rel_below:
|
|
tags.add("Translations")
|
|
|
|
# Content-blob accumulation: skip per-map subtree. Map-internal lua
|
|
# / scripts (rare but possible) wouldn't normally collide with
|
|
# other mods anyway, and reading them just costs memory for no
|
|
# detection upside.
|
|
if not in_map_subtree:
|
|
if rel_below.startswith("lua/"):
|
|
tags.add("Lua")
|
|
if suffix == ".lua" and len(lua_text_parts) < 60:
|
|
lua_text_parts.append(_read_small_text(path, 64_000))
|
|
if (rel_below.startswith("scripts/") and suffix in {".txt", ".xml"}
|
|
and len(script_text_parts) < 80):
|
|
script_text_parts.append(_read_small_text(path, 96_000))
|
|
|
|
if not has_media:
|
|
return [], (["Dependency"] if "Dependency" in tags else ["Unknown"])
|
|
|
|
script_blob = "\n".join(script_text_parts)
|
|
lua_blob = "\n".join(lua_text_parts)
|
|
|
|
if " type = weapon" in script_blob or "displaycategory = weapon" in script_blob:
|
|
tags.add("Weapons")
|
|
if " vehicle " in script_blob or "module vehicles" in script_blob:
|
|
tags.add("Vehicles")
|
|
if "item " in script_blob:
|
|
tags.add("Items")
|
|
if "recipe " in script_blob or " evolvedrecipe " in script_blob:
|
|
tags.add("Recipes")
|
|
if "bodylocation" in script_blob or "clothingitem" in script_blob:
|
|
tags.add("Clothing")
|
|
if "traitfactory.addtrait" in lua_blob:
|
|
tags.add("Traits")
|
|
if "professionfactory.addprofession" in lua_blob:
|
|
tags.add("Professions")
|
|
|
|
has_require = bool(re.search(r"^\s*require\s*=", raw or "", re.IGNORECASE | re.MULTILINE))
|
|
if not tags and has_require and not script_blob:
|
|
tags.add("Framework")
|
|
|
|
ordered = [t for t in _TYPE_PREFERRED if t in tags] or ["Unknown"]
|
|
manifest_rows = [(rel, sha, size) for rel, (sha, size) in sorted(manifest_map.items())]
|
|
return manifest_rows, ordered
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# DB upserts
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
UPSERT_WORKSHOP_META = """
|
|
INSERT INTO workshop_meta (
|
|
workshop_id, title, description, tags, creator_steamid,
|
|
time_created, time_updated, file_size, preview_url,
|
|
consumer_app_id, visibility, banned, last_checked_at
|
|
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, now())
|
|
ON CONFLICT (workshop_id) DO UPDATE SET
|
|
title = EXCLUDED.title,
|
|
description = EXCLUDED.description,
|
|
tags = EXCLUDED.tags,
|
|
creator_steamid = EXCLUDED.creator_steamid,
|
|
time_created = EXCLUDED.time_created,
|
|
time_updated = EXCLUDED.time_updated,
|
|
file_size = EXCLUDED.file_size,
|
|
preview_url = EXCLUDED.preview_url,
|
|
consumer_app_id = EXCLUDED.consumer_app_id,
|
|
visibility = EXCLUDED.visibility,
|
|
banned = EXCLUDED.banned,
|
|
last_checked_at = now();
|
|
"""
|
|
|
|
EVICT_AND_RECORD_CONFLICT = """
|
|
-- Per the cache invariant: a mod_id is owned by exactly one wsid at a time.
|
|
-- When we're about to UPSERT (wsid, mod_id), evict any (other_wsid, mod_id)
|
|
-- claims so the new pull becomes canonical, and record the eviction in
|
|
-- mod_id_conflicts so /api/sort can warn users who paste the displaced wsid.
|
|
WITH evicted AS (
|
|
DELETE FROM mod_parsed
|
|
WHERE mod_id = $2 AND workshop_id <> $1
|
|
RETURNING workshop_id
|
|
)
|
|
INSERT INTO mod_id_conflicts (mod_id, evicting_wsid, evicted_wsid)
|
|
SELECT $2, $1, workshop_id FROM evicted
|
|
ON CONFLICT (mod_id, evicting_wsid, evicted_wsid)
|
|
DO UPDATE SET recorded_at = now();
|
|
"""
|
|
|
|
UPSERT_MOD_PARSED = """
|
|
INSERT INTO mod_parsed (
|
|
workshop_id, mod_id, name, category,
|
|
requirements, load_after, load_before, incompatible_mods,
|
|
load_first, load_last, tags, maps,
|
|
raw_mod_info, version_min, is_addon,
|
|
mod_types, files_manifest_built,
|
|
parsed_at_time_updated, parsed_at
|
|
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18, now())
|
|
ON CONFLICT (workshop_id, mod_id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
category = EXCLUDED.category,
|
|
requirements = EXCLUDED.requirements,
|
|
load_after = EXCLUDED.load_after,
|
|
load_before = EXCLUDED.load_before,
|
|
incompatible_mods = EXCLUDED.incompatible_mods,
|
|
load_first = EXCLUDED.load_first,
|
|
load_last = EXCLUDED.load_last,
|
|
tags = EXCLUDED.tags,
|
|
maps = EXCLUDED.maps,
|
|
raw_mod_info = EXCLUDED.raw_mod_info,
|
|
version_min = EXCLUDED.version_min,
|
|
is_addon = EXCLUDED.is_addon,
|
|
mod_types = EXCLUDED.mod_types,
|
|
files_manifest_built = EXCLUDED.files_manifest_built,
|
|
parsed_at_time_updated = EXCLUDED.parsed_at_time_updated,
|
|
parsed_at = now();
|
|
"""
|
|
|
|
DELETE_MOD_FILES = """
|
|
DELETE FROM mod_files WHERE workshop_id = $1 AND mod_id = $2;
|
|
"""
|
|
|
|
INSERT_MOD_FILE = """
|
|
INSERT INTO mod_files (workshop_id, mod_id, rel_path, sha1, size_bytes)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
ON CONFLICT (workshop_id, mod_id, rel_path) DO UPDATE SET
|
|
sha1 = EXCLUDED.sha1,
|
|
size_bytes = EXCLUDED.size_bytes;
|
|
"""
|
|
|
|
# Description-text heuristic for "this mod is an optional add-on to the
|
|
# primary mod published by the same wsid". Matches:
|
|
# "Optional add-on: removes ..." (TMMumble)
|
|
# "optional addon ..."
|
|
# "Optional add on ..."
|
|
# Strict "optional + add-on" keyword pair to avoid false positives on
|
|
# generic "addon" naming. Author-driven signal — set via the description=
|
|
# field of mod.info.
|
|
_RE_OPTIONAL_ADDON = re.compile(
|
|
r"description\s*=\s*[^\r\n]*\bOptional\s+Add[- ]?on\b",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def detect_is_addon(raw: str) -> bool:
|
|
"""Return True if the mod.info description self-identifies as an
|
|
optional add-on (`Optional add-on: …`)."""
|
|
return bool(_RE_OPTIONAL_ADDON.search(raw or ""))
|
|
|
|
DELETE_STALE_MOD_PARSED = """
|
|
DELETE FROM mod_parsed
|
|
WHERE workshop_id = $1 AND mod_id <> ALL($2::text[]);
|
|
"""
|
|
|
|
CHECK_PARSED_FRESH = """
|
|
SELECT mod_id FROM mod_parsed
|
|
WHERE workshop_id = $1 AND parsed_at_time_updated = $2;
|
|
"""
|
|
|
|
|
|
def extract_version_min(raw: str) -> Optional[str]:
|
|
for line in raw.splitlines():
|
|
s = line.strip().lower()
|
|
if s.startswith("versionmin"):
|
|
_, _, v = line.partition("=")
|
|
return v.strip() or None
|
|
return None
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Main flow
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
async def process_one(
|
|
conn: asyncpg.Connection,
|
|
workshop_id: str,
|
|
detail: dict,
|
|
dd_path: Path,
|
|
force: bool,
|
|
) -> str:
|
|
"""Returns 'hit' | 'refreshed' | 'banned' | 'missing' | 'no_mod_info' | 'failed'.
|
|
|
|
'no_mod_info' = DepotDownloader succeeded but the workshop item contained
|
|
no parseable mod.info file (typical for collections, art-only items, and
|
|
other non-mod uploads that share the PZ consumer_app_id). Distinct from
|
|
'failed' (DD itself errored), so the API can surface "this isn't a mod"
|
|
differently from "we couldn't fetch this."
|
|
"""
|
|
# Pre-flight: bad results
|
|
if detail.get("result") != 1:
|
|
return "missing"
|
|
if detail.get("banned"):
|
|
return "banned"
|
|
if detail.get("consumer_app_id") != PZ_APP_ID:
|
|
return "failed" # wrong app
|
|
|
|
time_updated = int(detail.get("time_updated", 0))
|
|
|
|
# Always refresh meta (cheap)
|
|
await conn.execute(
|
|
UPSERT_WORKSHOP_META,
|
|
workshop_id,
|
|
detail.get("title", ""),
|
|
detail.get("description", "") or "",
|
|
flatten_tags(detail),
|
|
str(detail.get("creator", "")) or None,
|
|
int(detail.get("time_created", 0)) or None,
|
|
time_updated,
|
|
int(detail.get("file_size", 0)) or None,
|
|
detail.get("preview_url"),
|
|
detail.get("consumer_app_id"),
|
|
detail.get("visibility"),
|
|
bool(detail.get("banned", False)),
|
|
)
|
|
|
|
# Cache check
|
|
if not force:
|
|
rows = await conn.fetch(CHECK_PARSED_FRESH, workshop_id, time_updated)
|
|
if rows:
|
|
return "hit"
|
|
|
|
# Cache miss → download + parse
|
|
with tempfile.TemporaryDirectory(prefix=f"pzsort_{workshop_id}_") as tmpdir:
|
|
tmp = Path(tmpdir)
|
|
ok = run_depot_downloader(workshop_id, tmp, dd_path)
|
|
if not ok:
|
|
return "failed"
|
|
|
|
mod_info_paths = discover_mod_infos(tmp)
|
|
if not mod_info_paths:
|
|
print(f" ! no mod.info found in {workshop_id}", file=sys.stderr)
|
|
return "no_mod_info"
|
|
|
|
seen_mod_ids: List[str] = []
|
|
for mip in mod_info_paths:
|
|
raw = mip.read_text(encoding="utf-8", errors="replace")
|
|
mod = parse_mod_info(raw, workshop_id=workshop_id)
|
|
if mod is None:
|
|
continue
|
|
maps = discover_map_folders(mip.parent)
|
|
# Single-pass walk under the mod_id root: produces both the
|
|
# conflict manifest and the pzmm-style mod_types list. See
|
|
# build_manifest_and_types.
|
|
manifest_rows, mod_types = build_manifest_and_types(mip.parent, mod.id, raw)
|
|
# Evict any other wsid's claim on this mod_id before we install
|
|
# ours. Cache invariant: at most one wsid per mod_id, with the
|
|
# most-recent pull winning.
|
|
await conn.execute(EVICT_AND_RECORD_CONFLICT, workshop_id, mod.id)
|
|
await conn.execute(
|
|
UPSERT_MOD_PARSED,
|
|
workshop_id,
|
|
mod.id,
|
|
mod.name,
|
|
mod.category,
|
|
mod.requirements,
|
|
mod.loadAfter,
|
|
mod.loadBefore,
|
|
mod.incompatibleMods,
|
|
mod.loadFirst,
|
|
mod.loadLast,
|
|
mod.tags,
|
|
maps,
|
|
raw,
|
|
extract_version_min(raw),
|
|
detect_is_addon(raw),
|
|
mod_types,
|
|
True, # files_manifest_built
|
|
time_updated,
|
|
)
|
|
# Replace any stale manifest rows for this (workshop_id, mod_id)
|
|
# so a re-parse can't leave behind orphans from a prior layout.
|
|
await conn.execute(DELETE_MOD_FILES, workshop_id, mod.id)
|
|
if manifest_rows:
|
|
await conn.executemany(
|
|
INSERT_MOD_FILE,
|
|
[(workshop_id, mod.id, rel, sha, size)
|
|
for rel, sha, size in manifest_rows],
|
|
)
|
|
seen_mod_ids.append(mod.id)
|
|
|
|
# Drop rows for mods that no longer exist in this workshop item
|
|
if seen_mod_ids:
|
|
await conn.execute(DELETE_STALE_MOD_PARSED, workshop_id, seen_mod_ids)
|
|
|
|
# Scrape the public Workshop page for the "Required Items" section so the
|
|
# API can auto-resolve missing-dep warnings against this mod's declared
|
|
# Steam-side dependencies. Best-effort: None on fetch error → leave the
|
|
# existing cached value; [] or list → overwrite.
|
|
required = await asyncio.to_thread(fetch_required_wsids, workshop_id)
|
|
if required is not None:
|
|
await conn.execute(
|
|
"""
|
|
UPDATE workshop_meta
|
|
SET required_wsids = $1, required_scraped_at = now()
|
|
WHERE workshop_id = $2
|
|
""",
|
|
required, workshop_id,
|
|
)
|
|
|
|
return "refreshed"
|
|
|
|
|
|
async def main_async(workshop_ids: List[str], dd_path: Path, force: bool, dsn: str) -> int:
|
|
print(f"[steam] fetching metadata for {len(workshop_ids)} item(s)")
|
|
details = fetch_workshop_details(workshop_ids)
|
|
missing_from_steam = [w for w in workshop_ids if w not in details]
|
|
if missing_from_steam:
|
|
print(f"[steam] no detail returned for: {missing_from_steam}", file=sys.stderr)
|
|
|
|
summary: Dict[str, int] = {"hit": 0, "refreshed": 0, "banned": 0, "missing": 0, "failed": 0}
|
|
|
|
conn = await asyncpg.connect(dsn=dsn)
|
|
try:
|
|
for wid in workshop_ids:
|
|
detail = details.get(wid)
|
|
if detail is None:
|
|
summary["missing"] += 1
|
|
print(f" - {wid} -> missing (no Steam response)")
|
|
continue
|
|
status = await process_one(conn, wid, detail, dd_path, force)
|
|
summary[status] += 1
|
|
print(f" - {wid} -> {status}")
|
|
finally:
|
|
await conn.close()
|
|
|
|
print(f"[done] {summary}")
|
|
return 0 if summary["failed"] == 0 else 1
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("workshop_ids", nargs="+")
|
|
ap.add_argument("--force", action="store_true", help="ignore cache, always re-download")
|
|
ap.add_argument("--dd-path", default=DEFAULT_DD_PATH)
|
|
ap.add_argument("--dsn", default=os.environ.get("DATABASE_URL"))
|
|
args = ap.parse_args()
|
|
|
|
if not args.dsn:
|
|
print("ERROR: --dsn or DATABASE_URL required", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
dd = Path(args.dd_path)
|
|
if not dd.is_file():
|
|
print(f"ERROR: DepotDownloader not found at {dd}", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
rc = asyncio.run(main_async(args.workshop_ids, dd, args.force, args.dsn))
|
|
sys.exit(rc)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|