Add full sortof codebase: API, drain workers, frontend, schema, specs
This commit is contained in:
229
worker/drain.py
Normal file
229
worker/drain.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""sortof download_jobs drainer.
|
||||
|
||||
Long-running asyncio loop that claims queued jobs from download_jobs,
|
||||
calls worker.process_one() to materialize mod_parsed rows via
|
||||
DepotDownloader, and updates job status. Single connection per process;
|
||||
multiple instances are safe because claims use FOR UPDATE SKIP LOCKED.
|
||||
|
||||
Manual requeue (after a transient failure):
|
||||
UPDATE download_jobs
|
||||
SET status='queued', attempts=0, error=NULL
|
||||
WHERE id='<uuid>';
|
||||
|
||||
Bulk requeue everything that hit MAX_ATTEMPTS:
|
||||
UPDATE download_jobs
|
||||
SET status='queued', attempts=0, error=NULL
|
||||
WHERE status='failed';
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
|
||||
import asyncpg
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from worker import (
|
||||
DEFAULT_DD_PATH,
|
||||
fetch_workshop_details,
|
||||
process_one,
|
||||
)
|
||||
|
||||
ENV_PATH = Path(__file__).resolve().parent.parent / ".env"
|
||||
|
||||
IDLE_SLEEP_S = 5
|
||||
HEARTBEAT_S = 60
|
||||
BATCH_SIZE = 1
|
||||
MAX_ATTEMPTS = 5
|
||||
STALE_RECLAIM_MIN = 30
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
||||
)
|
||||
log = logging.getLogger("sortof.drain")
|
||||
|
||||
|
||||
CLAIM_SQL = """
|
||||
UPDATE download_jobs
|
||||
SET status='downloading', attempts=attempts+1, updated_at=now()
|
||||
WHERE id IN (
|
||||
SELECT id FROM download_jobs
|
||||
WHERE status='queued' AND attempts < $1
|
||||
ORDER BY priority DESC, created_at ASC
|
||||
LIMIT $2
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING id, workshop_id, attempts
|
||||
"""
|
||||
|
||||
RECLAIM_SQL = f"""
|
||||
UPDATE download_jobs
|
||||
SET status='queued', updated_at=now()
|
||||
WHERE status='downloading'
|
||||
AND updated_at < now() - interval '{STALE_RECLAIM_MIN} minutes'
|
||||
RETURNING id
|
||||
"""
|
||||
|
||||
DEPTH_SQL = """
|
||||
SELECT COUNT(*) FROM download_jobs
|
||||
WHERE status='queued' AND attempts < $1
|
||||
"""
|
||||
|
||||
DONE_SQL = """
|
||||
UPDATE download_jobs
|
||||
SET status='done', completed_at=now(), updated_at=now(), error=NULL
|
||||
WHERE id=$1
|
||||
"""
|
||||
|
||||
FAIL_SQL = """
|
||||
UPDATE download_jobs
|
||||
SET status='failed', updated_at=now(), error=$2
|
||||
WHERE id=$1
|
||||
"""
|
||||
|
||||
|
||||
def build_dsn() -> str:
|
||||
load_dotenv(ENV_PATH)
|
||||
explicit = os.environ.get("DATABASE_URL")
|
||||
if explicit:
|
||||
return explicit
|
||||
user = os.environ["POSTGRES_USER"]
|
||||
pw = urllib.parse.quote(os.environ["POSTGRES_PASSWORD"], safe="")
|
||||
name = os.environ["POSTGRES_DB"]
|
||||
host = os.environ.get("POSTGRES_HOST", "127.0.0.1")
|
||||
port = os.environ.get("POSTGRES_PORT", "5439")
|
||||
return f"postgresql://{user}:{pw}@{host}:{port}/{name}"
|
||||
|
||||
|
||||
def resolve_dd_path() -> Path:
|
||||
"""Resolve the DepotDownloader binary or fail loudly.
|
||||
|
||||
Order of precedence: $DD_PATH env var, then worker.py's argparse
|
||||
default (DEFAULT_DD_PATH).
|
||||
"""
|
||||
candidates: list[Path] = []
|
||||
env_dd = os.environ.get("DD_PATH")
|
||||
if env_dd:
|
||||
candidates.append(Path(env_dd))
|
||||
candidates.append(Path(DEFAULT_DD_PATH))
|
||||
for p in candidates:
|
||||
if p.is_file():
|
||||
return p
|
||||
raise RuntimeError(
|
||||
"DepotDownloader not found. Tried: "
|
||||
+ ", ".join(str(c) for c in candidates)
|
||||
+ ". Set DD_PATH or place the binary at the default path."
|
||||
)
|
||||
|
||||
|
||||
async def reclaim_stale(conn: asyncpg.Connection) -> int:
|
||||
rows = await conn.fetch(RECLAIM_SQL)
|
||||
return len(rows)
|
||||
|
||||
|
||||
async def claim_batch(conn: asyncpg.Connection, n: int):
|
||||
return await conn.fetch(CLAIM_SQL, MAX_ATTEMPTS, n)
|
||||
|
||||
|
||||
async def queue_depth(conn: asyncpg.Connection) -> int:
|
||||
return await conn.fetchval(DEPTH_SQL, MAX_ATTEMPTS)
|
||||
|
||||
|
||||
async def mark_done(conn: asyncpg.Connection, job_id) -> None:
|
||||
await conn.execute(DONE_SQL, job_id)
|
||||
|
||||
|
||||
async def mark_failed(conn: asyncpg.Connection, job_id, msg: str) -> None:
|
||||
await conn.execute(FAIL_SQL, job_id, msg[:500])
|
||||
|
||||
|
||||
async def run() -> int:
|
||||
dd_path = resolve_dd_path() # raises before opening DB if missing
|
||||
|
||||
dsn = build_dsn()
|
||||
conn = await asyncpg.connect(dsn=dsn)
|
||||
|
||||
stop = asyncio.Event()
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _handle(sig: signal.Signals):
|
||||
log.info("drain shutting down (signal=%s)", sig.name)
|
||||
stop.set()
|
||||
|
||||
for s in (signal.SIGTERM, signal.SIGINT):
|
||||
loop.add_signal_handler(s, _handle, s)
|
||||
|
||||
try:
|
||||
n_reclaimed = await reclaim_stale(conn)
|
||||
log.info(
|
||||
"drain starting, reclaimed %d stale, dd_path=%s",
|
||||
n_reclaimed, dd_path,
|
||||
)
|
||||
|
||||
last_heartbeat = 0.0
|
||||
while not stop.is_set():
|
||||
rows = await claim_batch(conn, BATCH_SIZE)
|
||||
|
||||
if not rows:
|
||||
now = time.monotonic()
|
||||
if now - last_heartbeat >= HEARTBEAT_S:
|
||||
depth = await queue_depth(conn)
|
||||
log.info("idle, queue depth=%d", depth)
|
||||
last_heartbeat = now
|
||||
try:
|
||||
await asyncio.wait_for(stop.wait(), timeout=IDLE_SLEEP_S)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
continue
|
||||
|
||||
ids = [r["workshop_id"] for r in rows]
|
||||
try:
|
||||
details = await asyncio.to_thread(fetch_workshop_details, ids)
|
||||
except Exception as e:
|
||||
log.warning("steam fetch failed: %s", e)
|
||||
for r in rows:
|
||||
await mark_failed(conn, r["id"], "steam fetch error")
|
||||
continue
|
||||
|
||||
for r in rows:
|
||||
wid = r["workshop_id"]
|
||||
attempt = r["attempts"]
|
||||
log.info("claimed wsid=%s attempt=%d", wid, attempt)
|
||||
detail = details.get(wid)
|
||||
if not detail or detail.get("result") != 1:
|
||||
reason = (
|
||||
f"steam result={detail.get('result') if detail else 'none'}"
|
||||
)
|
||||
log.info("failed wsid=%s reason=%s", wid, reason)
|
||||
await mark_failed(conn, r["id"], reason)
|
||||
continue
|
||||
try:
|
||||
outcome = await process_one(conn, wid, detail, dd_path, False)
|
||||
except Exception as e:
|
||||
log.exception("drain exception wsid=%s", wid)
|
||||
await mark_failed(conn, r["id"], str(e)[:500])
|
||||
continue
|
||||
if outcome in ("hit", "refreshed"):
|
||||
log.info("done wsid=%s outcome=%s", wid, outcome)
|
||||
await mark_done(conn, r["id"])
|
||||
else:
|
||||
reason = f"process_one={outcome}"
|
||||
log.info("failed wsid=%s reason=%s", wid, reason)
|
||||
await mark_failed(conn, r["id"], reason)
|
||||
finally:
|
||||
await conn.close()
|
||||
log.info("drain stopped")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(asyncio.run(run()))
|
||||
682
worker/mlos_sort.py
Normal file
682
worker/mlos_sort.py
Normal file
@@ -0,0 +1,682 @@
|
||||
"""
|
||||
mlos_sort.py
|
||||
Python port of MLOS_sorting.lua (Mod Load Order Sorter, by REfRigERatoR).
|
||||
|
||||
Faithful to the Lua algorithm:
|
||||
- preorder: ModManager, ModManagerServer, modoptions
|
||||
- category buckets: coreRequirement -> tweaks -> resource -> map -> vehicle ->
|
||||
code -> clothes -> ui -> other -> translation -> undefined
|
||||
- loadFirst / loadLast: on (0) | category (1) | off (2)
|
||||
- topological sort by `require` + `loadAfter` with cycle detection
|
||||
- sorting_rules.txt overrides supported (loadAfter/loadBefore/incompatibleMods/
|
||||
loadFirst/loadLast/category)
|
||||
|
||||
Limitations vs in-game Lua:
|
||||
- mod.info-only input. We do NOT walk /media/* folders for category detection.
|
||||
We rely on mod.info `category=` if present, then `frameworkKeys` name
|
||||
heuristic, then default "other" (or "undefined" if uncategorizable).
|
||||
- `loadBefore` is converted into corresponding `loadAfter` edges on other mods,
|
||||
matching the Lua mod's behavior.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Constants (mirrors MLOS_sorting.lua)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
PREORDER: Dict[str, int] = {"ModManager": 1, "ModManagerServer": 2, "modoptions": 3}
|
||||
|
||||
RAW_CATEGORY_ORDER: List[str] = [
|
||||
"coreRequirement",
|
||||
"tweaks", # libraries / frameworks / APIs (matches existing 'lib' pill)
|
||||
"tile", # tile asset packs
|
||||
"debug", # error logger, cheat menus
|
||||
"resource", # other generic resources
|
||||
"map",
|
||||
"qol", # QOL changes
|
||||
"moodle", # moodles / moodlets
|
||||
"tweak_minor", # tiny tweaks (working aircon, ear protection, …)
|
||||
"music", # music + music addons (load before vehicles per user spec)
|
||||
"wearable", # clothing, hair, tattoos (NOT armor)
|
||||
"profession", # profession mods
|
||||
"movement", # drop-and-roll, crawl, jump, ladders
|
||||
"building", # building menus, barricades, light switches
|
||||
"farming",
|
||||
"zombie", # zombie behaviour mods (OccultZed, HordeNight, ReactiveZombies)
|
||||
"zone", # hazardous zones, spore zones
|
||||
"armor", # armor mods (separate from wearables)
|
||||
"food",
|
||||
"health", # first aid, medical
|
||||
"weapon", # weapons (load before vehicles per user spec)
|
||||
"crafting",
|
||||
"container", # backpacks, boxes, tubs
|
||||
"vehicle",
|
||||
"vehicle_spawn", # vehicle spawn zones
|
||||
"loot", # loot tables
|
||||
"code", # generic gameplay code (legacy fallback)
|
||||
"ui", # interface
|
||||
"sound", # audio (non-music)
|
||||
"texture",
|
||||
"translation",
|
||||
"multiplayer", # MP-specific utilities
|
||||
"server_only", # admin tools, server logs
|
||||
"fix", # bug-fix overlays (distinct from 'patch' loadLast tier)
|
||||
"other",
|
||||
"undefined",
|
||||
# Spec G-patch: "patch" is a category like any other, but `_initial_sort_key`
|
||||
# routes patches above all sub-axes via a leading is_patch tuple element so
|
||||
# they sort strictly LAST (after every loadLast=on map mod).
|
||||
"patch",
|
||||
]
|
||||
CATEGORY_ORDER: Dict[str, int] = {c: i for i, c in enumerate(RAW_CATEGORY_ORDER)}
|
||||
|
||||
LOAD_CATEGORIES: Dict[str, int] = {"on": 0, "category": 1, "off": 2}
|
||||
|
||||
# from MLOS_sorting.lua: frameworkKeys for name-based tweak detection.
|
||||
# Lua uses string.find on lowercased name (substring match, no regex anchors).
|
||||
FRAMEWORK_KEYS: List[str] = [
|
||||
"framework",
|
||||
" api",
|
||||
"_api",
|
||||
"tweak",
|
||||
"interface",
|
||||
"utilit", # matches utility, utilities
|
||||
"bugfix",
|
||||
"librar", # matches library, libraries — covers damnlib/tsarslib/StarlitLibrary/etc.
|
||||
# `derive_category` checks mod.maps before FRAMEWORK_KEYS, so a map
|
||||
# mod whose name contains "library" still classifies as `map` first.
|
||||
]
|
||||
|
||||
# Multi-key list fields in mod.info (lowercased keys)
|
||||
LIST_KEYS_MAP = {
|
||||
"require": "requirements",
|
||||
"loadafter": "loadAfter",
|
||||
"loadbefore": "loadBefore",
|
||||
"incompatiblemods": "incompatibleMods",
|
||||
"tags": "tags",
|
||||
}
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Dataclasses
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModInfo:
|
||||
id: str
|
||||
name: str = ""
|
||||
workshop_id: Optional[str] = None
|
||||
category: str = "undefined"
|
||||
requirements: List[str] = field(default_factory=list)
|
||||
loadAfter: List[str] = field(default_factory=list)
|
||||
loadBefore: List[str] = field(default_factory=list)
|
||||
incompatibleMods: List[str] = field(default_factory=list)
|
||||
loadFirst: str = "off"
|
||||
loadLast: str = "off"
|
||||
tags: List[str] = field(default_factory=list)
|
||||
maps: List[str] = field(default_factory=list) # map folder names from media/maps/
|
||||
flags: List[str] = field(default_factory=list)
|
||||
is_addon: bool = False # Spec A addon: default-off in multi-mod wsids
|
||||
# Steam Workshop's controlled-vocab tags (workshop_meta.tags). Canonical
|
||||
# signal for build / multiplayer / category detection. Distinct from
|
||||
# `tags` which is mod.info-side (freeform).
|
||||
workshop_tags: List[str] = field(default_factory=list)
|
||||
warnings: Dict[str, List[str]] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SortingRule:
|
||||
loadAfter: List[str] = field(default_factory=list)
|
||||
loadBefore: List[str] = field(default_factory=list)
|
||||
incompatibleMods: List[str] = field(default_factory=list)
|
||||
loadFirst: str = "off"
|
||||
loadLast: str = "off"
|
||||
category: Optional[str] = None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _split_csv(value: str) -> List[str]:
|
||||
"""
|
||||
Mirrors Refr_Utils:splitStringBySeparator - split on commas, trim, drop empties.
|
||||
Defensively strips `word=` prefixes that some malformed mod.info lines include
|
||||
(the Lua does this too). Also strips a leading backslash on each entry: B42
|
||||
mod.info files write deps as `require=\\StarlitLibrary` (path-style); we want
|
||||
plain modIds so dep names match for the topo sort and missing-dep warnings.
|
||||
Finally strips a leading "<digits>/" wsid-path prefix some authors put in
|
||||
front of the modId (e.g. require=2256623447/firearmmod).
|
||||
"""
|
||||
if value is None:
|
||||
return []
|
||||
cleaned = re.sub(r"\w+\s*=", "", value)
|
||||
out: List[str] = []
|
||||
for p in cleaned.split(","):
|
||||
s = p.strip().lstrip("\\")
|
||||
s = re.sub(r"^\d+/", "", s)
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
|
||||
|
||||
def _convert_load_category(value) -> str:
|
||||
"""Mirrors convertToLoadCategoryString: normalize to 'on' | 'category' | 'off'."""
|
||||
if value in (True, "true", 0, "0"):
|
||||
return "on"
|
||||
if value in (None, False, "false", 2, "2", ""):
|
||||
return "off"
|
||||
if value not in LOAD_CATEGORIES:
|
||||
return "off"
|
||||
return str(value)
|
||||
|
||||
|
||||
def _str_contains_any(haystack: str, needles: List[str]) -> bool:
|
||||
if not haystack:
|
||||
return False
|
||||
h = haystack.lower()
|
||||
return any(n and n in h for n in needles)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Parsers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_mod_info(text: str, workshop_id: Optional[str] = None) -> Optional[ModInfo]:
|
||||
"""
|
||||
Parse a mod.info file body. Returns None if no `id=` line found.
|
||||
Lines are `key=value`; keys lowercased; list-fields comma-separated.
|
||||
"""
|
||||
fields: Dict[str, object] = {}
|
||||
for raw in text.splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
m = re.match(r"^\s*(.+?)\s*=\s*(.*?)\s*$", line)
|
||||
if not m:
|
||||
continue
|
||||
key = m.group(1).strip().lower()
|
||||
value = m.group(2).strip()
|
||||
if key in LIST_KEYS_MAP:
|
||||
fields[LIST_KEYS_MAP[key]] = _split_csv(value)
|
||||
elif key == "loadfirst":
|
||||
fields["loadFirst"] = _convert_load_category(value)
|
||||
elif key == "loadlast":
|
||||
fields["loadLast"] = _convert_load_category(value)
|
||||
elif key == "category":
|
||||
fields["category"] = value if value in CATEGORY_ORDER else "undefined"
|
||||
elif key == "name":
|
||||
fields["name"] = value
|
||||
elif key == "id":
|
||||
# Some authors prefix the wsid into the id (e.g. id=2256623447/firearmmod).
|
||||
# Strip a leading "<digits>/" so the canonical mod_id is the clean form.
|
||||
fields["id"] = re.sub(r"^\d+/", "", value)
|
||||
|
||||
if "id" not in fields:
|
||||
return None
|
||||
|
||||
return ModInfo(
|
||||
id=fields["id"],
|
||||
name=fields.get("name", ""),
|
||||
workshop_id=workshop_id,
|
||||
category=fields.get("category", "undefined"),
|
||||
requirements=fields.get("requirements", []),
|
||||
loadAfter=fields.get("loadAfter", []),
|
||||
loadBefore=fields.get("loadBefore", []),
|
||||
incompatibleMods=fields.get("incompatibleMods", []),
|
||||
loadFirst=fields.get("loadFirst", "off"),
|
||||
loadLast=fields.get("loadLast", "off"),
|
||||
tags=fields.get("tags", []),
|
||||
)
|
||||
|
||||
|
||||
def parse_sorting_rules(text: str) -> Dict[str, SortingRule]:
|
||||
"""
|
||||
Parse a sorting_rules.txt file. Format:
|
||||
[modId]
|
||||
loadAfter=mod1,mod2
|
||||
loadBefore=mod3
|
||||
incompatibleMods=mod4
|
||||
loadFirst=on
|
||||
loadLast=off
|
||||
category=tweaks
|
||||
"""
|
||||
rules: Dict[str, SortingRule] = {}
|
||||
current: Optional[str] = None
|
||||
for raw in text.splitlines():
|
||||
line = raw.strip()
|
||||
if not line:
|
||||
continue
|
||||
m = re.match(r"^\s*\[\s*(.+?)\s*\]\s*$", line)
|
||||
if m:
|
||||
current = m.group(1)
|
||||
rules.setdefault(current, SortingRule())
|
||||
continue
|
||||
if current is None:
|
||||
continue
|
||||
kv = re.match(r"^\s*(.+?)\s*=\s*(.*?)\s*$", line)
|
||||
if not kv:
|
||||
continue
|
||||
key, value = kv.group(1).lower(), kv.group(2)
|
||||
rule = rules[current]
|
||||
if key in ("loadafter", "loadmodafter"):
|
||||
rule.loadAfter = list(dict.fromkeys(rule.loadAfter + _split_csv(value)))
|
||||
elif key in ("loadbefore", "loadmodbefore"):
|
||||
rule.loadBefore = list(dict.fromkeys(rule.loadBefore + _split_csv(value)))
|
||||
elif key in ("incompatiblemods", "incompatible"):
|
||||
rule.incompatibleMods = list(dict.fromkeys(rule.incompatibleMods + _split_csv(value)))
|
||||
elif key == "loadfirst":
|
||||
rule.loadFirst = _convert_load_category(value)
|
||||
elif key == "loadlast":
|
||||
rule.loadLast = _convert_load_category(value)
|
||||
elif key == "category":
|
||||
rule.category = value if value in CATEGORY_ORDER else None
|
||||
return rules
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Filesystem ingestion (DepotDownloader output layout)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def load_mods_from_dir(root: Path) -> List[ModInfo]:
|
||||
"""
|
||||
Walk `<root>/<workshop_id>/mods/<mod_id>/mod.info` (DepotDownloader output).
|
||||
Also: `<root>/<workshop_id>/mods/<mod_id>/media/maps/<map_folder>/map.info`
|
||||
populates `maps` for that mod.
|
||||
"""
|
||||
mods: List[ModInfo] = []
|
||||
if not root.exists():
|
||||
raise FileNotFoundError(f"Mods root does not exist: {root}")
|
||||
|
||||
for workshop_dir in sorted(root.iterdir()):
|
||||
if not workshop_dir.is_dir():
|
||||
continue
|
||||
workshop_id = workshop_dir.name if workshop_dir.name.isdigit() else None
|
||||
mods_root = workshop_dir / "mods"
|
||||
if not mods_root.exists():
|
||||
# also support: some layouts put mods/ at root level directly
|
||||
mods_root = workshop_dir
|
||||
if not mods_root.exists() or not mods_root.is_dir():
|
||||
continue
|
||||
|
||||
for mod_dir in sorted(mods_root.iterdir()):
|
||||
if not mod_dir.is_dir():
|
||||
continue
|
||||
mod_info_path = mod_dir / "mod.info"
|
||||
if not mod_info_path.exists():
|
||||
continue
|
||||
try:
|
||||
text = mod_info_path.read_text(encoding="utf-8", errors="replace")
|
||||
except OSError as e:
|
||||
print(f"WARN: cannot read {mod_info_path}: {e}", file=sys.stderr)
|
||||
continue
|
||||
mod = parse_mod_info(text, workshop_id=workshop_id)
|
||||
if mod is None:
|
||||
print(f"WARN: no `id=` in {mod_info_path}, skipping", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Collect map folders
|
||||
maps_dir = mod_dir / "media" / "maps"
|
||||
if maps_dir.exists() and maps_dir.is_dir():
|
||||
for map_folder in sorted(maps_dir.iterdir()):
|
||||
if map_folder.is_dir() and (map_folder / "map.info").exists():
|
||||
mod.maps.append(map_folder.name)
|
||||
|
||||
mods.append(mod)
|
||||
|
||||
return mods
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Category derivation (degraded vs in-game; no folder walk)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
_PATCH_NAME_RE = re.compile(r"\b(patch|compat|compatibility)\b", re.IGNORECASE)
|
||||
|
||||
|
||||
# Substring-based category hints (kept in sync with api/mlos_sort.py)
|
||||
_LIB_NAME_HINTS = ["library", "libraries", "framework"]
|
||||
_LIB_NAME_RE = re.compile(
|
||||
r'(?<![A-Za-z])(?:lib|api|core)(?![A-Za-z])'
|
||||
r'|(?<=[a-z])(?:Lib|API|Core)(?![A-Za-z])',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_MUSIC_NAME_HINTS = ["music", "moozic", "jukebox"]
|
||||
_MOODLE_NAME_HINTS = ["moodle", "moodlet"]
|
||||
_PROFESSION_HINTS = ["profession"]
|
||||
_MOVEMENT_HINTS = [
|
||||
"true action", "trueaction", "true_action",
|
||||
"drop and roll", "dropandroll", "drop_and_roll",
|
||||
"crawl", "ladder",
|
||||
]
|
||||
_ARMOR_NAME_HINTS = ["armor", "armour"]
|
||||
_HEALTH_NAME_HINTS = ["first aid", "firstaid", "medical", "injur", "disease", "sickness"]
|
||||
_CRAFTING_HINTS = ["craft"]
|
||||
_CONTAINER_HINTS = ["backpack", "container", "storage"]
|
||||
_LOOT_NAME_HINTS = ["loot"]
|
||||
_TILE_NAME_HINTS = ["tiles", "tileset", "tilepack"]
|
||||
_DEBUG_NAME_HINTS = ["debug menu", "cheat menu", "error log", "errormagnifier"]
|
||||
_ZONE_NAME_HINTS = ["hazard zone", "spore zone", "spore zones"]
|
||||
_ZOMBIE_NAME_HINTS = ["zombie", "horde", "undead"]
|
||||
_FIX_NAME_HINTS = [" fix", "_fix", "bugfix", "hotfix"]
|
||||
|
||||
|
||||
def _name_has(name: str, hints: List[str]) -> bool:
|
||||
if not name:
|
||||
return False
|
||||
n = name.lower()
|
||||
return any(h in n for h in hints)
|
||||
|
||||
|
||||
def derive_category(mod: ModInfo) -> str:
|
||||
"""Best-effort category from mod.info + workshop_meta.tags + name.
|
||||
Mirrors api/mlos_sort.py; keep both copies in sync.
|
||||
"""
|
||||
if mod.category in CATEGORY_ORDER and mod.category != "undefined":
|
||||
return mod.category
|
||||
|
||||
name = mod.name or ""
|
||||
if name and _PATCH_NAME_RE.search(name):
|
||||
return "patch"
|
||||
if _name_has(name, _LIB_NAME_HINTS) or (name and _LIB_NAME_RE.search(name)):
|
||||
return "tweaks"
|
||||
|
||||
if mod.maps:
|
||||
return "map"
|
||||
|
||||
if _name_has(name, _MUSIC_NAME_HINTS):
|
||||
return "music"
|
||||
if _name_has(name, _MOVEMENT_HINTS):
|
||||
return "movement"
|
||||
if _name_has(name, _MOODLE_NAME_HINTS):
|
||||
return "moodle"
|
||||
if _name_has(name, _DEBUG_NAME_HINTS):
|
||||
return "debug"
|
||||
if _name_has(name, _TILE_NAME_HINTS):
|
||||
return "tile"
|
||||
|
||||
ws_tags = set(mod.workshop_tags or [])
|
||||
has_audio = "Audio" in ws_tags
|
||||
|
||||
if "Weapons" in ws_tags:
|
||||
return "weapon"
|
||||
if "Vehicles" in ws_tags:
|
||||
if name and "spawn zone" in name.lower():
|
||||
return "vehicle_spawn"
|
||||
return "vehicle"
|
||||
if "Clothing/Armor" in ws_tags:
|
||||
if _name_has(name, _ARMOR_NAME_HINTS):
|
||||
return "armor"
|
||||
return "wearable"
|
||||
if "Food" in ws_tags:
|
||||
return "food"
|
||||
if "building" in {t.lower() for t in ws_tags}:
|
||||
return "building"
|
||||
if "Farming" in ws_tags:
|
||||
return "farming"
|
||||
if "Skills" in ws_tags:
|
||||
if _name_has(name, _PROFESSION_HINTS):
|
||||
return "profession"
|
||||
return "code"
|
||||
if "Interface" in ws_tags:
|
||||
return "ui"
|
||||
if "Textures" in ws_tags:
|
||||
return "texture"
|
||||
if "Language/Translation" in ws_tags:
|
||||
return "translation"
|
||||
if "QOL" in ws_tags:
|
||||
return "qol"
|
||||
if "Map" in ws_tags:
|
||||
return "map"
|
||||
|
||||
if _name_has(name, _TILE_NAME_HINTS):
|
||||
return "tile"
|
||||
if _name_has(name, _ZOMBIE_NAME_HINTS):
|
||||
return "zombie"
|
||||
if _name_has(name, _HEALTH_NAME_HINTS):
|
||||
return "health"
|
||||
if _name_has(name, _CRAFTING_HINTS):
|
||||
return "crafting"
|
||||
if _name_has(name, _CONTAINER_HINTS):
|
||||
return "container"
|
||||
if _name_has(name, _LOOT_NAME_HINTS):
|
||||
return "loot"
|
||||
if _name_has(name, _FIX_NAME_HINTS):
|
||||
return "fix"
|
||||
if _name_has(name, _ZONE_NAME_HINTS):
|
||||
return "zone"
|
||||
|
||||
if has_audio:
|
||||
return "sound"
|
||||
|
||||
tags_lc = [t.lower() for t in mod.tags]
|
||||
if any("translation" in t for t in tags_lc):
|
||||
return "translation"
|
||||
if any("vehicle" in t for t in tags_lc):
|
||||
return "vehicle"
|
||||
if any("interface" in t or "ui" in t for t in tags_lc):
|
||||
return "ui"
|
||||
if any("clothing" in t or "skin" in t for t in tags_lc):
|
||||
return "wearable"
|
||||
if any("armor" in t for t in tags_lc):
|
||||
return "armor"
|
||||
if any("map" in t for t in tags_lc):
|
||||
return "map"
|
||||
|
||||
if _str_contains_any(name, FRAMEWORK_KEYS):
|
||||
return "tweaks"
|
||||
|
||||
if "Multiplayer" in ws_tags:
|
||||
return "multiplayer"
|
||||
|
||||
return "other"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sort
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _apply_overrides(mods: List[ModInfo], rules: Dict[str, SortingRule]) -> None:
|
||||
"""In-place: merge rules into mods, then propagate loadBefore -> reverse loadAfter."""
|
||||
by_id = {m.id: m for m in mods}
|
||||
|
||||
for mod in mods:
|
||||
rule = rules.get(mod.id)
|
||||
if rule:
|
||||
mod.loadAfter = list(dict.fromkeys(mod.loadAfter + rule.loadAfter))
|
||||
mod.loadBefore = list(dict.fromkeys(mod.loadBefore + rule.loadBefore))
|
||||
mod.incompatibleMods = list(dict.fromkeys(mod.incompatibleMods + rule.incompatibleMods))
|
||||
mod.loadFirst = _convert_load_category(rule.loadFirst if rule.loadFirst != "off" else mod.loadFirst)
|
||||
mod.loadLast = _convert_load_category(rule.loadLast if rule.loadLast != "off" else mod.loadLast)
|
||||
if rule.category:
|
||||
mod.category = rule.category
|
||||
|
||||
# Derive category if still undefined
|
||||
if mod.category not in CATEGORY_ORDER or mod.category == "undefined":
|
||||
mod.category = derive_category(mod)
|
||||
|
||||
# Translate loadBefore into reverse loadAfter on the target mod
|
||||
# (mirrors updateSortingRulesLoadAfter)
|
||||
for mod in mods:
|
||||
for target in mod.loadBefore:
|
||||
target_mod = by_id.get(target)
|
||||
if target_mod and mod.id not in target_mod.loadAfter:
|
||||
target_mod.loadAfter.append(mod.id)
|
||||
|
||||
|
||||
def _initial_sort_key(mod: ModInfo):
|
||||
"""Mirrors initialSortMods comparator. Returns sortable tuple.
|
||||
|
||||
Spec G-patch: index 0 is `is_patch` so patches sort strictly last - they
|
||||
have to override loadLast=on map mods at runtime. Within the patch tier
|
||||
the existing sub-axes still apply (PREORDER, alpha, etc.).
|
||||
"""
|
||||
is_patch = 1 if mod.category == "patch" else 0
|
||||
pre = PREORDER.get(mod.id, 10000)
|
||||
return (
|
||||
is_patch,
|
||||
pre,
|
||||
LOAD_CATEGORIES[mod.loadFirst], # global loadFirst (on first)
|
||||
-LOAD_CATEGORIES[mod.loadLast] + 100, # global loadLast (on last) -- keep parity by
|
||||
# sorting "on" last; we invert so smaller=earlier
|
||||
CATEGORY_ORDER.get(mod.category, CATEGORY_ORDER["undefined"]),
|
||||
LOAD_CATEGORIES[mod.loadFirst], # in-category loadFirst
|
||||
-LOAD_CATEGORIES[mod.loadLast] + 100, # in-category loadLast
|
||||
mod.id.lower(),
|
||||
)
|
||||
|
||||
|
||||
def _topological_sort(mods: List[ModInfo]) -> Tuple[List[str], List[List[str]]]:
|
||||
"""DFS topo sort on (requirements + loadAfter). Returns (order, cycles)."""
|
||||
by_id = {m.id: m for m in mods}
|
||||
visited: Dict[str, bool] = {}
|
||||
visiting: Dict[str, bool] = {}
|
||||
order: List[str] = []
|
||||
cycles: List[List[str]] = []
|
||||
|
||||
def visit(mod: ModInfo, path: List[str]):
|
||||
if visiting.get(mod.id):
|
||||
cycles.append(path + [mod.id])
|
||||
return
|
||||
if visited.get(mod.id):
|
||||
return
|
||||
visiting[mod.id] = True
|
||||
for dep in mod.requirements:
|
||||
target = by_id.get(dep)
|
||||
if target:
|
||||
visit(target, path + [mod.id])
|
||||
for dep in mod.loadAfter:
|
||||
target = by_id.get(dep)
|
||||
if target:
|
||||
visit(target, path + [mod.id])
|
||||
visiting[mod.id] = False
|
||||
visited[mod.id] = True
|
||||
order.append(mod.id)
|
||||
|
||||
for mod in mods:
|
||||
visit(mod, [])
|
||||
return order, cycles
|
||||
|
||||
|
||||
def sort_mods(
|
||||
mods: List[ModInfo],
|
||||
rules: Optional[Dict[str, SortingRule]] = None,
|
||||
) -> Dict[str, object]:
|
||||
"""
|
||||
Top-level entry: returns dict with ordered IDs + warnings.
|
||||
"""
|
||||
rules = rules or {}
|
||||
_apply_overrides(mods, rules)
|
||||
|
||||
# Initial deterministic sort (preorder, loadFirst, category, loadLast, alpha)
|
||||
mods.sort(key=_initial_sort_key)
|
||||
|
||||
order, cycles = _topological_sort(mods)
|
||||
|
||||
by_id = {m.id: m for m in mods}
|
||||
enabled = set(by_id.keys())
|
||||
|
||||
missing: Dict[str, List[str]] = {}
|
||||
incompat: Dict[str, List[str]] = {}
|
||||
|
||||
for mod in mods:
|
||||
miss = [r for r in mod.requirements if r not in enabled]
|
||||
if miss:
|
||||
missing[mod.id] = miss
|
||||
inc = [r for r in mod.incompatibleMods if r in enabled]
|
||||
if inc:
|
||||
incompat[mod.id] = inc
|
||||
|
||||
# Output blocks for the server's .ini file
|
||||
mods_line = order # already mod IDs in load order
|
||||
workshop_seen: List[str] = []
|
||||
workshop_set = set()
|
||||
for mod_id in order:
|
||||
wid = by_id[mod_id].workshop_id
|
||||
if wid and wid not in workshop_set:
|
||||
workshop_seen.append(wid)
|
||||
workshop_set.add(wid)
|
||||
# MAP_LINE convention: dependencies first (leftmost), dependents next.
|
||||
# Vanilla Muldraugh, KY is ALWAYS appended at the very end by
|
||||
# adapters.build_response. `order` is already topo-sorted by mod-level
|
||||
# deps (require= / loadAfter= / loadBefore=), so dependencies appear
|
||||
# before their dependents — walk it forward.
|
||||
map_folders: List[str] = []
|
||||
for mod_id in order:
|
||||
for mf in by_id[mod_id].maps:
|
||||
if mf not in map_folders:
|
||||
map_folders.append(mf)
|
||||
|
||||
return {
|
||||
"Mods": mods_line,
|
||||
"WorkshopItems": workshop_seen,
|
||||
"Map": map_folders,
|
||||
"warnings": {
|
||||
"cycles": cycles,
|
||||
"missing_requirements": missing,
|
||||
"incompatible_enabled": incompat,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# CLI
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(
|
||||
description="Sort PZ mods by load order. Reads DepotDownloader output layout.",
|
||||
)
|
||||
ap.add_argument("mods_root", help="Path containing <workshop_id>/mods/<mod_id>/mod.info trees")
|
||||
ap.add_argument("--rules", help="Optional sorting_rules.txt path")
|
||||
ap.add_argument("--json", action="store_true", help="Output JSON instead of ini blocks")
|
||||
args = ap.parse_args()
|
||||
|
||||
root = Path(args.mods_root).resolve()
|
||||
mods = load_mods_from_dir(root)
|
||||
if not mods:
|
||||
print("ERROR: no mods found", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
rules: Dict[str, SortingRule] = {}
|
||||
if args.rules:
|
||||
rules = parse_sorting_rules(Path(args.rules).read_text(encoding="utf-8"))
|
||||
|
||||
result = sort_mods(mods, rules)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
return
|
||||
|
||||
print("WorkshopItems=" + ";".join(result["WorkshopItems"]))
|
||||
print("Mods=" + ";".join(result["Mods"]))
|
||||
if result["Map"]:
|
||||
print("Map=" + ";".join(result["Map"]))
|
||||
w = result["warnings"]
|
||||
if w["cycles"] or w["missing_requirements"] or w["incompatible_enabled"]:
|
||||
print("\n# Warnings")
|
||||
if w["cycles"]:
|
||||
print("# cycles:", w["cycles"])
|
||||
if w["missing_requirements"]:
|
||||
print("# missing_requirements:", w["missing_requirements"])
|
||||
if w["incompatible_enabled"]:
|
||||
print("# incompatible_enabled:", w["incompatible_enabled"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
586
worker/worker.py
Normal file
586
worker/worker.py
Normal file
@@ -0,0 +1,586 @@
|
||||
"""
|
||||
worker.py - pzsort cache filler
|
||||
|
||||
Single-shot CLI that takes Steam Workshop IDs on argv, refreshes metadata
|
||||
from Steam's anonymous API, and only runs DepotDownloader for cache misses
|
||||
(where workshop_meta.time_updated has changed since last parse).
|
||||
|
||||
Usage:
|
||||
python3 worker.py <workshop_id> [<workshop_id> ...]
|
||||
python3 worker.py --force <workshop_id> ... # ignore cache, re-download
|
||||
|
||||
Env (or .env file):
|
||||
DATABASE_URL postgresql://pzsort:<pw>@127.0.0.1:5439/pzsort
|
||||
DD_PATH path to DepotDownloader executable
|
||||
PZ_APP_ID 108600 (default)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import asyncpg
|
||||
import httpx
|
||||
|
||||
# Reuse the parser from the sorter
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from mlos_sort import parse_mod_info, ModInfo # noqa: E402
|
||||
|
||||
PZ_APP_ID = int(os.environ.get("PZ_APP_ID", "108600"))
|
||||
DEFAULT_DD_PATH = os.environ.get("DD_PATH", "./DepotDownloader")
|
||||
STEAM_API = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Steam API
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def fetch_workshop_details(workshop_ids: List[str]) -> Dict[str, dict]:
|
||||
"""
|
||||
POST to legacy GetPublishedFileDetails. Anonymous, no API key needed.
|
||||
Returns {workshop_id: detail_dict}.
|
||||
"""
|
||||
if not workshop_ids:
|
||||
return {}
|
||||
data: Dict[str, str] = {"itemcount": str(len(workshop_ids))}
|
||||
for i, wid in enumerate(workshop_ids):
|
||||
data[f"publishedfileids[{i}]"] = wid
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
r = client.post(STEAM_API, data=data)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
out: Dict[str, dict] = {}
|
||||
for item in body.get("response", {}).get("publishedfiledetails", []):
|
||||
out[item["publishedfileid"]] = item
|
||||
return out
|
||||
|
||||
|
||||
def flatten_tags(detail: dict) -> List[str]:
|
||||
return [t.get("tag", "") for t in detail.get("tags", []) if t.get("tag")]
|
||||
|
||||
|
||||
# Public Steam Workshop page URL. The anonymous GetPublishedFileDetails API
|
||||
# does NOT return `children` for individual mods (only collections), so to
|
||||
# learn a mod's "Required Items" we have to scrape the public HTML page.
|
||||
_WORKSHOP_PAGE_URL = "https://steamcommunity.com/sharedfiles/filedetails/?id={wsid}"
|
||||
_RE_REQUIRED_BLOCK = re.compile(
|
||||
r'<div[^>]*id="RequiredItems"[^>]*>(.*?)</div>\s*</div>',
|
||||
re.DOTALL,
|
||||
)
|
||||
_RE_REQUIRED_LINK = re.compile(r'filedetails/\?id=(\d+)')
|
||||
|
||||
# ── rate-limit safety for Steam HTML scraping ─────────────────────────────
|
||||
# Steam aggressively 429s anonymous /sharedfiles/filedetails/ HTML requests;
|
||||
# during a 2026-05-03 backfill at ~1 RPS our IP was blocked for hours and a
|
||||
# subsequent single curl probe still got 429. Two file-locked, multi-process
|
||||
# safeguards now sit in front of every scrape:
|
||||
#
|
||||
# 1. THROTTLE FILE — records the timestamp of the last attempted scrape.
|
||||
# Every worker waits via flock until at least
|
||||
# `_MIN_SCRAPE_INTERVAL_S` seconds have elapsed since the last one.
|
||||
# Serializes 4 concurrent drain processes so they can't burst.
|
||||
#
|
||||
# 2. COOLDOWN FILE — when we observe a hard 429 (after retries), we write
|
||||
# `now() + _COOLDOWN_S` here. While active, every fetch returns None
|
||||
# instantly without touching Steam, preserving cached values until the
|
||||
# IP block ages out.
|
||||
#
|
||||
# Defaults: 6s spacing → ≤10 RPM steady-state, 1h cooldown after a 429
|
||||
# storm. Overridable via SORTOF_STEAM_MIN_INTERVAL / SORTOF_STEAM_COOLDOWN.
|
||||
import fcntl as _fcntl
|
||||
|
||||
_THROTTLE_FILE = "/tmp/sortof_steam_throttle"
|
||||
_COOLDOWN_FILE = "/tmp/sortof_steam_cooldown"
|
||||
_MIN_SCRAPE_INTERVAL_S = float(os.environ.get("SORTOF_STEAM_MIN_INTERVAL", "6"))
|
||||
_COOLDOWN_S = float(os.environ.get("SORTOF_STEAM_COOLDOWN", "3600"))
|
||||
|
||||
|
||||
def _read_cooldown_until() -> float:
|
||||
try:
|
||||
with open(_COOLDOWN_FILE, "r") as f:
|
||||
return float(f.read().strip() or 0)
|
||||
except (OSError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _write_cooldown_until(epoch_s: float) -> None:
|
||||
try:
|
||||
with open(_COOLDOWN_FILE, "w") as f:
|
||||
f.write(str(epoch_s))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _throttle_scrape() -> None:
|
||||
"""Block until at least `_MIN_SCRAPE_INTERVAL_S` has elapsed since the
|
||||
last scrape by ANY drain process (multi-process safe via flock)."""
|
||||
import time as _t
|
||||
Path(_THROTTLE_FILE).touch(exist_ok=True)
|
||||
with open(_THROTTLE_FILE, "r+") as f:
|
||||
_fcntl.flock(f.fileno(), _fcntl.LOCK_EX)
|
||||
try:
|
||||
f.seek(0)
|
||||
raw = f.read().strip()
|
||||
last = float(raw) if raw else 0.0
|
||||
now = _t.time()
|
||||
wait = _MIN_SCRAPE_INTERVAL_S - (now - last)
|
||||
if wait > 0:
|
||||
_t.sleep(wait)
|
||||
now = _t.time()
|
||||
f.seek(0); f.truncate(); f.write(str(now))
|
||||
finally:
|
||||
_fcntl.flock(f.fileno(), _fcntl.LOCK_UN)
|
||||
|
||||
|
||||
def fetch_required_wsids(
|
||||
workshop_id: str,
|
||||
timeout: int = 15,
|
||||
max_attempts: int = 4,
|
||||
backoff_429: float = 30.0,
|
||||
) -> Optional[List[str]]:
|
||||
"""Scrape the public Workshop page for Required Items wsids.
|
||||
|
||||
Returns
|
||||
None — fetch/parse error, persistent 429, or active cooldown.
|
||||
Caller MUST NOT overwrite the existing cached value.
|
||||
[] — page loaded successfully but has no required items section.
|
||||
list — required item wsids in declaration order, deduped.
|
||||
"""
|
||||
import time as _time
|
||||
cooldown_until = _read_cooldown_until()
|
||||
if cooldown_until and _time.time() < cooldown_until:
|
||||
return None # Steam recently 429'd us — back off entirely.
|
||||
_throttle_scrape()
|
||||
url = _WORKSHOP_PAGE_URL.format(wsid=workshop_id)
|
||||
html: Optional[str] = None
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
try:
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
r = client.get(url)
|
||||
if r.status_code == 429:
|
||||
if attempt < max_attempts:
|
||||
_time.sleep(backoff_429 * attempt)
|
||||
continue
|
||||
# Final 429 → arm the global cooldown so other workers
|
||||
# (and this one's next call) skip Steam entirely.
|
||||
_write_cooldown_until(_time.time() + _COOLDOWN_S)
|
||||
print(f" ! required_wsids 429 (gave up) for {workshop_id}; "
|
||||
f"cooldown {int(_COOLDOWN_S)}s armed", file=sys.stderr)
|
||||
return None
|
||||
r.raise_for_status()
|
||||
html = r.text
|
||||
break
|
||||
except (httpx.HTTPError, httpx.TimeoutException) as e:
|
||||
print(f" ! required_wsids fetch failed for {workshop_id}: {e}",
|
||||
file=sys.stderr)
|
||||
return None
|
||||
if html is None:
|
||||
return None
|
||||
m = _RE_REQUIRED_BLOCK.search(html)
|
||||
if not m:
|
||||
return []
|
||||
seen: set = set()
|
||||
out: List[str] = []
|
||||
for w in _RE_REQUIRED_LINK.findall(m.group(1)):
|
||||
if w not in seen and w != workshop_id:
|
||||
seen.add(w)
|
||||
out.append(w)
|
||||
return out
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DepotDownloader
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_depot_downloader(
|
||||
workshop_id: str,
|
||||
output_dir: Path,
|
||||
dd_path: Path,
|
||||
filelist_regex: str = r"regex:.*\.info$",
|
||||
timeout: int = 300,
|
||||
max_attempts: int = 3,
|
||||
backoff_s: float = 2.0,
|
||||
) -> bool:
|
||||
"""
|
||||
Fetch workshop item using DepotDownloader, filtered to .info files only.
|
||||
Writes <output_dir>/mods/<mod_id>/mod.info (and possibly map.info paths).
|
||||
Returns True on success.
|
||||
|
||||
Retries up to max_attempts times on rc!=0 or timeout - Steam Workshop's
|
||||
CDN occasionally flakes on the manifest fetch and a fresh DD invocation
|
||||
typically succeeds. Caller is also free to retry at a higher level
|
||||
(drain.py's MAX_ATTEMPTS), but in-process retry avoids the full re-claim
|
||||
cycle for the common transient case.
|
||||
"""
|
||||
import time as _time
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
filelist = output_dir / "_filelist.txt"
|
||||
filelist.write_text(filelist_regex + "\n", encoding="utf-8")
|
||||
|
||||
cmd = [
|
||||
str(dd_path),
|
||||
"-app", str(PZ_APP_ID),
|
||||
"-pubfile", workshop_id,
|
||||
"-filelist", str(filelist),
|
||||
"-dir", str(output_dir),
|
||||
]
|
||||
last_err = ""
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
last_err = "timeout"
|
||||
print(f" ! DepotDownloader timeout for {workshop_id} (attempt {attempt}/{max_attempts})",
|
||||
file=sys.stderr)
|
||||
else:
|
||||
if proc.returncode == 0:
|
||||
if attempt > 1:
|
||||
print(f" ✓ DepotDownloader recovered for {workshop_id} on attempt {attempt}",
|
||||
file=sys.stderr)
|
||||
return True
|
||||
last_err = f"rc={proc.returncode}"
|
||||
print(f" ! DepotDownloader rc={proc.returncode} for {workshop_id} "
|
||||
f"(attempt {attempt}/{max_attempts})", file=sys.stderr)
|
||||
print(proc.stderr[-500:] if proc.stderr else proc.stdout[-500:], file=sys.stderr)
|
||||
if attempt < max_attempts:
|
||||
_time.sleep(backoff_s)
|
||||
print(f" !! DepotDownloader gave up on {workshop_id} after {max_attempts} attempts (last: {last_err})",
|
||||
file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
def discover_mod_infos(output_dir: Path) -> List[Path]:
|
||||
"""Find all mod.info files. Two layouts coexist in the wild:
|
||||
B41: mods/<mod_id>/mod.info
|
||||
B42: mods/<mod_id>/<gameVersion>/mod.info e.g. mods/Foo/42/mod.info
|
||||
A single mod can ship both. UPSERT on (workshop_id, mod_id) collapses
|
||||
duplicates; lexicographic sort means the B41 (root-level) variant wins
|
||||
last when present, the highest-numbered B42 variant otherwise."""
|
||||
out = list(output_dir.glob("mods/*/mod.info"))
|
||||
out.extend(output_dir.glob("mods/*/*/mod.info"))
|
||||
return sorted(out)
|
||||
|
||||
|
||||
def discover_map_folders(mip_parent: Path) -> List[str]:
|
||||
"""Find map folders for the mod whose mod.info lives in `mip_parent`.
|
||||
|
||||
Three layouts coexist:
|
||||
B41: mods/<modId>/mod.info
|
||||
mods/<modId>/media/maps/<x>/map.info
|
||||
B42: mods/<modId>/<branch>/mod.info (branch is e.g., '42','42.13')
|
||||
mods/<modId>/<branch>/media/maps/<x>/map.info
|
||||
B42 split: mod.info under '42/' but map data under a sibling 'common/'
|
||||
branch — observed in Project RV Interior. This is why we
|
||||
walk back to the mod-id root and enumerate every branch.
|
||||
"""
|
||||
if mip_parent.parent.name == "mods":
|
||||
modid_root = mip_parent
|
||||
else:
|
||||
modid_root = mip_parent.parent
|
||||
seen: set = set()
|
||||
out: List[str] = []
|
||||
candidates = list(modid_root.glob("media/maps/*/map.info"))
|
||||
candidates.extend(modid_root.glob("*/media/maps/*/map.info"))
|
||||
for cand in sorted(candidates):
|
||||
folder = cand.parent.name
|
||||
if folder in seen:
|
||||
continue
|
||||
seen.add(folder)
|
||||
out.append(folder)
|
||||
return out
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DB upserts
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
UPSERT_WORKSHOP_META = """
|
||||
INSERT INTO workshop_meta (
|
||||
workshop_id, title, description, tags, creator_steamid,
|
||||
time_created, time_updated, file_size, preview_url,
|
||||
consumer_app_id, visibility, banned, last_checked_at
|
||||
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, now())
|
||||
ON CONFLICT (workshop_id) DO UPDATE SET
|
||||
title = EXCLUDED.title,
|
||||
description = EXCLUDED.description,
|
||||
tags = EXCLUDED.tags,
|
||||
creator_steamid = EXCLUDED.creator_steamid,
|
||||
time_created = EXCLUDED.time_created,
|
||||
time_updated = EXCLUDED.time_updated,
|
||||
file_size = EXCLUDED.file_size,
|
||||
preview_url = EXCLUDED.preview_url,
|
||||
consumer_app_id = EXCLUDED.consumer_app_id,
|
||||
visibility = EXCLUDED.visibility,
|
||||
banned = EXCLUDED.banned,
|
||||
last_checked_at = now();
|
||||
"""
|
||||
|
||||
EVICT_AND_RECORD_CONFLICT = """
|
||||
-- Per the cache invariant: a mod_id is owned by exactly one wsid at a time.
|
||||
-- When we're about to UPSERT (wsid, mod_id), evict any (other_wsid, mod_id)
|
||||
-- claims so the new pull becomes canonical, and record the eviction in
|
||||
-- mod_id_conflicts so /api/sort can warn users who paste the displaced wsid.
|
||||
WITH evicted AS (
|
||||
DELETE FROM mod_parsed
|
||||
WHERE mod_id = $2 AND workshop_id <> $1
|
||||
RETURNING workshop_id
|
||||
)
|
||||
INSERT INTO mod_id_conflicts (mod_id, evicting_wsid, evicted_wsid)
|
||||
SELECT $2, $1, workshop_id FROM evicted
|
||||
ON CONFLICT (mod_id, evicting_wsid, evicted_wsid)
|
||||
DO UPDATE SET recorded_at = now();
|
||||
"""
|
||||
|
||||
UPSERT_MOD_PARSED = """
|
||||
INSERT INTO mod_parsed (
|
||||
workshop_id, mod_id, name, category,
|
||||
requirements, load_after, load_before, incompatible_mods,
|
||||
load_first, load_last, tags, maps,
|
||||
raw_mod_info, version_min, is_addon,
|
||||
parsed_at_time_updated, parsed_at
|
||||
) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16, now())
|
||||
ON CONFLICT (workshop_id, mod_id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
category = EXCLUDED.category,
|
||||
requirements = EXCLUDED.requirements,
|
||||
load_after = EXCLUDED.load_after,
|
||||
load_before = EXCLUDED.load_before,
|
||||
incompatible_mods = EXCLUDED.incompatible_mods,
|
||||
load_first = EXCLUDED.load_first,
|
||||
load_last = EXCLUDED.load_last,
|
||||
tags = EXCLUDED.tags,
|
||||
maps = EXCLUDED.maps,
|
||||
raw_mod_info = EXCLUDED.raw_mod_info,
|
||||
version_min = EXCLUDED.version_min,
|
||||
is_addon = EXCLUDED.is_addon,
|
||||
parsed_at_time_updated = EXCLUDED.parsed_at_time_updated,
|
||||
parsed_at = now();
|
||||
"""
|
||||
|
||||
# Description-text heuristic for "this mod is an optional add-on to the
|
||||
# primary mod published by the same wsid". Matches:
|
||||
# "Optional add-on: removes ..." (TMMumble)
|
||||
# "optional addon ..."
|
||||
# "Optional add on ..."
|
||||
# Strict "optional + add-on" keyword pair to avoid false positives on
|
||||
# generic "addon" naming. Author-driven signal — set via the description=
|
||||
# field of mod.info.
|
||||
_RE_OPTIONAL_ADDON = re.compile(
|
||||
r"description\s*=\s*[^\r\n]*\bOptional\s+Add[- ]?on\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def detect_is_addon(raw: str) -> bool:
|
||||
"""Return True if the mod.info description self-identifies as an
|
||||
optional add-on (`Optional add-on: …`)."""
|
||||
return bool(_RE_OPTIONAL_ADDON.search(raw or ""))
|
||||
|
||||
DELETE_STALE_MOD_PARSED = """
|
||||
DELETE FROM mod_parsed
|
||||
WHERE workshop_id = $1 AND mod_id <> ALL($2::text[]);
|
||||
"""
|
||||
|
||||
CHECK_PARSED_FRESH = """
|
||||
SELECT mod_id FROM mod_parsed
|
||||
WHERE workshop_id = $1 AND parsed_at_time_updated = $2;
|
||||
"""
|
||||
|
||||
|
||||
def extract_version_min(raw: str) -> Optional[str]:
|
||||
for line in raw.splitlines():
|
||||
s = line.strip().lower()
|
||||
if s.startswith("versionmin"):
|
||||
_, _, v = line.partition("=")
|
||||
return v.strip() or None
|
||||
return None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Main flow
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def process_one(
|
||||
conn: asyncpg.Connection,
|
||||
workshop_id: str,
|
||||
detail: dict,
|
||||
dd_path: Path,
|
||||
force: bool,
|
||||
) -> str:
|
||||
"""Returns 'hit' | 'refreshed' | 'banned' | 'missing' | 'no_mod_info' | 'failed'.
|
||||
|
||||
'no_mod_info' = DepotDownloader succeeded but the workshop item contained
|
||||
no parseable mod.info file (typical for collections, art-only items, and
|
||||
other non-mod uploads that share the PZ consumer_app_id). Distinct from
|
||||
'failed' (DD itself errored), so the API can surface "this isn't a mod"
|
||||
differently from "we couldn't fetch this."
|
||||
"""
|
||||
# Pre-flight: bad results
|
||||
if detail.get("result") != 1:
|
||||
return "missing"
|
||||
if detail.get("banned"):
|
||||
return "banned"
|
||||
if detail.get("consumer_app_id") != PZ_APP_ID:
|
||||
return "failed" # wrong app
|
||||
|
||||
time_updated = int(detail.get("time_updated", 0))
|
||||
|
||||
# Always refresh meta (cheap)
|
||||
await conn.execute(
|
||||
UPSERT_WORKSHOP_META,
|
||||
workshop_id,
|
||||
detail.get("title", ""),
|
||||
detail.get("description", "") or "",
|
||||
flatten_tags(detail),
|
||||
str(detail.get("creator", "")) or None,
|
||||
int(detail.get("time_created", 0)) or None,
|
||||
time_updated,
|
||||
int(detail.get("file_size", 0)) or None,
|
||||
detail.get("preview_url"),
|
||||
detail.get("consumer_app_id"),
|
||||
detail.get("visibility"),
|
||||
bool(detail.get("banned", False)),
|
||||
)
|
||||
|
||||
# Cache check
|
||||
if not force:
|
||||
rows = await conn.fetch(CHECK_PARSED_FRESH, workshop_id, time_updated)
|
||||
if rows:
|
||||
return "hit"
|
||||
|
||||
# Cache miss → download + parse
|
||||
with tempfile.TemporaryDirectory(prefix=f"pzsort_{workshop_id}_") as tmpdir:
|
||||
tmp = Path(tmpdir)
|
||||
ok = run_depot_downloader(workshop_id, tmp, dd_path)
|
||||
if not ok:
|
||||
return "failed"
|
||||
|
||||
mod_info_paths = discover_mod_infos(tmp)
|
||||
if not mod_info_paths:
|
||||
print(f" ! no mod.info found in {workshop_id}", file=sys.stderr)
|
||||
return "no_mod_info"
|
||||
|
||||
seen_mod_ids: List[str] = []
|
||||
for mip in mod_info_paths:
|
||||
raw = mip.read_text(encoding="utf-8", errors="replace")
|
||||
mod = parse_mod_info(raw, workshop_id=workshop_id)
|
||||
if mod is None:
|
||||
continue
|
||||
maps = discover_map_folders(mip.parent)
|
||||
# Evict any other wsid's claim on this mod_id before we install
|
||||
# ours. Cache invariant: at most one wsid per mod_id, with the
|
||||
# most-recent pull winning.
|
||||
await conn.execute(EVICT_AND_RECORD_CONFLICT, workshop_id, mod.id)
|
||||
await conn.execute(
|
||||
UPSERT_MOD_PARSED,
|
||||
workshop_id,
|
||||
mod.id,
|
||||
mod.name,
|
||||
mod.category,
|
||||
mod.requirements,
|
||||
mod.loadAfter,
|
||||
mod.loadBefore,
|
||||
mod.incompatibleMods,
|
||||
mod.loadFirst,
|
||||
mod.loadLast,
|
||||
mod.tags,
|
||||
maps,
|
||||
raw,
|
||||
extract_version_min(raw),
|
||||
detect_is_addon(raw),
|
||||
time_updated,
|
||||
)
|
||||
seen_mod_ids.append(mod.id)
|
||||
|
||||
# Drop rows for mods that no longer exist in this workshop item
|
||||
if seen_mod_ids:
|
||||
await conn.execute(DELETE_STALE_MOD_PARSED, workshop_id, seen_mod_ids)
|
||||
|
||||
# Scrape the public Workshop page for the "Required Items" section so the
|
||||
# API can auto-resolve missing-dep warnings against this mod's declared
|
||||
# Steam-side dependencies. Best-effort: None on fetch error → leave the
|
||||
# existing cached value; [] or list → overwrite.
|
||||
required = await asyncio.to_thread(fetch_required_wsids, workshop_id)
|
||||
if required is not None:
|
||||
await conn.execute(
|
||||
"""
|
||||
UPDATE workshop_meta
|
||||
SET required_wsids = $1, required_scraped_at = now()
|
||||
WHERE workshop_id = $2
|
||||
""",
|
||||
required, workshop_id,
|
||||
)
|
||||
|
||||
return "refreshed"
|
||||
|
||||
|
||||
async def main_async(workshop_ids: List[str], dd_path: Path, force: bool, dsn: str) -> int:
|
||||
print(f"[steam] fetching metadata for {len(workshop_ids)} item(s)")
|
||||
details = fetch_workshop_details(workshop_ids)
|
||||
missing_from_steam = [w for w in workshop_ids if w not in details]
|
||||
if missing_from_steam:
|
||||
print(f"[steam] no detail returned for: {missing_from_steam}", file=sys.stderr)
|
||||
|
||||
summary: Dict[str, int] = {"hit": 0, "refreshed": 0, "banned": 0, "missing": 0, "failed": 0}
|
||||
|
||||
conn = await asyncpg.connect(dsn=dsn)
|
||||
try:
|
||||
for wid in workshop_ids:
|
||||
detail = details.get(wid)
|
||||
if detail is None:
|
||||
summary["missing"] += 1
|
||||
print(f" - {wid} -> missing (no Steam response)")
|
||||
continue
|
||||
status = await process_one(conn, wid, detail, dd_path, force)
|
||||
summary[status] += 1
|
||||
print(f" - {wid} -> {status}")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
print(f"[done] {summary}")
|
||||
return 0 if summary["failed"] == 0 else 1
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("workshop_ids", nargs="+")
|
||||
ap.add_argument("--force", action="store_true", help="ignore cache, always re-download")
|
||||
ap.add_argument("--dd-path", default=DEFAULT_DD_PATH)
|
||||
ap.add_argument("--dsn", default=os.environ.get("DATABASE_URL"))
|
||||
args = ap.parse_args()
|
||||
|
||||
if not args.dsn:
|
||||
print("ERROR: --dsn or DATABASE_URL required", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
dd = Path(args.dd_path)
|
||||
if not dd.is_file():
|
||||
print(f"ERROR: DepotDownloader not found at {dd}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
rc = asyncio.run(main_async(args.workshop_ids, dd, args.force, args.dsn))
|
||||
sys.exit(rc)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user