Add full sortof codebase: API, drain workers, frontend, schema, specs

This commit is contained in:
2026-05-04 03:27:54 +00:00
parent acda2c90f8
commit 55d3794bfb
43 changed files with 13375 additions and 53 deletions

127
init/01_schema.sql Normal file
View File

@@ -0,0 +1,127 @@
-- pzsort schema
-- Run as: psql -U pzsort -d pzsort -f schema.sql
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- -----------------------------------------------------------------------------
-- workshop_meta
-- Cheap, refreshed often via Steam ISteamRemoteStorage/GetPublishedFileDetails.
-- Keyed by Steam publishedfileid (text to avoid bigint surprises).
-- time_updated is the cache-invalidation key for mod_parsed.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS workshop_meta (
workshop_id TEXT PRIMARY KEY,
title TEXT,
description TEXT,
tags TEXT[] NOT NULL DEFAULT '{}',
creator_steamid TEXT,
time_created BIGINT, -- unix ts from Steam
time_updated BIGINT NOT NULL, -- unix ts; cache invalidation key
file_size BIGINT,
preview_url TEXT,
consumer_app_id INTEGER, -- 108600 for PZ
visibility INTEGER, -- 0=public, 1=friends, 2=private
banned BOOLEAN NOT NULL DEFAULT FALSE,
last_checked_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS workshop_meta_last_checked_idx
ON workshop_meta (last_checked_at);
-- -----------------------------------------------------------------------------
-- mod_parsed
-- Expensive: requires DepotDownloader fetch. Only refreshed when
-- workshop_meta.time_updated changes vs parsed_at_time_updated.
-- One workshop item can yield N rows (multi-mod packages).
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS mod_parsed (
workshop_id TEXT NOT NULL REFERENCES workshop_meta(workshop_id) ON DELETE CASCADE,
mod_id TEXT NOT NULL, -- mod.info `id=`
name TEXT NOT NULL DEFAULT '',
category TEXT NOT NULL DEFAULT 'undefined',
requirements TEXT[] NOT NULL DEFAULT '{}',
load_after TEXT[] NOT NULL DEFAULT '{}',
load_before TEXT[] NOT NULL DEFAULT '{}',
incompatible_mods TEXT[] NOT NULL DEFAULT '{}',
load_first TEXT NOT NULL DEFAULT 'off',
load_last TEXT NOT NULL DEFAULT 'off',
tags TEXT[] NOT NULL DEFAULT '{}',
maps TEXT[] NOT NULL DEFAULT '{}', -- map folder names
raw_mod_info TEXT, -- original file for debugging
version_min TEXT, -- e.g. 41.55
parsed_at_time_updated BIGINT NOT NULL, -- snapshot of workshop_meta.time_updated at parse
parsed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (workshop_id, mod_id)
);
CREATE INDEX IF NOT EXISTS mod_parsed_mod_id_idx ON mod_parsed (mod_id);
-- -----------------------------------------------------------------------------
-- download_jobs
-- Work queue for the DepotDownloader worker. One job per workshop_id.
-- Worker dequeues (status='queued') ORDER BY priority DESC, created_at ASC.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS download_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
workshop_id TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'queued', -- queued|downloading|parsing|done|failed
priority INTEGER NOT NULL DEFAULT 0, -- higher first
attempts INTEGER NOT NULL DEFAULT 0,
error TEXT,
requested_by TEXT, -- IP hash or user token; for rate limiting
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
completed_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS download_jobs_dequeue_idx
ON download_jobs (status, priority DESC, created_at ASC)
WHERE status = 'queued';
CREATE INDEX IF NOT EXISTS download_jobs_workshop_idx
ON download_jobs (workshop_id);
-- Trigger: keep updated_at fresh
CREATE OR REPLACE FUNCTION touch_updated_at() RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS download_jobs_touch ON download_jobs;
CREATE TRIGGER download_jobs_touch
BEFORE UPDATE ON download_jobs
FOR EACH ROW
EXECUTE FUNCTION touch_updated_at();
-- -----------------------------------------------------------------------------
-- collections
-- Cache for ISteamRemoteStorage/GetCollectionDetails results.
-- Collections expand to N child workshop_ids; we cache that mapping.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS collections (
collection_id TEXT PRIMARY KEY,
title TEXT,
child_workshop_ids TEXT[] NOT NULL DEFAULT '{}',
last_fetched_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- -----------------------------------------------------------------------------
-- sort_requests
-- Optional: log of submitted sort jobs for debugging + abuse triage.
-- Not required for sort to function. Keep TTL short via a cron.
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS sort_requests (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
input_workshop_ids TEXT[] NOT NULL,
input_collection_id TEXT,
cache_hits INTEGER NOT NULL DEFAULT 0,
cache_misses INTEGER NOT NULL DEFAULT 0,
requested_by TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS sort_requests_created_idx
ON sort_requests (created_at);

29
init/02_sort_jobs.sql Normal file
View File

@@ -0,0 +1,29 @@
-- Async sort jobs: lifecycle + result for collection expansion + cold drains.
-- Created 2026-05-01 (Spec B+F).
-- Depends on: 01_schema.sql (touch_updated_at() function, pgcrypto extension).
-- Docker initdb runs files alphabetically, so 01_ executes first; for live
-- one-shot psql application against an existing DB, both prerequisites
-- already exist.
CREATE TABLE IF NOT EXISTS sort_jobs (
job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
phase TEXT NOT NULL CHECK (phase IN ('expanding','queued','draining','done','failed')),
phase_started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
input_raw TEXT NOT NULL,
collection_ids TEXT[] NOT NULL DEFAULT '{}',
wsids TEXT[],
rules_raw TEXT,
result_json JSONB,
failure_reason TEXT
);
CREATE INDEX IF NOT EXISTS sort_jobs_phase_idx ON sort_jobs (phase);
CREATE INDEX IF NOT EXISTS sort_jobs_updated_idx ON sort_jobs (updated_at);
DROP TRIGGER IF EXISTS sort_jobs_touch ON sort_jobs;
CREATE TRIGGER sort_jobs_touch
BEFORE UPDATE ON sort_jobs
FOR EACH ROW
EXECUTE FUNCTION touch_updated_at();

View File

@@ -0,0 +1,13 @@
-- Records every mod_id eviction the worker performs (one wsid claiming a mod_id
-- previously held by another). Used by /api/sort to warn the user when their
-- input includes multiple wsids that declare the same mod_id (PZ silently
-- loads only one; the others' folders end up dead weight on the server).
CREATE TABLE IF NOT EXISTS mod_id_conflicts (
mod_id TEXT NOT NULL,
evicting_wsid TEXT NOT NULL,
evicted_wsid TEXT NOT NULL,
recorded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (mod_id, evicting_wsid, evicted_wsid)
);
CREATE INDEX IF NOT EXISTS mod_id_conflicts_evicted_idx ON mod_id_conflicts (evicted_wsid);
CREATE INDEX IF NOT EXISTS mod_id_conflicts_evicting_idx ON mod_id_conflicts (evicting_wsid);

View File

@@ -0,0 +1,12 @@
-- Required Items scraped from each mod's Steam Workshop page (the "Required
-- Items" section). Steam's anonymous GetPublishedFileDetails endpoint does
-- not include children for individual mods, so we scrape the public HTML.
--
-- Use cases:
-- 1. Auto-resolving missing-dep warnings: when a cached mod_id Y is
-- missing dep X, we look at Y's source wsid's required_wsids and
-- auto-queue any uncached wsids — the next sort resolves X.
-- 2. Surfacing "↗ add <wsid>" actions for unresolved deps so the user
-- can pull them with one click.
ALTER TABLE workshop_meta
ADD COLUMN IF NOT EXISTS required_wsids TEXT[] NOT NULL DEFAULT '{}';

View File

@@ -0,0 +1,7 @@
-- Track when we last successfully scraped a wsid's "Required Items" section.
-- Without this, we can't distinguish "successfully scraped, zero required
-- items" (a stable empty []) from "never scraped" (also empty {} per schema
-- default). Backfill jobs and the missing-dep auto-resolver use it to skip
-- already-known-empty pages.
ALTER TABLE workshop_meta
ADD COLUMN IF NOT EXISTS required_scraped_at TIMESTAMPTZ;

View File

@@ -0,0 +1,5 @@
-- pz_build captured at job creation so the polling-path result regen
-- (`_build_result_for_job`) can emit build-mismatch warnings against the
-- user's chosen build, matching what the sync path emits.
ALTER TABLE sort_jobs
ADD COLUMN IF NOT EXISTS pz_build TEXT;

View File

@@ -0,0 +1,7 @@
-- Marks a mod_id as an "optional add-on" within a multi-mod wsid, signaled
-- by `Optional add-on` (or close variants) at the head of the mod.info
-- description. Spec A's branch picker treats addon mods additively
-- (default-off, tickable to load alongside the primary) instead of as a
-- mutually-exclusive flavor variant.
ALTER TABLE mod_parsed
ADD COLUMN IF NOT EXISTS is_addon BOOLEAN NOT NULL DEFAULT FALSE;

View File

@@ -0,0 +1,20 @@
-- Community-reported broken mods. Each (workshop_id, version) is unique;
-- re-submitting the same pair upserts (refreshes updated_at) while
-- preserving accumulated votes. ORDER BY updated_at DESC drives the
-- list view, so a re-report bubbles the entry back to the top with
-- previous up/down counts intact.
CREATE TABLE IF NOT EXISTS broken_mod_reports (
id BIGSERIAL PRIMARY KEY,
workshop_id TEXT NOT NULL,
mod_name TEXT,
version TEXT NOT NULL,
upvotes INTEGER NOT NULL DEFAULT 0,
downvotes INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
UNIQUE (workshop_id, version)
);
CREATE INDEX IF NOT EXISTS broken_mod_reports_updated_idx
ON broken_mod_reports (updated_at DESC);
CREATE INDEX IF NOT EXISTS broken_mod_reports_wsid_idx
ON broken_mod_reports (workshop_id);