Add full sortof codebase: API, drain workers, frontend, schema, specs
This commit is contained in:
75
api/parse.py
Normal file
75
api/parse.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Parse a raw textarea blob into a deduped, ordered list of workshop IDs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
|
||||
def parse_workshop_input(text: str) -> List[str]:
|
||||
cleaned = re.sub(
|
||||
r"^\s*(WorkshopItems|Mods|Map)\s*=\s*",
|
||||
"",
|
||||
text,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
ids = re.findall(r"\b\d{7,12}\b", cleaned)
|
||||
seen: set[str] = set()
|
||||
out: List[str] = []
|
||||
for i in ids:
|
||||
if i not in seen:
|
||||
seen.add(i)
|
||||
out.append(i)
|
||||
return out
|
||||
|
||||
|
||||
# Steam Workshop URL form: https://steamcommunity.com/{sharedfiles,workshop}/filedetails/?id=NNNNNNN
|
||||
_STEAM_URL_RE = re.compile(
|
||||
r"https?://steamcommunity\.com/(?:sharedfiles|workshop)/filedetails/\?id=(\d{7,12})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def parse_with_collections(text: str) -> tuple[List[str], List[str]]:
|
||||
"""Split an input blob into bare wsids and candidate collection IDs.
|
||||
|
||||
A "candidate collection" is any 7-12-digit ID that appears inside a
|
||||
Steam Workshop URL. Bare numeric IDs in the same blob are treated as
|
||||
mod wsids (current behavior). Steam doesn't syntactically distinguish
|
||||
collection IDs from mod IDs; the candidate list is sent to
|
||||
GetCollectionDetails to confirm. If a candidate isn't actually a
|
||||
collection, the caller falls it back to wsids.
|
||||
|
||||
Returns (wsids, collection_ids), each deduped and in first-seen order.
|
||||
"""
|
||||
if not text:
|
||||
return ([], [])
|
||||
|
||||
# 1. Find URL-form IDs FIRST (so they don't get double-counted as bare).
|
||||
url_ids: List[str] = []
|
||||
seen_url: set[str] = set()
|
||||
for m in _STEAM_URL_RE.finditer(text):
|
||||
i = m.group(1)
|
||||
if i not in seen_url:
|
||||
seen_url.add(i)
|
||||
url_ids.append(i)
|
||||
|
||||
# 2. Strip the URLs out before extracting bare numbers.
|
||||
text_minus_urls = _STEAM_URL_RE.sub("", text)
|
||||
|
||||
# 3. Bare wsids: same regex as parse_workshop_input.
|
||||
cleaned = re.sub(
|
||||
r"^\s*(WorkshopItems|Mods|Map)\s*=\s*",
|
||||
"",
|
||||
text_minus_urls,
|
||||
flags=re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
bare_ids = re.findall(r"\b\d{7,12}\b", cleaned)
|
||||
seen_bare: set[str] = set()
|
||||
bare_unique: List[str] = []
|
||||
for i in bare_ids:
|
||||
if i not in seen_bare and i not in seen_url:
|
||||
seen_bare.add(i)
|
||||
bare_unique.append(i)
|
||||
|
||||
return (bare_unique, url_ids)
|
||||
Reference in New Issue
Block a user