76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
"""Parse a raw textarea blob into a deduped, ordered list of workshop IDs."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import List
|
|
|
|
|
|
def parse_workshop_input(text: str) -> List[str]:
|
|
cleaned = re.sub(
|
|
r"^\s*(WorkshopItems|Mods|Map)\s*=\s*",
|
|
"",
|
|
text,
|
|
flags=re.MULTILINE | re.IGNORECASE,
|
|
)
|
|
ids = re.findall(r"\b\d{7,12}\b", cleaned)
|
|
seen: set[str] = set()
|
|
out: List[str] = []
|
|
for i in ids:
|
|
if i not in seen:
|
|
seen.add(i)
|
|
out.append(i)
|
|
return out
|
|
|
|
|
|
# Steam Workshop URL form: https://steamcommunity.com/{sharedfiles,workshop}/filedetails/?id=NNNNNNN
|
|
_STEAM_URL_RE = re.compile(
|
|
r"https?://steamcommunity\.com/(?:sharedfiles|workshop)/filedetails/\?id=(\d{7,12})",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def parse_with_collections(text: str) -> tuple[List[str], List[str]]:
|
|
"""Split an input blob into bare wsids and candidate collection IDs.
|
|
|
|
A "candidate collection" is any 7-12-digit ID that appears inside a
|
|
Steam Workshop URL. Bare numeric IDs in the same blob are treated as
|
|
mod wsids (current behavior). Steam doesn't syntactically distinguish
|
|
collection IDs from mod IDs; the candidate list is sent to
|
|
GetCollectionDetails to confirm. If a candidate isn't actually a
|
|
collection, the caller falls it back to wsids.
|
|
|
|
Returns (wsids, collection_ids), each deduped and in first-seen order.
|
|
"""
|
|
if not text:
|
|
return ([], [])
|
|
|
|
# 1. Find URL-form IDs FIRST (so they don't get double-counted as bare).
|
|
url_ids: List[str] = []
|
|
seen_url: set[str] = set()
|
|
for m in _STEAM_URL_RE.finditer(text):
|
|
i = m.group(1)
|
|
if i not in seen_url:
|
|
seen_url.add(i)
|
|
url_ids.append(i)
|
|
|
|
# 2. Strip the URLs out before extracting bare numbers.
|
|
text_minus_urls = _STEAM_URL_RE.sub("", text)
|
|
|
|
# 3. Bare wsids: same regex as parse_workshop_input.
|
|
cleaned = re.sub(
|
|
r"^\s*(WorkshopItems|Mods|Map)\s*=\s*",
|
|
"",
|
|
text_minus_urls,
|
|
flags=re.MULTILINE | re.IGNORECASE,
|
|
)
|
|
bare_ids = re.findall(r"\b\d{7,12}\b", cleaned)
|
|
seen_bare: set[str] = set()
|
|
bare_unique: List[str] = []
|
|
for i in bare_ids:
|
|
if i not in seen_bare and i not in seen_url:
|
|
seen_bare.add(i)
|
|
bare_unique.append(i)
|
|
|
|
return (bare_unique, url_ids)
|