"""Parse a raw textarea blob into a deduped, ordered list of workshop IDs.""" from __future__ import annotations import re from typing import List def parse_workshop_input(text: str) -> List[str]: cleaned = re.sub( r"^\s*(WorkshopItems|Mods|Map)\s*=\s*", "", text, flags=re.MULTILINE | re.IGNORECASE, ) ids = re.findall(r"\b\d{7,12}\b", cleaned) seen: set[str] = set() out: List[str] = [] for i in ids: if i not in seen: seen.add(i) out.append(i) return out # Steam Workshop URL form: https://steamcommunity.com/{sharedfiles,workshop}/filedetails/?id=NNNNNNN _STEAM_URL_RE = re.compile( r"https?://steamcommunity\.com/(?:sharedfiles|workshop)/filedetails/\?id=(\d{7,12})", re.IGNORECASE, ) def parse_with_collections(text: str) -> tuple[List[str], List[str]]: """Split an input blob into bare wsids and candidate collection IDs. A "candidate collection" is any 7-12-digit ID that appears inside a Steam Workshop URL. Bare numeric IDs in the same blob are treated as mod wsids (current behavior). Steam doesn't syntactically distinguish collection IDs from mod IDs; the candidate list is sent to GetCollectionDetails to confirm. If a candidate isn't actually a collection, the caller falls it back to wsids. Returns (wsids, collection_ids), each deduped and in first-seen order. """ if not text: return ([], []) # 1. Find URL-form IDs FIRST (so they don't get double-counted as bare). url_ids: List[str] = [] seen_url: set[str] = set() for m in _STEAM_URL_RE.finditer(text): i = m.group(1) if i not in seen_url: seen_url.add(i) url_ids.append(i) # 2. Strip the URLs out before extracting bare numbers. text_minus_urls = _STEAM_URL_RE.sub("", text) # 3. Bare wsids: same regex as parse_workshop_input. cleaned = re.sub( r"^\s*(WorkshopItems|Mods|Map)\s*=\s*", "", text_minus_urls, flags=re.MULTILINE | re.IGNORECASE, ) bare_ids = re.findall(r"\b\d{7,12}\b", cleaned) seen_bare: set[str] = set() bare_unique: List[str] = [] for i in bare_ids: if i not in seen_bare and i not in seen_url: seen_bare.add(i) bare_unique.append(i) return (bare_unique, url_ids)