diff --git a/scripts/scraper/targets/__init__.py b/scripts/scraper/targets/__init__.py new file mode 100644 index 00000000..740966f4 --- /dev/null +++ b/scripts/scraper/targets/__init__.py @@ -0,0 +1,48 @@ +"""Target scraper plugin discovery module. + +Auto-detects *_targets_scraper.py files and exposes their scrapers. +""" +from __future__ import annotations + +import importlib +import pkgutil +from pathlib import Path + + +class BaseTargetScraper: + """Base class for target scrapers.""" + + def __init__(self, url: str = ""): + self.url = url + + def fetch_targets(self) -> dict: + """Fetch targets and their core lists. Returns dict matching target YAML format.""" + raise NotImplementedError + + def write_output(self, data: dict, output_path: str) -> None: + """Write target data to YAML file.""" + try: + import yaml + except ImportError: + raise ImportError("PyYAML required: pip install pyyaml") + with open(output_path, "w") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + + +_scrapers: dict[str, type] = {} + + +def discover_target_scrapers() -> dict[str, type]: + """Auto-discover all *_targets_scraper.py modules.""" + if _scrapers: + return _scrapers + package_dir = Path(__file__).parent + for finder, name, ispkg in pkgutil.iter_modules([str(package_dir)]): + if not name.endswith("_targets_scraper"): + continue + module = importlib.import_module(f".{name}", package=__package__) + platform_name = getattr(module, "PLATFORM_NAME", None) + scraper_class = getattr(module, "Scraper", None) + if platform_name and scraper_class: + _scrapers[platform_name] = scraper_class + return _scrapers diff --git a/scripts/scraper/targets/retroarch_targets_scraper.py b/scripts/scraper/targets/retroarch_targets_scraper.py new file mode 100644 index 00000000..5ebec574 --- /dev/null +++ b/scripts/scraper/targets/retroarch_targets_scraper.py @@ -0,0 +1,138 @@ +"""Scraper for RetroArch buildbot nightly targets. + +Source: https://buildbot.libretro.com/nightly/ +Fetches directory listings per target to determine available cores. +""" +from __future__ import annotations + +import argparse +import re +import sys +import urllib.error +import urllib.request +from datetime import datetime, timezone + +import yaml + +from . import BaseTargetScraper + +PLATFORM_NAME = "retroarch" + +BUILDBOT_URL = "https://buildbot.libretro.com/nightly/" + +# (path, target_name, architecture) +TARGETS: list[tuple[str, str, str]] = [ + ("linux/x86_64", "linux-x86_64", "x86_64"), + ("linux/armhf", "linux-armhf", "armhf"), + ("linux/armv7-neon-hf", "linux-armv7-neon-hf", "armv7"), + ("windows/x86_64", "windows-x86_64", "x86_64"), + ("windows/x86", "windows-x86", "x86"), + ("android/armeabi-v7a", "android-armeabi-v7a", "armv7"), + ("android/arm64-v8a", "android-arm64-v8a", "aarch64"), + ("apple/osx/x86_64", "osx-x86_64", "x86_64"), + ("apple/osx/arm64", "osx-arm64", "aarch64"), + ("apple/ios-arm64", "ios-arm64", "aarch64"), + ("apple/tvos-arm64", "tvos-arm64", "aarch64"), + ("nintendo/switch/libnx", "switch-libnx", "aarch64"), + ("nintendo/3ds", "3ds", "armv6"), + ("nintendo/ngc", "ngc", "ppc"), + ("nintendo/wii", "wii", "ppc"), + ("nintendo/wiiu", "wiiu", "ppc"), + ("playstation/ps2", "ps2", "mips"), + ("playstation/psp", "psp", "mips"), + ("playstation/vita", "vita", "armv7"), +] + +_CORE_RE = re.compile( + r'href="([^"]+_libretro(?:\.so|\.dll|\.dylib)(?:\.zip)?)"', + re.IGNORECASE, +) + + +def _strip_core_suffix(filename: str) -> str: + """Strip _libretro.so/.dll/.dylib(.zip)? suffix to get core name.""" + name = re.sub(r'\.zip$', '', filename, flags=re.IGNORECASE) + name = re.sub(r'_libretro(?:\.so|\.dll|\.dylib)$', '', name, flags=re.IGNORECASE) + return name + + +class Scraper(BaseTargetScraper): + """Fetches core lists per target from RetroArch buildbot nightly.""" + + def __init__(self, url: str = BUILDBOT_URL): + super().__init__(url=url) + + def _fetch_url(self, url: str) -> str | None: + """Fetch URL, return text or None on failure.""" + try: + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-scraper/1.0"} + ) + with urllib.request.urlopen(req, timeout=30) as resp: + return resp.read().decode("utf-8") + except urllib.error.URLError as e: + print(f" skip {url}: {e}", file=sys.stderr) + return None + + def _fetch_cores_for_target(self, path: str) -> list[str]: + """Fetch core list from buildbot directory listing.""" + url = f"{self.url}{path}/latest/" + html = self._fetch_url(url) + if html is None: + return [] + cores = [] + seen: set[str] = set() + for match in _CORE_RE.finditer(html): + filename = match.group(1).split("/")[-1] + core = _strip_core_suffix(filename) + if core and core not in seen: + seen.add(core) + cores.append(core) + return sorted(cores) + + def fetch_targets(self) -> dict: + """Fetch all targets and their core lists.""" + targets: dict[str, dict] = {} + for path, target_name, arch in TARGETS: + print(f" fetching {target_name}...", file=sys.stderr) + cores = self._fetch_cores_for_target(path) + if not cores: + print(f" warning: no cores found for {target_name}", file=sys.stderr) + targets[target_name] = { + "architecture": arch, + "cores": cores, + } + return { + "platform": "retroarch", + "source": self.url, + "scraped_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "targets": targets, + } + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Scrape RetroArch buildbot nightly targets" + ) + parser.add_argument("--dry-run", action="store_true", help="Show target summary") + parser.add_argument("--output", "-o", help="Output YAML file") + args = parser.parse_args() + + scraper = Scraper() + data = scraper.fetch_targets() + + if args.dry_run: + for name, info in data["targets"].items(): + print(f" {name} ({info['architecture']}): {len(info['cores'])} cores") + return + + if args.output: + scraper.write_output(data, args.output) + print(f"Written to {args.output}") + return + + print(yaml.dump(data, default_flow_style=False, sort_keys=False)) + + +if __name__ == "__main__": + main()