mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
feat: add target scraper infra and retroarch buildbot scraper
This commit is contained in:
48
scripts/scraper/targets/__init__.py
Normal file
48
scripts/scraper/targets/__init__.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""Target scraper plugin discovery module.
|
||||
|
||||
Auto-detects *_targets_scraper.py files and exposes their scrapers.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class BaseTargetScraper:
|
||||
"""Base class for target scrapers."""
|
||||
|
||||
def __init__(self, url: str = ""):
|
||||
self.url = url
|
||||
|
||||
def fetch_targets(self) -> dict:
|
||||
"""Fetch targets and their core lists. Returns dict matching target YAML format."""
|
||||
raise NotImplementedError
|
||||
|
||||
def write_output(self, data: dict, output_path: str) -> None:
|
||||
"""Write target data to YAML file."""
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
raise ImportError("PyYAML required: pip install pyyaml")
|
||||
with open(output_path, "w") as f:
|
||||
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
|
||||
_scrapers: dict[str, type] = {}
|
||||
|
||||
|
||||
def discover_target_scrapers() -> dict[str, type]:
|
||||
"""Auto-discover all *_targets_scraper.py modules."""
|
||||
if _scrapers:
|
||||
return _scrapers
|
||||
package_dir = Path(__file__).parent
|
||||
for finder, name, ispkg in pkgutil.iter_modules([str(package_dir)]):
|
||||
if not name.endswith("_targets_scraper"):
|
||||
continue
|
||||
module = importlib.import_module(f".{name}", package=__package__)
|
||||
platform_name = getattr(module, "PLATFORM_NAME", None)
|
||||
scraper_class = getattr(module, "Scraper", None)
|
||||
if platform_name and scraper_class:
|
||||
_scrapers[platform_name] = scraper_class
|
||||
return _scrapers
|
||||
138
scripts/scraper/targets/retroarch_targets_scraper.py
Normal file
138
scripts/scraper/targets/retroarch_targets_scraper.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""Scraper for RetroArch buildbot nightly targets.
|
||||
|
||||
Source: https://buildbot.libretro.com/nightly/
|
||||
Fetches directory listings per target to determine available cores.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import yaml
|
||||
|
||||
from . import BaseTargetScraper
|
||||
|
||||
PLATFORM_NAME = "retroarch"
|
||||
|
||||
BUILDBOT_URL = "https://buildbot.libretro.com/nightly/"
|
||||
|
||||
# (path, target_name, architecture)
|
||||
TARGETS: list[tuple[str, str, str]] = [
|
||||
("linux/x86_64", "linux-x86_64", "x86_64"),
|
||||
("linux/armhf", "linux-armhf", "armhf"),
|
||||
("linux/armv7-neon-hf", "linux-armv7-neon-hf", "armv7"),
|
||||
("windows/x86_64", "windows-x86_64", "x86_64"),
|
||||
("windows/x86", "windows-x86", "x86"),
|
||||
("android/armeabi-v7a", "android-armeabi-v7a", "armv7"),
|
||||
("android/arm64-v8a", "android-arm64-v8a", "aarch64"),
|
||||
("apple/osx/x86_64", "osx-x86_64", "x86_64"),
|
||||
("apple/osx/arm64", "osx-arm64", "aarch64"),
|
||||
("apple/ios-arm64", "ios-arm64", "aarch64"),
|
||||
("apple/tvos-arm64", "tvos-arm64", "aarch64"),
|
||||
("nintendo/switch/libnx", "switch-libnx", "aarch64"),
|
||||
("nintendo/3ds", "3ds", "armv6"),
|
||||
("nintendo/ngc", "ngc", "ppc"),
|
||||
("nintendo/wii", "wii", "ppc"),
|
||||
("nintendo/wiiu", "wiiu", "ppc"),
|
||||
("playstation/ps2", "ps2", "mips"),
|
||||
("playstation/psp", "psp", "mips"),
|
||||
("playstation/vita", "vita", "armv7"),
|
||||
]
|
||||
|
||||
_CORE_RE = re.compile(
|
||||
r'href="([^"]+_libretro(?:\.so|\.dll|\.dylib)(?:\.zip)?)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _strip_core_suffix(filename: str) -> str:
|
||||
"""Strip _libretro.so/.dll/.dylib(.zip)? suffix to get core name."""
|
||||
name = re.sub(r'\.zip$', '', filename, flags=re.IGNORECASE)
|
||||
name = re.sub(r'_libretro(?:\.so|\.dll|\.dylib)$', '', name, flags=re.IGNORECASE)
|
||||
return name
|
||||
|
||||
|
||||
class Scraper(BaseTargetScraper):
|
||||
"""Fetches core lists per target from RetroArch buildbot nightly."""
|
||||
|
||||
def __init__(self, url: str = BUILDBOT_URL):
|
||||
super().__init__(url=url)
|
||||
|
||||
def _fetch_url(self, url: str) -> str | None:
|
||||
"""Fetch URL, return text or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.read().decode("utf-8")
|
||||
except urllib.error.URLError as e:
|
||||
print(f" skip {url}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def _fetch_cores_for_target(self, path: str) -> list[str]:
|
||||
"""Fetch core list from buildbot directory listing."""
|
||||
url = f"{self.url}{path}/latest/"
|
||||
html = self._fetch_url(url)
|
||||
if html is None:
|
||||
return []
|
||||
cores = []
|
||||
seen: set[str] = set()
|
||||
for match in _CORE_RE.finditer(html):
|
||||
filename = match.group(1).split("/")[-1]
|
||||
core = _strip_core_suffix(filename)
|
||||
if core and core not in seen:
|
||||
seen.add(core)
|
||||
cores.append(core)
|
||||
return sorted(cores)
|
||||
|
||||
def fetch_targets(self) -> dict:
|
||||
"""Fetch all targets and their core lists."""
|
||||
targets: dict[str, dict] = {}
|
||||
for path, target_name, arch in TARGETS:
|
||||
print(f" fetching {target_name}...", file=sys.stderr)
|
||||
cores = self._fetch_cores_for_target(path)
|
||||
if not cores:
|
||||
print(f" warning: no cores found for {target_name}", file=sys.stderr)
|
||||
targets[target_name] = {
|
||||
"architecture": arch,
|
||||
"cores": cores,
|
||||
}
|
||||
return {
|
||||
"platform": "retroarch",
|
||||
"source": self.url,
|
||||
"scraped_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"targets": targets,
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scrape RetroArch buildbot nightly targets"
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show target summary")
|
||||
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
data = scraper.fetch_targets()
|
||||
|
||||
if args.dry_run:
|
||||
for name, info in data["targets"].items():
|
||||
print(f" {name} ({info['architecture']}): {len(info['cores'])} cores")
|
||||
return
|
||||
|
||||
if args.output:
|
||||
scraper.write_output(data, args.output)
|
||||
print(f"Written to {args.output}")
|
||||
return
|
||||
|
||||
print(yaml.dump(data, default_flow_style=False, sort_keys=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user