mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-20 15:52:35 -05:00
feat: add emudeck and retropie target scrapers
This commit is contained in:
@@ -0,0 +1,137 @@
|
|||||||
|
"""Scraper for EmuDeck emulator targets.
|
||||||
|
|
||||||
|
Sources:
|
||||||
|
SteamOS: dragoonDorise/EmuDeck — checkBIOS.sh, install scripts
|
||||||
|
Windows: EmuDeck/emudeck-we — checkBIOS.ps1
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from . import BaseTargetScraper
|
||||||
|
|
||||||
|
PLATFORM_NAME = "emudeck"
|
||||||
|
|
||||||
|
STEAMOS_CHECKBIOS_URL = (
|
||||||
|
"https://raw.githubusercontent.com/dragoonDorise/EmuDeck/"
|
||||||
|
"main/functions/checkBIOS.sh"
|
||||||
|
)
|
||||||
|
WINDOWS_CHECKBIOS_URL = (
|
||||||
|
"https://raw.githubusercontent.com/EmuDeck/emudeck-we/"
|
||||||
|
"main/functions/checkBIOS.ps1"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Patterns for emulator name extraction from shell install/check functions
|
||||||
|
_SH_EMULATOR_RE = re.compile(
|
||||||
|
r'(?:function\s+|^)(?:check|install|setup)([A-Za-z0-9_]+)\s*\(',
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
_PS1_EMULATOR_RE = re.compile(
|
||||||
|
r'(?:function\s+|^)(?:Check|Install|Setup)([A-Za-z0-9_]+)\s*\{',
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch(url: str) -> str | None:
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
return resp.read().decode("utf-8")
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
print(f" skip {url}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_sh_emulators(text: str) -> list[str]:
|
||||||
|
"""Extract emulator names from checkBIOS.sh function declarations."""
|
||||||
|
seen: set[str] = set()
|
||||||
|
results: list[str] = []
|
||||||
|
for m in _SH_EMULATOR_RE.finditer(text):
|
||||||
|
name = m.group(1).lower()
|
||||||
|
if name and name not in seen:
|
||||||
|
seen.add(name)
|
||||||
|
results.append(name)
|
||||||
|
return sorted(results)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ps1_emulators(text: str) -> list[str]:
|
||||||
|
"""Extract emulator names from checkBIOS.ps1 function declarations."""
|
||||||
|
seen: set[str] = set()
|
||||||
|
results: list[str] = []
|
||||||
|
for m in _PS1_EMULATOR_RE.finditer(text):
|
||||||
|
name = m.group(1).lower()
|
||||||
|
if name and name not in seen:
|
||||||
|
seen.add(name)
|
||||||
|
results.append(name)
|
||||||
|
return sorted(results)
|
||||||
|
|
||||||
|
|
||||||
|
class Scraper(BaseTargetScraper):
|
||||||
|
"""Fetches emulator lists for EmuDeck SteamOS and Windows targets."""
|
||||||
|
|
||||||
|
def __init__(self, url: str = "https://github.com/dragoonDorise/EmuDeck"):
|
||||||
|
super().__init__(url=url)
|
||||||
|
|
||||||
|
def fetch_targets(self) -> dict:
|
||||||
|
print(" fetching SteamOS checkBIOS.sh...", file=sys.stderr)
|
||||||
|
sh_text = _fetch(STEAMOS_CHECKBIOS_URL)
|
||||||
|
steamos_cores = _extract_sh_emulators(sh_text) if sh_text else []
|
||||||
|
|
||||||
|
print(" fetching Windows checkBIOS.ps1...", file=sys.stderr)
|
||||||
|
ps1_text = _fetch(WINDOWS_CHECKBIOS_URL)
|
||||||
|
windows_cores = _extract_ps1_emulators(ps1_text) if ps1_text else []
|
||||||
|
|
||||||
|
targets: dict[str, dict] = {
|
||||||
|
"steamos": {
|
||||||
|
"architecture": "x86_64",
|
||||||
|
"cores": steamos_cores,
|
||||||
|
},
|
||||||
|
"windows": {
|
||||||
|
"architecture": "x86_64",
|
||||||
|
"cores": windows_cores,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"platform": "emudeck",
|
||||||
|
"source": self.url,
|
||||||
|
"scraped_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
"targets": targets,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Scrape EmuDeck emulator targets"
|
||||||
|
)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show target summary")
|
||||||
|
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
scraper = Scraper()
|
||||||
|
data = scraper.fetch_targets()
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
for name, info in data["targets"].items():
|
||||||
|
print(f" {name} ({info['architecture']}): {len(info['cores'])} emulators")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
scraper.write_output(data, args.output)
|
||||||
|
print(f"Written to {args.output}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(yaml.dump(data, default_flow_style=False, sort_keys=False))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""Scraper for RetroPie package availability per platform.
|
||||||
|
|
||||||
|
Source: https://retropie.org.uk/stats/pkgflags/
|
||||||
|
Parses the HTML table of packages × platforms.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from . import BaseTargetScraper
|
||||||
|
|
||||||
|
PLATFORM_NAME = "retropie"
|
||||||
|
|
||||||
|
SOURCE_URL = "https://retropie.org.uk/stats/pkgflags/"
|
||||||
|
|
||||||
|
# Maps table column header to (target_name, architecture)
|
||||||
|
_COLUMN_MAP: dict[str, tuple[str, str]] = {
|
||||||
|
"rpi1": ("rpi1", "armv6"),
|
||||||
|
"rpi2": ("rpi2", "armv7"),
|
||||||
|
"rpi3": ("rpi3", "armv7"),
|
||||||
|
"rpi4": ("rpi4", "aarch64"),
|
||||||
|
"rpi5": ("rpi5", "aarch64"),
|
||||||
|
"x86": ("x86", "x86"),
|
||||||
|
"x86_64": ("x86_64", "x86_64"),
|
||||||
|
}
|
||||||
|
|
||||||
|
_TH_RE = re.compile(r'<th[^>]*>(.*?)</th>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_TR_RE = re.compile(r'<tr[^>]*>(.*?)</tr>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_TD_RE = re.compile(r'<td[^>]*>(.*?)</td>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_TAG_RE = re.compile(r'<[^>]+>')
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_tags(text: str) -> str:
|
||||||
|
return _TAG_RE.sub("", text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch(url: str) -> str | None:
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
return resp.read().decode("utf-8")
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
print(f" skip {url}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_table(html: str) -> dict[str, list[str]]:
|
||||||
|
"""Parse the pkgflags HTML table into {target: [packages]}."""
|
||||||
|
# Extract header row to find column indices
|
||||||
|
header_match = re.search(
|
||||||
|
r'<thead[^>]*>(.*?)</thead>', html, re.IGNORECASE | re.DOTALL
|
||||||
|
)
|
||||||
|
if not header_match:
|
||||||
|
# Fallback: find first tr
|
||||||
|
header_match = re.search(
|
||||||
|
r'<tr[^>]*>(.*?)</tr>', html, re.IGNORECASE | re.DOTALL
|
||||||
|
)
|
||||||
|
if not header_match:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
headers = [_strip_tags(h).lower() for h in _TH_RE.findall(header_match.group(1))]
|
||||||
|
# Find which column index maps to which target
|
||||||
|
col_targets: dict[int, tuple[str, str]] = {}
|
||||||
|
for i, h in enumerate(headers):
|
||||||
|
if h in _COLUMN_MAP:
|
||||||
|
col_targets[i] = _COLUMN_MAP[h]
|
||||||
|
|
||||||
|
if not col_targets:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Initialize result
|
||||||
|
result: dict[str, list[str]] = {name: [] for name, _ in col_targets.values()}
|
||||||
|
|
||||||
|
# Parse body rows
|
||||||
|
tbody_match = re.search(
|
||||||
|
r'<tbody[^>]*>(.*?)</tbody>', html, re.IGNORECASE | re.DOTALL
|
||||||
|
)
|
||||||
|
body_html = tbody_match.group(1) if tbody_match else html
|
||||||
|
|
||||||
|
for tr_match in _TR_RE.finditer(body_html):
|
||||||
|
cells = [_strip_tags(td) for td in _TD_RE.findall(tr_match.group(1))]
|
||||||
|
if not cells:
|
||||||
|
continue
|
||||||
|
# First cell is package name
|
||||||
|
package = cells[0].strip().lower()
|
||||||
|
if not package:
|
||||||
|
continue
|
||||||
|
for col_idx, (target_name, _arch) in col_targets.items():
|
||||||
|
if col_idx < len(cells):
|
||||||
|
cell_val = cells[col_idx].strip().lower()
|
||||||
|
# Any non-empty, non-dash, non-zero value = available
|
||||||
|
if cell_val and cell_val not in ("", "-", "0", "n", "no", "false"):
|
||||||
|
result[target_name].append(package)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class Scraper(BaseTargetScraper):
|
||||||
|
"""Fetches RetroPie package availability per platform from pkgflags page."""
|
||||||
|
|
||||||
|
def __init__(self, url: str = SOURCE_URL):
|
||||||
|
super().__init__(url=url)
|
||||||
|
|
||||||
|
def fetch_targets(self) -> dict:
|
||||||
|
print(" fetching RetroPie pkgflags...", file=sys.stderr)
|
||||||
|
html = _fetch(self.url)
|
||||||
|
packages_per_target: dict[str, list[str]] = {}
|
||||||
|
if html:
|
||||||
|
packages_per_target = _parse_table(html)
|
||||||
|
|
||||||
|
targets: dict[str, dict] = {}
|
||||||
|
for col_key, (target_name, arch) in _COLUMN_MAP.items():
|
||||||
|
targets[target_name] = {
|
||||||
|
"architecture": arch,
|
||||||
|
"cores": sorted(packages_per_target.get(target_name, [])),
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"platform": "retropie",
|
||||||
|
"source": self.url,
|
||||||
|
"scraped_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
"targets": targets,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Scrape RetroPie package targets"
|
||||||
|
)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show target summary")
|
||||||
|
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
scraper = Scraper()
|
||||||
|
data = scraper.fetch_targets()
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
for name, info in data["targets"].items():
|
||||||
|
print(f" {name} ({info['architecture']}): {len(info['cores'])} packages")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
scraper.write_output(data, args.output)
|
||||||
|
print(f"Written to {args.output}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(yaml.dump(data, default_flow_style=False, sort_keys=False))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user