mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
feat: add emudeck platform support, 126/164 verified
This commit is contained in:
461
scripts/scraper/emudeck_scraper.py
Normal file
461
scripts/scraper/emudeck_scraper.py
Normal file
@@ -0,0 +1,461 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Scraper for EmuDeck BIOS requirements.
|
||||
|
||||
Sources:
|
||||
1. checkBIOS.sh - MD5 hash whitelists per system
|
||||
https://raw.githubusercontent.com/dragoonDorise/EmuDeck/main/functions/checkBIOS.sh
|
||||
2. CSV cheat sheets - BIOS filenames per manufacturer
|
||||
https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables/{name}-cheat-sheet.csv
|
||||
Hash: MD5 primary
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
try:
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
except ImportError:
|
||||
from base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
|
||||
PLATFORM_NAME = "emudeck"
|
||||
|
||||
CHECKBIOS_URL = (
|
||||
"https://raw.githubusercontent.com/dragoonDorise/EmuDeck/"
|
||||
"main/functions/checkBIOS.sh"
|
||||
)
|
||||
|
||||
CSV_BASE_URL = (
|
||||
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/"
|
||||
"main/docs/tables"
|
||||
)
|
||||
|
||||
CSV_SHEETS = [
|
||||
"sony-cheat-sheet.csv",
|
||||
"sega-cheat-sheet.csv",
|
||||
"nintendo-cheat-sheet.csv",
|
||||
"snk-cheat-sheet.csv",
|
||||
"panasonic-cheat-sheet.csv",
|
||||
"nec-cheat-sheet.csv",
|
||||
"microsoft-cheat-sheet.csv",
|
||||
"coleco-cheat-sheet.csv",
|
||||
"atari-cheat-sheet.csv",
|
||||
"bandai-cheat-sheet.csv",
|
||||
"mattel-cheat-sheet.csv",
|
||||
]
|
||||
|
||||
HASH_ARRAY_MAP = {
|
||||
"PSBios": "sony-playstation",
|
||||
"PS2Bios": "sony-playstation-2",
|
||||
"CDBios": "sega-mega-cd",
|
||||
"SaturnBios": "sega-saturn",
|
||||
}
|
||||
|
||||
FUNCTION_HASH_MAP = {
|
||||
"checkDreamcastBios": "sega-dreamcast",
|
||||
"checkDSBios": "nintendo-ds",
|
||||
}
|
||||
|
||||
SYSTEM_SLUG_MAP = {
|
||||
"psx": "sony-playstation",
|
||||
"ps2": "sony-playstation-2",
|
||||
"ps3": "sony-playstation-3",
|
||||
"psp": "sony-psp",
|
||||
"psvita": "sony-psvita",
|
||||
"segacd": "sega-mega-cd",
|
||||
"megacd": "sega-mega-cd",
|
||||
"saturn": "sega-saturn",
|
||||
"dreamcast": "sega-dreamcast",
|
||||
"sega32x": "sega-32x",
|
||||
"mastersystem": "sega-master-system",
|
||||
"genesis": "sega-mega-drive",
|
||||
"megadrive": "sega-mega-drive",
|
||||
"gamegear": "sega-game-gear",
|
||||
"naomi": "sega-dreamcast-arcade",
|
||||
"naomi2": "sega-dreamcast-arcade",
|
||||
"atomiswave": "sega-dreamcast-arcade",
|
||||
"nds": "nintendo-ds",
|
||||
"3ds": "nintendo-3ds",
|
||||
"n3ds": "nintendo-3ds",
|
||||
"n64": "nintendo-64",
|
||||
"n64dd": "nintendo-64dd",
|
||||
"gc": "nintendo-gamecube",
|
||||
"gamecube": "nintendo-gamecube",
|
||||
"wii": "nintendo-wii",
|
||||
"wiiu": "nintendo-wii-u",
|
||||
"switch": "nintendo-switch",
|
||||
"nes": "nintendo-nes",
|
||||
"famicom": "nintendo-nes",
|
||||
"snes": "nintendo-snes",
|
||||
"gb": "nintendo-gb",
|
||||
"gba": "nintendo-gba",
|
||||
"gbc": "nintendo-gbc",
|
||||
"virtualboy": "nintendo-virtual-boy",
|
||||
"fbneo": "snk-neogeo",
|
||||
"neogeocd": "snk-neogeo-cd",
|
||||
"neogeocdjp": "snk-neogeo-cd",
|
||||
"ngp": "snk-neogeo-pocket",
|
||||
"ngpc": "snk-neogeo-pocket-color",
|
||||
"3do": "panasonic-3do",
|
||||
"pcengine": "nec-pc-engine",
|
||||
"pcenginecd": "nec-pc-engine",
|
||||
"pcfx": "nec-pc-fx",
|
||||
"pc88": "nec-pc-88",
|
||||
"pc98": "nec-pc-98",
|
||||
"colecovision": "coleco-colecovision",
|
||||
}
|
||||
|
||||
KNOWN_BIOS_FILES = {
|
||||
"sony-playstation": [
|
||||
{"name": "scph5500.bin", "destination": "scph5500.bin", "region": "JP"},
|
||||
{"name": "scph5501.bin", "destination": "scph5501.bin", "region": "US"},
|
||||
{"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"},
|
||||
],
|
||||
"sony-playstation-2": [
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"},
|
||||
],
|
||||
"sega-mega-cd": [
|
||||
{"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"},
|
||||
{"name": "bios_CD_U.bin", "destination": "bios_CD_U.bin", "region": "US"},
|
||||
{"name": "bios_CD_J.bin", "destination": "bios_CD_J.bin", "region": "JP"},
|
||||
],
|
||||
"sega-saturn": [
|
||||
{"name": "sega_101.bin", "destination": "sega_101.bin", "region": "JP"},
|
||||
{"name": "mpr-17933.bin", "destination": "mpr-17933.bin", "region": "US/EU"},
|
||||
{"name": "saturn_bios.bin", "destination": "saturn_bios.bin"},
|
||||
],
|
||||
"sega-dreamcast": [
|
||||
{"name": "dc_boot.bin", "destination": "dc/dc_boot.bin"},
|
||||
{"name": "dc_flash.bin", "destination": "dc/dc_flash.bin"},
|
||||
],
|
||||
"nintendo-ds": [
|
||||
{"name": "bios7.bin", "destination": "bios7.bin"},
|
||||
{"name": "bios9.bin", "destination": "bios9.bin"},
|
||||
{"name": "firmware.bin", "destination": "firmware.bin"},
|
||||
],
|
||||
"snk-neogeo": [
|
||||
{"name": "neogeo.zip", "destination": "neogeo.zip"},
|
||||
{"name": "neocdz.zip", "destination": "neocdz.zip"},
|
||||
],
|
||||
"panasonic-3do": [
|
||||
{"name": "panafz1.bin", "destination": "panafz1.bin"},
|
||||
],
|
||||
"nintendo-nes": [
|
||||
{"name": "disksys.rom", "destination": "disksys.rom"},
|
||||
],
|
||||
"sega-dreamcast-arcade": [
|
||||
{"name": "naomi.zip", "destination": "dc/naomi.zip"},
|
||||
],
|
||||
}
|
||||
|
||||
_RE_ARRAY = re.compile(
|
||||
r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_RE_FUNC = re.compile(
|
||||
r'function\s+(check\w+Bios)\s*\(\)',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_RE_LOCAL_HASHES = re.compile(
|
||||
r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
|
||||
def _fetch_url(url: str) -> str:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.read().decode("utf-8")
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {url}: {e}") from e
|
||||
|
||||
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets."""
|
||||
|
||||
def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL):
|
||||
self.checkbios_url = checkbios_url
|
||||
self.csv_base_url = csv_base_url
|
||||
self._raw_script: str | None = None
|
||||
self._csv_cache: dict[str, str] = {}
|
||||
|
||||
def _fetch_script(self) -> str:
|
||||
if self._raw_script is not None:
|
||||
return self._raw_script
|
||||
self._raw_script = _fetch_url(self.checkbios_url)
|
||||
return self._raw_script
|
||||
|
||||
def _fetch_csv(self, sheet: str) -> str:
|
||||
if sheet in self._csv_cache:
|
||||
return self._csv_cache[sheet]
|
||||
url = f"{self.csv_base_url}/{sheet}"
|
||||
try:
|
||||
data = _fetch_url(url)
|
||||
except ConnectionError:
|
||||
data = ""
|
||||
self._csv_cache[sheet] = data
|
||||
return data
|
||||
|
||||
def _parse_hash_arrays(self, script: str) -> dict[str, list[str]]:
|
||||
"""Extract named MD5 hash arrays from bash script."""
|
||||
result: dict[str, list[str]] = {}
|
||||
for match in _RE_ARRAY.finditer(script):
|
||||
name = match.group(1)
|
||||
hashes_raw = match.group(2)
|
||||
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
|
||||
if name in HASH_ARRAY_MAP:
|
||||
result[HASH_ARRAY_MAP[name]] = hashes
|
||||
return result
|
||||
|
||||
def _parse_function_hashes(self, script: str) -> dict[str, list[str]]:
|
||||
"""Extract local hash arrays from named check functions."""
|
||||
result: dict[str, list[str]] = {}
|
||||
for func_match in _RE_FUNC.finditer(script):
|
||||
func_name = func_match.group(1)
|
||||
if func_name not in FUNCTION_HASH_MAP:
|
||||
continue
|
||||
system = FUNCTION_HASH_MAP[func_name]
|
||||
func_start = func_match.start()
|
||||
remaining = script[func_start:]
|
||||
local_match = _RE_LOCAL_HASHES.search(remaining)
|
||||
if local_match:
|
||||
hashes_raw = local_match.group(1)
|
||||
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
|
||||
result[system] = hashes
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _clean_markdown(text: str) -> str:
|
||||
"""Strip markdown/HTML artifacts from CSV fields."""
|
||||
text = re.sub(r'\*\*', '', text) # bold
|
||||
text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons
|
||||
text = re.sub(r':material-[^:]+:', '', text)
|
||||
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
|
||||
text = re.sub(r'<br\s*/?>', ' ', text) # <br/>
|
||||
text = re.sub(r'<[^>]+>', '', text) # remaining HTML
|
||||
return text.strip()
|
||||
|
||||
def _parse_csv_bios(self, csv_text: str) -> list[dict]:
|
||||
"""Parse BIOS file info from a cheat sheet CSV."""
|
||||
entries = []
|
||||
if not csv_text.strip():
|
||||
return entries
|
||||
reader = csv.DictReader(io.StringIO(csv_text))
|
||||
for row in reader:
|
||||
bios_col = ""
|
||||
for key in row:
|
||||
if key and "bios" in key.lower():
|
||||
bios_col = self._clean_markdown((row[key] or ""))
|
||||
break
|
||||
if not bios_col or bios_col.lower() in ("not required", ""):
|
||||
continue
|
||||
folder_col = ""
|
||||
for key in row:
|
||||
if key and "folder" in key.lower():
|
||||
folder_col = self._clean_markdown((row[key] or ""))
|
||||
break
|
||||
system_col = ""
|
||||
for key in row:
|
||||
if key and "system" in key.lower():
|
||||
system_col = self._clean_markdown((row[key] or ""))
|
||||
break
|
||||
slug = None
|
||||
for part in re.split(r'[`\s]+', folder_col):
|
||||
part = part.strip().strip('`').lower()
|
||||
if part and part in SYSTEM_SLUG_MAP:
|
||||
slug = SYSTEM_SLUG_MAP[part]
|
||||
break
|
||||
if not slug:
|
||||
clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower())
|
||||
slug = clean if clean else "unknown"
|
||||
entries.append({
|
||||
"system": slug,
|
||||
"system_name": system_col,
|
||||
"bios_raw": bios_col,
|
||||
})
|
||||
return entries
|
||||
|
||||
def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]:
|
||||
"""Extract individual BIOS filenames from a CSV BIOS field."""
|
||||
results = []
|
||||
bios_raw = re.sub(r'<br\s*/?>', ' ', bios_raw)
|
||||
bios_raw = bios_raw.replace('`', '')
|
||||
patterns = re.findall(
|
||||
r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)',
|
||||
bios_raw,
|
||||
)
|
||||
for p in patterns:
|
||||
name = p.split("/")[-1] if "/" in p else p
|
||||
results.append({"name": name, "destination": p})
|
||||
return results
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
script = self._fetch_script()
|
||||
if not self.validate_format(script):
|
||||
raise ValueError("checkBIOS.sh format validation failed")
|
||||
|
||||
hash_arrays = self._parse_hash_arrays(script)
|
||||
func_hashes = self._parse_function_hashes(script)
|
||||
all_hashes: dict[str, list[str]] = {}
|
||||
all_hashes.update(hash_arrays)
|
||||
all_hashes.update(func_hashes)
|
||||
|
||||
requirements: list[BiosRequirement] = []
|
||||
seen: set[tuple[str, str]] = set()
|
||||
|
||||
for system, file_list in KNOWN_BIOS_FILES.items():
|
||||
system_hashes = all_hashes.get(system, [])
|
||||
for f in file_list:
|
||||
key = (system, f["name"])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
requirements.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
))
|
||||
|
||||
for md5 in system_hashes:
|
||||
requirements.append(BiosRequirement(
|
||||
name=f"{system}:{md5}",
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination="",
|
||||
required=True,
|
||||
))
|
||||
|
||||
for sheet in CSV_SHEETS:
|
||||
csv_text = self._fetch_csv(sheet)
|
||||
entries = self._parse_csv_bios(csv_text)
|
||||
for entry in entries:
|
||||
system = entry["system"]
|
||||
files = self._extract_filenames_from_bios_field(entry["bios_raw"])
|
||||
for f in files:
|
||||
key = (system, f["name"])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
if system in KNOWN_BIOS_FILES:
|
||||
continue
|
||||
requirements.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
))
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data
|
||||
has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data
|
||||
has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None
|
||||
return has_ps and has_func and has_md5
|
||||
|
||||
def generate_platform_yaml(self) -> dict:
|
||||
requirements = self.fetch_requirements()
|
||||
|
||||
systems: dict[str, dict] = {}
|
||||
for req in requirements:
|
||||
if req.system not in systems:
|
||||
systems[req.system] = {"files": []}
|
||||
|
||||
entry: dict = {
|
||||
"name": req.name,
|
||||
"destination": req.destination,
|
||||
"required": req.required,
|
||||
}
|
||||
if req.md5:
|
||||
entry["md5"] = req.md5
|
||||
systems[req.system]["files"].append(entry)
|
||||
|
||||
version = ""
|
||||
try:
|
||||
v = fetch_github_latest_version("dragoonDorise/EmuDeck")
|
||||
if v:
|
||||
version = v
|
||||
except (ConnectionError, ValueError, OSError):
|
||||
pass
|
||||
|
||||
return {
|
||||
"platform": "EmuDeck",
|
||||
"version": version or "",
|
||||
"homepage": "https://www.emudeck.com",
|
||||
"source": CHECKBIOS_URL,
|
||||
"base_destination": "bios",
|
||||
"hash_type": "md5",
|
||||
"verification_mode": "md5",
|
||||
"systems": systems,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
import json
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape EmuDeck BIOS requirements")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--output", "-o")
|
||||
args = parser.parse_args()
|
||||
|
||||
scraper = Scraper()
|
||||
|
||||
try:
|
||||
reqs = scraper.fetch_requirements()
|
||||
except (ConnectionError, ValueError) as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.dry_run:
|
||||
by_system: dict[str, list[BiosRequirement]] = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
|
||||
for system, files in sorted(by_system.items()):
|
||||
print(f"\n{system} ({len(files)} files):")
|
||||
for f in files:
|
||||
hash_info = f.md5[:12] if f.md5 else "no-hash"
|
||||
print(f" {f.name} ({hash_info}...)")
|
||||
|
||||
print(f"\nTotal: {len(reqs)} BIOS entries across {len(by_system)} systems")
|
||||
return
|
||||
|
||||
if args.json:
|
||||
config = scraper.generate_platform_yaml()
|
||||
print(json.dumps(config, indent=2))
|
||||
return
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML required", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
config = scraper.generate_platform_yaml()
|
||||
with open(args.output, "w") as f:
|
||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
print(f"Written to {args.output}")
|
||||
else:
|
||||
by_system = {}
|
||||
for req in reqs:
|
||||
by_system.setdefault(req.system, []).append(req)
|
||||
print(f"Scraped {len(reqs)} BIOS entries across {len(by_system)} systems")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user