mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
Shared CLI boilerplate for all scrapers: argparse, dry-run, json, yaml output. 4 scrapers (libretro, batocera, retrobat, emudeck) reduced from ~58 lines main() each to 3 lines calling scraper_cli(). ~220 lines of duplicated boilerplate eliminated. recalbox + coreinfo keep custom main() (extra flags: --full, --compare-db).
412 lines
14 KiB
Python
412 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""Scraper for EmuDeck BIOS requirements.
|
|
|
|
Sources:
|
|
1. checkBIOS.sh - MD5 hash whitelists per system
|
|
https://raw.githubusercontent.com/dragoonDorise/EmuDeck/main/functions/checkBIOS.sh
|
|
2. CSV cheat sheets - BIOS filenames per manufacturer
|
|
https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables/{name}-cheat-sheet.csv
|
|
Hash: MD5 primary
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import io
|
|
import re
|
|
import sys
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
try:
|
|
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
|
except ImportError:
|
|
from base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
|
|
|
PLATFORM_NAME = "emudeck"
|
|
|
|
CHECKBIOS_URL = (
|
|
"https://raw.githubusercontent.com/dragoonDorise/EmuDeck/"
|
|
"main/functions/checkBIOS.sh"
|
|
)
|
|
|
|
CSV_BASE_URL = (
|
|
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/"
|
|
"main/docs/tables"
|
|
)
|
|
|
|
CSV_SHEETS = [
|
|
"sony-cheat-sheet.csv",
|
|
"sega-cheat-sheet.csv",
|
|
"nintendo-cheat-sheet.csv",
|
|
"snk-cheat-sheet.csv",
|
|
"panasonic-cheat-sheet.csv",
|
|
"nec-cheat-sheet.csv",
|
|
"microsoft-cheat-sheet.csv",
|
|
"coleco-cheat-sheet.csv",
|
|
"atari-cheat-sheet.csv",
|
|
"bandai-cheat-sheet.csv",
|
|
"mattel-cheat-sheet.csv",
|
|
]
|
|
|
|
HASH_ARRAY_MAP = {
|
|
"PSBios": "sony-playstation",
|
|
"PS2Bios": "sony-playstation-2",
|
|
"CDBios": "sega-mega-cd",
|
|
"SaturnBios": "sega-saturn",
|
|
}
|
|
|
|
FUNCTION_HASH_MAP = {
|
|
"checkDreamcastBios": "sega-dreamcast",
|
|
"checkDSBios": "nintendo-ds",
|
|
}
|
|
|
|
SYSTEM_SLUG_MAP = {
|
|
"psx": "sony-playstation",
|
|
"ps2": "sony-playstation-2",
|
|
"ps3": "sony-playstation-3",
|
|
"psp": "sony-psp",
|
|
"psvita": "sony-psvita",
|
|
"segacd": "sega-mega-cd",
|
|
"megacd": "sega-mega-cd",
|
|
"saturn": "sega-saturn",
|
|
"dreamcast": "sega-dreamcast",
|
|
"sega32x": "sega-32x",
|
|
"mastersystem": "sega-master-system",
|
|
"genesis": "sega-mega-drive",
|
|
"megadrive": "sega-mega-drive",
|
|
"gamegear": "sega-game-gear",
|
|
"naomi": "sega-dreamcast-arcade",
|
|
"naomi2": "sega-dreamcast-arcade",
|
|
"atomiswave": "sega-dreamcast-arcade",
|
|
"nds": "nintendo-ds",
|
|
"3ds": "nintendo-3ds",
|
|
"n3ds": "nintendo-3ds",
|
|
"n64": "nintendo-64",
|
|
"n64dd": "nintendo-64dd",
|
|
"gc": "nintendo-gamecube",
|
|
"gamecube": "nintendo-gamecube",
|
|
"wii": "nintendo-wii",
|
|
"wiiu": "nintendo-wii-u",
|
|
"switch": "nintendo-switch",
|
|
"nes": "nintendo-nes",
|
|
"famicom": "nintendo-nes",
|
|
"snes": "nintendo-snes",
|
|
"gb": "nintendo-gb",
|
|
"gba": "nintendo-gba",
|
|
"gbc": "nintendo-gbc",
|
|
"virtualboy": "nintendo-virtual-boy",
|
|
"fbneo": "snk-neogeo",
|
|
"neogeocd": "snk-neogeo-cd",
|
|
"neogeocdjp": "snk-neogeo-cd",
|
|
"ngp": "snk-neogeo-pocket",
|
|
"ngpc": "snk-neogeo-pocket-color",
|
|
"3do": "panasonic-3do",
|
|
"pcengine": "nec-pc-engine",
|
|
"pcenginecd": "nec-pc-engine",
|
|
"pcfx": "nec-pc-fx",
|
|
"pc88": "nec-pc-88",
|
|
"pc98": "nec-pc-98",
|
|
"colecovision": "coleco-colecovision",
|
|
}
|
|
|
|
KNOWN_BIOS_FILES = {
|
|
"sony-playstation": [
|
|
{"name": "scph5500.bin", "destination": "scph5500.bin", "region": "JP"},
|
|
{"name": "scph5501.bin", "destination": "scph5501.bin", "region": "US"},
|
|
{"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"},
|
|
],
|
|
"sony-playstation-2": [
|
|
{"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"},
|
|
{"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"},
|
|
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"},
|
|
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"},
|
|
],
|
|
"sega-mega-cd": [
|
|
{"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"},
|
|
{"name": "bios_CD_U.bin", "destination": "bios_CD_U.bin", "region": "US"},
|
|
{"name": "bios_CD_J.bin", "destination": "bios_CD_J.bin", "region": "JP"},
|
|
],
|
|
"sega-saturn": [
|
|
{"name": "sega_101.bin", "destination": "sega_101.bin", "region": "JP"},
|
|
{"name": "mpr-17933.bin", "destination": "mpr-17933.bin", "region": "US/EU"},
|
|
{"name": "saturn_bios.bin", "destination": "saturn_bios.bin"},
|
|
],
|
|
"sega-dreamcast": [
|
|
{"name": "dc_boot.bin", "destination": "dc/dc_boot.bin"},
|
|
{"name": "dc_flash.bin", "destination": "dc/dc_flash.bin"},
|
|
],
|
|
"nintendo-ds": [
|
|
{"name": "bios7.bin", "destination": "bios7.bin"},
|
|
{"name": "bios9.bin", "destination": "bios9.bin"},
|
|
{"name": "firmware.bin", "destination": "firmware.bin"},
|
|
],
|
|
"snk-neogeo": [
|
|
{"name": "neogeo.zip", "destination": "neogeo.zip"},
|
|
{"name": "neocdz.zip", "destination": "neocdz.zip"},
|
|
],
|
|
"panasonic-3do": [
|
|
{"name": "panafz1.bin", "destination": "panafz1.bin"},
|
|
],
|
|
"nintendo-nes": [
|
|
{"name": "disksys.rom", "destination": "disksys.rom"},
|
|
],
|
|
"sega-dreamcast-arcade": [
|
|
{"name": "naomi.zip", "destination": "dc/naomi.zip"},
|
|
],
|
|
}
|
|
|
|
_RE_ARRAY = re.compile(
|
|
r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
|
re.MULTILINE,
|
|
)
|
|
|
|
_RE_FUNC = re.compile(
|
|
r'function\s+(check\w+Bios)\s*\(\)',
|
|
re.MULTILINE,
|
|
)
|
|
|
|
_RE_LOCAL_HASHES = re.compile(
|
|
r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
|
re.MULTILINE,
|
|
)
|
|
|
|
|
|
def _fetch_url(url: str) -> str:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return resp.read().decode("utf-8")
|
|
except urllib.error.URLError as e:
|
|
raise ConnectionError(f"Failed to fetch {url}: {e}") from e
|
|
|
|
|
|
class Scraper(BaseScraper):
|
|
"""Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets."""
|
|
|
|
def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL):
|
|
self.checkbios_url = checkbios_url
|
|
self.csv_base_url = csv_base_url
|
|
self._raw_script: str | None = None
|
|
self._csv_cache: dict[str, str] = {}
|
|
|
|
def _fetch_script(self) -> str:
|
|
if self._raw_script is not None:
|
|
return self._raw_script
|
|
self._raw_script = _fetch_url(self.checkbios_url)
|
|
return self._raw_script
|
|
|
|
def _fetch_csv(self, sheet: str) -> str:
|
|
if sheet in self._csv_cache:
|
|
return self._csv_cache[sheet]
|
|
url = f"{self.csv_base_url}/{sheet}"
|
|
try:
|
|
data = _fetch_url(url)
|
|
except ConnectionError:
|
|
data = ""
|
|
self._csv_cache[sheet] = data
|
|
return data
|
|
|
|
def _parse_hash_arrays(self, script: str) -> dict[str, list[str]]:
|
|
"""Extract named MD5 hash arrays from bash script."""
|
|
result: dict[str, list[str]] = {}
|
|
for match in _RE_ARRAY.finditer(script):
|
|
name = match.group(1)
|
|
hashes_raw = match.group(2)
|
|
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
|
|
if name in HASH_ARRAY_MAP:
|
|
result[HASH_ARRAY_MAP[name]] = hashes
|
|
return result
|
|
|
|
def _parse_function_hashes(self, script: str) -> dict[str, list[str]]:
|
|
"""Extract local hash arrays from named check functions."""
|
|
result: dict[str, list[str]] = {}
|
|
for func_match in _RE_FUNC.finditer(script):
|
|
func_name = func_match.group(1)
|
|
if func_name not in FUNCTION_HASH_MAP:
|
|
continue
|
|
system = FUNCTION_HASH_MAP[func_name]
|
|
func_start = func_match.start()
|
|
remaining = script[func_start:]
|
|
local_match = _RE_LOCAL_HASHES.search(remaining)
|
|
if local_match:
|
|
hashes_raw = local_match.group(1)
|
|
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
|
|
result[system] = hashes
|
|
return result
|
|
|
|
@staticmethod
|
|
def _clean_markdown(text: str) -> str:
|
|
"""Strip markdown/HTML artifacts from CSV fields."""
|
|
text = re.sub(r'\*\*', '', text) # bold
|
|
text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons
|
|
text = re.sub(r':material-[^:]+:', '', text)
|
|
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
|
|
text = re.sub(r'<br\s*/?>', ' ', text) # <br/>
|
|
text = re.sub(r'<[^>]+>', '', text) # remaining HTML
|
|
return text.strip()
|
|
|
|
def _parse_csv_bios(self, csv_text: str) -> list[dict]:
|
|
"""Parse BIOS file info from a cheat sheet CSV."""
|
|
entries = []
|
|
if not csv_text.strip():
|
|
return entries
|
|
reader = csv.DictReader(io.StringIO(csv_text))
|
|
for row in reader:
|
|
bios_col = ""
|
|
for key in row:
|
|
if key and "bios" in key.lower():
|
|
bios_col = self._clean_markdown((row[key] or ""))
|
|
break
|
|
if not bios_col or bios_col.lower() in ("not required", ""):
|
|
continue
|
|
folder_col = ""
|
|
for key in row:
|
|
if key and "folder" in key.lower():
|
|
folder_col = self._clean_markdown((row[key] or ""))
|
|
break
|
|
system_col = ""
|
|
for key in row:
|
|
if key and "system" in key.lower():
|
|
system_col = self._clean_markdown((row[key] or ""))
|
|
break
|
|
slug = None
|
|
for part in re.split(r'[`\s]+', folder_col):
|
|
part = part.strip().strip('`').lower()
|
|
if part and part in SYSTEM_SLUG_MAP:
|
|
slug = SYSTEM_SLUG_MAP[part]
|
|
break
|
|
if not slug:
|
|
clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower())
|
|
slug = clean if clean else "unknown"
|
|
entries.append({
|
|
"system": slug,
|
|
"system_name": system_col,
|
|
"bios_raw": bios_col,
|
|
})
|
|
return entries
|
|
|
|
def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]:
|
|
"""Extract individual BIOS filenames from a CSV BIOS field."""
|
|
results = []
|
|
bios_raw = re.sub(r'<br\s*/?>', ' ', bios_raw)
|
|
bios_raw = bios_raw.replace('`', '')
|
|
patterns = re.findall(
|
|
r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)',
|
|
bios_raw,
|
|
)
|
|
for p in patterns:
|
|
name = p.split("/")[-1] if "/" in p else p
|
|
results.append({"name": name, "destination": p})
|
|
return results
|
|
|
|
def fetch_requirements(self) -> list[BiosRequirement]:
|
|
script = self._fetch_script()
|
|
if not self.validate_format(script):
|
|
raise ValueError("checkBIOS.sh format validation failed")
|
|
|
|
hash_arrays = self._parse_hash_arrays(script)
|
|
func_hashes = self._parse_function_hashes(script)
|
|
all_hashes: dict[str, list[str]] = {}
|
|
all_hashes.update(hash_arrays)
|
|
all_hashes.update(func_hashes)
|
|
|
|
requirements: list[BiosRequirement] = []
|
|
seen: set[tuple[str, str]] = set()
|
|
|
|
for system, file_list in KNOWN_BIOS_FILES.items():
|
|
system_hashes = all_hashes.get(system, [])
|
|
for f in file_list:
|
|
key = (system, f["name"])
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
requirements.append(BiosRequirement(
|
|
name=f["name"],
|
|
system=system,
|
|
destination=f.get("destination", f["name"]),
|
|
required=True,
|
|
))
|
|
|
|
for md5 in system_hashes:
|
|
requirements.append(BiosRequirement(
|
|
name=f"{system}:{md5}",
|
|
system=system,
|
|
md5=md5,
|
|
destination="",
|
|
required=True,
|
|
))
|
|
|
|
for sheet in CSV_SHEETS:
|
|
csv_text = self._fetch_csv(sheet)
|
|
entries = self._parse_csv_bios(csv_text)
|
|
for entry in entries:
|
|
system = entry["system"]
|
|
files = self._extract_filenames_from_bios_field(entry["bios_raw"])
|
|
for f in files:
|
|
key = (system, f["name"])
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
if system in KNOWN_BIOS_FILES:
|
|
continue
|
|
requirements.append(BiosRequirement(
|
|
name=f["name"],
|
|
system=system,
|
|
destination=f.get("destination", f["name"]),
|
|
required=True,
|
|
))
|
|
|
|
return requirements
|
|
|
|
def validate_format(self, raw_data: str) -> bool:
|
|
has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data
|
|
has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data
|
|
has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None
|
|
return has_ps and has_func and has_md5
|
|
|
|
def generate_platform_yaml(self) -> dict:
|
|
requirements = self.fetch_requirements()
|
|
|
|
systems: dict[str, dict] = {}
|
|
for req in requirements:
|
|
if req.system not in systems:
|
|
systems[req.system] = {"files": []}
|
|
|
|
entry: dict = {
|
|
"name": req.name,
|
|
"destination": req.destination,
|
|
"required": req.required,
|
|
}
|
|
if req.md5:
|
|
entry["md5"] = req.md5
|
|
systems[req.system]["files"].append(entry)
|
|
|
|
version = ""
|
|
try:
|
|
v = fetch_github_latest_version("dragoonDorise/EmuDeck")
|
|
if v:
|
|
version = v
|
|
except (ConnectionError, ValueError, OSError):
|
|
pass
|
|
|
|
return {
|
|
"platform": "EmuDeck",
|
|
"version": version or "",
|
|
"homepage": "https://www.emudeck.com",
|
|
"source": CHECKBIOS_URL,
|
|
"base_destination": "bios",
|
|
"hash_type": "md5",
|
|
"verification_mode": "md5",
|
|
"systems": systems,
|
|
}
|
|
|
|
|
|
def main():
|
|
from scripts.scraper.base_scraper import scraper_cli
|
|
scraper_cli(Scraper, "Scrape emudeck BIOS requirements")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|