#!/usr/bin/env python3
"""Scraper for EmuDeck BIOS requirements.
Sources:
1. checkBIOS.sh - MD5 hash whitelists per system
https://raw.githubusercontent.com/dragoonDorise/EmuDeck/main/functions/checkBIOS.sh
2. CSV cheat sheets - BIOS filenames per manufacturer
https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables/{name}-cheat-sheet.csv
Hash: MD5 primary
"""
from __future__ import annotations
import csv
import io
import re
import sys
import urllib.request
import urllib.error
try:
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
except ImportError:
from base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
PLATFORM_NAME = "emudeck"
CHECKBIOS_URL = (
"https://raw.githubusercontent.com/dragoonDorise/EmuDeck/"
"main/functions/checkBIOS.sh"
)
CSV_BASE_URL = (
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/"
"main/docs/tables"
)
CSV_SHEETS = [
"sony-cheat-sheet.csv",
"sega-cheat-sheet.csv",
"nintendo-cheat-sheet.csv",
"snk-cheat-sheet.csv",
"panasonic-cheat-sheet.csv",
"nec-cheat-sheet.csv",
"microsoft-cheat-sheet.csv",
"coleco-cheat-sheet.csv",
"atari-cheat-sheet.csv",
"bandai-cheat-sheet.csv",
"mattel-cheat-sheet.csv",
]
HASH_ARRAY_MAP = {
"PSBios": "sony-playstation",
"PS2Bios": "sony-playstation-2",
"CDBios": "sega-mega-cd",
"SaturnBios": "sega-saturn",
}
FUNCTION_HASH_MAP = {
"checkDreamcastBios": "sega-dreamcast",
"checkDSBios": "nintendo-ds",
}
SYSTEM_SLUG_MAP = {
"psx": "sony-playstation",
"ps2": "sony-playstation-2",
"ps3": "sony-playstation-3",
"psp": "sony-psp",
"psvita": "sony-psvita",
"segacd": "sega-mega-cd",
"megacd": "sega-mega-cd",
"saturn": "sega-saturn",
"dreamcast": "sega-dreamcast",
"sega32x": "sega-32x",
"mastersystem": "sega-master-system",
"genesis": "sega-mega-drive",
"megadrive": "sega-mega-drive",
"gamegear": "sega-game-gear",
"naomi": "sega-dreamcast-arcade",
"naomi2": "sega-dreamcast-arcade",
"atomiswave": "sega-dreamcast-arcade",
"nds": "nintendo-ds",
"3ds": "nintendo-3ds",
"n3ds": "nintendo-3ds",
"n64": "nintendo-64",
"n64dd": "nintendo-64dd",
"gc": "nintendo-gamecube",
"gamecube": "nintendo-gamecube",
"wii": "nintendo-wii",
"wiiu": "nintendo-wii-u",
"switch": "nintendo-switch",
"nes": "nintendo-nes",
"famicom": "nintendo-nes",
"snes": "nintendo-snes",
"gb": "nintendo-gb",
"gba": "nintendo-gba",
"gbc": "nintendo-gbc",
"virtualboy": "nintendo-virtual-boy",
"fbneo": "snk-neogeo",
"neogeocd": "snk-neogeo-cd",
"neogeocdjp": "snk-neogeo-cd",
"ngp": "snk-neogeo-pocket",
"ngpc": "snk-neogeo-pocket-color",
"3do": "panasonic-3do",
"pcengine": "nec-pc-engine",
"pcenginecd": "nec-pc-engine",
"pcfx": "nec-pc-fx",
"pc88": "nec-pc-88",
"pc98": "nec-pc-98",
"colecovision": "coleco-colecovision",
}
KNOWN_BIOS_FILES = {
"sony-playstation": [
{"name": "scph5500.bin", "destination": "scph5500.bin", "region": "JP"},
{"name": "scph5501.bin", "destination": "scph5501.bin", "region": "US"},
{"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"},
],
"sony-playstation-2": [
{"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"},
],
"sega-mega-cd": [
{"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"},
{"name": "bios_CD_U.bin", "destination": "bios_CD_U.bin", "region": "US"},
{"name": "bios_CD_J.bin", "destination": "bios_CD_J.bin", "region": "JP"},
],
"sega-saturn": [
{"name": "sega_101.bin", "destination": "sega_101.bin", "region": "JP"},
{"name": "mpr-17933.bin", "destination": "mpr-17933.bin", "region": "US/EU"},
{"name": "saturn_bios.bin", "destination": "saturn_bios.bin"},
],
"sega-dreamcast": [
{"name": "dc_boot.bin", "destination": "dc/dc_boot.bin"},
{"name": "dc_flash.bin", "destination": "dc/dc_flash.bin"},
],
"nintendo-ds": [
{"name": "bios7.bin", "destination": "bios7.bin"},
{"name": "bios9.bin", "destination": "bios9.bin"},
{"name": "firmware.bin", "destination": "firmware.bin"},
],
"snk-neogeo": [
{"name": "neogeo.zip", "destination": "neogeo.zip"},
{"name": "neocdz.zip", "destination": "neocdz.zip"},
],
"panasonic-3do": [
{"name": "panafz1.bin", "destination": "panafz1.bin"},
],
"nintendo-nes": [
{"name": "disksys.rom", "destination": "disksys.rom"},
],
"sega-dreamcast-arcade": [
{"name": "naomi.zip", "destination": "dc/naomi.zip"},
],
}
_RE_ARRAY = re.compile(
r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
re.MULTILINE,
)
_RE_FUNC = re.compile(
r'function\s+(check\w+Bios)\s*\(\)',
re.MULTILINE,
)
_RE_LOCAL_HASHES = re.compile(
r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
re.MULTILINE,
)
def _fetch_url(url: str) -> str:
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8")
except urllib.error.URLError as e:
raise ConnectionError(f"Failed to fetch {url}: {e}") from e
class Scraper(BaseScraper):
"""Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets."""
def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL):
super().__init__(url=checkbios_url)
self.checkbios_url = checkbios_url
self.csv_base_url = csv_base_url
self._raw_script: str | None = None
self._csv_cache: dict[str, str] = {}
def _fetch_script(self) -> str:
if self._raw_script is not None:
return self._raw_script
self._raw_script = _fetch_url(self.checkbios_url)
return self._raw_script
def _fetch_csv(self, sheet: str) -> str:
if sheet in self._csv_cache:
return self._csv_cache[sheet]
url = f"{self.csv_base_url}/{sheet}"
try:
data = _fetch_url(url)
except ConnectionError:
data = ""
self._csv_cache[sheet] = data
return data
def _parse_hash_arrays(self, script: str) -> dict[str, list[str]]:
"""Extract named MD5 hash arrays from bash script."""
result: dict[str, list[str]] = {}
for match in _RE_ARRAY.finditer(script):
name = match.group(1)
hashes_raw = match.group(2)
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
if name in HASH_ARRAY_MAP:
result[HASH_ARRAY_MAP[name]] = hashes
return result
def _parse_function_hashes(self, script: str) -> dict[str, list[str]]:
"""Extract local hash arrays from named check functions."""
result: dict[str, list[str]] = {}
for func_match in _RE_FUNC.finditer(script):
func_name = func_match.group(1)
if func_name not in FUNCTION_HASH_MAP:
continue
system = FUNCTION_HASH_MAP[func_name]
func_start = func_match.start()
next_func = _RE_FUNC.search(script, func_match.end())
func_end = next_func.start() if next_func else len(script)
func_body = script[func_start:func_end]
local_match = _RE_LOCAL_HASHES.search(func_body)
if local_match:
hashes_raw = local_match.group(1)
hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
result[system] = hashes
return result
@staticmethod
def _clean_markdown(text: str) -> str:
"""Strip markdown/HTML artifacts from CSV fields."""
text = re.sub(r'\*\*', '', text) # bold
text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons
text = re.sub(r':material-[^:]+:', '', text)
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
text = re.sub(r'
', ' ', text) #
text = re.sub(r'<[^>]+>', '', text) # remaining HTML
return text.strip()
def _parse_csv_bios(self, csv_text: str) -> list[dict]:
"""Parse BIOS file info from a cheat sheet CSV."""
entries = []
if not csv_text.strip():
return entries
reader = csv.DictReader(io.StringIO(csv_text))
for row in reader:
bios_col = ""
for key in row:
if key and "bios" in key.lower():
bios_col = self._clean_markdown((row[key] or ""))
break
if not bios_col or bios_col.lower() in ("not required", ""):
continue
folder_col = ""
for key in row:
if key and "folder" in key.lower():
folder_col = self._clean_markdown((row[key] or ""))
break
system_col = ""
for key in row:
if key and "system" in key.lower():
system_col = self._clean_markdown((row[key] or ""))
break
slug = None
for part in re.split(r'[`\s]+', folder_col):
part = part.strip().strip('`').lower()
if part and part in SYSTEM_SLUG_MAP:
slug = SYSTEM_SLUG_MAP[part]
break
if not slug:
clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower())
slug = clean if clean else "unknown"
entries.append({
"system": slug,
"system_name": system_col,
"bios_raw": bios_col,
})
return entries
def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]:
"""Extract individual BIOS filenames from a CSV BIOS field."""
results = []
bios_raw = re.sub(r'
', ' ', bios_raw)
bios_raw = bios_raw.replace('`', '')
patterns = re.findall(
r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)',
bios_raw,
)
for p in patterns:
name = p.split("/")[-1] if "/" in p else p
results.append({"name": name, "destination": p})
return results
def fetch_requirements(self) -> list[BiosRequirement]:
script = self._fetch_script()
if not self.validate_format(script):
raise ValueError("checkBIOS.sh format validation failed")
hash_arrays = self._parse_hash_arrays(script)
func_hashes = self._parse_function_hashes(script)
all_hashes: dict[str, list[str]] = {}
all_hashes.update(hash_arrays)
all_hashes.update(func_hashes)
requirements: list[BiosRequirement] = []
seen: set[tuple[str, str]] = set()
for system, file_list in KNOWN_BIOS_FILES.items():
system_hashes = all_hashes.get(system, [])
for f in file_list:
key = (system, f["name"])
if key in seen:
continue
seen.add(key)
requirements.append(BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
))
for md5 in system_hashes:
requirements.append(BiosRequirement(
name=f"{system}:{md5}",
system=system,
md5=md5,
destination="",
required=True,
))
for sheet in CSV_SHEETS:
csv_text = self._fetch_csv(sheet)
entries = self._parse_csv_bios(csv_text)
for entry in entries:
system = entry["system"]
files = self._extract_filenames_from_bios_field(entry["bios_raw"])
for f in files:
key = (system, f["name"])
if key in seen:
continue
seen.add(key)
if system in KNOWN_BIOS_FILES:
continue
requirements.append(BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
))
return requirements
def validate_format(self, raw_data: str) -> bool:
has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data
has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data
has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None
return has_ps and has_func and has_md5
def generate_platform_yaml(self) -> dict:
requirements = self.fetch_requirements()
systems: dict[str, dict] = {}
for req in requirements:
if req.system not in systems:
systems[req.system] = {"files": []}
entry: dict = {
"name": req.name,
"destination": req.destination,
"required": req.required,
}
if req.md5:
entry["md5"] = req.md5
systems[req.system]["files"].append(entry)
version = ""
try:
v = fetch_github_latest_version("dragoonDorise/EmuDeck")
if v:
version = v
except (ConnectionError, ValueError, OSError):
pass
return {
"platform": "EmuDeck",
"version": version or "",
"homepage": "https://www.emudeck.com",
"source": CHECKBIOS_URL,
"base_destination": "bios",
"hash_type": "md5",
"verification_mode": "md5",
"systems": systems,
}
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape emudeck BIOS requirements")
if __name__ == "__main__":
main()