mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
324 lines
12 KiB
Python
324 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Scraper for libretro System.dat (RetroArch, Lakka).
|
|
|
|
Source: https://github.com/libretro/libretro-database/blob/master/dat/System.dat
|
|
Format: clrmamepro DAT
|
|
Hash: SHA1 primary
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
|
from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
|
|
|
|
PLATFORM_NAME = "libretro"
|
|
|
|
SOURCE_URL = (
|
|
"https://raw.githubusercontent.com/libretro/libretro-database/"
|
|
"master/dat/System.dat"
|
|
)
|
|
|
|
SYSTEM_SLUG_MAP = {
|
|
"3DO Company, The - 3DO": "3do",
|
|
"Amstrad - CPC": "amstrad-cpc",
|
|
"Arcade": "arcade",
|
|
"Atari - 400-800": "atari-400-800",
|
|
"Atari - 5200": "atari-5200",
|
|
"Atari - 7800": "atari-7800",
|
|
"Atari - Lynx": "atari-lynx",
|
|
"Atari - ST": "atari-st",
|
|
"Coleco - ColecoVision": "coleco-colecovision",
|
|
"Commodore - Amiga": "commodore-amiga",
|
|
"Commodore - C128": "commodore-c128",
|
|
"Dinothawr": "dinothawr",
|
|
"DOS": "dos",
|
|
"EPOCH/YENO Super Cassette Vision": "epoch-scv",
|
|
"Elektronika - BK-0010/BK-0011(M)": "elektronika-bk",
|
|
"Enterprise - 64/128": "enterprise-64-128",
|
|
"Fairchild Channel F": "fairchild-channel-f",
|
|
"Id Software - Doom": "doom",
|
|
"J2ME": "j2me",
|
|
"MacII": "apple-macintosh-ii",
|
|
"Magnavox - Odyssey2": "magnavox-odyssey2",
|
|
"Mattel - Intellivision": "mattel-intellivision",
|
|
"Microsoft - MSX": "microsoft-msx",
|
|
"NEC - PC Engine - TurboGrafx 16 - SuperGrafx": "nec-pc-engine",
|
|
"NEC - PC-98": "nec-pc-98",
|
|
"NEC - PC-FX": "nec-pc-fx",
|
|
"Nintendo - Famicom Disk System": "nintendo-fds",
|
|
"Nintendo - Game Boy Advance": "nintendo-gba",
|
|
"Nintendo - GameCube": "nintendo-gamecube",
|
|
"Nintendo - Gameboy": "nintendo-gb",
|
|
"Nintendo - Gameboy Color": "nintendo-gbc",
|
|
"Nintendo - Nintendo 64DD": "nintendo-64dd",
|
|
"Nintendo - Nintendo DS": "nintendo-ds",
|
|
"Nintendo - Nintendo Entertainment System": "nintendo-nes",
|
|
"Nintendo - Pokemon Mini": "nintendo-pokemon-mini",
|
|
"Nintendo - Satellaview": "nintendo-satellaview",
|
|
"Nintendo - SuFami Turbo": "nintendo-sufami-turbo",
|
|
"Nintendo - Super Game Boy": "nintendo-sgb",
|
|
"Nintendo - Super Nintendo Entertainment System": "nintendo-snes",
|
|
"Phillips - Videopac+": "philips-videopac",
|
|
"SNK - NeoGeo CD": "snk-neogeo-cd",
|
|
"ScummVM": "scummvm",
|
|
"Sega - Dreamcast": "sega-dreamcast",
|
|
"Sega - Dreamcast-based Arcade": "sega-dreamcast-arcade",
|
|
"Sega - Game Gear": "sega-game-gear",
|
|
"Sega - Master System - Mark III": "sega-master-system",
|
|
"Sega - Mega CD - Sega CD": "sega-mega-cd",
|
|
"Sega - Mega Drive - Genesis": "sega-mega-drive",
|
|
"Sega - Saturn": "sega-saturn",
|
|
"Sharp - X1": "sharp-x1",
|
|
"Sharp - X68000": "sharp-x68000",
|
|
"Sinclair - ZX Spectrum": "sinclair-zx-spectrum",
|
|
"Sony - PlayStation": "sony-playstation",
|
|
"Sony - PlayStation 2": "sony-playstation-2",
|
|
"Sony - PlayStation Portable": "sony-psp",
|
|
"Texas Instruments TI-83": "ti-83",
|
|
"Videoton - TV Computer": "videoton-tvc",
|
|
"Wolfenstein 3D": "wolfenstein-3d",
|
|
}
|
|
|
|
|
|
class Scraper(BaseScraper):
|
|
"""Scraper for libretro System.dat."""
|
|
|
|
def __init__(self, url: str = SOURCE_URL):
|
|
self.url = url
|
|
self._raw_data: str | None = None
|
|
|
|
def _fetch_raw(self) -> str:
|
|
"""Fetch raw DAT content from source URL."""
|
|
if self._raw_data is not None:
|
|
return self._raw_data
|
|
|
|
try:
|
|
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
self._raw_data = resp.read().decode("utf-8")
|
|
return self._raw_data
|
|
except urllib.error.URLError as e:
|
|
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
|
|
|
def fetch_requirements(self) -> list[BiosRequirement]:
|
|
"""Parse System.dat and return BIOS requirements."""
|
|
raw = self._fetch_raw()
|
|
|
|
if not self.validate_format(raw):
|
|
raise ValueError("System.dat format validation failed")
|
|
|
|
roms = parse_dat(raw)
|
|
requirements = []
|
|
|
|
for rom in roms:
|
|
system_slug = SYSTEM_SLUG_MAP.get(rom.system, rom.system.lower().replace(" ", "-"))
|
|
|
|
destination = rom.name
|
|
name = rom.name.split("/")[-1] if "/" in rom.name else rom.name
|
|
|
|
requirements.append(BiosRequirement(
|
|
name=name,
|
|
system=system_slug,
|
|
sha1=rom.sha1 or None,
|
|
md5=rom.md5 or None,
|
|
crc32=rom.crc32 or None,
|
|
size=rom.size or None,
|
|
destination=destination,
|
|
required=True,
|
|
))
|
|
|
|
return requirements
|
|
|
|
def validate_format(self, raw_data: str) -> bool:
|
|
"""Validate System.dat format."""
|
|
return validate_dat_format(raw_data)
|
|
|
|
def fetch_metadata(self) -> dict:
|
|
"""Fetch version info from System.dat header and GitHub API."""
|
|
raw = self._fetch_raw()
|
|
meta = parse_dat_metadata(raw)
|
|
|
|
retroarch_version = fetch_github_latest_version("libretro/RetroArch")
|
|
db_version = fetch_github_latest_version("libretro/libretro-database")
|
|
|
|
return {
|
|
"dat_version": meta.version,
|
|
"retroarch_version": retroarch_version,
|
|
"db_version": db_version,
|
|
}
|
|
|
|
def _fetch_core_metadata(self) -> dict[str, dict]:
|
|
"""Fetch per-core metadata from libretro-core-info .info files."""
|
|
metadata = {}
|
|
try:
|
|
url = f"https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
|
|
req = urllib.request.Request(url, headers={
|
|
"User-Agent": "retrobios-scraper/1.0",
|
|
"Accept": "application/vnd.github.v3+json",
|
|
})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
import json
|
|
tree = json.loads(resp.read())
|
|
|
|
info_files = [
|
|
item["path"] for item in tree.get("tree", [])
|
|
if item["path"].endswith("_libretro.info")
|
|
]
|
|
|
|
for filename in info_files:
|
|
core_name = filename.replace("_libretro.info", "")
|
|
try:
|
|
info_url = f"https://raw.githubusercontent.com/libretro/libretro-core-info/master/{filename}"
|
|
req = urllib.request.Request(info_url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
content = resp.read().decode("utf-8")
|
|
|
|
info = {}
|
|
for line in content.split("\n"):
|
|
line = line.strip()
|
|
if " = " in line:
|
|
key, _, value = line.partition(" = ")
|
|
info[key.strip()] = value.strip().strip('"')
|
|
|
|
fw_count = int(info.get("firmware_count", "0"))
|
|
if fw_count == 0:
|
|
continue
|
|
|
|
system_name = info.get("systemname", "")
|
|
manufacturer = info.get("manufacturer", "")
|
|
display_name = info.get("display_name", "")
|
|
categories = info.get("categories", "")
|
|
|
|
# Map core to our system slug via firmware paths
|
|
from .coreinfo_scraper import CORE_SYSTEM_MAP
|
|
system_slug = CORE_SYSTEM_MAP.get(core_name)
|
|
if not system_slug:
|
|
continue
|
|
|
|
if system_slug not in metadata:
|
|
metadata[system_slug] = {
|
|
"core": core_name,
|
|
"manufacturer": manufacturer,
|
|
"display_name": display_name or system_name,
|
|
"docs": f"https://docs.libretro.com/library/{core_name}/",
|
|
}
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
continue
|
|
except (ConnectionError, ValueError, OSError):
|
|
pass
|
|
|
|
return metadata
|
|
|
|
def generate_platform_yaml(self) -> dict:
|
|
"""Generate a platform YAML config dict, merging System.dat with core-info metadata."""
|
|
requirements = self.fetch_requirements()
|
|
metadata = self.fetch_metadata()
|
|
core_meta = self._fetch_core_metadata()
|
|
|
|
systems = {}
|
|
for req in requirements:
|
|
if req.system not in systems:
|
|
system_entry = {"files": []}
|
|
if req.system in core_meta:
|
|
cm = core_meta[req.system]
|
|
if cm.get("core"):
|
|
system_entry["core"] = cm["core"]
|
|
if cm.get("manufacturer"):
|
|
system_entry["manufacturer"] = cm["manufacturer"]
|
|
if cm.get("docs"):
|
|
system_entry["docs"] = cm["docs"]
|
|
systems[req.system] = system_entry
|
|
|
|
entry = {
|
|
"name": req.name,
|
|
"destination": req.destination,
|
|
"required": req.required,
|
|
}
|
|
if req.sha1:
|
|
entry["sha1"] = req.sha1
|
|
if req.md5:
|
|
entry["md5"] = req.md5
|
|
if req.crc32:
|
|
entry["crc32"] = req.crc32
|
|
if req.size:
|
|
entry["size"] = req.size
|
|
|
|
systems[req.system]["files"].append(entry)
|
|
|
|
return {
|
|
"platform": "RetroArch",
|
|
"version": metadata["retroarch_version"] or "",
|
|
"dat_version": metadata["dat_version"] or "",
|
|
"homepage": "https://www.retroarch.com",
|
|
"source": "https://github.com/libretro/libretro-database/blob/master/dat/System.dat",
|
|
"base_destination": "system",
|
|
"hash_type": "sha1",
|
|
"verification_mode": "existence",
|
|
"systems": systems,
|
|
}
|
|
|
|
|
|
def main():
|
|
"""CLI entry point for testing."""
|
|
import argparse
|
|
import json
|
|
|
|
parser = argparse.ArgumentParser(description="Scrape libretro System.dat")
|
|
parser.add_argument("--dry-run", action="store_true", help="Just show what would be scraped")
|
|
parser.add_argument("--output", "-o", help="Output YAML file")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
args = parser.parse_args()
|
|
|
|
scraper = Scraper()
|
|
|
|
try:
|
|
reqs = scraper.fetch_requirements()
|
|
except (ConnectionError, ValueError) as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.dry_run:
|
|
by_system = {}
|
|
for req in reqs:
|
|
by_system.setdefault(req.system, []).append(req)
|
|
|
|
for system, files in sorted(by_system.items()):
|
|
print(f"\n{system} ({len(files)} files):")
|
|
for f in files:
|
|
hash_info = f.sha1[:12] if f.sha1 else f.md5[:12] if f.md5 else "no-hash"
|
|
print(f" {f.name} ({f.size or '?'} bytes, {hash_info}...)")
|
|
|
|
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
return
|
|
|
|
if args.json:
|
|
config = scraper.generate_platform_yaml()
|
|
print(json.dumps(config, indent=2))
|
|
return
|
|
|
|
if args.output:
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
config = scraper.generate_platform_yaml()
|
|
with open(args.output, "w") as f:
|
|
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
print(f"Written to {args.output}")
|
|
else:
|
|
reqs = scraper.fetch_requirements()
|
|
by_system = {}
|
|
for req in reqs:
|
|
by_system.setdefault(req.system, []).append(req)
|
|
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|