mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
Run ruff check --fix: remove unused imports (F401), fix f-strings without placeholders (F541), remove unused variables (F841), fix duplicate dict key (F601). Run isort --profile black: normalize import ordering across all files. Run ruff format: apply consistent formatting (black-compatible) to all 58 Python files. 3 intentional E402 remain (imports after require_yaml() must execute after yaml is available).
361 lines
11 KiB
Python
361 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""Scraper for libretro-core-info firmware declarations.
|
|
|
|
Source: https://github.com/libretro/libretro-core-info
|
|
Format: .info files with firmware0_path, firmware0_desc, firmware0_opt patterns
|
|
Hash: From notes field (MD5) or cross-referenced with System.dat
|
|
|
|
Complements libretro_scraper (System.dat) with:
|
|
- Exact firmware paths per core
|
|
- Required vs optional status
|
|
- Firmware for cores not covered by System.dat
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
try:
|
|
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
|
except ImportError:
|
|
# Allow running directly: python scripts/scraper/coreinfo_scraper.py
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
from scraper.base_scraper import (
|
|
BaseScraper,
|
|
BiosRequirement,
|
|
fetch_github_latest_version,
|
|
)
|
|
|
|
PLATFORM_NAME = "libretro_coreinfo"
|
|
|
|
GITHUB_API = "https://api.github.com/repos/libretro/libretro-core-info"
|
|
RAW_BASE = "https://raw.githubusercontent.com/libretro/libretro-core-info/master"
|
|
|
|
CORE_SYSTEM_MAP = {
|
|
"pcsx_rearmed": "sony-playstation",
|
|
"mednafen_psx": "sony-playstation",
|
|
"mednafen_psx_hw": "sony-playstation",
|
|
"swanstation": "sony-playstation",
|
|
"duckstation": "sony-playstation",
|
|
"pcsx1": "sony-playstation",
|
|
"lrps2": "sony-playstation-2",
|
|
"play": "sony-playstation-2",
|
|
"ppsspp": "sony-psp",
|
|
"fbneo": "arcade",
|
|
"mame": "arcade",
|
|
"mame2003": "arcade",
|
|
"mame2003_plus": "arcade",
|
|
"dolphin": "nintendo-gamecube",
|
|
"melonds": "nintendo-ds",
|
|
"melonds_ds": "nintendo-ds",
|
|
"desmume": "nintendo-ds",
|
|
"mgba": "nintendo-gba",
|
|
"vba_next": "nintendo-gba",
|
|
"gpsp": "nintendo-gba",
|
|
"gambatte": "nintendo-gb",
|
|
"sameboy": "nintendo-gb",
|
|
"gearboy": "nintendo-gb",
|
|
"bsnes": "nintendo-snes",
|
|
"snes9x": "nintendo-snes",
|
|
"higan_sfc": "nintendo-snes",
|
|
"mesen-s": "nintendo-snes",
|
|
"nestopia": "nintendo-nes",
|
|
"fceumm": "nintendo-nes",
|
|
"mesen": "nintendo-nes",
|
|
"mupen64plus_next": "nintendo-64",
|
|
"parallel_n64": "nintendo-64",
|
|
"flycast": "sega-dreamcast",
|
|
"reicast": "sega-dreamcast",
|
|
"kronos": "sega-saturn",
|
|
"mednafen_saturn": "sega-saturn",
|
|
"yabause": "sega-saturn",
|
|
"genesis_plus_gx": "sega-mega-drive",
|
|
"picodrive": "sega-mega-drive",
|
|
"mednafen_pce": "nec-pc-engine",
|
|
"mednafen_pce_fast": "nec-pc-engine",
|
|
"mednafen_pcfx": "nec-pc-fx",
|
|
"mednafen_ngp": "snk-neogeo-pocket",
|
|
"mednafen_lynx": "atari-lynx",
|
|
"handy": "atari-lynx",
|
|
"hatari": "atari-st",
|
|
"puae": "commodore-amiga",
|
|
"fuse": "sinclair-zx-spectrum",
|
|
"dosbox_pure": "dos",
|
|
"dosbox_svn": "dos",
|
|
"scummvm": "scummvm",
|
|
"opera": "3do",
|
|
"4do": "3do",
|
|
"ep128emu": "enterprise-64-128",
|
|
"freej2me": "j2me",
|
|
"squirreljme": "j2me",
|
|
"numero": "ti-83",
|
|
"neocd": "snk-neogeo-cd",
|
|
"vice_x64": "commodore-c64",
|
|
"vice_x128": "commodore-c128",
|
|
"cap32": "amstrad-cpc",
|
|
"o2em": "magnavox-odyssey2",
|
|
"vecx": "vectrex",
|
|
"virtualjaguar": "atari-jaguar",
|
|
"prosystem": "atari-7800",
|
|
"stella": "atari-2600",
|
|
"a5200": "atari-5200",
|
|
"bluemsx": "microsoft-msx",
|
|
"fmsx": "microsoft-msx",
|
|
"px68k": "sharp-x68000",
|
|
"x1": "sharp-x1",
|
|
"quasi88": "nec-pc-88",
|
|
"np2kai": "nec-pc-98",
|
|
"theodore": "thomson",
|
|
"81": "sinclair-zx81",
|
|
"crocods": "amstrad-cpc",
|
|
"dinothawr": "dinothawr",
|
|
}
|
|
|
|
|
|
def _parse_info_file(content: str) -> dict:
|
|
"""Parse a .info file into a dictionary."""
|
|
result = {}
|
|
for line in content.split("\n"):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
match = re.match(r'^(\w+)\s*=\s*"?(.*?)"?\s*$', line)
|
|
if match:
|
|
key, value = match.group(1), match.group(2)
|
|
result[key] = value
|
|
return result
|
|
|
|
|
|
_SKIP_EXTENSIONS = {".dll", ".so", ".dylib", ".exe", ".bat", ".sh"}
|
|
_DIRECTORY_MARKERS = {"folder", "directory", "dir"}
|
|
|
|
|
|
def _is_directory_ref(path: str, desc: str) -> bool:
|
|
"""Check if a firmware entry is a directory reference rather than a file."""
|
|
if "." not in path.split("/")[-1]:
|
|
return True
|
|
desc_lower = desc.lower()
|
|
return any(marker in desc_lower for marker in _DIRECTORY_MARKERS)
|
|
|
|
|
|
def _is_native_lib(path: str) -> bool:
|
|
"""Check if path is a native library (.dll, .so, .dylib) - not a BIOS."""
|
|
ext = "." + path.rsplit(".", 1)[-1] if "." in path else ""
|
|
return ext.lower() in _SKIP_EXTENSIONS
|
|
|
|
|
|
def _extract_firmware(info: dict) -> list[dict]:
|
|
"""Extract firmware entries, filtering out directories and native libraries."""
|
|
count_str = info.get("firmware_count", "0")
|
|
try:
|
|
count = int(count_str)
|
|
except ValueError:
|
|
return []
|
|
|
|
firmware = []
|
|
for i in range(count):
|
|
path = info.get(f"firmware{i}_path", "")
|
|
desc = info.get(f"firmware{i}_desc", "")
|
|
opt = info.get(f"firmware{i}_opt", "false")
|
|
|
|
if not path:
|
|
continue
|
|
|
|
if _is_directory_ref(path, desc):
|
|
continue
|
|
|
|
if _is_native_lib(path):
|
|
continue
|
|
|
|
firmware.append(
|
|
{
|
|
"path": path,
|
|
"desc": desc,
|
|
"optional": opt.lower() == "true",
|
|
}
|
|
)
|
|
|
|
return firmware
|
|
|
|
|
|
def _extract_md5_from_notes(info: dict) -> dict[str, str]:
|
|
"""Extract MD5 hashes from the notes field."""
|
|
notes = info.get("notes", "")
|
|
md5_map = {}
|
|
|
|
for match in re.finditer(r"\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})", notes):
|
|
filename = match.group(1).strip()
|
|
md5 = match.group(2)
|
|
md5_map[filename] = md5
|
|
|
|
return md5_map
|
|
|
|
|
|
class Scraper(BaseScraper):
|
|
"""Scraper for libretro-core-info firmware declarations."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._info_files: dict[str, dict] | None = None
|
|
|
|
def _fetch_info_list(self) -> list[str]:
|
|
"""Fetch list of all .info files from GitHub API."""
|
|
# Use the tree API to get all files at once
|
|
url = f"{GITHUB_API}/git/trees/master?recursive=1"
|
|
try:
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={
|
|
"User-Agent": "retrobios-scraper/1.0",
|
|
"Accept": "application/vnd.github.v3+json",
|
|
},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
data = json.loads(resp.read())
|
|
|
|
return [
|
|
item["path"]
|
|
for item in data.get("tree", [])
|
|
if item["path"].endswith("_libretro.info")
|
|
]
|
|
except (urllib.error.URLError, json.JSONDecodeError) as e:
|
|
raise ConnectionError(f"Failed to list core-info files: {e}") from e
|
|
|
|
def _fetch_info_file(self, filename: str) -> dict:
|
|
"""Fetch and parse a single .info file."""
|
|
url = f"{RAW_BASE}/{filename}"
|
|
try:
|
|
req = urllib.request.Request(
|
|
url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
|
)
|
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
content = resp.read().decode("utf-8")
|
|
return _parse_info_file(content)
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
return {}
|
|
|
|
def fetch_requirements(self) -> list[BiosRequirement]:
|
|
"""Fetch firmware requirements from all core .info files."""
|
|
info_files = self._fetch_info_list()
|
|
requirements = []
|
|
seen = set()
|
|
|
|
for filename in info_files:
|
|
info = self._fetch_info_file(filename)
|
|
firmware_list = _extract_firmware(info)
|
|
|
|
if not firmware_list:
|
|
continue
|
|
|
|
core_name = filename.replace("_libretro.info", "")
|
|
system = CORE_SYSTEM_MAP.get(core_name, core_name)
|
|
|
|
md5_map = _extract_md5_from_notes(info)
|
|
|
|
for fw in firmware_list:
|
|
path = fw["path"]
|
|
if path in seen:
|
|
continue
|
|
seen.add(path)
|
|
|
|
basename = path.split("/")[-1] if "/" in path else path
|
|
# Full path when basename is generic to avoid SGB1.sfc/program.rom vs SGB2.sfc/program.rom collisions
|
|
GENERIC_NAMES = {
|
|
"program.rom",
|
|
"data.rom",
|
|
"boot.rom",
|
|
"bios.bin",
|
|
"firmware.bin",
|
|
}
|
|
name = path if basename.lower() in GENERIC_NAMES else basename
|
|
md5 = md5_map.get(basename)
|
|
|
|
requirements.append(
|
|
BiosRequirement(
|
|
name=name,
|
|
system=system,
|
|
md5=md5,
|
|
destination=path,
|
|
required=not fw["optional"],
|
|
)
|
|
)
|
|
|
|
return requirements
|
|
|
|
def validate_format(self, raw_data: str) -> bool:
|
|
"""Validate .info file format."""
|
|
return "firmware_count" in raw_data or "display_name" in raw_data
|
|
|
|
def fetch_metadata(self) -> dict:
|
|
"""Fetch version info from GitHub."""
|
|
version = fetch_github_latest_version("libretro/libretro-core-info")
|
|
return {"version": version or ""}
|
|
|
|
|
|
def main():
|
|
"""CLI entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Scrape libretro-core-info firmware requirements"
|
|
)
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--compare-db", help="Compare against database.json")
|
|
args = parser.parse_args()
|
|
|
|
scraper = Scraper()
|
|
|
|
try:
|
|
reqs = scraper.fetch_requirements()
|
|
except ConnectionError as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.compare_db:
|
|
import json as _json
|
|
|
|
with open(args.compare_db) as f:
|
|
db = _json.load(f)
|
|
|
|
found = 0
|
|
missing = []
|
|
for r in reqs:
|
|
if r.name in db["indexes"]["by_name"]:
|
|
found += 1
|
|
elif r.md5 and r.md5 in db["indexes"]["by_md5"]:
|
|
found += 1
|
|
else:
|
|
missing.append(r)
|
|
|
|
print(f"Core-info: {len(reqs)} unique firmware paths")
|
|
print(f"Found in DB: {found}")
|
|
print(f"Missing: {len(missing)}")
|
|
if missing:
|
|
print("\nMissing files:")
|
|
for r in sorted(missing, key=lambda x: x.system):
|
|
opt = "(optional)" if not r.required else "(REQUIRED)"
|
|
print(f" {r.system}: {r.destination} {opt}")
|
|
return
|
|
|
|
from collections import defaultdict
|
|
|
|
by_system = defaultdict(list)
|
|
for r in reqs:
|
|
by_system[r.system].append(r)
|
|
|
|
print(f"Total: {len(reqs)} unique firmware paths across {len(by_system)} systems")
|
|
for sys_name, files in sorted(by_system.items()):
|
|
req_count = sum(1 for f in files if f.required)
|
|
opt_count = sum(1 for f in files if not f.required)
|
|
print(f" {sys_name}: {req_count} required, {opt_count} optional")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|