diff --git a/scripts/auto_fetch.py b/scripts/auto_fetch.py index 1c6ee552..772667ad 100644 --- a/scripts/auto_fetch.py +++ b/scripts/auto_fetch.py @@ -21,12 +21,17 @@ import json import os import subprocess import sys -import urllib.request import urllib.error +import urllib.request from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from common import list_registered_platforms, load_database, load_platform_config, require_yaml +from common import ( + list_registered_platforms, + load_database, + load_platform_config, + require_yaml, +) yaml = require_yaml() @@ -83,14 +88,16 @@ def find_missing(config: dict, db: dict) -> list[dict]: found = any(m in by_md5 for m in md5_list) if not found: - missing.append({ - "name": name, - "system": sys_id, - "sha1": sha1, - "md5": md5, - "size": file_entry.get("size"), - "destination": file_entry.get("destination", name), - }) + missing.append( + { + "name": name, + "system": sys_id, + "sha1": sha1, + "md5": md5, + "size": file_entry.get("size"), + "destination": file_entry.get("destination", name), + } + ) return missing @@ -139,14 +146,16 @@ def step2_scan_branches(entry: dict) -> bytes | None: try: subprocess.run( ["git", "rev-parse", "--verify", ref], - capture_output=True, check=True, + capture_output=True, + check=True, ) except subprocess.CalledProcessError: continue result = subprocess.run( ["git", "ls-tree", "-r", "--name-only", ref], - capture_output=True, text=True, + capture_output=True, + text=True, ) for filepath in result.stdout.strip().split("\n"): @@ -154,7 +163,8 @@ def step2_scan_branches(entry: dict) -> bytes | None: try: blob = subprocess.run( ["git", "show", f"{ref}:{filepath}"], - capture_output=True, check=True, + capture_output=True, + check=True, ) if verify_content(blob.stdout, entry): return blob.stdout @@ -172,7 +182,9 @@ def step3_search_public_repos(entry: dict) -> bytes | None: for url_template in PUBLIC_REPOS: url = url_template.format(name=name) try: - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-fetch/1.0"} + ) with urllib.request.urlopen(req, timeout=30) as resp: data = _read_limited(resp) if data is None: @@ -185,7 +197,9 @@ def step3_search_public_repos(entry: dict) -> bytes | None: if "/" in destination: url = url_template.format(name=destination) try: - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-fetch/1.0"} + ) with urllib.request.urlopen(req, timeout=30) as resp: data = _read_limited(resp) if data is None: @@ -206,7 +220,9 @@ def step4_search_archive_org(entry: dict) -> bytes | None: for path in [name, f"system/{name}", f"bios/{name}"]: url = f"https://archive.org/download/{collection_id}/{path}" try: - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-fetch/1.0"} + ) with urllib.request.urlopen(req, timeout=30) as resp: data = _read_limited(resp) if data is None: @@ -221,12 +237,13 @@ def step4_search_archive_org(entry: dict) -> bytes | None: return None search_url = ( - f"https://archive.org/advancedsearch.php?" - f"q=sha1:{sha1}&output=json&rows=1" + f"https://archive.org/advancedsearch.php?q=sha1:{sha1}&output=json&rows=1" ) try: - req = urllib.request.Request(search_url, headers={"User-Agent": "retrobios-fetch/1.0"}) + req = urllib.request.Request( + search_url, headers={"User-Agent": "retrobios-fetch/1.0"} + ) with urllib.request.urlopen(req, timeout=30) as resp: result = json.loads(resp.read()) docs = result.get("response", {}).get("docs", []) @@ -235,7 +252,9 @@ def step4_search_archive_org(entry: dict) -> bytes | None: if identifier: dl_url = f"https://archive.org/download/{identifier}/{name}" try: - req2 = urllib.request.Request(dl_url, headers={"User-Agent": "retrobios-fetch/1.0"}) + req2 = urllib.request.Request( + dl_url, headers={"User-Agent": "retrobios-fetch/1.0"} + ) with urllib.request.urlopen(req2, timeout=30) as resp2: data = _read_limited(resp2) if data is not None and verify_content(data, entry): @@ -297,7 +316,7 @@ def fetch_missing( continue if dry_run: - print(f" [DRY RUN] Would search branches, repos, archive.org") + print(" [DRY RUN] Would search branches, repos, archive.org") still_missing.append(entry) stats["not_found"] += 1 continue @@ -323,7 +342,7 @@ def fetch_missing( stats["found"] += 1 continue - print(f" [5] Not found - needs community contribution") + print(" [5] Not found - needs community contribution") still_missing.append(entry) stats["not_found"] += 1 @@ -345,16 +364,20 @@ def generate_issue_body(missing: list[dict], platform: str) -> str: for entry in missing: sha1 = entry.get("sha1") or "N/A" md5 = entry.get("md5") or "N/A" - lines.append(f"| `{entry['name']}` | {entry['system']} | `{sha1[:12]}...` | `{md5[:12]}...` |") + lines.append( + f"| `{entry['name']}` | {entry['system']} | `{sha1[:12]}...` | `{md5[:12]}...` |" + ) - lines.extend([ - "", - "### How to Contribute", - "", - "1. Fork this repository", - "2. Add the BIOS file to `bios/Manufacturer/Console/`", - "3. Create a Pull Request - checksums are verified automatically", - ]) + lines.extend( + [ + "", + "### How to Contribute", + "", + "1. Fork this repository", + "2. Add the BIOS file to `bios/Manufacturer/Console/`", + "3. Create a Pull Request - checksums are verified automatically", + ] + ) return "\n".join(lines) @@ -363,11 +386,15 @@ def main(): parser = argparse.ArgumentParser(description="Auto-fetch missing BIOS files") parser.add_argument("--platform", "-p", help="Platform to check") parser.add_argument("--all", action="store_true", help="Check all platforms") - parser.add_argument("--dry-run", action="store_true", help="Don't download, just report") + parser.add_argument( + "--dry-run", action="store_true", help="Don't download, just report" + ) parser.add_argument("--db", default=DEFAULT_DB) parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR) parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR) - parser.add_argument("--create-issues", action="store_true", help="Output GitHub Issue bodies") + parser.add_argument( + "--create-issues", action="store_true", help="Output GitHub Issue bodies" + ) args = parser.parse_args() if not os.path.exists(args.db): @@ -378,7 +405,8 @@ def main(): if args.all: platforms = list_registered_platforms( - args.platforms_dir, include_archived=True, + args.platforms_dir, + include_archived=True, ) elif args.platform: platforms = [args.platform] @@ -389,19 +417,19 @@ def main(): all_still_missing = {} for platform in sorted(platforms): - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"Platform: {platform}") - print(f"{'='*60}") + print(f"{'=' * 60}") try: config = load_platform_config(platform, args.platforms_dir) except FileNotFoundError: - print(f" Config not found, skipping") + print(" Config not found, skipping") continue missing = find_missing(config, db) if not missing: - print(f" All BIOS files present!") + print(" All BIOS files present!") continue print(f" {len(missing)} missing files") @@ -414,9 +442,9 @@ def main(): print(f"\n Results: {stats['found']} found, {stats['not_found']} not found") if args.create_issues and all_still_missing: - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print("GitHub Issue Bodies") - print(f"{'='*60}") + print(f"{'=' * 60}") for platform, missing in all_still_missing.items(): print(f"\n--- Issue for {platform} ---\n") print(generate_issue_body(missing, platform)) diff --git a/scripts/check_buildbot_system.py b/scripts/check_buildbot_system.py index f57ed418..0a80dee1 100644 --- a/scripts/check_buildbot_system.py +++ b/scripts/check_buildbot_system.py @@ -9,6 +9,7 @@ Usage: python scripts/check_buildbot_system.py --update python scripts/check_buildbot_system.py --json """ + from __future__ import annotations import argparse @@ -36,10 +37,14 @@ def fetch_index() -> set[str]: """Fetch .index from buildbot, return set of ZIP filenames.""" req = urllib.request.Request(INDEX_URL, headers={"User-Agent": USER_AGENT}) with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT) as resp: - return {line.strip() for line in resp.read().decode().splitlines() if line.strip()} + return { + line.strip() for line in resp.read().decode().splitlines() if line.strip() + } -def load_tracked_entries(registry_path: str = DEFAULT_REGISTRY) -> dict[str, tuple[str, str]]: +def load_tracked_entries( + registry_path: str = DEFAULT_REGISTRY, +) -> dict[str, tuple[str, str]]: """Load buildbot entries from _data_dirs.yml. Returns {decoded_zip_name: (key, source_url)}. @@ -64,8 +69,9 @@ def load_tracked_entries(registry_path: str = DEFAULT_REGISTRY) -> dict[str, tup def get_remote_etag(url: str) -> str | None: """HEAD request to get ETag.""" try: - req = urllib.request.Request(url, method="HEAD", - headers={"User-Agent": USER_AGENT}) + req = urllib.request.Request( + url, method="HEAD", headers={"User-Agent": USER_AGENT} + ) with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT) as resp: return resp.headers.get("ETag") or resp.headers.get("Last-Modified") or "" except (urllib.error.URLError, OSError): @@ -114,8 +120,15 @@ def check(registry_path: str = DEFAULT_REGISTRY) -> dict: status = "OK" else: status = "UPDATED" - results.append({"zip": z, "status": status, "key": key, - "stored_etag": stored, "remote_etag": remote or ""}) + results.append( + { + "zip": z, + "status": status, + "key": key, + "stored_etag": stored, + "remote_etag": remote or "", + } + ) return {"entries": results} @@ -144,8 +157,13 @@ def update_changed(report: dict) -> None: if e["status"] == "UPDATED" and e.get("key"): log.info("refreshing %s ...", e["key"]) subprocess.run( - [sys.executable, "scripts/refresh_data_dirs.py", - "--force", "--key", e["key"]], + [ + sys.executable, + "scripts/refresh_data_dirs.py", + "--force", + "--key", + e["key"], + ], check=False, ) @@ -155,10 +173,15 @@ def main() -> None: parser = argparse.ArgumentParser( description="Check buildbot system directory for changes", ) - parser.add_argument("--update", action="store_true", - help="Auto-refresh changed entries") - parser.add_argument("--json", action="store_true", dest="json_output", - help="Machine-readable JSON output") + parser.add_argument( + "--update", action="store_true", help="Auto-refresh changed entries" + ) + parser.add_argument( + "--json", + action="store_true", + dest="json_output", + help="Machine-readable JSON output", + ) parser.add_argument("--registry", default=DEFAULT_REGISTRY) args = parser.parse_args() diff --git a/scripts/common.py b/scripts/common.py index 84eea6d9..93421616 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -26,9 +26,11 @@ def require_yaml(): """Import and return yaml, exiting if PyYAML is not installed.""" try: import yaml as _yaml + return _yaml except ImportError: import sys + print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr) sys.exit(1) @@ -154,12 +156,17 @@ def load_platform_config(platform_name: str, platforms_dir: str = "platforms") - if "inherits" in config: parent = load_platform_config(config["inherits"], platforms_dir) merged = {**parent} - merged.update({k: v for k, v in config.items() if k not in ("inherits", "overrides")}) + merged.update( + {k: v for k, v in config.items() if k not in ("inherits", "overrides")} + ) if "overrides" in config and "systems" in config["overrides"]: merged.setdefault("systems", {}) for sys_id, override in config["overrides"]["systems"].items(): if sys_id in merged["systems"]: - merged["systems"][sys_id] = {**merged["systems"][sys_id], **override} + merged["systems"][sys_id] = { + **merged["systems"][sys_id], + **override, + } else: merged["systems"][sys_id] = override config = merged @@ -346,12 +353,14 @@ def list_available_targets( result = [] for tname, tdata in sorted(data.get("targets", {}).items()): aliases = overrides.get(tname, {}).get("aliases", []) - result.append({ - "name": tname, - "architecture": tdata.get("architecture", ""), - "core_count": len(tdata.get("cores", [])), - "aliases": aliases, - }) + result.append( + { + "name": tname, + "architecture": tdata.get("architecture", ""), + "core_count": len(tdata.get("cores", [])), + "aliases": aliases, + } + ) return result @@ -398,7 +407,9 @@ def resolve_local_file( if hint_base and hint_base not in names_to_try: names_to_try.append(hint_base) - md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + md5_list = ( + [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + ) files_db = db.get("files", {}) by_md5 = db.get("indexes", {}).get("by_md5", {}) by_name = db.get("indexes", {}).get("by_name", {}) @@ -480,7 +491,9 @@ def resolve_local_file( if candidates: if zipped_file: - candidates = [(p, m) for p, m in candidates if ".zip" in os.path.basename(p)] + candidates = [ + (p, m) for p, m in candidates if ".zip" in os.path.basename(p) + ] if md5_set: for path, db_md5 in candidates: if ".zip" in os.path.basename(path): @@ -530,7 +543,11 @@ def resolve_local_file( if canonical and canonical != name: canonical_entry = {"name": canonical} result = resolve_local_file( - canonical_entry, db, zip_contents, dest_hint, _depth=_depth + 1, + canonical_entry, + db, + zip_contents, + dest_hint, + _depth=_depth + 1, data_dir_registry=data_dir_registry, ) if result[0]: @@ -643,9 +660,7 @@ def build_zip_contents_index(db: dict, max_entry_size: int = 512 * 1024 * 1024) if path.endswith(".zip") and os.path.exists(path): zip_entries.append((path, sha1)) - fingerprint = frozenset( - (path, os.path.getmtime(path)) for path, _ in zip_entries - ) + fingerprint = frozenset((path, os.path.getmtime(path)) for path, _ in zip_entries) if _zip_contents_cache is not None and _zip_contents_cache[0] == fingerprint: return _zip_contents_cache[1] @@ -672,7 +687,8 @@ _emulator_profiles_cache: dict[tuple[str, bool], dict[str, dict]] = {} def load_emulator_profiles( - emulators_dir: str, skip_aliases: bool = True, + emulators_dir: str, + skip_aliases: bool = True, ) -> dict[str, dict]: """Load all emulator YAML profiles from a directory (cached).""" cache_key = (os.path.realpath(emulators_dir), skip_aliases) @@ -701,7 +717,8 @@ def load_emulator_profiles( def group_identical_platforms( - platforms: list[str], platforms_dir: str, + platforms: list[str], + platforms_dir: str, target_cores_cache: dict[str, set[str] | None] | None = None, ) -> list[tuple[list[str], str]]: """Group platforms that produce identical packs (same files + base_destination). @@ -744,7 +761,9 @@ def group_identical_platforms( fp = hashlib.sha1(f"{fp}|{tc_str}".encode()).hexdigest() fingerprints.setdefault(fp, []).append(platform) # Prefer the root platform (no inherits) as representative - if fp not in representatives or (not inherits[platform] and inherits.get(representatives[fp], False)): + if fp not in representatives or ( + not inherits[platform] and inherits.get(representatives[fp], False) + ): representatives[fp] = platform result = [] @@ -756,7 +775,8 @@ def group_identical_platforms( def resolve_platform_cores( - config: dict, profiles: dict[str, dict], + config: dict, + profiles: dict[str, dict], target_cores: set[str] | None = None, ) -> set[str]: """Resolve which emulator profiles are relevant for a platform. @@ -773,9 +793,9 @@ def resolve_platform_cores( if cores_config == "all_libretro": result = { - name for name, p in profiles.items() - if "libretro" in p.get("type", "") - and p.get("type") != "alias" + name + for name, p in profiles.items() + if "libretro" in p.get("type", "") and p.get("type") != "alias" } elif isinstance(cores_config, list): core_set = {str(c) for c in cores_config} @@ -786,25 +806,22 @@ def resolve_platform_cores( core_to_profile[name] = name for core_name in p.get("cores", []): core_to_profile[str(core_name)] = name - result = { - core_to_profile[c] - for c in core_set - if c in core_to_profile - } + result = {core_to_profile[c] for c in core_set if c in core_to_profile} # Support "all_libretro" as a list element: combines all libretro # profiles with explicitly listed standalone cores (e.g. RetroDECK # ships RetroArch + standalone emulators) if "all_libretro" in core_set or "retroarch" in core_set: result |= { - name for name, p in profiles.items() - if "libretro" in p.get("type", "") - and p.get("type") != "alias" + name + for name, p in profiles.items() + if "libretro" in p.get("type", "") and p.get("type") != "alias" } else: # Fallback: system ID intersection with normalization norm_plat_systems = {_norm_system_id(s) for s in config.get("systems", {})} result = { - name for name, p in profiles.items() + name + for name, p in profiles.items() if {_norm_system_id(s) for s in p.get("systems", [])} & norm_plat_systems and p.get("type") != "alias" } @@ -826,11 +843,34 @@ def resolve_platform_cores( MANUFACTURER_PREFIXES = ( - "acorn-", "apple-", "microsoft-", "nintendo-", "sony-", "sega-", - "snk-", "panasonic-", "nec-", "epoch-", "mattel-", "fairchild-", - "hartung-", "tiger-", "magnavox-", "philips-", "bandai-", "casio-", - "coleco-", "commodore-", "sharp-", "sinclair-", "atari-", "sammy-", - "gce-", "interton-", "texas-instruments-", "videoton-", + "acorn-", + "apple-", + "microsoft-", + "nintendo-", + "sony-", + "sega-", + "snk-", + "panasonic-", + "nec-", + "epoch-", + "mattel-", + "fairchild-", + "hartung-", + "tiger-", + "magnavox-", + "philips-", + "bandai-", + "casio-", + "coleco-", + "commodore-", + "sharp-", + "sinclair-", + "atari-", + "sammy-", + "gce-", + "interton-", + "texas-instruments-", + "videoton-", ) @@ -877,7 +917,7 @@ def _norm_system_id(sid: str) -> str: s = SYSTEM_ALIASES.get(s, s) for prefix in MANUFACTURER_PREFIXES: if s.startswith(prefix): - s = s[len(prefix):] + s = s[len(prefix) :] break return s.replace("-", "") @@ -984,9 +1024,9 @@ def expand_platform_declared_names(config: dict, db: dict) -> set[str]: import re _TIMESTAMP_PATTERNS = [ - re.compile(r'"generated_at":\s*"[^"]*"'), # database.json - re.compile(r'\*Auto-generated on [^*]*\*'), # README.md - re.compile(r'\*Generated on [^*]*\*'), # docs site pages + re.compile(r'"generated_at":\s*"[^"]*"'), # database.json + re.compile(r"\*Auto-generated on [^*]*\*"), # README.md + re.compile(r"\*Generated on [^*]*\*"), # docs site pages ] @@ -1023,8 +1063,12 @@ LARGE_FILES_REPO = "Abdess/retrobios" LARGE_FILES_CACHE = ".cache/large" -def fetch_large_file(name: str, dest_dir: str = LARGE_FILES_CACHE, - expected_sha1: str = "", expected_md5: str = "") -> str | None: +def fetch_large_file( + name: str, + dest_dir: str = LARGE_FILES_CACHE, + expected_sha1: str = "", + expected_md5: str = "", +) -> str | None: """Download a large file from the 'large-files' GitHub release if not cached.""" cached = os.path.join(dest_dir, name) if os.path.exists(cached): @@ -1033,7 +1077,9 @@ def fetch_large_file(name: str, dest_dir: str = LARGE_FILES_CACHE, if expected_sha1 and hashes["sha1"].lower() != expected_sha1.lower(): os.unlink(cached) elif expected_md5: - md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()] + md5_list = [ + m.strip().lower() for m in expected_md5.split(",") if m.strip() + ] if hashes["md5"].lower() not in md5_list: os.unlink(cached) else: @@ -1122,8 +1168,9 @@ def list_platform_system_ids(platform_name: str, platforms_dir: str) -> None: file_count = len(systems[sys_id].get("files", [])) mfr = systems[sys_id].get("manufacturer", "") mfr_display = f" [{mfr.split('|')[0]}]" if mfr else "" - print(f" {sys_id:35s} ({file_count} file{'s' if file_count != 1 else ''}){mfr_display}") - + print( + f" {sys_id:35s} ({file_count} file{'s' if file_count != 1 else ''}){mfr_display}" + ) def build_target_cores_cache( diff --git a/scripts/cross_reference.py b/scripts/cross_reference.py index 092ec2c4..57a69aa5 100644 --- a/scripts/cross_reference.py +++ b/scripts/cross_reference.py @@ -19,7 +19,13 @@ import sys from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) -from common import list_registered_platforms, load_database, load_emulator_profiles, load_platform_config, require_yaml +from common import ( + list_registered_platforms, + load_database, + load_emulator_profiles, + load_platform_config, + require_yaml, +) yaml = require_yaml() @@ -28,11 +34,15 @@ DEFAULT_PLATFORMS_DIR = "platforms" DEFAULT_DB = "database.json" -def load_platform_files(platforms_dir: str) -> tuple[dict[str, set[str]], dict[str, set[str]]]: +def load_platform_files( + platforms_dir: str, +) -> tuple[dict[str, set[str]], dict[str, set[str]]]: """Load all platform configs and collect declared filenames + data_directories per system.""" declared = {} platform_data_dirs = {} - for platform_name in list_registered_platforms(platforms_dir, include_archived=True): + for platform_name in list_registered_platforms( + platforms_dir, include_archived=True + ): config = load_platform_config(platform_name, platforms_dir) for sys_id, system in config.get("systems", {}).items(): for fe in system.get("files", []): @@ -46,8 +56,9 @@ def load_platform_files(platforms_dir: str) -> tuple[dict[str, set[str]], dict[s return declared, platform_data_dirs -def _build_supplemental_index(data_root: str = "data", - bios_root: str = "bios") -> set[str]: +def _build_supplemental_index( + data_root: str = "data", bios_root: str = "bios" +) -> set[str]: """Build a set of filenames and directory names in data/ and inside bios/ ZIPs.""" names: set[str] = set() root_path = Path(data_root) @@ -76,12 +87,15 @@ def _build_supplemental_index(data_root: str = "data", names.add(dpath.name + "/") names.add(dpath.name.lower() + "/") import zipfile + for zpath in bios_path.rglob("*.zip"): try: with zipfile.ZipFile(zpath) as zf: for member in zf.namelist(): if not member.endswith("/"): - basename = member.rsplit("/", 1)[-1] if "/" in member else member + basename = ( + member.rsplit("/", 1)[-1] if "/" in member else member + ) names.add(basename) names.add(basename.lower()) except (zipfile.BadZipFile, OSError): @@ -89,8 +103,12 @@ def _build_supplemental_index(data_root: str = "data", return names -def _find_in_repo(fname: str, by_name: dict[str, list], by_name_lower: dict[str, str], - data_names: set[str] | None = None) -> bool: +def _find_in_repo( + fname: str, + by_name: dict[str, list], + by_name_lower: dict[str, str], + data_names: set[str] | None = None, +) -> bool: if fname in by_name: return True # For directory entries or paths, extract the meaningful basename @@ -170,7 +188,9 @@ def cross_reference( if not in_repo: path_field = f.get("path", "") if path_field and path_field != fname: - in_repo = _find_in_repo(path_field, by_name, by_name_lower, data_names) + in_repo = _find_in_repo( + path_field, by_name, by_name_lower, data_names + ) # Try MD5 hash match (handles files that exist under different names) if not in_repo: md5_raw = f.get("md5", "") @@ -231,9 +251,11 @@ def print_report(report: dict) -> None: status = f"{data['gap_in_repo']} in repo, {data['gap_missing']} missing" print(f"\n{data['emulator']} ({', '.join(data['systems'])})") - print(f" {data['total_files']} files in profile, " - f"{data['platform_covered']} declared by platforms, " - f"{gaps} undeclared") + print( + f" {data['total_files']} files in profile, " + f"{data['platform_covered']} declared by platforms, " + f"{gaps} undeclared" + ) if gaps > 0: print(f" Gaps: {status}") @@ -259,7 +281,9 @@ def main(): parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR) parser.add_argument("--db", default=DEFAULT_DB) parser.add_argument("--emulator", "-e", help="Analyze single emulator") - parser.add_argument("--platform", "-p", help="Platform name (required for --target)") + parser.add_argument( + "--platform", "-p", help="Platform name (required for --target)" + ) parser.add_argument("--target", "-t", help="Hardware target (e.g., switch, rpi4)") parser.add_argument("--json", action="store_true", help="JSON output") args = parser.parse_args() @@ -272,7 +296,10 @@ def main(): if not args.platform: parser.error("--target requires --platform") from common import load_target_config, resolve_platform_cores - target_cores = load_target_config(args.platform, args.target, args.platforms_dir) + + target_cores = load_target_config( + args.platform, args.target, args.platforms_dir + ) config = load_platform_config(args.platform, args.platforms_dir) relevant = resolve_platform_cores(config, profiles, target_cores=target_cores) profiles = {k: v for k, v in profiles.items() if k in relevant} diff --git a/scripts/crypto_verify.py b/scripts/crypto_verify.py index c9e7b5ad..998b79fb 100644 --- a/scripts/crypto_verify.py +++ b/scripts/crypto_verify.py @@ -14,6 +14,7 @@ Source refs: Azahar src/core/hw/rsa/rsa.cpp Azahar src/core/file_sys/otp.cpp """ + from __future__ import annotations import hashlib @@ -22,9 +23,9 @@ import subprocess from collections.abc import Callable from pathlib import Path - # Key file parsing (keys.txt / aes_keys.txt format) + def parse_keys_file(path: str | Path) -> dict[str, dict[str, bytes]]: """Parse a 3DS keys file with :AES, :RSA, :ECC sections. @@ -67,6 +68,7 @@ def find_keys_file(bios_dir: str | Path) -> Path | None: # Pure Python RSA-2048 PKCS1v15 SHA256 verification (zero dependencies) + def _rsa_verify_pkcs1v15_sha256( message: bytes, signature: bytes, @@ -98,14 +100,29 @@ def _rsa_verify_pkcs1v15_sha256( # PKCS#1 v1.5 signature encoding: 0x00 0x01 [0xFF padding] 0x00 [DigestInfo] # DigestInfo for SHA-256: # SEQUENCE { SEQUENCE { OID sha256, NULL }, OCTET STRING hash } - digest_info_prefix = bytes([ - 0x30, 0x31, # SEQUENCE (49 bytes) - 0x30, 0x0D, # SEQUENCE (13 bytes) - 0x06, 0x09, # OID (9 bytes) - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, # sha256 - 0x05, 0x00, # NULL - 0x04, 0x20, # OCTET STRING (32 bytes) - ]) + digest_info_prefix = bytes( + [ + 0x30, + 0x31, # SEQUENCE (49 bytes) + 0x30, + 0x0D, # SEQUENCE (13 bytes) + 0x06, + 0x09, # OID (9 bytes) + 0x60, + 0x86, + 0x48, + 0x01, + 0x65, + 0x03, + 0x04, + 0x02, + 0x01, # sha256 + 0x05, + 0x00, # NULL + 0x04, + 0x20, # OCTET STRING (32 bytes) + ] + ) sha256_hash = hashlib.sha256(message).digest() expected_digest_info = digest_info_prefix + sha256_hash @@ -122,11 +139,13 @@ def _rsa_verify_pkcs1v15_sha256( # AES-128-CBC decryption (with fallback) + def _aes_128_cbc_decrypt(data: bytes, key: bytes, iv: bytes) -> bytes: """Decrypt AES-128-CBC without padding.""" # Try cryptography library first try: from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + cipher = Cipher(algorithms.AES(key), modes.CBC(iv)) decryptor = cipher.decryptor() return decryptor.update(data) + decryptor.finalize() @@ -136,6 +155,7 @@ def _aes_128_cbc_decrypt(data: bytes, key: bytes, iv: bytes) -> bytes: # Try pycryptodome try: from Crypto.Cipher import AES # type: ignore[import-untyped] + cipher = AES.new(key, AES.MODE_CBC, iv) return cipher.decrypt(data) except ImportError: @@ -145,8 +165,15 @@ def _aes_128_cbc_decrypt(data: bytes, key: bytes, iv: bytes) -> bytes: try: result = subprocess.run( [ - "openssl", "enc", "-aes-128-cbc", "-d", - "-K", key.hex(), "-iv", iv.hex(), "-nopad", + "openssl", + "enc", + "-aes-128-cbc", + "-d", + "-K", + key.hex(), + "-iv", + iv.hex(), + "-nopad", ], input=data, capture_output=True, @@ -162,6 +189,7 @@ def _aes_128_cbc_decrypt(data: bytes, key: bytes, iv: bytes) -> bytes: # File verification functions + def verify_secure_info_a( filepath: str | Path, keys: dict[str, dict[str, bytes]], @@ -204,7 +232,10 @@ def verify_secure_info_a( continue modified_body = bytes([test_region]) + body[1:] if _rsa_verify_pkcs1v15_sha256(modified_body, signature, modulus, exponent): - return False, f"signature invalid (region changed from {test_region} to {region_byte})" + return ( + False, + f"signature invalid (region changed from {test_region} to {region_byte})", + ) return False, "signature invalid" @@ -307,7 +338,7 @@ def verify_otp( Returns (valid, reason_string). """ - from sect233r1 import ecdsa_verify_sha256, _ec_mul, _Gx, _Gy, _N + from sect233r1 import _N, _ec_mul, _Gx, _Gy, ecdsa_verify_sha256 data = bytearray(Path(filepath).read_bytes()) @@ -322,7 +353,10 @@ def verify_otp( magic = struct.unpack_from("I", 2) diff --git a/scripts/dedup.py b/scripts/dedup.py index 726ae110..e4582691 100644 --- a/scripts/dedup.py +++ b/scripts/dedup.py @@ -17,6 +17,7 @@ Two types of deduplication: After dedup, run generate_db.py --force to rebuild database indexes. """ + from __future__ import annotations import argparse @@ -110,13 +111,10 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict: unique_names = sorted(by_name.keys()) if len(unique_names) > 1: # Check if these are all in MAME/Arcade dirs AND all ZIPs - all_mame_zip = ( - all( - any(_is_mame_dir(p) for p in name_paths) - for name_paths in by_name.values() - ) - and all(n.endswith(".zip") for n in unique_names) - ) + all_mame_zip = all( + any(_is_mame_dir(p) for p in name_paths) + for name_paths in by_name.values() + ) and all(n.endswith(".zip") for n in unique_names) if all_mame_zip: # MAME device clones: different ZIP names, same ROM content # Keep one canonical, remove clones, record in clone map @@ -202,7 +200,9 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict: prefix = "Would remove" if dry_run else "Removed" print(f"\n{prefix}: {total_removed} files") - print(f"Space {'to save' if dry_run else 'saved'}: {total_saved / 1024 / 1024:.1f} MB") + print( + f"Space {'to save' if dry_run else 'saved'}: {total_saved / 1024 / 1024:.1f} MB" + ) if not dry_run and empty_cleaned: print(f"Cleaned {empty_cleaned} empty directories") @@ -211,21 +211,27 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict: clone_path = "_mame_clones.json" if dry_run: print(f"\nWould write MAME clone map: {clone_path}") - print(f" {len(mame_clones)} canonical ZIPs with " - f"{sum(len(v['clones']) for v in mame_clones.values())} clones") + print( + f" {len(mame_clones)} canonical ZIPs with " + f"{sum(len(v['clones']) for v in mame_clones.values())} clones" + ) else: with open(clone_path, "w") as f: json.dump(mame_clones, f, indent=2, sort_keys=True) print(f"\nWrote MAME clone map: {clone_path}") - print(f" {len(mame_clones)} canonical ZIPs with " - f"{sum(len(v['clones']) for v in mame_clones.values())} clones") + print( + f" {len(mame_clones)} canonical ZIPs with " + f"{sum(len(v['clones']) for v in mame_clones.values())} clones" + ) return results def main() -> None: parser = argparse.ArgumentParser(description="Deduplicate bios/ directory") - parser.add_argument("--dry-run", action="store_true", help="Preview without deleting") + parser.add_argument( + "--dry-run", action="store_true", help="Preview without deleting" + ) parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR) args = parser.parse_args() diff --git a/scripts/deterministic_zip.py b/scripts/deterministic_zip.py index fbe027d3..aa1d30eb 100644 --- a/scripts/deterministic_zip.py +++ b/scripts/deterministic_zip.py @@ -22,10 +22,10 @@ Usage: ] build_deterministic_zip("neogeo.zip", recipe, atom_store) """ + from __future__ import annotations import hashlib -import struct import zipfile import zlib from io import BytesIO @@ -63,7 +63,9 @@ def build_deterministic_zip( # Sort by filename for deterministic order sorted_recipe = sorted(recipe, key=lambda r: r["name"]) - with zipfile.ZipFile(str(output_path), "w", compression, compresslevel=_COMPRESS_LEVEL) as zf: + with zipfile.ZipFile( + str(output_path), "w", compression, compresslevel=_COMPRESS_LEVEL + ) as zf: for entry in sorted_recipe: name = entry["name"] expected_crc = entry.get("crc32", "").lower() @@ -127,12 +129,14 @@ def extract_atoms_with_names(zip_path: str | Path) -> list[dict]: continue data = zf.read(info.filename) crc = format(zlib.crc32(data) & 0xFFFFFFFF, "08x") - result.append({ - "name": info.filename, - "crc32": crc, - "size": len(data), - "data": data, - }) + result.append( + { + "name": info.filename, + "crc32": crc, + "size": len(data), + "data": data, + } + ) return result @@ -154,7 +158,9 @@ def verify_zip_determinism(zip_path: str | Path) -> tuple[bool, str, str]: # Rebuild to memory buf = BytesIO() sorted_recipe = sorted(recipe, key=lambda r: r["name"]) - with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED, compresslevel=_COMPRESS_LEVEL) as zf: + with zipfile.ZipFile( + buf, "w", zipfile.ZIP_DEFLATED, compresslevel=_COMPRESS_LEVEL + ) as zf: for entry in sorted_recipe: info = zipfile.ZipInfo(filename=entry["name"], date_time=_FIXED_DATE_TIME) info.compress_type = zipfile.ZIP_DEFLATED diff --git a/scripts/diff_truth.py b/scripts/diff_truth.py index 482069ed..74fe3d0f 100644 --- a/scripts/diff_truth.py +++ b/scripts/diff_truth.py @@ -78,13 +78,17 @@ def _format_terminal(report: dict) -> str: lines.append(f" + {m['name']} [{cores}]") for h in div.get("hash_mismatch", []): ht = h["hash_type"] - lines.append(f" ~ {h['name']} {ht}: {h[f'truth_{ht}']} != {h[f'scraped_{ht}']}") + lines.append( + f" ~ {h['name']} {ht}: {h[f'truth_{ht}']} != {h[f'scraped_{ht}']}" + ) for p in div.get("extra_phantom", []): lines.append(f" - {p['name']} (phantom)") for u in div.get("extra_unprofiled", []): lines.append(f" ? {u['name']} (unprofiled)") for r in div.get("required_mismatch", []): - lines.append(f" ! {r['name']} required: {r['truth_required']} != {r['scraped_required']}") + lines.append( + f" ! {r['name']} required: {r['truth_required']} != {r['scraped_required']}" + ) uncovered = report.get("uncovered_systems", []) if uncovered: @@ -125,13 +129,17 @@ def _format_markdown(report: dict) -> str: lines.append(f"- **Add** `{m['name']}`{refs}") for h in div.get("hash_mismatch", []): ht = h["hash_type"] - lines.append(f"- **Fix hash** `{h['name']}` {ht}: `{h[f'truth_{ht}']}` != `{h[f'scraped_{ht}']}`") + lines.append( + f"- **Fix hash** `{h['name']}` {ht}: `{h[f'truth_{ht}']}` != `{h[f'scraped_{ht}']}`" + ) for p in div.get("extra_phantom", []): lines.append(f"- **Remove** `{p['name']}` (phantom)") for u in div.get("extra_unprofiled", []): lines.append(f"- **Check** `{u['name']}` (unprofiled cores)") for r in div.get("required_mismatch", []): - lines.append(f"- **Fix required** `{r['name']}`: truth={r['truth_required']}, scraped={r['scraped_required']}") + lines.append( + f"- **Fix required** `{r['name']}`: truth={r['truth_required']}, scraped={r['scraped_required']}" + ) lines.append("") uncovered = report.get("uncovered_systems", []) @@ -148,17 +156,25 @@ def _format_markdown(report: dict) -> str: def main() -> None: parser = argparse.ArgumentParser(description="Compare scraped vs truth YAMLs") group = parser.add_mutually_exclusive_group(required=True) - group.add_argument("--all", action="store_true", help="diff all registered platforms") + group.add_argument( + "--all", action="store_true", help="diff all registered platforms" + ) group.add_argument("--platform", help="diff a single platform") - parser.add_argument("--json", action="store_true", dest="json_output", help="JSON output") - parser.add_argument("--format", choices=["terminal", "markdown"], default="terminal") + parser.add_argument( + "--json", action="store_true", dest="json_output", help="JSON output" + ) + parser.add_argument( + "--format", choices=["terminal", "markdown"], default="terminal" + ) parser.add_argument("--truth-dir", default="dist/truth") parser.add_argument("--platforms-dir", default="platforms") parser.add_argument("--include-archived", action="store_true") args = parser.parse_args() if args.all: - platforms = list_registered_platforms(args.platforms_dir, include_archived=args.include_archived) + platforms = list_registered_platforms( + args.platforms_dir, include_archived=args.include_archived + ) else: platforms = [args.platform] @@ -169,7 +185,10 @@ def main() -> None: truth = _load_truth(args.truth_dir, platform) if truth is None: if not args.json_output: - print(f"skip {platform}: no truth YAML in {args.truth_dir}/", file=sys.stderr) + print( + f"skip {platform}: no truth YAML in {args.truth_dir}/", + file=sys.stderr, + ) continue try: diff --git a/scripts/download.py b/scripts/download.py index fe0c975b..cfaf3692 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -16,8 +16,8 @@ import argparse import json import os import sys -import urllib.request import urllib.error +import urllib.request import zipfile from pathlib import Path @@ -31,10 +31,13 @@ REPO = "Abdess/retrobios" def get_latest_release() -> dict: """Fetch latest release info from GitHub API.""" url = f"{GITHUB_API}/repos/{REPO}/releases/latest" - req = urllib.request.Request(url, headers={ - "User-Agent": "retrobios-downloader/1.0", - "Accept": "application/vnd.github.v3+json", - }) + req = urllib.request.Request( + url, + headers={ + "User-Agent": "retrobios-downloader/1.0", + "Accept": "application/vnd.github.v3+json", + }, + ) try: with urllib.request.urlopen(req, timeout=30) as resp: @@ -71,7 +74,9 @@ def find_asset(release: dict, platform: str) -> dict | None: def download_file(url: str, dest: str, expected_size: int = 0): """Download a file with progress indication.""" - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-downloader/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-downloader/1.0"} + ) with urllib.request.urlopen(req, timeout=300) as resp: total = int(resp.headers.get("Content-Length", expected_size)) @@ -88,7 +93,11 @@ def download_file(url: str, dest: str, expected_size: int = 0): if total > 0: pct = downloaded * 100 // total bar = "=" * (pct // 2) + " " * (50 - pct // 2) - print(f"\r [{bar}] {pct}% ({downloaded:,}/{total:,})", end="", flush=True) + print( + f"\r [{bar}] {pct}% ({downloaded:,}/{total:,})", + end="", + flush=True, + ) print() @@ -114,11 +123,14 @@ def verify_files(platform: str, dest_dir: str, release: dict): return import tempfile + tmp = tempfile.NamedTemporaryFile(suffix=".json", delete=False) tmp.close() try: - download_file(db_asset["browser_download_url"], tmp.name, db_asset.get("size", 0)) + download_file( + db_asset["browser_download_url"], tmp.name, db_asset.get("size", 0) + ) with open(tmp.name) as f: db = json.load(f) finally: @@ -142,7 +154,9 @@ def verify_files(platform: str, dest_dir: str, release: dict): break else: mismatched += 1 - print(f" MISMATCH: {name} (expected {sha1[:12]}..., got {local_sha1[:12]}...)") + print( + f" MISMATCH: {name} (expected {sha1[:12]}..., got {local_sha1[:12]}...)" + ) found = True break @@ -166,7 +180,7 @@ def show_info(platform: str, release: dict): print(f" Platform: {platform}") print(f" File: {asset['name']}") - print(f" Size: {asset['size']:,} bytes ({asset['size'] / (1024*1024):.1f} MB)") + print(f" Size: {asset['size']:,} bytes ({asset['size'] / (1024 * 1024):.1f} MB)") print(f" Downloads: {asset.get('download_count', 'N/A')}") print(f" Updated: {asset.get('updated_at', 'N/A')}") @@ -200,7 +214,12 @@ Examples: print(f" - {p}") else: print("No platform packs found in latest release") - except (urllib.error.URLError, urllib.error.HTTPError, OSError, json.JSONDecodeError) as e: + except ( + urllib.error.URLError, + urllib.error.HTTPError, + OSError, + json.JSONDecodeError, + ) as e: print(f"Error: {e}") return @@ -233,6 +252,7 @@ Examples: sys.exit(1) import tempfile + fd, zip_path = tempfile.mkstemp(suffix=".zip") os.close(fd) diff --git a/scripts/export_native.py b/scripts/export_native.py index b14affd9..8df4b368 100644 --- a/scripts/export_native.py +++ b/scripts/export_native.py @@ -9,11 +9,9 @@ from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) import yaml - from common import list_registered_platforms, load_platform_config from exporter import discover_exporters - OUTPUT_FILENAMES: dict[str, str] = { "retroarch": "System.dat", "lakka": "System.dat", @@ -94,23 +92,31 @@ def main() -> None: group.add_argument("--all", action="store_true", help="export all platforms") group.add_argument("--platform", help="export a single platform") parser.add_argument( - "--output-dir", default="dist/upstream", help="output directory", + "--output-dir", + default="dist/upstream", + help="output directory", ) parser.add_argument( - "--truth-dir", default="dist/truth", help="truth YAML directory", + "--truth-dir", + default="dist/truth", + help="truth YAML directory", ) parser.add_argument( - "--platforms-dir", default="platforms", help="platform configs directory", + "--platforms-dir", + default="platforms", + help="platform configs directory", ) parser.add_argument( - "--include-archived", action="store_true", + "--include-archived", + action="store_true", help="include archived platforms", ) args = parser.parse_args() if args.all: platforms = list_registered_platforms( - args.platforms_dir, include_archived=args.include_archived, + args.platforms_dir, + include_archived=args.include_archived, ) else: platforms = [args.platform] diff --git a/scripts/exporter/base_exporter.py b/scripts/exporter/base_exporter.py index 9517f37a..e359f160 100644 --- a/scripts/exporter/base_exporter.py +++ b/scripts/exporter/base_exporter.py @@ -38,7 +38,8 @@ class BaseExporter(ABC): @staticmethod def _display_name( - sys_id: str, scraped_sys: dict | None = None, + sys_id: str, + scraped_sys: dict | None = None, ) -> str: """Get display name for a system from scraped data or slug.""" if scraped_sys: @@ -47,9 +48,28 @@ class BaseExporter(ABC): return name # Fallback: convert slug to display name with acronym handling _UPPER = { - "3do", "cdi", "cpc", "cps1", "cps2", "cps3", "dos", "gba", - "gbc", "hle", "msx", "nes", "nds", "ngp", "psp", "psx", - "sms", "snes", "stv", "tvc", "vb", "zx", + "3do", + "cdi", + "cpc", + "cps1", + "cps2", + "cps3", + "dos", + "gba", + "gbc", + "hle", + "msx", + "nes", + "nds", + "ngp", + "psp", + "psx", + "sms", + "snes", + "stv", + "tvc", + "vb", + "zx", } parts = sys_id.replace("-", " ").split() result = [] diff --git a/scripts/exporter/batocera_exporter.py b/scripts/exporter/batocera_exporter.py index 90325cf8..a69d3543 100644 --- a/scripts/exporter/batocera_exporter.py +++ b/scripts/exporter/batocera_exporter.py @@ -11,8 +11,6 @@ from pathlib import Path from .base_exporter import BaseExporter - - class Exporter(BaseExporter): """Export truth data to Batocera batocera-systems format.""" @@ -44,7 +42,9 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - scraped_sys = scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + scraped_sys = ( + scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + ) display_name = self._display_name(sys_id, scraped_sys) # Build md5 lookup from scraped data for this system @@ -74,9 +74,7 @@ class Exporter(BaseExporter): # Original format requires md5 for every entry — skip without if not md5: continue - bios_parts.append( - f'{{ "md5": "{md5}", "file": "bios/{dest}" }}' - ) + bios_parts.append(f'{{ "md5": "{md5}", "file": "bios/{dest}" }}') bios_str = ", ".join(bios_parts) line = ( diff --git a/scripts/exporter/emudeck_exporter.py b/scripts/exporter/emudeck_exporter.py index fea1a790..a15b8c6b 100644 --- a/scripts/exporter/emudeck_exporter.py +++ b/scripts/exporter/emudeck_exporter.py @@ -156,7 +156,9 @@ class Exporter(BaseExporter): continue md5 = fe.get("md5", "") if isinstance(md5, list): - md5s.extend(m for m in md5 if m and re.fullmatch(r"[a-f0-9]{32}", m)) + md5s.extend( + m for m in md5 if m and re.fullmatch(r"[a-f0-9]{32}", m) + ) elif md5 and re.fullmatch(r"[a-f0-9]{32}", md5): md5s.append(md5) if md5s: @@ -195,7 +197,8 @@ class Exporter(BaseExporter): # Only flag if the system has usable data for the function type if cfg["pattern"] == "md5": has_md5 = any( - fe.get("md5") and isinstance(fe.get("md5"), str) + fe.get("md5") + and isinstance(fe.get("md5"), str) and re.fullmatch(r"[a-f0-9]{32}", fe["md5"]) for fe in sys_data["files"] ) diff --git a/scripts/exporter/recalbox_exporter.py b/scripts/exporter/recalbox_exporter.py index 843471a3..b13349fe 100644 --- a/scripts/exporter/recalbox_exporter.py +++ b/scripts/exporter/recalbox_exporter.py @@ -15,8 +15,6 @@ from pathlib import Path from .base_exporter import BaseExporter - - class Exporter(BaseExporter): """Export truth data to Recalbox es_bios.xml format.""" @@ -51,7 +49,9 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - scraped_sys = scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + scraped_sys = ( + scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + ) display_name = self._display_name(sys_id, scraped_sys) lines.append(f' ') @@ -85,7 +85,9 @@ class Exporter(BaseExporter): # Build cores string from _cores cores_list = fe.get("_cores", []) - core_str = ",".join(f"libretro/{c}" for c in cores_list) if cores_list else "" + core_str = ( + ",".join(f"libretro/{c}" for c in cores_list) if cores_list else "" + ) attrs = [f'path="{path}"'] if md5: @@ -97,7 +99,7 @@ class Exporter(BaseExporter): if core_str: attrs.append(f'core="{core_str}"') - lines.append(f' ') + lines.append(f" ") lines.append(" ") @@ -125,6 +127,9 @@ class Exporter(BaseExporter): if name.startswith("_") or self._is_pattern(name): continue dest = self._dest(fe) - if name.lower() not in exported_paths and dest.lower() not in exported_paths: + if ( + name.lower() not in exported_paths + and dest.lower() not in exported_paths + ): issues.append(f"missing: {name}") return issues diff --git a/scripts/exporter/retrobat_exporter.py b/scripts/exporter/retrobat_exporter.py index 118b1b49..6bfa28d7 100644 --- a/scripts/exporter/retrobat_exporter.py +++ b/scripts/exporter/retrobat_exporter.py @@ -15,8 +15,6 @@ from pathlib import Path from .base_exporter import BaseExporter - - class Exporter(BaseExporter): """Export truth data to RetroBat batocera-systems.json format.""" @@ -47,7 +45,9 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - scraped_sys = scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + scraped_sys = ( + scraped_data.get("systems", {}).get(sys_id) if scraped_data else None + ) display_name = self._display_name(sys_id, scraped_sys) bios_files: list[OrderedDict] = [] @@ -70,7 +70,9 @@ class Exporter(BaseExporter): if bios_files: if native_id in output: - existing_files = {e.get("file") for e in output[native_id]["biosFiles"]} + existing_files = { + e.get("file") for e in output[native_id]["biosFiles"] + } for entry in bios_files: if entry.get("file") not in existing_files: output[native_id]["biosFiles"].append(entry) diff --git a/scripts/exporter/retrodeck_exporter.py b/scripts/exporter/retrodeck_exporter.py index 269867ad..8317fce8 100644 --- a/scripts/exporter/retrodeck_exporter.py +++ b/scripts/exporter/retrodeck_exporter.py @@ -170,7 +170,9 @@ class Exporter(BaseExporter): if native_id in manifest: # Merge into existing component (multiple truth systems # may map to the same native ID) - existing_names = {e["filename"] for e in manifest[native_id]["bios"]} + existing_names = { + e["filename"] for e in manifest[native_id]["bios"] + } for entry in bios_entries: if entry["filename"] not in existing_names: manifest[native_id]["bios"].append(entry) diff --git a/scripts/exporter/systemdat_exporter.py b/scripts/exporter/systemdat_exporter.py index 73b03305..63c94228 100644 --- a/scripts/exporter/systemdat_exporter.py +++ b/scripts/exporter/systemdat_exporter.py @@ -58,16 +58,18 @@ class Exporter(BaseExporter): ] if version: lines.append(f"\tversion {version}") - lines.extend([ - '\tauthor "libretro"', - '\thomepage "https://github.com/libretro/libretro-database/blob/master/dat/System.dat"', - '\turl "https://raw.githubusercontent.com/libretro/libretro-database/master/dat/System.dat"', - ")", - "", - "game (", - '\tname "System"', - '\tcomment "System"', - ]) + lines.extend( + [ + '\tauthor "libretro"', + '\thomepage "https://github.com/libretro/libretro-database/blob/master/dat/System.dat"', + '\turl "https://raw.githubusercontent.com/libretro/libretro-database/master/dat/System.dat"', + ")", + "", + "game (", + '\tname "System"', + '\tcomment "System"', + ] + ) systems = truth_data.get("systems", {}) for sys_id in sorted(systems): diff --git a/scripts/generate_db.py b/scripts/generate_db.py index b8e16379..2ec01f01 100644 --- a/scripts/generate_db.py +++ b/scripts/generate_db.py @@ -44,7 +44,11 @@ def _canonical_name(filepath: Path) -> str: if "/.variants/" in str(filepath) or "\\.variants\\" in str(filepath): # naomi2.zip.da79eca4 -> naomi2.zip parts = name.rsplit(".", 1) - if len(parts) == 2 and len(parts[1]) == 8 and all(c in "0123456789abcdef" for c in parts[1]): + if ( + len(parts) == 2 + and len(parts[1]) == 8 + and all(c in "0123456789abcdef" for c in parts[1]) + ): return parts[0] return name @@ -83,7 +87,9 @@ def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> tuple[dict, dict, if existing_is_variant and not is_variant: if sha1 not in aliases: aliases[sha1] = [] - aliases[sha1].append({"name": files[sha1]["name"], "path": files[sha1]["path"]}) + aliases[sha1].append( + {"name": files[sha1]["name"], "path": files[sha1]["path"]} + ) files[sha1] = { "path": rel_path, "name": _canonical_name(filepath), @@ -93,7 +99,9 @@ def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> tuple[dict, dict, else: if sha1 not in aliases: aliases[sha1] = [] - aliases[sha1].append({"name": _canonical_name(filepath), "path": rel_path}) + aliases[sha1].append( + {"name": _canonical_name(filepath), "path": rel_path} + ) else: entry = { "path": rel_path, @@ -114,7 +122,9 @@ def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> tuple[dict, dict, # Non-variant file should be primary over .variants/ file if sha1 not in aliases: aliases[sha1] = [] - aliases[sha1].append({"name": files[sha1]["name"], "path": files[sha1]["path"]}) + aliases[sha1].append( + {"name": files[sha1]["name"], "path": files[sha1]["path"]} + ) files[sha1] = { "path": rel_path, "name": _canonical_name(filepath), @@ -124,7 +134,9 @@ def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> tuple[dict, dict, else: if sha1 not in aliases: aliases[sha1] = [] - aliases[sha1].append({"name": _canonical_name(filepath), "path": rel_path}) + aliases[sha1].append( + {"name": _canonical_name(filepath), "path": rel_path} + ) else: entry = { "path": rel_path, @@ -275,8 +287,12 @@ def _preserve_large_file_entries(files: dict, db_path: str) -> int: def main(): parser = argparse.ArgumentParser(description="Generate multi-indexed BIOS database") parser.add_argument("--force", action="store_true", help="Force rehash all files") - parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR, help="BIOS directory path") - parser.add_argument("--output", "-o", default=DEFAULT_OUTPUT, help="Output JSON file") + parser.add_argument( + "--bios-dir", default=DEFAULT_BIOS_DIR, help="BIOS directory path" + ) + parser.add_argument( + "--output", "-o", default=DEFAULT_OUTPUT, help="Output JSON file" + ) args = parser.parse_args() bios_dir = Path(args.bios_dir) @@ -354,7 +370,10 @@ def _collect_all_aliases(files: dict) -> dict: if platforms_dir.is_dir(): try: import yaml - for platform_name in list_registered_platforms(str(platforms_dir), include_archived=True): + + for platform_name in list_registered_platforms( + str(platforms_dir), include_archived=True + ): config_file = platforms_dir / f"{platform_name}.yml" try: with open(config_file) as f: @@ -383,6 +402,7 @@ def _collect_all_aliases(files: dict) -> dict: try: sys.path.insert(0, "scripts") from scraper.coreinfo_scraper import Scraper as CoreInfoScraper + ci_reqs = CoreInfoScraper().fetch_requirements() for r in ci_reqs: basename = r.name @@ -400,6 +420,7 @@ def _collect_all_aliases(files: dict) -> dict: if emulators_dir.is_dir(): try: import yaml + for emu_file in emulators_dir.glob("*.yml"): if emu_file.name.endswith(".old.yml"): continue @@ -454,10 +475,17 @@ def _collect_all_aliases(files: dict) -> dict: # ZX Spectrum ["48.rom", "zx48.rom"], # SquirrelJME - all JARs are the same - ["squirreljme.sqc", "squirreljme.jar", "squirreljme-fast.jar", - "squirreljme-slow.jar", "squirreljme-slow-test.jar", - "squirreljme-0.3.0.jar", "squirreljme-0.3.0-fast.jar", - "squirreljme-0.3.0-slow.jar", "squirreljme-0.3.0-slow-test.jar"], + [ + "squirreljme.sqc", + "squirreljme.jar", + "squirreljme-fast.jar", + "squirreljme-slow.jar", + "squirreljme-slow-test.jar", + "squirreljme-0.3.0.jar", + "squirreljme-0.3.0-fast.jar", + "squirreljme-0.3.0-slow.jar", + "squirreljme-0.3.0-slow-test.jar", + ], # Arcade - FBNeo spectrum ["spectrum.zip", "fbneo/spectrum.zip", "spec48k.zip"], ] diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index a045473e..2142024f 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -19,25 +19,39 @@ import os import re import sys import tempfile -import urllib.request import urllib.error +import urllib.request import zipfile from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) from common import ( MANUFACTURER_PREFIXES, - build_target_cores_cache, build_zip_contents_index, check_inside_zip, - compute_hashes, expand_platform_declared_names, fetch_large_file, group_identical_platforms, - list_emulator_profiles, list_platform_system_ids, list_registered_platforms, - filter_systems_by_target, list_system_ids, load_database, - load_data_dir_registry, load_emulator_profiles, load_platform_config, - md5_composite, require_yaml, resolve_local_file, -) -from validation import ( - _build_validation_index, check_file_validation, filter_files_by_mode, + build_target_cores_cache, + build_zip_contents_index, + check_inside_zip, + compute_hashes, + expand_platform_declared_names, + fetch_large_file, + filter_systems_by_target, + group_identical_platforms, + list_emulator_profiles, + list_platform_system_ids, + list_registered_platforms, + list_system_ids, + load_data_dir_registry, + load_database, + load_emulator_profiles, + load_platform_config, + require_yaml, + resolve_local_file, ) from deterministic_zip import rebuild_zip_deterministic +from validation import ( + _build_validation_index, + check_file_validation, + filter_files_by_mode, +) yaml = require_yaml() @@ -169,7 +183,11 @@ def _find_candidate_satisfying_both( return None md5_expected = file_entry.get("md5", "") - md5_set = {m.strip().lower() for m in md5_expected.split(",") if m.strip()} if md5_expected else set() + md5_set = ( + {m.strip().lower() for m in md5_expected.split(",") if m.strip()} + if md5_expected + else set() + ) by_name = db.get("indexes", {}).get("by_name", {}) files_db = db.get("files", {}) @@ -177,7 +195,11 @@ def _find_candidate_satisfying_both( for sha1 in by_name.get(fname, []): candidate = files_db.get(sha1, {}) path = candidate.get("path", "") - if not path or not os.path.exists(path) or os.path.realpath(path) == os.path.realpath(local_path): + if ( + not path + or not os.path.exists(path) + or os.path.realpath(path) == os.path.realpath(local_path) + ): continue # Must still satisfy platform MD5 if md5_set and candidate.get("md5", "").lower() not in md5_set: @@ -202,8 +224,7 @@ def _path_parents(dest: str) -> list[str]: return ["/".join(parts[:i]) for i in range(1, len(parts))] -def _has_path_conflict(dest: str, seen_files: set[str], - seen_parents: set[str]) -> bool: +def _has_path_conflict(dest: str, seen_files: set[str], seen_parents: set[str]) -> bool: """Check if dest conflicts with existing paths (file vs directory). Returns True if adding dest would create an impossible extraction: @@ -218,18 +239,21 @@ def _has_path_conflict(dest: str, seen_files: set[str], return False -def _register_path(dest: str, seen_files: set[str], - seen_parents: set[str]) -> None: +def _register_path(dest: str, seen_files: set[str], seen_parents: set[str]) -> None: """Track a file path and its parent directories.""" seen_files.add(dest) for parent in _path_parents(dest): seen_parents.add(parent) -def resolve_file(file_entry: dict, db: dict, bios_dir: str, - zip_contents: dict | None = None, - dest_hint: str = "", - data_dir_registry: dict | None = None) -> tuple[str | None, str]: +def resolve_file( + file_entry: dict, + db: dict, + bios_dir: str, + zip_contents: dict | None = None, + dest_hint: str = "", + data_dir_registry: dict | None = None, +) -> tuple[str | None, str]: """Resolve a BIOS file with storage tiers and release asset fallback. Wraps common.resolve_local_file() with pack-specific logic for @@ -242,9 +266,13 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, if storage == "external": return None, "external" - path, status = resolve_local_file(file_entry, db, zip_contents, - dest_hint=dest_hint, - data_dir_registry=data_dir_registry) + path, status = resolve_local_file( + file_entry, + db, + zip_contents, + dest_hint=dest_hint, + data_dir_registry=data_dir_registry, + ) if path and status != "hash_mismatch": return path, status @@ -253,7 +281,9 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, name = file_entry.get("name", "") sha1 = file_entry.get("sha1") md5_raw = file_entry.get("md5", "") - md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + md5_list = ( + [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + ) first_md5 = md5_list[0] if md5_list else "" cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5) if cached: @@ -266,7 +296,6 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, return None, "not_found" - def download_external(file_entry: dict, dest_path: str) -> bool: """Download an external BIOS file, verify hash, save to dest_path.""" url = file_entry.get("source_url") @@ -278,11 +307,15 @@ def download_external(file_entry: dict, dest_path: str) -> bool: md5 = file_entry.get("md5") if not (sha256 or sha1 or md5): - print(f" WARNING: no hash for {file_entry['name']}, skipping unverifiable download") + print( + f" WARNING: no hash for {file_entry['name']}, skipping unverifiable download" + ) return False try: - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack-gen/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-pack-gen/1.0"} + ) with urllib.request.urlopen(req, timeout=120) as resp: data = resp.read() except urllib.error.URLError as e: @@ -329,6 +362,7 @@ def _detect_extras_prefix(config: dict, base_dest: str) -> str: if not dests: return "" from collections import Counter + roots = Counter(d.split("/", 1)[0] for d in dests) most_common, count = roots.most_common(1)[0] if count / len(dests) > 0.9: @@ -388,17 +422,18 @@ def _detect_slug_structure(config: dict) -> tuple[bool, dict[str, str]]: if d and d.count("/") > 1: deep_files += 1 shallow = deep_files / total_files < 0.05 if total_files else True - return (all_have_slash and varying_slugs and high_coverage - and shallow), sys_to_slug + return (all_have_slash and varying_slugs and high_coverage and shallow), sys_to_slug def _map_emulator_to_slug( profile: dict, - platform_systems: set[str], norm_map: dict[str, str], + platform_systems: set[str], + norm_map: dict[str, str], sys_to_slug: dict[str, str], ) -> str: """Map an emulator to a destination slug for slug-based platforms.""" from common import _norm_system_id + emu_systems = set(profile.get("systems", [])) # Direct match direct = emu_systems & platform_systems @@ -437,10 +472,14 @@ def _collect_emulator_extras( Works for ANY platform (RetroArch, Batocera, Recalbox, etc.) """ - from common import resolve_platform_cores, _norm_system_id + from common import _norm_system_id, resolve_platform_cores from verify import find_undeclared_files - profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir) + profiles = ( + emu_profiles + if emu_profiles is not None + else load_emulator_profiles(emulators_dir) + ) # Detect destination conventions for core extras extras_prefix = _detect_extras_prefix(config, base_dest) @@ -451,7 +490,9 @@ def _collect_emulator_extras( for sid in platform_systems: norm_map[_norm_system_id(sid)] = sid - undeclared = find_undeclared_files(config, emulators_dir, db, emu_profiles, target_cores=target_cores) + undeclared = find_undeclared_files( + config, emulators_dir, db, emu_profiles, target_cores=target_cores + ) extras = [] seen_dests: set[str] = set(seen) for u in undeclared: @@ -475,7 +516,10 @@ def _collect_emulator_extras( profile = pp break slug = _map_emulator_to_slug( - profile, platform_systems, norm_map, sys_to_slug, + profile, + platform_systems, + norm_map, + sys_to_slug, ) if not slug: continue # can't place without slug @@ -485,13 +529,15 @@ def _collect_emulator_extras( if full_dest in seen_dests: continue seen_dests.add(full_dest) - extras.append({ - "name": name, - "destination": dest, - "required": u.get("required", False), - "hle_fallback": u.get("hle_fallback", False), - "source_emulator": u.get("emulator", ""), - }) + extras.append( + { + "name": name, + "destination": dest, + "required": u.get("required", False), + "hle_fallback": u.get("hle_fallback", False), + "source_emulator": u.get("emulator", ""), + } + ) # Second pass: find alternative destinations for files already in the pack. # A file declared by the platform or emitted above may also be needed at a @@ -550,17 +596,22 @@ def _collect_emulator_extras( if full_dest in seen_dests: continue # Check file exists in repo or data dirs - if not (by_name.get(fname) or by_name.get(dest.rsplit("/", 1)[-1]) - or by_path_suffix.get(dest)): + if not ( + by_name.get(fname) + or by_name.get(dest.rsplit("/", 1)[-1]) + or by_path_suffix.get(dest) + ): continue seen_dests.add(full_dest) - extras.append({ - "name": fname, - "destination": dest, - "required": f.get("required", False), - "hle_fallback": f.get("hle_fallback", False), - "source_emulator": profile.get("emulator", emu_name), - }) + extras.append( + { + "name": fname, + "destination": dest, + "required": f.get("required", False), + "hle_fallback": f.get("hle_fallback", False), + "source_emulator": profile.get("emulator", emu_name), + } + ) # Archive prefix pass: cores that store BIOS archives in a subdirectory # (e.g. system/fbneo/neogeo.zip). When the archive is already covered at @@ -589,13 +640,15 @@ def _collect_emulator_extras( if not by_name.get(archive_name): continue seen_dests.add(full_dest) - extras.append({ - "name": archive_name, - "destination": dest, - "required": True, - "hle_fallback": False, - "source_emulator": profile.get("emulator", emu_name), - }) + extras.append( + { + "name": archive_name, + "destination": dest, + "required": True, + "hle_fallback": False, + "source_emulator": profile.get("emulator", emu_name), + } + ) # Third pass: agnostic scan — for filename-agnostic cores, include all # DB files matching the system path prefix and size criteria. @@ -673,20 +726,27 @@ def _collect_emulator_extras( if full_dest in seen_dests: continue seen_dests.add(full_dest) - extras.append({ - "name": scan_name, - "destination": dest, - "required": False, - "hle_fallback": False, - "source_emulator": profile.get("emulator", emu_name), - "agnostic_scan": True, - }) + extras.append( + { + "name": scan_name, + "destination": dest, + "required": False, + "hle_fallback": False, + "source_emulator": profile.get("emulator", emu_name), + "agnostic_scan": True, + } + ) return extras -def _build_readme(platform_name: str, platform_display: str, - base_dest: str, total_files: int, num_systems: int) -> str: +def _build_readme( + platform_name: str, + platform_display: str, + base_dest: str, + total_files: int, + num_systems: int, +) -> str: """Build a personalized step-by-step README for each platform pack.""" sep = "=" * 50 header = ( @@ -710,7 +770,7 @@ def _build_readme(platform_name: str, platform_display: str, " 1. Find your RetroArch system directory:\n" " - RetroArch > Settings > Directory > System/BIOS\n" " - Default: retroarch/system/\n" - " 2. Open the \"system\" folder from this archive\n" + ' 2. Open the "system" folder from this archive\n' " 3. Copy ALL contents into your system directory\n" " 4. Overwrite if asked\n\n" " Option C: Manual (handheld / SD card)\n" @@ -718,7 +778,7 @@ def _build_readme(platform_name: str, platform_display: str, " Anbernic, Retroid, Miyoo, Trimui, etc.:\n" " 1. Connect your SD card to your PC\n" " 2. Find the BIOS folder (usually BIOS/ or system/)\n" - " 3. Copy ALL contents of \"system\" from this archive\n" + ' 3. Copy ALL contents of "system" from this archive\n' " 4. Eject SD card and reboot your device\n\n" " Common paths by device:\n" " Anbernic (ArkOS/JELOS): BIOS/\n" @@ -737,14 +797,14 @@ def _build_readme(platform_name: str, platform_display: str, " 1. On your PC, open the Batocera network share:\n" " - Windows: \\\\BATOCERA\\share\\bios\\\n" " - Mac/Linux: smb://batocera/share/bios/\n" - " 2. Open the \"bios\" folder from this archive\n" + ' 2. Open the "bios" folder from this archive\n' " 3. Copy ALL contents into the share\n" " 4. Overwrite if asked\n\n" " Option C: Manual (SD card)\n" " --------------------------\n" " 1. Put the SD card in your PC\n" " 2. Navigate to /userdata/bios/ on the SHARE partition\n" - " 3. Copy ALL contents of \"bios\" from this archive\n\n" + ' 3. Copy ALL contents of "bios" from this archive\n\n' " NOTE: Dreamcast flash memory is named dc_nvmem.bin\n" " (if your setup asks for dc_flash.bin, same file).\n\n" ), @@ -758,13 +818,13 @@ def _build_readme(platform_name: str, platform_display: str, " 1. On your PC, open the Recalbox network share:\n" " - Windows: \\\\RECALBOX\\share\\bios\\\n" " - Mac/Linux: smb://recalbox/share/bios/\n" - " 2. Open the \"bios\" folder from this archive\n" + ' 2. Open the "bios" folder from this archive\n' " 3. Copy ALL contents into the share\n\n" " Option C: Manual (SD card)\n" " --------------------------\n" " 1. Put the SD card in your PC\n" " 2. Navigate to /recalbox/share/bios/\n" - " 3. Copy ALL contents of \"bios\" from this archive\n\n" + ' 3. Copy ALL contents of "bios" from this archive\n\n' ), "emudeck": ( "INSTALLATION GUIDE (Steam Deck / Linux)\n\n" @@ -778,7 +838,7 @@ def _build_readme(platform_name: str, platform_display: str, " ----------------\n" " 1. Open Dolphin file manager\n" " 2. Navigate to ~/Emulation/bios/\n" - " 3. Open the \"bios\" folder from this archive\n" + ' 3. Open the "bios" folder from this archive\n' " 4. Copy ALL contents into ~/Emulation/bios/\n\n" " STANDALONE EMULATORS (extra step)\n" " Switch and 3DS emulators need keys in specific folders:\n" @@ -799,9 +859,9 @@ def _build_readme(platform_name: str, platform_display: str, " 1. Open Dolphin file manager\n" " 2. Show hidden files (Ctrl+H)\n" " 3. Navigate to ~/retrodeck/\n" - " 4. Open the \"bios\" folder from this archive\n" + ' 4. Open the "bios" folder from this archive\n' " 5. Copy ALL contents into ~/retrodeck/bios/\n" - " 6. If the archive contains a \"roms\" folder, copy\n" + ' 6. If the archive contains a "roms" folder, copy\n' " its contents into ~/retrodeck/roms/\n\n" " NOTE: RetroDECK uses its own BIOS checker. After\n" " copying, open RetroDECK > Tools > BIOS Checker to\n" @@ -818,7 +878,7 @@ def _build_readme(platform_name: str, platform_display: str, " 1. Open your RetroBat installation folder\n" " 2. Navigate to the bios\\ subfolder\n" " (default: C:\\RetroBat\\bios\\)\n" - " 3. Open the \"bios\" folder from this archive\n" + ' 3. Open the "bios" folder from this archive\n' " 4. Copy ALL contents into your bios\\ folder\n" " 5. Overwrite if asked\n\n" ), @@ -828,7 +888,7 @@ def _build_readme(platform_name: str, platform_display: str, " 2. Navigate to the Firmware subfolder:\n" " - Windows: BizHawk\\Firmware\\\n" " - Linux: ~/.config/BizHawk/Firmware/\n" - " 3. Open the \"Firmware\" folder from this archive\n" + ' 3. Open the "Firmware" folder from this archive\n' " 4. Copy ALL contents into your Firmware folder\n" " 5. In BizHawk: Config > Paths > Firmware should\n" " point to this folder\n\n" @@ -837,7 +897,7 @@ def _build_readme(platform_name: str, platform_display: str, "INSTALLATION GUIDE (RomM server)\n\n" " 1. Locate your RomM library folder\n" " 2. Navigate to the bios/ subdirectory\n" - " 3. Copy ALL contents of \"bios\" from this archive\n" + ' 3. Copy ALL contents of "bios" from this archive\n' " 4. Restart the RomM service to detect new files\n\n" ), "retropie": ( @@ -845,7 +905,7 @@ def _build_readme(platform_name: str, platform_display: str, " Option A: Via network share\n" " --------------------------\n" " 1. On your PC, open: \\\\RETROPIE\\bios\\\n" - " 2. Copy ALL contents of \"BIOS\" from this archive\n\n" + ' 2. Copy ALL contents of "BIOS" from this archive\n\n' " Option B: Via SSH\n" " -----------------\n" " 1. SSH into your Pi: ssh pi@retropie\n" @@ -854,19 +914,22 @@ def _build_readme(platform_name: str, platform_display: str, " ---------------------\n" " 1. Put the SD card in your PC\n" " 2. Navigate to /home/pi/RetroPie/BIOS/\n" - " 3. Copy ALL contents of \"BIOS\" from this archive\n\n" + ' 3. Copy ALL contents of "BIOS" from this archive\n\n' ), } # Lakka uses same guide as RetroArch guides["lakka"] = guides["retroarch"] - guide = guides.get(platform_name, ( - f"INSTALLATION\n\n" - f" 1. Open the \"{base_dest or 'files'}\" folder in this archive\n" - f" 2. Copy ALL contents to your BIOS directory\n" - f" 3. Overwrite if asked\n\n" - )) + guide = guides.get( + platform_name, + ( + f"INSTALLATION\n\n" + f' 1. Open the "{base_dest or "files"}" folder in this archive\n' + f" 2. Copy ALL contents to your BIOS directory\n" + f" 3. Overwrite if asked\n\n" + ), + ) footer = ( "TROUBLESHOOTING\n\n" @@ -884,7 +947,9 @@ def _build_readme(platform_name: str, platform_display: str, def _build_agnostic_rename_readme( - destination: str, original: str, alternatives: list[str], + destination: str, + original: str, + alternatives: list[str], ) -> str: """Build a README explaining an agnostic file rename.""" lines = [ @@ -940,7 +1005,7 @@ def generate_pack( s = sid.lower().replace("_", "-") for prefix in MANUFACTURER_PREFIXES: if s.startswith(prefix): - s = s[len(prefix):] + s = s[len(prefix) :] break parts = s.split("-") display_parts.append("_".join(p.title() for p in parts if p)) @@ -960,7 +1025,9 @@ def generate_pack( user_provided = [] seen_destinations: set[str] = set() seen_lower: set[str] = set() # only used when case_insensitive=True - seen_parents: set[str] = set() # parent dirs of added files (path conflict detection) + seen_parents: set[str] = ( + set() + ) # parent dirs of added files (path conflict detection) # Per-file status: worst status wins (missing > untested > ok) file_status: dict[str, str] = {} file_reasons: dict[str, str] = {} @@ -972,7 +1039,10 @@ def generate_pack( # Filter systems by target if specified from common import resolve_platform_cores - plat_cores = resolve_platform_cores(config, emu_profiles or {}) if target_cores else None + + plat_cores = ( + resolve_platform_cores(config, emu_profiles or {}) if target_cores else None + ) pack_systems = filter_systems_by_target( config.get("systems", {}), emu_profiles or {}, @@ -982,13 +1052,19 @@ def generate_pack( if system_filter: from common import _norm_system_id + norm_filter = {_norm_system_id(s) for s in system_filter} - filtered = {sid: sys_data for sid, sys_data in pack_systems.items() - if sid in system_filter or _norm_system_id(sid) in norm_filter} + filtered = { + sid: sys_data + for sid, sys_data in pack_systems.items() + if sid in system_filter or _norm_system_id(sid) in norm_filter + } if not filtered: available = sorted(pack_systems.keys())[:10] - print(f" WARNING: no systems matched filter {system_filter} " - f"(available: {', '.join(available)})") + print( + f" WARNING: no systems matched filter {system_filter} " + f"(available: {', '.join(available)})" + ) return None pack_systems = filtered @@ -1013,7 +1089,9 @@ def generate_pack( full_dest = dest dedup_key = full_dest - already_packed = dedup_key in seen_destinations or (case_insensitive and dedup_key.lower() in seen_lower) + already_packed = dedup_key in seen_destinations or ( + case_insensitive and dedup_key.lower() in seen_lower + ) if _has_path_conflict(full_dest, seen_destinations, seen_parents): continue @@ -1028,22 +1106,34 @@ def generate_pack( if case_insensitive: seen_lower.add(dedup_key.lower()) file_status.setdefault(dedup_key, "ok") - instructions = file_entry.get("instructions", "Please provide this file manually.") + instructions = file_entry.get( + "instructions", "Please provide this file manually." + ) instr_name = f"INSTRUCTIONS_{file_entry['name']}.txt" - instr_path = f"{base_dest}/{instr_name}" if base_dest else instr_name - zf.writestr(instr_path, f"File needed: {file_entry['name']}\n\n{instructions}\n") + instr_path = ( + f"{base_dest}/{instr_name}" if base_dest else instr_name + ) + zf.writestr( + instr_path, + f"File needed: {file_entry['name']}\n\n{instructions}\n", + ) user_provided.append(file_entry["name"]) total_files += 1 continue local_path, status = resolve_file( - file_entry, db, bios_dir, zip_contents, + file_entry, + db, + bios_dir, + zip_contents, data_dir_registry=data_registry, ) if status == "external": file_ext = os.path.splitext(file_entry["name"])[1] or "" - with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: + with tempfile.NamedTemporaryFile( + delete=False, suffix=file_ext + ) as tmp: tmp_path = tmp.name try: @@ -1119,10 +1209,16 @@ def generate_pack( if _n and _n != original_name: alt_names.append(_n) readme_text = _build_agnostic_rename_readme( - dest_name, original_name, alt_names, + dest_name, + original_name, + alt_names, ) readme_name = f"RENAMED_{dest_name}.txt" - readme_full = f"{base_dest}/{readme_name}" if base_dest else readme_name + readme_full = ( + f"{base_dest}/{readme_name}" + if base_dest + else readme_name + ) if readme_full not in seen_destinations: zf.writestr(readme_full, readme_text) seen_destinations.add(readme_full) @@ -1140,12 +1236,15 @@ def generate_pack( inner_md5_raw = file_entry.get("md5", "") inner_md5_list = ( [m.strip() for m in inner_md5_raw.split(",") if m.strip()] - if inner_md5_raw else [""] + if inner_md5_raw + else [""] ) zip_ok = False last_result = "not_in_zip" for md5_candidate in inner_md5_list: - last_result = check_inside_zip(local_path, zf_name, md5_candidate) + last_result = check_inside_zip( + local_path, zf_name, md5_candidate + ) if last_result == "ok": zip_ok = True break @@ -1159,7 +1258,9 @@ def generate_pack( file_reasons[dedup_key] = "cannot read ZIP" else: file_status[dedup_key] = "untested" - file_reasons[dedup_key] = f"{zf_name} MD5 mismatch inside ZIP" + file_reasons[dedup_key] = ( + f"{zf_name} MD5 mismatch inside ZIP" + ) else: file_status[dedup_key] = "untested" file_reasons[dedup_key] = "hash mismatch" @@ -1170,14 +1271,22 @@ def generate_pack( # Platform verification (existence/md5) is the authority for pack status. # Emulator checks are supplementary -logged but don't downgrade. # When a discrepancy is found, try to find a file satisfying both. - if (file_status.get(dedup_key) == "ok" - and local_path and validation_index): + if ( + file_status.get(dedup_key) == "ok" + and local_path + and validation_index + ): fname = file_entry.get("name", "") - reason = check_file_validation(local_path, fname, validation_index, - bios_dir) + reason = check_file_validation( + local_path, fname, validation_index, bios_dir + ) if reason: better = _find_candidate_satisfying_both( - file_entry, db, local_path, validation_index, bios_dir, + file_entry, + db, + local_path, + validation_index, + bios_dir, ) if better: local_path = better @@ -1214,8 +1323,13 @@ def generate_pack( core_files = [] else: core_files = _collect_emulator_extras( - config, emulators_dir, db, - seen_destinations, base_dest, emu_profiles, target_cores=target_cores, + config, + emulators_dir, + db, + seen_destinations, + base_dest, + emu_profiles, + target_cores=target_cores, ) core_count = 0 for fe in core_files: @@ -1247,8 +1361,12 @@ def generate_pack( dest_hint = fe.get("destination", "") local_path, status = resolve_file( - fe, db, bios_dir, zip_contents, - dest_hint=dest_hint, data_dir_registry=data_registry, + fe, + db, + bios_dir, + zip_contents, + dest_hint=dest_hint, + data_dir_registry=data_registry, ) if status in ("not_found", "external", "user_provided"): continue @@ -1276,7 +1394,9 @@ def generate_pack( continue local_path = entry.get("local_cache", "") if not local_path or not os.path.isdir(local_path): - print(f" WARNING: data directory '{ref_key}' not cached at {local_path} -run refresh_data_dirs.py") + print( + f" WARNING: data directory '{ref_key}' not cached at {local_path} -run refresh_data_dirs.py" + ) continue dd_dest = dd.get("destination", "") if base_dest and dd_dest: @@ -1290,7 +1410,9 @@ def generate_pack( src = os.path.join(root, fname) rel = os.path.relpath(src, local_path) full = f"{dd_prefix}/{rel}" - if full in seen_destinations or (full.lower() in seen_lower and case_insensitive): + if full in seen_destinations or ( + full.lower() in seen_lower and case_insensitive + ): continue if _has_path_conflict(full, seen_destinations, seen_parents): continue @@ -1303,8 +1425,9 @@ def generate_pack( # README.txt for users -personalized step-by-step per platform num_systems = len(pack_systems) - readme_text = _build_readme(platform_name, platform_display, - base_dest, total_files, num_systems) + readme_text = _build_readme( + platform_name, platform_display, base_dest, total_files, num_systems + ) zf.writestr("README.txt", readme_text) files_ok = sum(1 for s in file_status.values() if s == "ok") @@ -1318,7 +1441,9 @@ def generate_pack( if files_miss: parts.append(f"{files_miss} missing") baseline = total_files - core_count - print(f" {zip_path}: {total_files} files packed ({baseline} baseline + {core_count} from cores), {', '.join(parts)} [{verification_mode}]") + print( + f" {zip_path}: {total_files} files packed ({baseline} baseline + {core_count} from cores), {', '.join(parts)} [{verification_mode}]" + ) for key, reason in sorted(file_reasons.items()): status = file_status.get(key, "") @@ -1329,7 +1454,9 @@ def generate_pack( return zip_path -def _extract_zip_to_archive(source_zip: str, dest_prefix: str, target_zf: zipfile.ZipFile): +def _extract_zip_to_archive( + source_zip: str, dest_prefix: str, target_zf: zipfile.ZipFile +): """Extract contents of a source ZIP into target ZIP under dest_prefix.""" with zipfile.ZipFile(source_zip, "r") as src: for info in src.infolist(): @@ -1343,7 +1470,9 @@ def _extract_zip_to_archive(source_zip: str, dest_prefix: str, target_zf: zipfil target_zf.writestr(target_path, data) -def _normalize_zip_for_pack(source_zip: str, dest_path: str, target_zf: zipfile.ZipFile): +def _normalize_zip_for_pack( + source_zip: str, dest_path: str, target_zf: zipfile.ZipFile +): """Add a MAME BIOS ZIP to the pack as a deterministic rebuild. Instead of copying the original ZIP (with non-deterministic metadata), @@ -1356,6 +1485,7 @@ def _normalize_zip_for_pack(source_zip: str, dest_path: str, target_zf: zipfile. - Bit-identical ZIPs across platforms and build times """ import tempfile as _tmp + tmp_fd, tmp_path = _tmp.mkstemp(suffix=".zip", dir="tmp") os.close(tmp_fd) try: @@ -1370,8 +1500,10 @@ def _normalize_zip_for_pack(source_zip: str, dest_path: str, target_zf: zipfile. # Emulator/system mode pack generation -def _resolve_destination(file_entry: dict, pack_structure: dict | None, - standalone: bool) -> str: + +def _resolve_destination( + file_entry: dict, pack_structure: dict | None, standalone: bool +) -> str: """Resolve the ZIP destination path for a file entry.""" # 1. standalone_path override if standalone and file_entry.get("standalone_path"): @@ -1414,25 +1546,34 @@ def generate_emulator_pack( selected: list[tuple[str, dict]] = [] for name in profile_names: if name not in all_profiles: - available = sorted(k for k, v in all_profiles.items() - if v.get("type") not in ("alias", "test")) + available = sorted( + k + for k, v in all_profiles.items() + if v.get("type") not in ("alias", "test") + ) print(f"Error: emulator '{name}' not found", file=sys.stderr) print(f"Available: {', '.join(available[:10])}...", file=sys.stderr) return None p = all_profiles[name] if p.get("type") == "alias": alias_of = p.get("alias_of", "?") - print(f"Error: {name} is an alias of {alias_of} -use --emulator {alias_of}", - file=sys.stderr) + print( + f"Error: {name} is an alias of {alias_of} -use --emulator {alias_of}", + file=sys.stderr, + ) return None if p.get("type") == "launcher": - print(f"Error: {name} is a launcher -use the emulator it launches", - file=sys.stderr) + print( + f"Error: {name} is a launcher -use the emulator it launches", + file=sys.stderr, + ) return None ptype = p.get("type", "libretro") if standalone and "standalone" not in ptype: - print(f"Error: {name} ({ptype}) does not support --standalone", - file=sys.stderr) + print( + f"Error: {name} ({ptype}) does not support --standalone", + file=sys.stderr, + ) return None selected.append((name, p)) @@ -1446,7 +1587,9 @@ def generate_emulator_pack( missing_files = [] seen_destinations: set[str] = set() seen_lower: set[str] = set() - seen_parents: set[str] = set() # parent dirs of added files (path conflict detection) + seen_parents: set[str] = ( + set() + ) # parent dirs of added files (path conflict detection) seen_hashes: set[str] = set() # SHA1 dedup for same file, different path data_dir_notices: list[str] = [] data_registry = load_data_dir_registry( @@ -1519,7 +1662,10 @@ def generate_emulator_pack( archive_entry = {"name": archive_name} local_path, status = resolve_file( - archive_entry, db, bios_dir, zip_contents, + archive_entry, + db, + bios_dir, + zip_contents, data_dir_registry=data_registry, ) if local_path and status not in ("not_found",): @@ -1563,13 +1709,19 @@ def generate_emulator_pack( dest_hint = fe.get("path", "") local_path, status = resolve_file( - fe, db, bios_dir, zip_contents, - dest_hint=dest_hint, data_dir_registry=data_registry, + fe, + db, + bios_dir, + zip_contents, + dest_hint=dest_hint, + data_dir_registry=data_registry, ) if status == "external": file_ext = os.path.splitext(fe["name"])[1] or "" - with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: + with tempfile.NamedTemporaryFile( + delete=False, suffix=file_ext + ) as tmp: tmp_path = tmp.name try: if download_external(fe, tmp_path): @@ -1613,7 +1765,7 @@ def generate_emulator_pack( os.unlink(zip_path) # Report - label = " + ".join(p.get("emulator", n) for n, p in selected) + " + ".join(p.get("emulator", n) for n, p in selected) missing_count = len(missing_files) ok_count = total_files parts = [f"{ok_count} files packed"] @@ -1623,7 +1775,9 @@ def generate_emulator_pack( for name in missing_files: print(f" MISSING: {name}") for ref in sorted(set(data_dir_notices)): - print(f" Note: data directory '{ref}' required but not included (use refresh_data_dirs.py)") + print( + f" Note: data directory '{ref}' required but not included (use refresh_data_dirs.py)" + ) return zip_path if total_files > 0 or missing_files else None @@ -1656,13 +1810,19 @@ def generate_system_pack( for p in profiles.values(): all_systems.update(p.get("systems", [])) if standalone: - print(f"No standalone emulators found for system(s): {', '.join(system_ids)}", - file=sys.stderr) + print( + f"No standalone emulators found for system(s): {', '.join(system_ids)}", + file=sys.stderr, + ) else: - print(f"No emulators found for system(s): {', '.join(system_ids)}", - file=sys.stderr) - print(f"Available systems: {', '.join(sorted(all_systems)[:20])}...", - file=sys.stderr) + print( + f"No emulators found for system(s): {', '.join(system_ids)}", + file=sys.stderr, + ) + print( + f"Available systems: {', '.join(sorted(all_systems)[:20])}...", + file=sys.stderr, + ) return None # Use system-based ZIP name @@ -1670,8 +1830,14 @@ def generate_system_pack( "_".join(w.title() for w in sid.split("-")) for sid in system_ids ) result = generate_emulator_pack( - matching, emulators_dir, db, bios_dir, output_dir, - standalone, zip_contents, required_only=required_only, + matching, + emulators_dir, + db, + bios_dir, + output_dir, + standalone, + zip_contents, + required_only=required_only, ) if result: # Rename to system-based name @@ -1693,7 +1859,7 @@ def _system_display_name(system_id: str) -> str: s = system_id.lower().replace("_", "-") for prefix in MANUFACTURER_PREFIXES: if s.startswith(prefix): - s = s[len(prefix):] + s = s[len(prefix) :] break parts = s.split("-") return "_".join(p.title() for p in parts if p) @@ -1706,6 +1872,7 @@ def _group_systems_by_manufacturer( ) -> dict[str, list[str]]: """Group system IDs by manufacturer for --split --group-by manufacturer.""" from common import derive_manufacturer + groups: dict[str, list[str]] = {} for sid, sys_data in systems.items(): mfr = derive_manufacturer(sid, sys_data) @@ -1747,7 +1914,12 @@ def generate_split_packs( base_dest = config.get("base_destination", "") if emu_profiles: all_extras = _collect_emulator_extras( - config, emulators_dir, db, set(), base_dest, emu_profiles, + config, + emulators_dir, + db, + set(), + base_dest, + emu_profiles, target_cores=target_cores, ) else: @@ -1755,6 +1927,7 @@ def generate_split_packs( # Map each extra to matching systems via source_emulator. # Index by both profile key AND display name (source_emulator uses display). from common import _norm_system_id + emu_system_map: dict[str, set[str]] = {} for name, p in emu_profiles.items(): raw = set(p.get("systems", [])) @@ -1765,7 +1938,7 @@ def generate_split_packs( if display and display != name: emu_system_map[display] = combined - plat_norm = {_norm_system_id(s): s for s in systems} + {_norm_system_id(s): s for s in systems} results = [] for group_name, group_system_ids in sorted(groups.items()): @@ -1773,15 +1946,24 @@ def generate_split_packs( group_norm = {_norm_system_id(s) for s in group_system_ids} group_match = group_sys_set | group_norm group_extras = [ - fe for fe in all_extras + fe + for fe in all_extras if emu_system_map.get(fe.get("source_emulator", ""), set()) & group_match ] zip_path = generate_pack( - platform_name, platforms_dir, db, bios_dir, split_dir, - emulators_dir=emulators_dir, zip_contents=zip_contents, - data_registry=data_registry, emu_profiles=emu_profiles, - target_cores=target_cores, required_only=required_only, - system_filter=group_system_ids, precomputed_extras=group_extras, + platform_name, + platforms_dir, + db, + bios_dir, + split_dir, + emulators_dir=emulators_dir, + zip_contents=zip_contents, + data_registry=data_registry, + emu_profiles=emu_profiles, + target_cores=target_cores, + required_only=required_only, + system_filter=group_system_ids, + precomputed_extras=group_extras, ) if zip_path: version = config.get("version", config.get("dat_version", "")) @@ -1801,14 +1983,18 @@ def generate_split_packs( group_norm = {_norm_system_id(s) for s in group_system_ids} all_groups_match |= set(group_system_ids) | group_norm undistributed = [ - fe for fe in all_extras - if not emu_system_map.get(fe.get("source_emulator", ""), set()) & all_groups_match + fe + for fe in all_extras + if not emu_system_map.get(fe.get("source_emulator", ""), set()) + & all_groups_match ] if undistributed: emus = sorted({fe.get("source_emulator", "?") for fe in undistributed}) - print(f" NOTE: {len(undistributed)} core extras from {len(emus)} emulators " - f"not in split packs (missing systems: field in profiles: " - f"{', '.join(emus[:5])}{'...' if len(emus) > 5 else ''})") + print( + f" NOTE: {len(undistributed)} core extras from {len(emus)} emulators " + f"not in split packs (missing systems: field in profiles: " + f"{', '.join(emus[:5])}{'...' if len(emus) > 5 else ''})" + ) return results @@ -1894,7 +2080,9 @@ def generate_md5_pack( if matched_fe: if emulator_name and emu_pack_structure is not None: - dest = _resolve_destination(matched_fe, emu_pack_structure, standalone) + dest = _resolve_destination( + matched_fe, emu_pack_structure, standalone + ) else: dest = matched_fe.get("destination", matched_fe.get("name", name)) elif paths: @@ -1910,7 +2098,9 @@ def generate_md5_pack( seen.add(full_dest) fe_for_resolve = {"name": name, "sha1": sha1, "md5": entry.get("md5", "")} - local_path, status = resolve_file(fe_for_resolve, db, bios_dir, zip_contents) + local_path, status = resolve_file( + fe_for_resolve, db, bios_dir, zip_contents + ) if status == "not_found" or not local_path: not_in_repo.append((name, hash_val)) @@ -1968,9 +2158,9 @@ def _validate_args(args, parser): has_all = args.all has_emulator = bool(args.emulator) has_system = bool(args.system) - has_from_md5 = bool(args.from_md5 or getattr(args, 'from_md5_file', None)) + has_from_md5 = bool(args.from_md5 or getattr(args, "from_md5_file", None)) - if args.from_md5 and getattr(args, 'from_md5_file', None): + if args.from_md5 and getattr(args, "from_md5_file", None): parser.error("--from-md5 and --from-md5-file are mutually exclusive") if has_from_md5 and has_all: parser.error("--from-md5 requires --platform or --emulator, not --all") @@ -1982,13 +2172,19 @@ def _validate_args(args, parser): # --platform/--all and --system can combine (system filters within platform) # --emulator is exclusive with everything else if has_emulator and (has_platform or has_all or has_system): - parser.error("--emulator is mutually exclusive with --platform, --all, and --system") + parser.error( + "--emulator is mutually exclusive with --platform, --all, and --system" + ) if has_platform and has_all: parser.error("--platform and --all are mutually exclusive") if not (has_platform or has_all or has_emulator or has_system or has_from_md5): parser.error("Specify --platform, --all, --emulator, --system, or --from-md5") - if args.standalone and not (has_emulator or (has_system and not has_platform and not has_all)): - parser.error("--standalone requires --emulator or --system (without --platform)") + if args.standalone and not ( + has_emulator or (has_system and not has_platform and not has_all) + ): + parser.error( + "--standalone requires --emulator or --system (without --platform)" + ) if args.split and not (has_platform or has_all): parser.error("--split requires --platform or --all") if args.split and has_emulator: @@ -2026,7 +2222,9 @@ def _write_manifest_if_changed(path: str, manifest: dict) -> None: f.write(new_json) -def _run_manifest_mode(args, groups, db, zip_contents, emu_profiles, target_cores_cache): +def _run_manifest_mode( + args, groups, db, zip_contents, emu_profiles, target_cores_cache +): """Generate JSON manifests instead of ZIP packs.""" registry_path = os.path.join(args.platforms_dir, "_registry.yml") os.makedirs(args.output_dir, exist_ok=True) @@ -2039,15 +2237,22 @@ def _run_manifest_mode(args, groups, db, zip_contents, emu_profiles, target_core try: tc = target_cores_cache.get(representative) if args.target else None manifest = generate_manifest( - representative, args.platforms_dir, db, args.bios_dir, - registry_path, emulators_dir=args.emulators_dir, - zip_contents=zip_contents, emu_profiles=emu_profiles, + representative, + args.platforms_dir, + db, + args.bios_dir, + registry_path, + emulators_dir=args.emulators_dir, + zip_contents=zip_contents, + emu_profiles=emu_profiles, target_cores=tc, ) out_path = os.path.join(args.output_dir, f"{representative}.json") _write_manifest_if_changed(out_path, manifest) - print(f" {out_path}: {manifest['total_files']} files, " - f"{manifest['total_size']} bytes") + print( + f" {out_path}: {manifest['total_files']} files, " + f"{manifest['total_size']} bytes" + ) # Create aliases for grouped platforms (e.g., lakka -> retroarch) for alias_plat in group_platforms: if alias_plat != representative: @@ -2055,11 +2260,15 @@ def _run_manifest_mode(args, groups, db, zip_contents, emu_profiles, target_core alias_manifest = dict(manifest) alias_manifest["platform"] = alias_plat alias_cfg = load_platform_config(alias_plat, args.platforms_dir) - alias_manifest["display_name"] = alias_cfg.get("platform", alias_plat) + alias_manifest["display_name"] = alias_cfg.get( + "platform", alias_plat + ) alias_registry = registry.get("platforms", {}).get(alias_plat, {}) alias_install = alias_registry.get("install", {}) alias_manifest["detect"] = alias_install.get("detect", []) - alias_manifest["standalone_copies"] = alias_install.get("standalone_copies", []) + alias_manifest["standalone_copies"] = alias_install.get( + "standalone_copies", [] + ) _write_manifest_if_changed(alias_path, alias_manifest) print(f" {alias_path}: alias of {representative}") except (FileNotFoundError, OSError, yaml.YAMLError) as e: @@ -2111,7 +2320,11 @@ def _run_verify_packs(args): dest = fe.get("destination", fe.get("name", "")) if not dest: continue - fp = os.path.join(extract_dir, base_dest, dest) if base_dest else os.path.join(extract_dir, dest) + fp = ( + os.path.join(extract_dir, base_dest, dest) + if base_dest + else os.path.join(extract_dir, dest) + ) # Case-insensitive fallback if not os.path.exists(fp): parent = os.path.dirname(fp) @@ -2143,9 +2356,13 @@ def _run_verify_packs(args): if not expected_md5: ok += 1 continue - md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()] + md5_list = [ + m.strip().lower() for m in expected_md5.split(",") if m.strip() + ] actual_md5 = hashlib.md5(open(fp, "rb").read()).hexdigest() - if actual_md5 in md5_list or any(actual_md5.startswith(m) for m in md5_list if len(m) < 32): + if actual_md5 in md5_list or any( + actual_md5.startswith(m) for m in md5_list if len(m) < 32 + ): ok += 1 continue # ZIP inner content @@ -2154,16 +2371,37 @@ def _run_verify_packs(args): continue # Path collision bn = os.path.basename(dest) - collision = sum(1 for sd in systems.values() for ff in sd.get("files", []) - if os.path.basename(ff.get("destination", ff.get("name", "")) or "") == bn) > 1 + collision = ( + sum( + 1 + for sd in systems.values() + for ff in sd.get("files", []) + if os.path.basename( + ff.get("destination", ff.get("name", "")) or "" + ) + == bn + ) + > 1 + ) if collision: ok += 1 else: hash_fail.append(f"{sys_id}: {dest}") - total = sum(len([f for f in s.get("files", []) if f.get("destination", f.get("name", ""))]) for s in systems.values()) + total = sum( + len( + [ + f + for f in s.get("files", []) + if f.get("destination", f.get("name", "")) + ] + ) + for s in systems.values() + ) if missing or hash_fail: - print(f" {platform_name}: FAIL ({len(missing)} missing, {len(hash_fail)} hash errors / {total})") + print( + f" {platform_name}: FAIL ({len(missing)} missing, {len(hash_fail)} hash errors / {total})" + ) for m in missing[:5]: print(f" MISSING: {m}") for h in hash_fail[:5]: @@ -2178,13 +2416,24 @@ def _run_verify_packs(args): sys.exit(1) -def _run_platform_packs(args, groups, db, zip_contents, data_registry, - emu_profiles, target_cores_cache, system_filter): +def _run_platform_packs( + args, + groups, + db, + zip_contents, + data_registry, + emu_profiles, + target_cores_cache, + system_filter, +): """Generate ZIP packs for platform groups and verify.""" for group_platforms, representative in groups: variants = [p for p in group_platforms if p != representative] if variants: - all_names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms] + all_names = [ + load_platform_config(p, args.platforms_dir).get("platform", p) + for p in group_platforms + ] label = " / ".join(all_names) print(f"\nGenerating pack for {label}...") else: @@ -2194,19 +2443,33 @@ def _run_platform_packs(args, groups, db, zip_contents, data_registry, tc = target_cores_cache.get(representative) if args.target else None if args.split: zip_paths = generate_split_packs( - representative, args.platforms_dir, db, args.bios_dir, - args.output_dir, group_by=args.group_by, - emulators_dir=args.emulators_dir, zip_contents=zip_contents, - data_registry=data_registry, emu_profiles=emu_profiles, - target_cores=tc, required_only=args.required_only, + representative, + args.platforms_dir, + db, + args.bios_dir, + args.output_dir, + group_by=args.group_by, + emulators_dir=args.emulators_dir, + zip_contents=zip_contents, + data_registry=data_registry, + emu_profiles=emu_profiles, + target_cores=tc, + required_only=args.required_only, ) print(f" Split into {len(zip_paths)} packs") else: zip_path = generate_pack( - representative, args.platforms_dir, db, args.bios_dir, args.output_dir, - include_extras=args.include_extras, emulators_dir=args.emulators_dir, - zip_contents=zip_contents, data_registry=data_registry, - emu_profiles=emu_profiles, target_cores=tc, + representative, + args.platforms_dir, + db, + args.bios_dir, + args.output_dir, + include_extras=args.include_extras, + emulators_dir=args.emulators_dir, + zip_contents=zip_contents, + data_registry=data_registry, + emu_profiles=emu_profiles, + target_cores=tc, required_only=args.required_only, system_filter=system_filter, ) @@ -2214,8 +2477,14 @@ def _run_platform_packs(args, groups, db, zip_contents, data_registry, rep_cfg = load_platform_config(representative, args.platforms_dir) ver = rep_cfg.get("version", rep_cfg.get("dat_version", "")) ver_tag = f"_{ver.replace(' ', '')}" if ver else "" - all_names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms] - combined = "_".join(n.replace(" ", "") for n in all_names) + f"{ver_tag}_BIOS_Pack.zip" + all_names = [ + load_platform_config(p, args.platforms_dir).get("platform", p) + for p in group_platforms + ] + combined = ( + "_".join(n.replace(" ", "") for n in all_names) + + f"{ver_tag}_BIOS_Pack.zip" + ) new_path = os.path.join(os.path.dirname(zip_path), combined) if new_path != zip_path: os.rename(zip_path, new_path) @@ -2225,15 +2494,16 @@ def _run_platform_packs(args, groups, db, zip_contents, data_registry, print("\nVerifying packs and generating manifests...") skip_conf = bool(system_filter or args.split) - all_ok = verify_and_finalize_packs(args.output_dir, db, - skip_conformance=skip_conf, - data_registry=data_registry) + all_ok = verify_and_finalize_packs( + args.output_dir, db, skip_conformance=skip_conf, data_registry=data_registry + ) if args.split: for entry in os.listdir(args.output_dir): sub = os.path.join(args.output_dir, entry) if os.path.isdir(sub) and entry.endswith("_Split"): - ok = verify_and_finalize_packs(sub, db, skip_conformance=True, - data_registry=data_registry) + ok = verify_and_finalize_packs( + sub, db, skip_conformance=True, data_registry=data_registry + ) all_ok = all_ok and ok if not all_ok: print("WARNING: some packs have verification errors") @@ -2243,44 +2513,83 @@ def _run_platform_packs(args, groups, db, zip_contents, data_registry, def main(): parser = argparse.ArgumentParser(description="Generate platform BIOS ZIP packs") parser.add_argument("--platform", "-p", help="Platform name (e.g., retroarch)") - parser.add_argument("--all", action="store_true", help="Generate packs for all active platforms") - parser.add_argument("--emulator", "-e", help="Emulator profile name(s), comma-separated") + parser.add_argument( + "--all", action="store_true", help="Generate packs for all active platforms" + ) + parser.add_argument( + "--emulator", "-e", help="Emulator profile name(s), comma-separated" + ) parser.add_argument("--system", "-s", help="System ID(s), comma-separated") parser.add_argument("--standalone", action="store_true", help="Use standalone mode") - parser.add_argument("--list-emulators", action="store_true", help="List available emulators") - parser.add_argument("--list-systems", action="store_true", help="List available systems") - parser.add_argument("--include-archived", action="store_true", help="Include archived platforms") + parser.add_argument( + "--list-emulators", action="store_true", help="List available emulators" + ) + parser.add_argument( + "--list-systems", action="store_true", help="List available systems" + ) + parser.add_argument( + "--include-archived", action="store_true", help="Include archived platforms" + ) parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR) parser.add_argument("--db", default=DEFAULT_DB_FILE, help="Path to database.json") parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR) parser.add_argument("--output-dir", "-o", default=DEFAULT_OUTPUT_DIR) - parser.add_argument("--include-extras", action="store_true", - help="(no-op) Core requirements are always included") + parser.add_argument( + "--include-extras", + action="store_true", + help="(no-op) Core requirements are always included", + ) parser.add_argument("--emulators-dir", default="emulators") - parser.add_argument("--offline", action="store_true", - help="Skip data directory freshness check, use cache only") - parser.add_argument("--refresh-data", action="store_true", - help="Force re-download all data directories") + parser.add_argument( + "--offline", + action="store_true", + help="Skip data directory freshness check, use cache only", + ) + parser.add_argument( + "--refresh-data", + action="store_true", + help="Force re-download all data directories", + ) parser.add_argument("--list", action="store_true", help="List available platforms") - parser.add_argument("--required-only", action="store_true", - help="Only include required files, skip optional") - parser.add_argument("--split", action="store_true", - help="Generate one ZIP per system/manufacturer") - parser.add_argument("--group-by", choices=["system", "manufacturer"], - default="system", - help="Grouping for --split (default: system)") + parser.add_argument( + "--required-only", + action="store_true", + help="Only include required files, skip optional", + ) + parser.add_argument( + "--split", action="store_true", help="Generate one ZIP per system/manufacturer" + ) + parser.add_argument( + "--group-by", + choices=["system", "manufacturer"], + default="system", + help="Grouping for --split (default: system)", + ) parser.add_argument("--target", "-t", help="Hardware target (e.g., switch, rpi4)") - parser.add_argument("--list-targets", action="store_true", help="List available targets for the platform") - parser.add_argument("--from-md5", - help="Hash(es) to look up or pack (comma-separated)") - parser.add_argument("--from-md5-file", - help="File with hashes (one per line)") - parser.add_argument("--manifest", action="store_true", - help="Output JSON manifests instead of ZIP packs") - parser.add_argument("--manifest-targets", action="store_true", - help="Convert target YAMLs to installer JSON") - parser.add_argument("--verify-packs", action="store_true", - help="Extract and verify pack integrity (path + hash)") + parser.add_argument( + "--list-targets", + action="store_true", + help="List available targets for the platform", + ) + parser.add_argument( + "--from-md5", help="Hash(es) to look up or pack (comma-separated)" + ) + parser.add_argument("--from-md5-file", help="File with hashes (one per line)") + parser.add_argument( + "--manifest", + action="store_true", + help="Output JSON manifests instead of ZIP packs", + ) + parser.add_argument( + "--manifest-targets", + action="store_true", + help="Convert target YAMLs to installer JSON", + ) + parser.add_argument( + "--verify-packs", + action="store_true", + help="Extract and verify pack integrity (path + hash)", + ) args = parser.parse_args() # Quick-exit modes @@ -2289,7 +2598,8 @@ def main(): return if args.manifest_targets: generate_target_manifests( - os.path.join(args.platforms_dir, "targets"), args.output_dir) + os.path.join(args.platforms_dir, "targets"), args.output_dir + ) return if args.list: for p in list_platforms(args.platforms_dir): @@ -2308,35 +2618,48 @@ def main(): if not args.platform: parser.error("--list-targets requires --platform") from common import list_available_targets + targets = list_available_targets(args.platform, args.platforms_dir) if not targets: print(f"No targets configured for platform '{args.platform}'") return for t in targets: - aliases = f" (aliases: {', '.join(t['aliases'])})" if t['aliases'] else "" - print(f" {t['name']:30s} {t['architecture']:10s} {t['core_count']:>4d} cores{aliases}") + aliases = f" (aliases: {', '.join(t['aliases'])})" if t["aliases"] else "" + print( + f" {t['name']:30s} {t['architecture']:10s} {t['core_count']:>4d} cores{aliases}" + ) return _validate_args(args, parser) # Hash lookup / pack mode - has_from_md5 = bool(args.from_md5 or getattr(args, 'from_md5_file', None)) + has_from_md5 = bool(args.from_md5 or getattr(args, "from_md5_file", None)) if has_from_md5: - hashes = parse_hash_input(args.from_md5) if args.from_md5 else parse_hash_file(args.from_md5_file) + hashes = ( + parse_hash_input(args.from_md5) + if args.from_md5 + else parse_hash_file(args.from_md5_file) + ) if not hashes: print("No valid hashes found in input", file=sys.stderr) sys.exit(1) db = load_database(args.db) if not args.platform and not args.emulator: - lookup_hashes(hashes, db, args.bios_dir, args.emulators_dir, - args.platforms_dir) + lookup_hashes( + hashes, db, args.bios_dir, args.emulators_dir, args.platforms_dir + ) return zip_contents = build_zip_contents_index(db) result = generate_md5_pack( - hashes=hashes, db=db, bios_dir=args.bios_dir, - output_dir=args.output_dir, zip_contents=zip_contents, - platform_name=args.platform, platforms_dir=args.platforms_dir, - emulator_name=args.emulator, emulators_dir=args.emulators_dir, + hashes=hashes, + db=db, + bios_dir=args.bios_dir, + output_dir=args.output_dir, + zip_contents=zip_contents, + platform_name=args.platform, + platforms_dir=args.platforms_dir, + emulator_name=args.emulator, + emulators_dir=args.emulators_dir, standalone=getattr(args, "standalone", False), ) if not result: @@ -2350,8 +2673,14 @@ def main(): if args.emulator: names = [n.strip() for n in args.emulator.split(",") if n.strip()] if not generate_emulator_pack( - names, args.emulators_dir, db, args.bios_dir, args.output_dir, - args.standalone, zip_contents, required_only=args.required_only, + names, + args.emulators_dir, + db, + args.bios_dir, + args.output_dir, + args.standalone, + zip_contents, + required_only=args.required_only, ): sys.exit(1) return @@ -2360,18 +2689,29 @@ def main(): if args.system and not args.platform and not args.all: system_ids = [s.strip() for s in args.system.split(",") if s.strip()] if not generate_system_pack( - system_ids, args.emulators_dir, db, args.bios_dir, args.output_dir, - args.standalone, zip_contents, required_only=args.required_only, + system_ids, + args.emulators_dir, + db, + args.bios_dir, + args.output_dir, + args.standalone, + zip_contents, + required_only=args.required_only, ): sys.exit(1) return - system_filter = [s.strip() for s in args.system.split(",") if s.strip()] if args.system else None + system_filter = ( + [s.strip() for s in args.system.split(",") if s.strip()] + if args.system + else None + ) # Platform mode if args.all: platforms = list_registered_platforms( - args.platforms_dir, include_archived=args.include_archived) + args.platforms_dir, include_archived=args.include_archived + ) elif args.platform: platforms = [args.platform] else: @@ -2380,7 +2720,8 @@ def main(): data_registry = load_data_dir_registry(args.platforms_dir) if data_registry and not args.offline: - from refresh_data_dirs import refresh_all, load_registry + from refresh_data_dirs import load_registry, refresh_all + registry = load_registry(os.path.join(args.platforms_dir, "_data_dirs.yml")) results = refresh_all(registry, force=args.refresh_data) updated = sum(1 for v in results.values() if v) @@ -2393,20 +2734,34 @@ def main(): if args.target: try: target_cores_cache, platforms = build_target_cores_cache( - platforms, args.target, args.platforms_dir, is_all=args.all, + platforms, + args.target, + args.platforms_dir, + is_all=args.all, ) except (FileNotFoundError, ValueError) as e: print(f"ERROR: {e}", file=sys.stderr) sys.exit(1) - groups = group_identical_platforms(platforms, args.platforms_dir, - target_cores_cache if args.target else None) + groups = group_identical_platforms( + platforms, args.platforms_dir, target_cores_cache if args.target else None + ) if args.manifest: - _run_manifest_mode(args, groups, db, zip_contents, emu_profiles, target_cores_cache) + _run_manifest_mode( + args, groups, db, zip_contents, emu_profiles, target_cores_cache + ) else: - _run_platform_packs(args, groups, db, zip_contents, data_registry, - emu_profiles, target_cores_cache, system_filter) + _run_platform_packs( + args, + groups, + db, + zip_contents, + data_registry, + emu_profiles, + target_cores_cache, + system_filter, + ) # Manifest generation (JSON inventory for install.py) @@ -2491,6 +2846,7 @@ def generate_manifest( # Filter systems by target from common import resolve_platform_cores + plat_cores = resolve_platform_cores(config, emu_profiles) if target_cores else None pack_systems = filter_systems_by_target( config.get("systems", {}), @@ -2550,7 +2906,9 @@ def generate_manifest( if _is_large_file(local_path or "", repo_root): entry["storage"] = "release" - entry["release_asset"] = os.path.basename(local_path) if local_path else file_entry["name"] + entry["release_asset"] = ( + os.path.basename(local_path) if local_path else file_entry["name"] + ) manifest_files.append(entry) total_size += file_size @@ -2561,8 +2919,13 @@ def generate_manifest( # Phase 2: core complement (emulator extras) core_files = _collect_emulator_extras( - config, emulators_dir, db, - seen_destinations, base_dest, emu_profiles, target_cores=target_cores, + config, + emulators_dir, + db, + seen_destinations, + base_dest, + emu_profiles, + target_cores=target_cores, ) extras_pfx = _detect_extras_prefix(config, base_dest) for fe in core_files: @@ -2585,8 +2948,9 @@ def generate_manifest( continue dest_hint = fe.get("destination", "") - local_path, status = resolve_file(fe, db, bios_dir, zip_contents, - dest_hint=dest_hint) + local_path, status = resolve_file( + fe, db, bios_dir, zip_contents, dest_hint=dest_hint + ) if status in ("not_found", "external", "user_provided"): continue @@ -2610,7 +2974,9 @@ def generate_manifest( if _is_large_file(local_path or "", repo_root): entry["storage"] = "release" - entry["release_asset"] = os.path.basename(local_path) if local_path else fe["name"] + entry["release_asset"] = ( + os.path.basename(local_path) if local_path else fe["name"] + ) manifest_files.append(entry) total_size += file_size @@ -2621,9 +2987,11 @@ def generate_manifest( # No phase 3 (data directories) -skipped for manifest - now = __import__("datetime").datetime.now( - __import__("datetime").timezone.utc - ).strftime("%Y-%m-%dT%H:%M:%SZ") + now = ( + __import__("datetime") + .datetime.now(__import__("datetime").timezone.utc) + .strftime("%Y-%m-%dT%H:%M:%SZ") + ) result: dict = { "manifest_version": 1, @@ -2643,8 +3011,10 @@ def generate_manifest( # Post-generation pack verification + manifest + SHA256SUMS -def verify_pack(zip_path: str, db: dict, - data_registry: dict | None = None) -> tuple[bool, dict]: + +def verify_pack( + zip_path: str, db: dict, data_registry: dict | None = None +) -> tuple[bool, dict]: """Verify a generated pack ZIP by re-hashing every file inside. Checks against database.json, data directory caches, and verifies @@ -2673,9 +3043,9 @@ def verify_pack(zip_path: str, db: dict, manifest = { "version": 1, "generator": "retrobios generate_pack.py", - "generated": __import__("datetime").datetime.now( - __import__("datetime").timezone.utc - ).strftime("%Y-%m-%dT%H:%M:%SZ"), + "generated": __import__("datetime") + .datetime.now(__import__("datetime").timezone.utc) + .strftime("%Y-%m-%dT%H:%M:%SZ"), "files": [], } errors = [] @@ -2685,7 +3055,10 @@ def verify_pack(zip_path: str, db: dict, if info.is_dir(): continue name = info.filename - if name.startswith("INSTRUCTIONS_") or name in ("manifest.json", "README.txt"): + if name.startswith("INSTRUCTIONS_") or name in ( + "manifest.json", + "README.txt", + ): continue with zf.open(info) as f: sha1_h = hashlib.sha1() @@ -2728,10 +3101,19 @@ def verify_pack(zip_path: str, db: dict, continue try: import io as _io + with zipfile.ZipFile(_src_path) as _sz: - _sc = {i.filename: i.CRC for i in _sz.infolist() if not i.is_dir()} + _sc = { + i.filename: i.CRC + for i in _sz.infolist() + if not i.is_dir() + } with zipfile.ZipFile(_io.BytesIO(zf.read(name))) as _pz: - _pc = {i.filename: i.CRC for i in _pz.infolist() if not i.is_dir()} + _pc = { + i.filename: i.CRC + for i in _pz.infolist() + if not i.is_dir() + } if _sc == _pc: status = "verified_rebuild" file_name = _bn @@ -2742,7 +3124,7 @@ def verify_pack(zip_path: str, db: dict, # Data directory: check against cached files if status == "untracked" and _data_index: _bn = os.path.basename(name) - _pr = name[len("system/"):] if name.startswith("system/") else name + _pr = name[len("system/") :] if name.startswith("system/") else name _cands = [] if _pr in _data_path_index: _cands.append(_data_path_index[_pr]) @@ -2759,10 +3141,19 @@ def verify_pack(zip_path: str, db: dict, if name.endswith(".zip") and _dp.endswith(".zip"): try: import io as _io2 + with zipfile.ZipFile(_io2.BytesIO(zf.read(name))) as _pz2: - _pc2 = {i.filename: i.CRC for i in _pz2.infolist() if not i.is_dir()} + _pc2 = { + i.filename: i.CRC + for i in _pz2.infolist() + if not i.is_dir() + } with zipfile.ZipFile(_dp) as _dz: - _dc = {i.filename: i.CRC for i in _dz.infolist() if not i.is_dir()} + _dc = { + i.filename: i.CRC + for i in _dz.infolist() + if not i.is_dir() + } if _pc2 == _dc: status = "verified_data" file_name = _bn @@ -2770,14 +3161,16 @@ def verify_pack(zip_path: str, db: dict, except (zipfile.BadZipFile, OSError): continue - manifest["files"].append({ - "path": name, - "sha1": sha1, - "md5": md5, - "size": size, - "status": status, - "name": file_name, - }) + manifest["files"].append( + { + "path": name, + "sha1": sha1, + "md5": md5, + "size": size, + "status": status, + "name": file_name, + } + ) # Corruption check: SHA1 in DB but doesn't match what we computed # This should never happen (we looked up by SHA1), but catches @@ -2785,7 +3178,9 @@ def verify_pack(zip_path: str, db: dict, if db_entry and status == "verified_md5": expected_sha1 = db_entry.get("sha1", "") if expected_sha1 and expected_sha1.lower() != sha1.lower(): - errors.append(f"{name}: SHA1 mismatch (expected {expected_sha1}, got {sha1})") + errors.append( + f"{name}: SHA1 mismatch (expected {expected_sha1}, got {sha1})" + ) verified = sum(1 for f in manifest["files"] if f["status"].startswith("verified")) untracked = sum(1 for f in manifest["files"] if f["status"] == "untracked") @@ -2817,11 +3212,16 @@ def inject_manifest(zip_path: str, manifest: dict) -> None: else: # Rebuild to replace existing manifest import tempfile as _tempfile - tmp_fd, tmp_path = _tempfile.mkstemp(suffix=".zip", dir=os.path.dirname(zip_path)) + + tmp_fd, tmp_path = _tempfile.mkstemp( + suffix=".zip", dir=os.path.dirname(zip_path) + ) os.close(tmp_fd) try: - with zipfile.ZipFile(zip_path, "r") as src, \ - zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as dst: + with ( + zipfile.ZipFile(zip_path, "r") as src, + zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as dst, + ): for item in src.infolist(): if item.filename == "manifest.json": continue @@ -2855,8 +3255,11 @@ def generate_sha256sums(output_dir: str) -> str | None: def verify_pack_against_platform( - zip_path: str, platform_name: str, platforms_dir: str, - db: dict | None = None, emulators_dir: str = "emulators", + zip_path: str, + platform_name: str, + platforms_dir: str, + db: dict | None = None, + emulators_dir: str = "emulators", emu_profiles: dict | None = None, ) -> tuple[bool, int, int, list[str]]: """Verify a pack ZIP against its platform config and core requirements. @@ -2872,6 +3275,7 @@ def verify_pack_against_platform( Returns (all_ok, checked, present, errors). """ from collections import Counter + from verify import find_undeclared_files config = load_platform_config(platform_name, platforms_dir) @@ -2902,7 +3306,10 @@ def verify_pack_against_platform( # Zero-byte check (exclude Dolphin GraphicMods markers) for info in zf.infolist(): if info.file_size == 0 and not info.is_dir(): - if "GraphicMods" not in info.filename and info.filename not in ("manifest.json", "README.txt"): + if "GraphicMods" not in info.filename and info.filename not in ( + "manifest.json", + "README.txt", + ): errors.append(f"zero-byte: {info.filename}") # 1. Baseline file presence @@ -2952,8 +3359,9 @@ def verify_pack_against_platform( continue # Skip unresolvable files (game_data dirs, etc.) fe = {"name": u["name"], "destination": dest} - local_path, status = resolve_file(fe, db, "bios", {}, - dest_hint=raw_dest) + local_path, status = resolve_file( + fe, db, "bios", {}, dest_hint=raw_dest + ) if status in ("not_found", "external", "user_provided"): continue core_checked += 1 @@ -2965,14 +3373,25 @@ def verify_pack_against_platform( checked = baseline_checked + core_checked present = baseline_present + core_present - return (len(errors) == 0, checked, present, errors, - baseline_checked, baseline_present, core_checked, core_present) + return ( + len(errors) == 0, + checked, + present, + errors, + baseline_checked, + baseline_present, + core_checked, + core_present, + ) -def verify_and_finalize_packs(output_dir: str, db: dict, - platforms_dir: str = "platforms", - skip_conformance: bool = False, - data_registry: dict | None = None) -> bool: +def verify_and_finalize_packs( + output_dir: str, + db: dict, + platforms_dir: str = "platforms", + skip_conformance: bool = False, + data_registry: dict | None = None, +) -> bool: """Verify all packs, inject manifests, generate SHA256SUMS. Two-stage verification: @@ -3003,8 +3422,10 @@ def verify_and_finalize_packs(output_dir: str, db: dict, ok, manifest = verify_pack(zip_path, db, data_registry=data_registry) summary = manifest["summary"] status = "OK" if ok else "ERRORS" - print(f" verify {name}: {summary['verified']}/{summary['total_files']} verified, " - f"{summary['untracked']} untracked, {summary['errors']} errors [{status}]") + print( + f" verify {name}: {summary['verified']}/{summary['total_files']} verified, " + f"{summary['untracked']} untracked, {summary['errors']} errors [{status}]" + ) if not ok: for err in manifest["errors"]: print(f" ERROR: {err}") @@ -3017,14 +3438,26 @@ def verify_and_finalize_packs(output_dir: str, db: dict, continue platforms = pack_to_platform.get(name, []) for pname in platforms: - (p_ok, total, matched, p_errors, - bl_checked, bl_present, core_checked, core_present) = \ - verify_pack_against_platform( - zip_path, pname, platforms_dir, db=db, - ) + ( + p_ok, + total, + matched, + p_errors, + bl_checked, + bl_present, + core_checked, + core_present, + ) = verify_pack_against_platform( + zip_path, + pname, + platforms_dir, + db=db, + ) status = "OK" if p_ok else "FAILED" - print(f" platform {pname}: {bl_present}/{bl_checked} baseline, " - f"{core_present}/{core_checked} cores, {status}") + print( + f" platform {pname}: {bl_present}/{bl_checked} baseline, " + f"{core_present}/{core_checked} cores, {status}" + ) if not p_ok: for err in p_errors: print(f" {err}") diff --git a/scripts/generate_readme.py b/scripts/generate_readme.py index 490a327d..cdd5fb99 100644 --- a/scripts/generate_readme.py +++ b/scripts/generate_readme.py @@ -18,15 +18,29 @@ from datetime import datetime, timezone from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) -from common import list_registered_platforms, load_database, load_platform_config, write_if_changed +from common import ( + list_registered_platforms, + load_database, + load_platform_config, + write_if_changed, +) from verify import verify_platform -def compute_coverage(platform_name: str, platforms_dir: str, db: dict, - data_registry: dict | None = None, - supplemental_names: set[str] | None = None) -> dict: + +def compute_coverage( + platform_name: str, + platforms_dir: str, + db: dict, + data_registry: dict | None = None, + supplemental_names: set[str] | None = None, +) -> dict: config = load_platform_config(platform_name, platforms_dir) - result = verify_platform(config, db, data_dir_registry=data_registry, - supplemental_names=supplemental_names) + result = verify_platform( + config, + db, + data_dir_registry=data_registry, + supplemental_names=supplemental_names, + ) sc = result.get("status_counts", {}) ok = sc.get("ok", 0) untested = sc.get("untested", 0) @@ -55,8 +69,9 @@ REPO = "Abdess/retrobios" def fetch_contributors() -> list[dict]: """Fetch contributors from GitHub API, exclude bots.""" - import urllib.request import urllib.error + import urllib.request + url = f"https://api.github.com/repos/{REPO}/contributors" headers = {"User-Agent": "retrobios-readme/1.0"} token = os.environ.get("GITHUB_TOKEN", "") @@ -68,7 +83,8 @@ def fetch_contributors() -> list[dict]: data = json.loads(resp.read().decode()) owner = REPO.split("/")[0] return [ - c for c in data + c + for c in data if not c.get("login", "").endswith("[bot]") and c.get("type") == "User" and c.get("login") != owner @@ -87,21 +103,28 @@ def generate_readme(db: dict, platforms_dir: str) -> str: from common import load_data_dir_registry from cross_reference import _build_supplemental_index + data_registry = load_data_dir_registry(platforms_dir) suppl_names = _build_supplemental_index() coverages = {} for name in platform_names: try: - coverages[name] = compute_coverage(name, platforms_dir, db, - data_registry, suppl_names) + coverages[name] = compute_coverage( + name, platforms_dir, db, data_registry, suppl_names + ) except FileNotFoundError: pass - emulator_count = sum( - 1 for f in Path("emulators").glob("*.yml") - if not f.name.endswith(".old.yml") - ) if Path("emulators").exists() else 0 + emulator_count = ( + sum( + 1 + for f in Path("emulators").glob("*.yml") + if not f.name.endswith(".old.yml") + ) + if Path("emulators").exists() + else 0 + ) # Count systems from emulator profiles system_ids: set[str] = set() @@ -109,6 +132,7 @@ def generate_readme(db: dict, platforms_dir: str) -> str: if emu_dir.exists(): try: import yaml + for f in emu_dir.glob("*.yml"): if f.name.endswith(".old.yml"): continue @@ -122,8 +146,12 @@ def generate_readme(db: dict, platforms_dir: str) -> str: "# RetroBIOS", "", f"Complete BIOS and firmware packs for " - f"{', '.join(c['platform'] for c in sorted(coverages.values(), key=lambda x: x['platform'])[:-1])}" - f", and {sorted(coverages.values(), key=lambda x: x['platform'])[-1]['platform']}.", + f"{', '.join(c['platform'] for c in sorted(coverages.values(), key=lambda x: x[ + 'platform' + ])[:-1])}" + f", and {sorted(coverages.values(), key=lambda x: x[ + 'platform' + ])[-1]['platform']}.", "", f"**{total_files:,}** verified files across **{len(system_ids)}** systems," f" ready to extract into your emulator's BIOS directory.", @@ -170,48 +198,78 @@ def generate_readme(db: dict, platforms_dir: str) -> str: display = cov["platform"] path = extract_paths.get(display, "") lines.append( - f"| {display} | {cov['total']} | {path} | " - f"[Download]({RELEASE_URL}) |" + f"| {display} | {cov['total']} | {path} | [Download]({RELEASE_URL}) |" ) - lines.extend([ - "", - "## What's included", - "", - "BIOS, firmware, and system files for consoles from Atari to PlayStation 3.", - f"Each file is checked against the emulator's source code to match what the" - f" code actually loads at runtime.", - "", - f"- **{len(coverages)} platforms** supported with platform-specific verification", - f"- **{emulator_count} emulators** profiled from source (RetroArch cores + standalone)", - f"- **{len(system_ids)} systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...)", - f"- **{total_files:,} files** verified with MD5, SHA1, CRC32 checksums", - f"- **{size_mb:.0f} MB** total collection size", - "", - "## Supported systems", - "", - ]) + lines.extend( + [ + "", + "## What's included", + "", + "BIOS, firmware, and system files for consoles from Atari to PlayStation 3.", + "Each file is checked against the emulator's source code to match what the" + " code actually loads at runtime.", + "", + f"- **{len(coverages)} platforms** supported with platform-specific verification", + f"- **{emulator_count} emulators** profiled from source (RetroArch cores + standalone)", + f"- **{len(system_ids)} systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...)", + f"- **{total_files:,} files** verified with MD5, SHA1, CRC32 checksums", + f"- **{size_mb:.0f} MB** total collection size", + "", + "## Supported systems", + "", + ] + ) # Show well-known systems for SEO, link to full list well_known = [ - "NES", "SNES", "Nintendo 64", "GameCube", "Wii", "Game Boy", "Game Boy Advance", - "Nintendo DS", "Nintendo 3DS", "Switch", - "PlayStation", "PlayStation 2", "PlayStation 3", "PSP", "PS Vita", - "Mega Drive", "Saturn", "Dreamcast", "Game Gear", "Master System", - "Neo Geo", "Atari 2600", "Atari 7800", "Atari Lynx", "Atari ST", - "MSX", "PC Engine", "TurboGrafx-16", "ColecoVision", "Intellivision", - "Commodore 64", "Amiga", "ZX Spectrum", "Arcade (MAME)", + "NES", + "SNES", + "Nintendo 64", + "GameCube", + "Wii", + "Game Boy", + "Game Boy Advance", + "Nintendo DS", + "Nintendo 3DS", + "Switch", + "PlayStation", + "PlayStation 2", + "PlayStation 3", + "PSP", + "PS Vita", + "Mega Drive", + "Saturn", + "Dreamcast", + "Game Gear", + "Master System", + "Neo Geo", + "Atari 2600", + "Atari 7800", + "Atari Lynx", + "Atari ST", + "MSX", + "PC Engine", + "TurboGrafx-16", + "ColecoVision", + "Intellivision", + "Commodore 64", + "Amiga", + "ZX Spectrum", + "Arcade (MAME)", ] - lines.extend([ - ", ".join(well_known) + f", and {len(system_ids) - len(well_known)}+ more.", - "", - f"Full list with per-file details: **[{SITE_URL}]({SITE_URL})**", - "", - "## Coverage", - "", - "| Platform | Coverage | Verified | Untested | Missing |", - "|----------|----------|----------|----------|---------|", - ]) + lines.extend( + [ + ", ".join(well_known) + f", and {len(system_ids) - len(well_known)}+ more.", + "", + f"Full list with per-file details: **[{SITE_URL}]({SITE_URL})**", + "", + "## Coverage", + "", + "| Platform | Coverage | Verified | Untested | Missing |", + "|----------|----------|----------|----------|---------|", + ] + ) for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): pct = f"{cov['percentage']:.1f}%" @@ -220,62 +278,66 @@ def generate_readme(db: dict, platforms_dir: str) -> str: f"{cov['verified']} | {cov['untested']} | {cov['missing']} |" ) - lines.extend([ - "", - "## Build your own pack", - "", - "Clone the repo and generate packs for any platform, emulator, or system:", - "", - "```bash", - "# Full platform pack", - "python scripts/generate_pack.py --platform retroarch --output-dir dist/", - "python scripts/generate_pack.py --platform batocera --output-dir dist/", - "", - "# Single emulator or system", - "python scripts/generate_pack.py --emulator dolphin", - "python scripts/generate_pack.py --system sony-playstation-2", - "", - "# List available emulators and systems", - "python scripts/generate_pack.py --list-emulators", - "python scripts/generate_pack.py --list-systems", - "", - "# Verify your BIOS collection", - "python scripts/verify.py --all", - "python scripts/verify.py --platform batocera", - "python scripts/verify.py --emulator flycast", - "python scripts/verify.py --platform retroarch --verbose # emulator ground truth", - "```", - "", - f"Only dependency: Python 3 + `pyyaml`.", - "", - "## Documentation site", - "", - f"The [documentation site]({SITE_URL}) provides:", - "", - f"- **Per-platform pages** with file-by-file verification status and hashes", - f"- **Per-emulator profiles** with source code references for every file", - f"- **Per-system pages** showing which emulators and platforms cover each console", - f"- **Gap analysis** identifying missing files and undeclared core requirements", - f"- **Cross-reference** mapping files across {len(coverages)} platforms and {emulator_count} emulators", - "", - "## How it works", - "", - "Documentation and metadata can drift from what emulators actually load.", - "To keep packs accurate, each file is checked against the emulator's source code.", - "", - "1. **Read emulator source code** - trace every file the code loads, its expected hash and size", - "2. **Cross-reference with platforms** - match against what each platform declares", - "3. **Build packs** - include baseline files plus what each platform's cores need", - "4. **Verify** - run platform-native checks and emulator-level validation", - "", - ]) + lines.extend( + [ + "", + "## Build your own pack", + "", + "Clone the repo and generate packs for any platform, emulator, or system:", + "", + "```bash", + "# Full platform pack", + "python scripts/generate_pack.py --platform retroarch --output-dir dist/", + "python scripts/generate_pack.py --platform batocera --output-dir dist/", + "", + "# Single emulator or system", + "python scripts/generate_pack.py --emulator dolphin", + "python scripts/generate_pack.py --system sony-playstation-2", + "", + "# List available emulators and systems", + "python scripts/generate_pack.py --list-emulators", + "python scripts/generate_pack.py --list-systems", + "", + "# Verify your BIOS collection", + "python scripts/verify.py --all", + "python scripts/verify.py --platform batocera", + "python scripts/verify.py --emulator flycast", + "python scripts/verify.py --platform retroarch --verbose # emulator ground truth", + "```", + "", + "Only dependency: Python 3 + `pyyaml`.", + "", + "## Documentation site", + "", + f"The [documentation site]({SITE_URL}) provides:", + "", + "- **Per-platform pages** with file-by-file verification status and hashes", + "- **Per-emulator profiles** with source code references for every file", + "- **Per-system pages** showing which emulators and platforms cover each console", + "- **Gap analysis** identifying missing files and undeclared core requirements", + f"- **Cross-reference** mapping files across {len(coverages)} platforms and {emulator_count} emulators", + "", + "## How it works", + "", + "Documentation and metadata can drift from what emulators actually load.", + "To keep packs accurate, each file is checked against the emulator's source code.", + "", + "1. **Read emulator source code** - trace every file the code loads, its expected hash and size", + "2. **Cross-reference with platforms** - match against what each platform declares", + "3. **Build packs** - include baseline files plus what each platform's cores need", + "4. **Verify** - run platform-native checks and emulator-level validation", + "", + ] + ) contributors = fetch_contributors() if contributors: - lines.extend([ - "## Contributors", - "", - ]) + lines.extend( + [ + "## Contributors", + "", + ] + ) for c in contributors: login = c["login"] avatar = c.get("avatar_url", "") @@ -285,18 +347,20 @@ def generate_readme(db: dict, platforms_dir: str) -> str: ) lines.append("") - lines.extend([ - "", - "## Contributing", - "", - "See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.", - "", - "## License", - "", - "This repository provides BIOS files for personal backup and archival purposes.", - "", - f"*Auto-generated on {ts}*", - ]) + lines.extend( + [ + "", + "## Contributing", + "", + "See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.", + "", + "## License", + "", + "This repository provides BIOS files for personal backup and archival purposes.", + "", + f"*Auto-generated on {ts}*", + ] + ) return "\n".join(lines) + "\n" @@ -332,7 +396,11 @@ def main(): print(f"{status} ./README.md") contributing = generate_contributing() - status = "Generated" if write_if_changed("CONTRIBUTING.md", contributing) else "Unchanged" + status = ( + "Generated" + if write_if_changed("CONTRIBUTING.md", contributing) + else "Unchanged" + ) print(f"{status} ./CONTRIBUTING.md") diff --git a/scripts/generate_site.py b/scripts/generate_site.py index 39dba7c0..e2252f95 100644 --- a/scripts/generate_site.py +++ b/scripts/generate_site.py @@ -19,7 +19,13 @@ from datetime import datetime, timezone from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) -from common import list_registered_platforms, load_database, load_emulator_profiles, require_yaml, write_if_changed +from common import ( + list_registered_platforms, + load_database, + load_emulator_profiles, + require_yaml, + write_if_changed, +) yaml = require_yaml() from generate_readme import compute_coverage @@ -37,14 +43,16 @@ _system_page_map: dict[str, tuple[str, str]] = {} def _build_system_page_map_from_data( - manufacturers: dict, coverages: dict, db: dict, + manufacturers: dict, + coverages: dict, + db: dict, ) -> None: """Build system_id -> (manufacturer_slug, console_name) mapping. Uses platform file paths to trace system_id -> bios directory -> manufacturer page. """ - files_db = db.get("files", {}) - by_name = db.get("indexes", {}).get("by_name", {}) + db.get("files", {}) + db.get("indexes", {}).get("by_name", {}) # Build reverse index: filename -> (manufacturer, console) from bios/ structure file_to_console: dict[str, tuple[str, str]] = {} @@ -64,7 +72,14 @@ def _build_system_page_map_from_data( console_norm[norm] = entry console_norm[f"{mfr_norm}-{norm}"] = entry # Short aliases: strip common manufacturer prefix words - for prefix in (f"{mfr_norm}-", "nintendo-", "sega-", "sony-", "snk-", "nec-"): + for prefix in ( + f"{mfr_norm}-", + "nintendo-", + "sega-", + "sony-", + "snk-", + "nec-", + ): if norm.startswith(prefix.replace(f"{mfr_norm}-", "")): pass # already covered by norm key = f"{prefix}{norm}" @@ -122,8 +137,11 @@ def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None: lines.append("") for item in v: if isinstance(item, dict): - parts = [f"{ik}: {iv}" for ik, iv in item.items() - if not isinstance(iv, (dict, list))] + parts = [ + f"{ik}: {iv}" + for ik, iv in item.items() + if not isinstance(iv, (dict, list)) + ] lines.append(f"{pad}- {', '.join(parts)}") else: lines.append(f"{pad}- {item}") @@ -137,8 +155,11 @@ def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None: elif isinstance(val, list): for item in val: if isinstance(item, dict): - parts = [f"{ik}: {iv}" for ik, iv in item.items() - if not isinstance(iv, (dict, list))] + parts = [ + f"{ik}: {iv}" + for ik, iv in item.items() + if not isinstance(iv, (dict, list)) + ] lines.append(f"{pad}- {', '.join(parts)}") else: lines.append(f"{pad}- {item}") @@ -189,14 +210,17 @@ def _status_icon(pct: float) -> str: # Home page -def generate_home(db: dict, coverages: dict, profiles: dict, - registry: dict | None = None) -> str: + +def generate_home( + db: dict, coverages: dict, profiles: dict, registry: dict | None = None +) -> str: total_files = db.get("total_files", 0) total_size = db.get("total_size", 0) ts = _timestamp() - unique = {k: v for k, v in profiles.items() - if v.get("type") not in ("alias", "test")} + unique = { + k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") + } emulator_count = len(unique) # Classification stats @@ -251,18 +275,22 @@ def generate_home(db: dict, coverages: dict, profiles: dict, ] # Platform table - lines.extend([ - "## Platforms", - "", - "| | Platform | Coverage | Verified | Download |", - "|---|----------|----------|----------|----------|", - ]) + lines.extend( + [ + "## Platforms", + "", + "| | Platform | Coverage | Verified | Download |", + "|---|----------|----------|----------|----------|", + ] + ) for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): display = cov["platform"] pct = _pct(cov["present"], cov["total"]) logo_url = (registry or {}).get(name, {}).get("logo", "") - logo_md = f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else "" + logo_md = ( + f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else "" + ) lines.append( f"| {logo_md} | [{display}](platforms/{name}.md) | " @@ -272,35 +300,40 @@ def generate_home(db: dict, coverages: dict, profiles: dict, ) # Emulator classification breakdown - lines.extend([ - "", - "## Emulator profiles", - "", - "| Classification | Count |", - "|---------------|-------|", - ]) + lines.extend( + [ + "", + "## Emulator profiles", + "", + "| Classification | Count |", + "|---------------|-------|", + ] + ) for cls, count in sorted(classifications.items(), key=lambda x: -x[1]): lines.append(f"| {cls} | {count} |") # Quick links - lines.extend([ - "", - "---", - "", - f"[Systems](systems/){{ .md-button }} " - f"[Emulators](emulators/){{ .md-button }} " - f"[Cross-reference](cross-reference.md){{ .md-button }} " - f"[Gap Analysis](gaps.md){{ .md-button }} " - f"[Contributing](contributing.md){{ .md-button .md-button--primary }}", - "", - f"*Generated on {ts}.*", - ]) + lines.extend( + [ + "", + "---", + "", + "[Systems](systems/){ .md-button } " + "[Emulators](emulators/){ .md-button } " + "[Cross-reference](cross-reference.md){ .md-button } " + "[Gap Analysis](gaps.md){ .md-button } " + "[Contributing](contributing.md){ .md-button .md-button--primary }", + "", + f"*Generated on {ts}.*", + ] + ) return "\n".join(lines) + "\n" # Platform pages + def generate_platform_index(coverages: dict) -> str: lines = [ f"# Platforms - {SITE_NAME}", @@ -313,7 +346,9 @@ def generate_platform_index(coverages: dict) -> str: display = cov["platform"] pct = _pct(cov["present"], cov["total"]) plat_status = cov["config"].get("status", "active") - status = "archived" if plat_status == "archived" else _status_icon(cov["percentage"]) + status = ( + "archived" if plat_status == "archived" else _status_icon(cov["percentage"]) + ) lines.append( f"| [{display}]({name}.md) | " f"{cov['present']}/{cov['total']} ({pct}) | " @@ -323,15 +358,21 @@ def generate_platform_index(coverages: dict) -> str: return "\n".join(lines) + "\n" -def generate_platform_page(name: str, cov: dict, registry: dict | None = None, - emulator_files: dict | None = None) -> str: +def generate_platform_page( + name: str, + cov: dict, + registry: dict | None = None, + emulator_files: dict | None = None, +) -> str: config = cov["config"] display = cov["platform"] mode = cov["mode"] pct = _pct(cov["present"], cov["total"]) logo_url = (registry or {}).get(name, {}).get("logo", "") - logo_md = f"![{display}]({logo_url}){{ width=48 align=right }}\n\n" if logo_url else "" + logo_md = ( + f"![{display}]({logo_url}){{ width=48 align=right }}\n\n" if logo_url else "" + ) homepage = config.get("homepage", "") version = config.get("version", "") @@ -341,7 +382,7 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, lines = [ f"# {display} - {SITE_NAME}", "", - logo_md + f"| | |", + logo_md + "| | |", "|---|---|", f"| Verification | {mode} |", f"| Hash type | {hash_type} |", @@ -352,14 +393,16 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, lines.append(f"| BIOS path | `{base_dest}/` |") if homepage: lines.append(f"| Homepage | [{homepage}]({homepage}) |") - lines.extend([ - "", - f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | " - f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}", - "", - f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}", - "", - ]) + lines.extend( + [ + "", + f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | " + f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}", + "", + f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}", + "", + ] + ) # Build lookup from config file entries (has hashes/sizes) config_files: dict[str, dict] = {} @@ -376,12 +419,14 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, by_system.setdefault(sys_id, []).append(d) # System summary table (quick navigation) - lines.extend([ - "## Systems overview", - "", - "| System | Files | Status | Emulators |", - "|--------|-------|--------|-----------|", - ]) + lines.extend( + [ + "## Systems overview", + "", + "| System | Files | Status | Emulators |", + "|--------|-------|--------|-----------|", + ] + ) for sys_id, files in sorted(by_system.items()): ok_count = sum(1 for f in files if f["status"] == "ok") total = len(files) @@ -396,7 +441,9 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, if len(sys_emus) > 3: emu_str += f" +{len(sys_emus) - 3}" anchor = sys_id.replace(" ", "-") - lines.append(f"| [{sys_id}](#{anchor}) | {ok_count}/{total} | {status} | {emu_str} |") + lines.append( + f"| [{sys_id}](#{anchor}) | {ok_count}/{total} | {status} | {emu_str} |" + ) lines.append("") # Per-system detail sections (collapsible for large platforms) @@ -417,8 +464,8 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, anchor = sys_id.replace(" ", "-") if use_collapsible: status_tag = "OK" if ok_count == total else f"{total - ok_count} issues" - lines.append(f"") - lines.append(f"??? note \"{sys_id} ({ok_count}/{total} - {status_tag})\"") + lines.append(f'') + lines.append(f'??? note "{sys_id} ({ok_count}/{total} - {status_tag})"') lines.append("") pad = " " else: @@ -474,6 +521,7 @@ def generate_platform_page(name: str, cov: dict, registry: dict | None = None, # System pages + def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]: """Group files by manufacturer -> console -> files.""" manufacturers: dict[str, dict[str, list]] = {} @@ -511,7 +559,7 @@ def generate_system_page( platform_files: dict[str, set], emulator_files: dict[str, dict], ) -> str: - slug = manufacturer.lower().replace(" ", "-") + manufacturer.lower().replace(" ", "-") lines = [ f"# {manufacturer} - {SITE_NAME}", "", @@ -536,7 +584,11 @@ def generate_system_page( # Cross-reference: which platforms declare this file plats = sorted(p for p, names in platform_files.items() if name in names) # Cross-reference: which emulators load this file - emus = sorted(e for e, data in emulator_files.items() if name in data.get("files", set())) + emus = sorted( + e + for e, data in emulator_files.items() + if name in data.get("files", set()) + ) lines.append(f"**`{name}`** ({size})") lines.append("") @@ -566,8 +618,11 @@ def generate_system_page( # Emulator pages + def generate_emulators_index(profiles: dict) -> str: - unique = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")} + unique = { + k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") + } aliases = {k: v for k, v in profiles.items() if v.get("type") == "alias"} # Group by classification @@ -600,9 +655,17 @@ def generate_emulators_index(profiles: dict) -> str: "other": "Unclassified", } - cls_order = ["official_port", "community_fork", "pure_libretro", - "game_engine", "enhanced_fork", "frozen_snapshot", - "embedded_hle", "launcher", "other"] + cls_order = [ + "official_port", + "community_fork", + "pure_libretro", + "game_engine", + "enhanced_fork", + "frozen_snapshot", + "embedded_hle", + "launcher", + "other", + ] for cls in cls_order: entries = by_class.get(cls, []) @@ -617,12 +680,14 @@ def generate_emulators_index(profiles: dict) -> str: entries = by_class.get(cls, []) if not entries: continue - lines.extend([ - f"## {cls}", - "", - "| Engine | Systems | Files |", - "|--------|---------|-------|", - ]) + lines.extend( + [ + f"## {cls}", + "", + "| Engine | Systems | Files |", + "|--------|---------|-------|", + ] + ) for name, p in entries: emu_name = p.get("emulator", name) @@ -631,9 +696,7 @@ def generate_emulators_index(profiles: dict) -> str: sys_str = ", ".join(systems[:3]) if len(systems) > 3: sys_str += f" +{len(systems) - 3}" - lines.append( - f"| [{emu_name}]({name}.md) | {sys_str} | {len(files)} |" - ) + lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {len(files)} |") lines.append("") if aliases: @@ -641,15 +704,18 @@ def generate_emulators_index(profiles: dict) -> str: lines.append("| Core | Points to |") lines.append("|------|-----------|") for name in sorted(aliases.keys()): - parent = aliases[name].get("alias_of", aliases[name].get("bios_identical_to", "unknown")) + parent = aliases[name].get( + "alias_of", aliases[name].get("bios_identical_to", "unknown") + ) lines.append(f"| {name} | [{parent}]({parent}.md) |") lines.append("") return "\n".join(lines) + "\n" -def generate_emulator_page(name: str, profile: dict, db: dict, - platform_files: dict | None = None) -> str: +def generate_emulator_page( + name: str, profile: dict, db: dict, platform_files: dict | None = None +) -> str: if profile.get("type") == "alias": parent = profile.get("alias_of", profile.get("bios_identical_to", "unknown")) return ( @@ -664,21 +730,23 @@ def generate_emulator_page(name: str, profile: dict, db: dict, source = profile.get("source", "") upstream = profile.get("upstream", "") version = profile.get("core_version", "unknown") - display = profile.get("display_name", emu_name) + profile.get("display_name", emu_name) profiled = profile.get("profiled_date", "unknown") systems = profile.get("systems", []) cores = profile.get("cores", [name]) files = profile.get("files", []) notes_raw = profile.get("notes", profile.get("note", "")) - notes = str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else "" + notes = ( + str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else "" + ) exclusion = profile.get("exclusion_note", "") data_dirs = profile.get("data_directories", []) lines = [ f"# {emu_name} - {SITE_NAME}", "", - f"| | |", - f"|---|---|", + "| | |", + "|---|---|", f"| Type | {emu_type} |", ] if classification: @@ -705,14 +773,22 @@ def generate_emulator_page(name: str, profile: dict, db: dict, lines.append(f"| Based on | {based_on} |") # Additional metadata fields (scalar values only -complex ones go to collapsible sections) for field, label in [ - ("core", "Core ID"), ("core_name", "Core name"), - ("bios_size", "BIOS size"), ("bios_directory", "BIOS directory"), - ("bios_detection", "BIOS detection"), ("bios_selection", "BIOS selection"), - ("firmware_file", "Firmware file"), ("firmware_source", "Firmware source"), - ("firmware_install", "Firmware install"), ("firmware_detection", "Firmware detection"), - ("resources_directory", "Resources directory"), ("rom_path", "ROM path"), - ("game_count", "Game count"), ("verification", "Verification mode"), - ("source_ref", "Source ref"), ("analysis_date", "Analysis date"), + ("core", "Core ID"), + ("core_name", "Core name"), + ("bios_size", "BIOS size"), + ("bios_directory", "BIOS directory"), + ("bios_detection", "BIOS detection"), + ("bios_selection", "BIOS selection"), + ("firmware_file", "Firmware file"), + ("firmware_source", "Firmware source"), + ("firmware_install", "Firmware install"), + ("firmware_detection", "Firmware detection"), + ("resources_directory", "Resources directory"), + ("rom_path", "ROM path"), + ("game_count", "Game count"), + ("verification", "Verification mode"), + ("source_ref", "Source ref"), + ("analysis_date", "Analysis date"), ("analysis_commit", "Analysis commit"), ]: val = profile.get(field) @@ -727,7 +803,7 @@ def generate_emulator_page(name: str, profile: dict, db: dict, # Platform-specific details (rich structured data) platform_details = profile.get("platform_details") if platform_details and isinstance(platform_details, dict): - lines.extend(["???+ info \"Platform details\"", ""]) + lines.extend(['???+ info "Platform details"', ""]) for pk, pv in platform_details.items(): if isinstance(pv, dict): lines.append(f" **{pk}:**") @@ -769,7 +845,7 @@ def generate_emulator_page(name: str, profile: dict, db: dict, val = profile.get(field) if val is None: continue - lines.append(f"???+ abstract \"{label}\"") + lines.append(f'???+ abstract "{label}"') lines.append("") _render_yaml_value(lines, val, indent=4) lines.append("") @@ -777,21 +853,21 @@ def generate_emulator_page(name: str, profile: dict, db: dict, # Notes if notes: indented = notes.replace("\n", "\n ") - lines.extend(["???+ note \"Technical notes\"", - f" {indented}", - ""]) + lines.extend(['???+ note "Technical notes"', f" {indented}", ""]) if not files: lines.append("No BIOS or firmware files required.") if exclusion: - lines.extend([ - "", - f"!!! info \"Why no files\"", - f" {exclusion}", - ]) + lines.extend( + [ + "", + '!!! info "Why no files"', + f" {exclusion}", + ] + ) else: by_name = db.get("indexes", {}).get("by_name", {}) - files_db = db.get("files", {}) + db.get("files", {}) # Stats by category bios_files = [f for f in files if f.get("category", "bios") == "bios"] @@ -876,7 +952,9 @@ def generate_emulator_page(name: str, profile: dict, db: dict, if category and category != "bios": badges.append(category) if region: - badges.append(", ".join(region) if isinstance(region, list) else str(region)) + badges.append( + ", ".join(region) if isinstance(region, list) else str(region) + ) if storage and storage != "embedded": badges.append(storage) if bundled: @@ -944,7 +1022,9 @@ def generate_emulator_page(name: str, profile: dict, db: dict, if source_ref: details.append(f"Source: `{source_ref}`") if platform_files: - plats = sorted(p for p, names in platform_files.items() if fname in names) + plats = sorted( + p for p, names in platform_files.items() if fname in names + ) if plats: plat_links = [_platform_link(p, p, "../") for p in plats] details.append(f"Platforms: {', '.join(plat_links)}") @@ -960,7 +1040,9 @@ def generate_emulator_page(name: str, profile: dict, db: dict, if size_note: details.append(f"Size note: {size_note}") if size_options: - details.append(f"Size options: {', '.join(_fmt_size(s) for s in size_options)}") + details.append( + f"Size options: {', '.join(_fmt_size(s) for s in size_options)}" + ) if size_range: details.append(f"Size range: {size_range}") @@ -1003,6 +1085,7 @@ def generate_emulator_page(name: str, profile: dict, db: dict, # Contributing page + def generate_gap_analysis( profiles: dict, coverages: dict, @@ -1044,12 +1127,14 @@ def generate_gap_analysis( continue if fname not in all_platform_names: in_repo = fname in by_name - undeclared.append({ - "name": fname, - "required": f.get("required", False), - "in_repo": in_repo, - "source_ref": f.get("source_ref", ""), - }) + undeclared.append( + { + "name": fname, + "required": f.get("required", False), + "in_repo": in_repo, + "source_ref": f.get("source_ref", ""), + } + ) total_undeclared += 1 if in_repo: total_in_repo += 1 @@ -1057,32 +1142,40 @@ def generate_gap_analysis( total_missing += 1 if undeclared: - emulator_gaps.append((emu_name, profile.get("emulator", emu_name), undeclared)) + emulator_gaps.append( + (emu_name, profile.get("emulator", emu_name), undeclared) + ) - lines.extend([ - "## Summary", - "", - f"| Metric | Count |", - f"|--------|-------|", - f"| Total undeclared files | {total_undeclared} |", - f"| Already in repo | {total_in_repo} |", - f"| Missing from repo | {total_missing} |", - f"| Emulators with gaps | {len(emulator_gaps)} |", - "", - ]) + lines.extend( + [ + "## Summary", + "", + "| Metric | Count |", + "|--------|-------|", + f"| Total undeclared files | {total_undeclared} |", + f"| Already in repo | {total_in_repo} |", + f"| Missing from repo | {total_missing} |", + f"| Emulators with gaps | {len(emulator_gaps)} |", + "", + ] + ) # Per-emulator breakdown - lines.extend([ - "## Per Emulator", - "", - "| Emulator | Undeclared | In Repo | Missing |", - "|----------|-----------|---------|---------|", - ]) + lines.extend( + [ + "## Per Emulator", + "", + "| Emulator | Undeclared | In Repo | Missing |", + "|----------|-----------|---------|---------|", + ] + ) for emu_name, display, gaps in sorted(emulator_gaps, key=lambda x: -len(x[2])): in_repo = sum(1 for g in gaps if g["in_repo"]) missing = len(gaps) - in_repo - lines.append(f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |") + lines.append( + f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |" + ) # Missing files detail (not in repo) all_missing = set() @@ -1091,18 +1184,22 @@ def generate_gap_analysis( for g in gaps: if not g["in_repo"] and g["name"] not in all_missing: all_missing.add(g["name"]) - missing_details.append({ - "name": g["name"], - "emulator": display, - "required": g["required"], - "source_ref": g["source_ref"], - }) + missing_details.append( + { + "name": g["name"], + "emulator": display, + "required": g["required"], + "source_ref": g["source_ref"], + } + ) # Build reverse map: emulator -> platforms that use it (via cores: field) from common import resolve_platform_cores + emu_to_platforms: dict[str, set[str]] = {} - unique_profiles = {k: v for k, v in profiles.items() - if v.get("type") not in ("alias", "test")} + unique_profiles = { + k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") + } for pname in coverages: config = coverages[pname]["config"] matched = resolve_platform_cores(config, unique_profiles) @@ -1113,39 +1210,53 @@ def generate_gap_analysis( req_missing = [m for m in missing_details if m["required"]] opt_missing = [m for m in missing_details if not m["required"]] - lines.extend([ - "", - f"## Missing Files ({len(missing_details)} unique, {len(req_missing)} required)", - "", - "Files loaded by emulators but not available in the repository.", - "Adding these files would improve pack completeness.", - "", - ]) + lines.extend( + [ + "", + f"## Missing Files ({len(missing_details)} unique, {len(req_missing)} required)", + "", + "Files loaded by emulators but not available in the repository.", + "Adding these files would improve pack completeness.", + "", + ] + ) if req_missing: - lines.extend([ - "### Required (highest priority)", - "", - "These files are needed for the emulator to function.", - "", - "| File | Emulator | Affects platforms | Source |", - "|------|----------|------------------|--------|", - ]) + lines.extend( + [ + "### Required (highest priority)", + "", + "These files are needed for the emulator to function.", + "", + "| File | Emulator | Affects platforms | Source |", + "|------|----------|------------------|--------|", + ] + ) for m in sorted(req_missing, key=lambda x: x["name"]): - emu_key = next((k for k, v in profiles.items() - if v.get("emulator") == m["emulator"]), "") + emu_key = next( + ( + k + for k, v in profiles.items() + if v.get("emulator") == m["emulator"] + ), + "", + ) plats = sorted(emu_to_platforms.get(emu_key, set())) plat_str = ", ".join(plats) if plats else "-" - lines.append(f"| `{m['name']}` | {m['emulator']} | {plat_str} | {m['source_ref']} |") + lines.append( + f"| `{m['name']}` | {m['emulator']} | {plat_str} | {m['source_ref']} |" + ) lines.append("") if opt_missing: - lines.extend([ - "### Optional", - "", - "| File | Emulator | Source |", - "|------|----------|--------|", - ]) + lines.extend( + [ + "### Optional", + "", + "| File | Emulator | Source |", + "|------|----------|--------|", + ] + ) for m in sorted(opt_missing, key=lambda x: x["name"]): lines.append(f"| `{m['name']}` | {m['emulator']} | {m['source_ref']} |") lines.append("") @@ -1159,8 +1270,9 @@ def generate_cross_reference( profiles: dict, ) -> str: """Generate cross-reference: Platform -> Core -> Systems -> Upstream.""" - unique = {k: v for k, v in profiles.items() - if v.get("type") not in ("alias", "test")} + unique = { + k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") + } # Build core -> profile lookup by core name core_to_profile: dict[str, str] = {} @@ -1185,13 +1297,14 @@ def generate_cross_reference( config = cov["config"] platform_cores = config.get("cores", []) - lines.append(f"??? abstract \"[{display}](platforms/{pname}.md)\"") + lines.append(f'??? abstract "[{display}](platforms/{pname}.md)"') lines.append("") # Resolve which profiles this platform uses if platform_cores == "all_libretro": - matched = {k: v for k, v in unique.items() - if "libretro" in v.get("type", "")} + matched = { + k: v for k, v in unique.items() if "libretro" in v.get("type", "") + } elif isinstance(platform_cores, list): matched = {} for cname in platform_cores: @@ -1204,8 +1317,9 @@ def generate_cross_reference( else: # Fallback: system intersection psystems = set(config.get("systems", {}).keys()) - matched = {k: v for k, v in unique.items() - if set(v.get("systems", [])) & psystems} + matched = { + k: v for k, v in unique.items() if set(v.get("systems", [])) & psystems + } if platform_cores == "all_libretro": lines.append(f" **{len(matched)} cores** (all libretro)") @@ -1220,7 +1334,7 @@ def generate_cross_reference( p = matched[emu_name] emu_display = p.get("emulator", emu_name) cls = p.get("core_classification", "-") - emu_type = p.get("type", "") + p.get("type", "") upstream = p.get("upstream", "") source = p.get("source", "") systems = p.get("systems", []) @@ -1259,12 +1373,14 @@ def generate_cross_reference( lines.append("") # Reverse view: by upstream emulator - lines.extend([ - "## By upstream emulator", - "", - "| Upstream | Cores | Classification | Platforms |", - "|----------|-------|---------------|-----------|", - ]) + lines.extend( + [ + "## By upstream emulator", + "", + "| Upstream | Cores | Classification | Platforms |", + "|----------|-------|---------------|-----------|", + ] + ) # Group profiles by upstream by_upstream: dict[str, list[str]] = {} @@ -1359,6 +1475,7 @@ The CI automatically: # index, architecture, tools, profiling are maintained as wiki/ sources # and copied verbatim by main(). Only data-model is generated dynamically. + def generate_wiki_data_model(db: dict, profiles: dict) -> str: """Generate data model documentation from actual database structure.""" files_count = len(db.get("files", {})) @@ -1377,7 +1494,7 @@ def generate_wiki_data_model(db: dict, profiles: dict) -> str: "Each entry:", "", "```json", - '{', + "{", ' "path": "bios/Nintendo/GameCube/GC/USA/IPL.bin",', ' "name": "IPL.bin",', ' "size": 2097152,', @@ -1386,13 +1503,13 @@ def generate_wiki_data_model(db: dict, profiles: dict) -> str: ' "sha256": "...",', ' "crc32": "...",', ' "adler32": "..."', - '}', + "}", "```", "", "### Indexes", "", - f"| Index | Entries | Purpose |", - f"|-------|---------|---------|", + "| Index | Entries | Purpose |", + "|-------|---------|---------|", f"| `by_md5` | {by_md5} | MD5 to SHA1 lookup (Batocera, Recalbox verification) |", f"| `by_name` | {by_name} | filename to SHA1 list (name-based resolution) |", f"| `by_crc32` | {by_crc32} | CRC32 to SHA1 lookup |", @@ -1444,6 +1561,7 @@ def generate_wiki_data_model(db: dict, profiles: dict) -> str: # Build cross-reference indexes + def _build_platform_file_index(coverages: dict) -> dict[str, set]: """Map platform_name -> set of declared file names.""" index = {} @@ -1472,6 +1590,7 @@ def _build_emulator_file_index(profiles: dict) -> dict[str, dict]: # mkdocs.yml nav generator + def generate_mkdocs_nav( coverages: dict, manufacturers: dict, @@ -1488,7 +1607,9 @@ def generate_mkdocs_nav( slug = mfr.lower().replace(" ", "-") system_nav.append({mfr: f"systems/{slug}.md"}) - unique_profiles = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")} + unique_profiles = { + k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") + } # Group emulators by classification for nav by_class: dict[str, list[tuple[str, str]]] = {} @@ -1512,9 +1633,17 @@ def generate_mkdocs_nav( } emu_nav: list = [{"Overview": "emulators/index.md"}] - for cls in ["official_port", "community_fork", "pure_libretro", - "game_engine", "enhanced_fork", "frozen_snapshot", - "embedded_hle", "launcher", "other"]: + for cls in [ + "official_port", + "community_fork", + "pure_libretro", + "game_engine", + "enhanced_fork", + "frozen_snapshot", + "embedded_hle", + "launcher", + "other", + ]: entries = by_class.get(cls, []) if not entries: continue @@ -1553,8 +1682,11 @@ def generate_mkdocs_nav( # Main + def main(): - parser = argparse.ArgumentParser(description="Generate MkDocs site from project data") + parser = argparse.ArgumentParser( + description="Generate MkDocs site from project data" + ) parser.add_argument("--db", default="database.json") parser.add_argument("--platforms-dir", default="platforms") parser.add_argument("--emulators-dir", default="emulators") @@ -1580,10 +1712,13 @@ def main(): with open(registry_path) as f: registry = (yaml.safe_load(f) or {}).get("platforms", {}) - platform_names = list_registered_platforms(args.platforms_dir, include_archived=True) + platform_names = list_registered_platforms( + args.platforms_dir, include_archived=True + ) from common import load_data_dir_registry from cross_reference import _build_supplemental_index + data_registry = load_data_dir_registry(args.platforms_dir) suppl_names = _build_supplemental_index() @@ -1591,17 +1726,22 @@ def main(): coverages = {} for name in sorted(platform_names): try: - cov = compute_coverage(name, args.platforms_dir, db, - data_registry, suppl_names) + cov = compute_coverage( + name, args.platforms_dir, db, data_registry, suppl_names + ) coverages[name] = cov - print(f" {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})") + print( + f" {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})" + ) except FileNotFoundError as e: print(f" {name}: skipped ({e})", file=sys.stderr) print("Loading emulator profiles...") profiles = load_emulator_profiles(args.emulators_dir, skip_aliases=False) unique_count = sum(1 for p in profiles.values() if p.get("type") != "alias") - print(f" {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)") + print( + f" {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)" + ) # Build cross-reference indexes platform_files = _build_platform_file_index(coverages) @@ -1609,7 +1749,9 @@ def main(): # Generate home print("Generating home page...") - write_if_changed(str(docs / "index.md"), generate_home(db, coverages, profiles, registry)) + write_if_changed( + str(docs / "index.md"), generate_home(db, coverages, profiles, registry) + ) # Build system_id -> manufacturer page map (needed by all generators) print("Building system cross-reference map...") @@ -1619,14 +1761,21 @@ def main(): # Generate platform pages print("Generating platform pages...") - write_if_changed(str(docs / "platforms" / "index.md"), generate_platform_index(coverages)) + write_if_changed( + str(docs / "platforms" / "index.md"), generate_platform_index(coverages) + ) for name, cov in coverages.items(): - write_if_changed(str(docs / "platforms" / f"{name}.md"), generate_platform_page(name, cov, registry, emulator_files)) + write_if_changed( + str(docs / "platforms" / f"{name}.md"), + generate_platform_page(name, cov, registry, emulator_files), + ) # Generate system pages print("Generating system pages...") - write_if_changed(str(docs / "systems" / "index.md"), generate_systems_index(manufacturers)) + write_if_changed( + str(docs / "systems" / "index.md"), generate_systems_index(manufacturers) + ) for mfr, consoles in manufacturers.items(): slug = mfr.lower().replace(" ", "-") page = generate_system_page(mfr, consoles, platform_files, emulator_files) @@ -1634,20 +1783,24 @@ def main(): # Generate emulator pages print("Generating emulator pages...") - write_if_changed(str(docs / "emulators" / "index.md"), generate_emulators_index(profiles)) + write_if_changed( + str(docs / "emulators" / "index.md"), generate_emulators_index(profiles) + ) for name, profile in profiles.items(): page = generate_emulator_page(name, profile, db, platform_files) write_if_changed(str(docs / "emulators" / f"{name}.md"), page) # Generate cross-reference page print("Generating cross-reference page...") - write_if_changed(str(docs / "cross-reference.md"), - generate_cross_reference(coverages, profiles)) + write_if_changed( + str(docs / "cross-reference.md"), generate_cross_reference(coverages, profiles) + ) # Generate gap analysis page print("Generating gap analysis page...") - write_if_changed(str(docs / "gaps.md"), - generate_gap_analysis(profiles, coverages, db)) + write_if_changed( + str(docs / "gaps.md"), generate_gap_analysis(profiles, coverages, db) + ) # Wiki pages: copy manually maintained sources + generate dynamic ones print("Generating wiki pages...") @@ -1658,7 +1811,9 @@ def main(): for src_file in wiki_src.glob("*.md"): shutil.copy2(src_file, wiki_dest / src_file.name) # data-model.md is generated (contains live DB stats) - write_if_changed(str(wiki_dest / "data-model.md"), generate_wiki_data_model(db, profiles)) + write_if_changed( + str(wiki_dest / "data-model.md"), generate_wiki_data_model(db, profiles) + ) # Generate contributing print("Generating contributing page...") @@ -1667,7 +1822,9 @@ def main(): # Update mkdocs.yml nav section only (avoid yaml.dump round-trip mangling quotes) print("Updating mkdocs.yml nav...") nav = generate_mkdocs_nav(coverages, manufacturers, profiles) - nav_yaml = yaml.dump({"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True) + nav_yaml = yaml.dump( + {"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True + ) # Rewrite mkdocs.yml entirely (static config + generated nav) mkdocs_static = """\ @@ -1724,10 +1881,13 @@ plugins: total_pages = ( 1 # home - + 1 + len(coverages) # platform index + detail - + 1 + len(manufacturers) # system index + detail + + 1 + + len(coverages) # platform index + detail + + 1 + + len(manufacturers) # system index + detail + 1 # cross-reference - + 1 + len(profiles) # emulator index + detail + + 1 + + len(profiles) # emulator index + detail + 1 # gap analysis + 14 # wiki pages (copied from wiki/ + generated data-model) + 1 # contributing diff --git a/scripts/generate_truth.py b/scripts/generate_truth.py index 3d788b06..9bbc84b1 100644 --- a/scripts/generate_truth.py +++ b/scripts/generate_truth.py @@ -39,20 +39,28 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: group.add_argument("--all", action="store_true", help="all registered platforms") group.add_argument("--platform", help="single platform name") parser.add_argument( - "--output-dir", default=DEFAULT_OUTPUT_DIR, help="output directory", + "--output-dir", + default=DEFAULT_OUTPUT_DIR, + help="output directory", ) parser.add_argument( - "--target", "-t", default=None, help="hardware target filter", + "--target", + "-t", + default=None, + help="hardware target filter", ) parser.add_argument( - "--include-archived", action="store_true", + "--include-archived", + action="store_true", help="include archived platforms with --all", ) parser.add_argument( - "--platforms-dir", default=DEFAULT_PLATFORMS_DIR, + "--platforms-dir", + default=DEFAULT_PLATFORMS_DIR, ) parser.add_argument( - "--emulators-dir", default=DEFAULT_EMULATORS_DIR, + "--emulators-dir", + default=DEFAULT_EMULATORS_DIR, ) parser.add_argument("--db", default=DEFAULT_DB_FILE, help="database.json path") return parser.parse_args(argv) @@ -77,7 +85,8 @@ def main(argv: list[str] | None = None) -> None: # Determine platforms if args.all: platforms = list_registered_platforms( - args.platforms_dir, include_archived=args.include_archived, + args.platforms_dir, + include_archived=args.include_archived, ) else: platforms = [args.platform] @@ -90,7 +99,9 @@ def main(argv: list[str] | None = None) -> None: if args.target: try: target_cores = load_target_config( - name, args.target, args.platforms_dir, + name, + args.target, + args.platforms_dir, ) except FileNotFoundError: print(f" {name}: no target config, skipped") @@ -105,15 +116,22 @@ def main(argv: list[str] | None = None) -> None: registry_entry = registry.get(name, {}) result = generate_platform_truth( - name, config, registry_entry, profiles, - db=db, target_cores=target_cores, + name, + config, + registry_entry, + profiles, + db=db, + target_cores=target_cores, ) out_path = os.path.join(args.output_dir, f"{name}.yml") with open(out_path, "w") as f: yaml.dump( - result, f, - default_flow_style=False, sort_keys=False, allow_unicode=True, + result, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, ) n_systems = len(result.get("systems", {})) diff --git a/scripts/migrate.py b/scripts/migrate.py index 3c815635..38d84d9c 100644 --- a/scripts/migrate.py +++ b/scripts/migrate.py @@ -78,11 +78,9 @@ BIOS_FILE_MAP = { "sanyotry.bin": ("3DO Company", "3DO"), "3do_arcade_saot.bin": ("3DO Company", "3DO"), "3dobios.zip": ("3DO Company", "3DO"), - "cpc464.rom": ("Amstrad", "CPC"), "cpc664.rom": ("Amstrad", "CPC"), "cpc6128.rom": ("Amstrad", "CPC"), - "neogeo.zip": ("SNK", "Neo Geo"), "pgm.zip": ("Arcade", "Arcade"), "skns.zip": ("Arcade", "Arcade"), @@ -94,7 +92,6 @@ BIOS_FILE_MAP = { "nmk004.zip": ("Arcade", "Arcade"), "ym2608.zip": ("Arcade", "Arcade"), "qsound.zip": ("Arcade", "Arcade"), - "ATARIBAS.ROM": ("Atari", "400-800"), "ATARIOSA.ROM": ("Atari", "400-800"), "ATARIOSB.ROM": ("Atari", "400-800"), @@ -106,10 +103,8 @@ BIOS_FILE_MAP = { "7800 BIOS (E).rom": ("Atari", "7800"), "lynxboot.img": ("Atari", "Lynx"), "tos.img": ("Atari", "ST"), - "colecovision.rom": ("Coleco", "ColecoVision"), "coleco.rom": ("Coleco", "ColecoVision"), - "kick33180.A500": ("Commodore", "Amiga"), "kick34005.A500": ("Commodore", "Amiga"), "kick34005.CDTV": ("Commodore", "Amiga"), @@ -122,33 +117,26 @@ BIOS_FILE_MAP = { "kick40063.A600": ("Commodore", "Amiga"), "kick40068.A1200": ("Commodore", "Amiga"), "kick40068.A4000": ("Commodore", "Amiga"), - "sl31253.bin": ("Fairchild", "Channel F"), "sl31254.bin": ("Fairchild", "Channel F"), "sl90025.bin": ("Fairchild", "Channel F"), - "prboom.wad": ("Id Software", "Doom"), "ecwolf.pk3": ("Id Software", "Wolfenstein 3D"), - "MacII.ROM": ("Apple", "Macintosh II"), "MacIIx.ROM": ("Apple", "Macintosh II"), "vMac.ROM": ("Apple", "Macintosh II"), - "o2rom.bin": ("Magnavox", "Odyssey2"), "g7400.bin": ("Philips", "Videopac+"), "jopac.bin": ("Philips", "Videopac+"), - "exec.bin": ("Mattel", "Intellivision"), "grom.bin": ("Mattel", "Intellivision"), "ECS.bin": ("Mattel", "Intellivision"), "IVOICE.BIN": ("Mattel", "Intellivision"), - "MSX.ROM": ("Microsoft", "MSX"), "MSX2.ROM": ("Microsoft", "MSX"), "MSX2EXT.ROM": ("Microsoft", "MSX"), "MSX2P.ROM": ("Microsoft", "MSX"), "MSX2PEXT.ROM": ("Microsoft", "MSX"), - "syscard1.pce": ("NEC", "PC Engine"), "syscard2.pce": ("NEC", "PC Engine"), "syscard2u.pce": ("NEC", "PC Engine"), @@ -156,7 +144,6 @@ BIOS_FILE_MAP = { "syscard3u.pce": ("NEC", "PC Engine"), "gexpress.pce": ("NEC", "PC Engine"), "pcfx.rom": ("NEC", "PC-FX"), - "disksys.rom": ("Nintendo", "Famicom Disk System"), "gba_bios.bin": ("Nintendo", "Game Boy Advance"), "gb_bios.bin": ("Nintendo", "Game Boy"), @@ -179,7 +166,6 @@ BIOS_FILE_MAP = { "dsifirmware.bin": ("Nintendo", "Nintendo DS"), "bios.min": ("Nintendo", "Pokemon Mini"), "64DD_IPL.bin": ("Nintendo", "Nintendo 64DD"), - "dc_boot.bin": ("Sega", "Dreamcast"), "dc_flash.bin": ("Sega", "Dreamcast"), "bios.gg": ("Sega", "Game Gear"), @@ -196,7 +182,6 @@ BIOS_FILE_MAP = { "saturn_bios.bin": ("Sega", "Saturn"), "sega_101.bin": ("Sega", "Saturn"), "stvbios.zip": ("Sega", "Saturn"), - "scph1001.bin": ("Sony", "PlayStation"), "SCPH1001.BIN": ("Sony", "PlayStation"), "scph5500.bin": ("Sony", "PlayStation"), @@ -207,7 +192,6 @@ BIOS_FILE_MAP = { "ps1_rom.bin": ("Sony", "PlayStation"), "psxonpsp660.bin": ("Sony", "PlayStation"), "PSXONPSP660.BIN": ("Sony", "PlayStation Portable"), - "scummvm.zip": ("ScummVM", "ScummVM"), "MT32_CONTROL.ROM": ("ScummVM", "ScummVM"), "MT32_PCM.ROM": ("ScummVM", "ScummVM"), @@ -254,8 +238,11 @@ SKIP_LARGE_ROM_DIRS = {"roms/"} BRANCHES = ["RetroArch", "RetroPie", "Recalbox", "batocera", "Other"] SKIP_FILES = { - "README.md", ".gitignore", "desktop.ini", - "telemetry_id", "citra_log.txt", + "README.md", + ".gitignore", + "desktop.ini", + "telemetry_id", + "citra_log.txt", } SKIP_EXTENSIONS = {".txt", ".log", ".pem", ".nvm", ".ctg", ".exe", ".bat", ".sh"} @@ -279,17 +266,33 @@ def classify_file(filepath: str) -> tuple: return None clean = filepath - for prefix in ("bios/", "BIOS/", "roms/fba/", "roms/fbneo/", "roms/mame/", - "roms/mame-libretro/", "roms/neogeo/", "roms/naomi/", - "roms/atomiswave/", "roms/macintosh/"): + for prefix in ( + "bios/", + "BIOS/", + "roms/fba/", + "roms/fbneo/", + "roms/mame/", + "roms/mame-libretro/", + "roms/neogeo/", + "roms/naomi/", + "roms/atomiswave/", + "roms/macintosh/", + ): if clean.startswith(prefix): - clean = clean[len(prefix):] + clean = clean[len(prefix) :] break if filepath.startswith("roms/") and not any( - filepath.startswith(p) for p in ( - "roms/fba/", "roms/fbneo/", "roms/mame/", "roms/mame-libretro/", - "roms/neogeo/", "roms/naomi/", "roms/atomiswave/", "roms/macintosh/" + filepath.startswith(p) + for p in ( + "roms/fba/", + "roms/fbneo/", + "roms/mame/", + "roms/mame-libretro/", + "roms/neogeo/", + "roms/naomi/", + "roms/atomiswave/", + "roms/macintosh/", ) ): return None @@ -341,12 +344,12 @@ def get_subpath(filepath: str, manufacturer: str, console: str) -> str: clean = filepath for prefix in ("bios/", "BIOS/"): if clean.startswith(prefix): - clean = clean[len(prefix):] + clean = clean[len(prefix) :] break for prefix in PATH_PREFIX_MAP: if clean.startswith(prefix): - remaining = clean[len(prefix):] + remaining = clean[len(prefix) :] if "/" in remaining: return remaining return remaining @@ -363,16 +366,14 @@ def extract_from_branches(target: Path, dry_run: bool, existing_hashes: set) -> try: subprocess.run( - ["git", "rev-parse", "--verify", ref], - capture_output=True, check=True + ["git", "rev-parse", "--verify", ref], capture_output=True, check=True ) except subprocess.CalledProcessError: print(f" Branch {branch} not found, skipping") continue result = subprocess.run( - ["git", "ls-tree", "-r", "--name-only", ref], - capture_output=True, text=True + ["git", "ls-tree", "-r", "--name-only", ref], capture_output=True, text=True ) files = result.stdout.strip().split("\n") print(f"\n Branch '{branch}': {len(files)} files") @@ -391,7 +392,8 @@ def extract_from_branches(target: Path, dry_run: bool, existing_hashes: set) -> try: blob = subprocess.run( ["git", "show", f"{ref}:{filepath}"], - capture_output=True, check=True + capture_output=True, + check=True, ) content = blob.stdout except subprocess.CalledProcessError: @@ -493,14 +495,20 @@ def main(): parser = argparse.ArgumentParser( description="Migrate BIOS files to Manufacturer/Console structure" ) - parser.add_argument("--dry-run", action="store_true", - help="Show what would be done without moving files") - parser.add_argument("--source", default=".", - help="Source directory (repo root)") - parser.add_argument("--target", default="bios", - help="Target directory for organized BIOS files") - parser.add_argument("--include-branches", action="store_true", - help="Also extract BIOS files from all remote branches") + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be done without moving files", + ) + parser.add_argument("--source", default=".", help="Source directory (repo root)") + parser.add_argument( + "--target", default="bios", help="Target directory for organized BIOS files" + ) + parser.add_argument( + "--include-branches", + action="store_true", + help="Also extract BIOS files from all remote branches", + ) args = parser.parse_args() source = Path(args.source) @@ -517,7 +525,9 @@ def main(): print() print("=== Phase 1: Local files (libretro branch) ===") - moved, skipped, errors, existing_hashes = migrate_local(source, target, args.dry_run) + moved, skipped, errors, existing_hashes = migrate_local( + source, target, args.dry_run + ) action = "Would copy" if args.dry_run else "Copied" print(f"\n{action} {moved} files, skipped {skipped}") @@ -529,8 +539,15 @@ def main(): if source.is_dir(): known = set(SYSTEM_MAP.keys()) | { - "bios", "scripts", "platforms", "schemas", ".github", ".cache", - ".git", "README.md", ".gitignore", + "bios", + "scripts", + "platforms", + "schemas", + ".github", + ".cache", + ".git", + "README.md", + ".gitignore", } for d in sorted(source.iterdir()): if d.name not in known and not d.name.startswith("."): diff --git a/scripts/pipeline.py b/scripts/pipeline.py index c05c840d..814bc25d 100644 --- a/scripts/pipeline.py +++ b/scripts/pipeline.py @@ -19,10 +19,10 @@ Usage: python scripts/pipeline.py --skip-docs # skip steps 8-9 python scripts/pipeline.py --offline # skip step 2 """ + from __future__ import annotations import argparse -import json import subprocess import sys import time @@ -54,6 +54,7 @@ def parse_verify_counts(output: str) -> dict[str, tuple[int, int]]: Returns {group_label: (ok, total)}. """ import re + counts = {} for line in output.splitlines(): m = re.match(r"^(.+?):\s+(\d+)/(\d+)\s+(OK|present)", line) @@ -71,6 +72,7 @@ def parse_pack_counts(output: str) -> dict[str, tuple[int, int]]: Returns {pack_label: (ok, total)}. """ import re + counts = {} current_label = "" for line in output.splitlines(): @@ -84,7 +86,7 @@ def parse_pack_counts(output: str) -> dict[str, tuple[int, int]]: base_m = re.search(r"\((\d+) baseline", line) ok_m = re.search(r"(\d+)/(\d+) files OK", line) if base_m and ok_m: - baseline = int(base_m.group(1)) + int(base_m.group(1)) ok, total = int(ok_m.group(1)), int(ok_m.group(2)) counts[current_label] = (ok, total) elif ok_m: @@ -118,12 +120,18 @@ def check_consistency(verify_output: str, pack_output: str) -> bool: print(f" {v_label}: MISMATCH total verify {v_total} != pack {p_total}") all_ok = False elif p_ok < v_ok: - print(f" {v_label}: MISMATCH pack {p_ok} OK < verify {v_ok} OK (/{v_total})") + print( + f" {v_label}: MISMATCH pack {p_ok} OK < verify {v_ok} OK (/{v_total})" + ) all_ok = False elif p_ok == v_ok: - print(f" {v_label}: verify {v_ok}/{v_total} == pack {p_ok}/{p_total} OK") + print( + f" {v_label}: verify {v_ok}/{v_total} == pack {p_ok}/{p_total} OK" + ) else: - print(f" {v_label}: verify {v_ok}/{v_total}, pack {p_ok}/{p_total} OK (pack resolves more)") + print( + f" {v_label}: verify {v_ok}/{v_total}, pack {p_ok}/{p_total} OK (pack resolves more)" + ) else: print(f" {v_label}: {v_ok}/{v_total} (no separate pack)") @@ -134,26 +142,45 @@ def check_consistency(verify_output: str, pack_output: str) -> bool: def main(): parser = argparse.ArgumentParser(description="Run the full retrobios pipeline") - parser.add_argument("--include-archived", action="store_true", - help="Include archived platforms") - parser.add_argument("--skip-packs", action="store_true", - help="Only regenerate DB and verify, skip pack generation") - parser.add_argument("--skip-docs", action="store_true", - help="Skip README and site generation") - parser.add_argument("--offline", action="store_true", - help="Skip data directory refresh") - parser.add_argument("--output-dir", default="dist", - help="Pack output directory (default: dist/)") + parser.add_argument( + "--include-archived", action="store_true", help="Include archived platforms" + ) + parser.add_argument( + "--skip-packs", + action="store_true", + help="Only regenerate DB and verify, skip pack generation", + ) + parser.add_argument( + "--skip-docs", action="store_true", help="Skip README and site generation" + ) + parser.add_argument( + "--offline", action="store_true", help="Skip data directory refresh" + ) + parser.add_argument( + "--output-dir", default="dist", help="Pack output directory (default: dist/)" + ) # --include-extras is now a no-op: core requirements are always included - parser.add_argument("--include-extras", action="store_true", - help="(no-op) Core requirements are always included") + parser.add_argument( + "--include-extras", + action="store_true", + help="(no-op) Core requirements are always included", + ) parser.add_argument("--target", "-t", help="Hardware target (e.g., switch, rpi4)") - parser.add_argument("--check-buildbot", action="store_true", - help="Check buildbot system directory for changes") - parser.add_argument("--with-truth", action="store_true", - help="Generate truth YAMLs and diff against scraped") - parser.add_argument("--with-export", action="store_true", - help="Export native formats (implies --with-truth)") + parser.add_argument( + "--check-buildbot", + action="store_true", + help="Check buildbot system directory for changes", + ) + parser.add_argument( + "--with-truth", + action="store_true", + help="Generate truth YAMLs and diff against scraped", + ) + parser.add_argument( + "--with-export", + action="store_true", + help="Export native formats (implies --with-truth)", + ) args = parser.parse_args() results = {} @@ -162,8 +189,15 @@ def main(): # Step 1: Generate database ok, out = run( - [sys.executable, "scripts/generate_db.py", "--force", - "--bios-dir", "bios", "--output", "database.json"], + [ + sys.executable, + "scripts/generate_db.py", + "--force", + "--bios-dir", + "bios", + "--output", + "database.json", + ], "1/8 generate database", ) results["generate_db"] = ok @@ -216,8 +250,13 @@ def main(): # Step 2c: Generate truth YAMLs if args.with_truth or args.with_export: - truth_cmd = [sys.executable, "scripts/generate_truth.py", "--all", - "--output-dir", str(Path(args.output_dir) / "truth")] + truth_cmd = [ + sys.executable, + "scripts/generate_truth.py", + "--all", + "--output-dir", + str(Path(args.output_dir) / "truth"), + ] if args.include_archived: truth_cmd.append("--include-archived") if args.target: @@ -242,9 +281,15 @@ def main(): # Step 2e: Export native formats if args.with_export: - export_cmd = [sys.executable, "scripts/export_native.py", "--all", - "--output-dir", str(Path(args.output_dir) / "upstream"), - "--truth-dir", str(Path(args.output_dir) / "truth")] + export_cmd = [ + sys.executable, + "scripts/export_native.py", + "--all", + "--output-dir", + str(Path(args.output_dir) / "upstream"), + "--truth-dir", + str(Path(args.output_dir) / "truth"), + ] if args.include_archived: export_cmd.append("--include-archived") ok, _ = run(export_cmd, "2e export native") @@ -267,8 +312,11 @@ def main(): pack_output = "" if not args.skip_packs: pack_cmd = [ - sys.executable, "scripts/generate_pack.py", "--all", - "--output-dir", args.output_dir, + sys.executable, + "scripts/generate_pack.py", + "--all", + "--output-dir", + args.output_dir, ] if args.include_archived: pack_cmd.append("--include-archived") @@ -288,8 +336,12 @@ def main(): # Step 4b: Generate install manifests if not args.skip_packs: manifest_cmd = [ - sys.executable, "scripts/generate_pack.py", "--all", - "--manifest", "--output-dir", "install", + sys.executable, + "scripts/generate_pack.py", + "--all", + "--manifest", + "--output-dir", + "install", ] if args.include_archived: manifest_cmd.append("--include-archived") @@ -307,8 +359,11 @@ def main(): # Step 4c: Generate target manifests if not args.skip_packs: target_cmd = [ - sys.executable, "scripts/generate_pack.py", - "--manifest-targets", "--output-dir", "install/targets", + sys.executable, + "scripts/generate_pack.py", + "--manifest-targets", + "--output-dir", + "install/targets", ] ok, _ = run(target_cmd, "4c/8 generate target manifests") results["generate_target_manifests"] = ok @@ -329,8 +384,12 @@ def main(): # Step 6: Pack integrity (extract + hash verification) if not args.skip_packs: integrity_cmd = [ - sys.executable, "scripts/generate_pack.py", "--all", - "--verify-packs", "--output-dir", args.output_dir, + sys.executable, + "scripts/generate_pack.py", + "--all", + "--verify-packs", + "--output-dir", + args.output_dir, ] if args.include_archived: integrity_cmd.append("--include-archived") @@ -344,8 +403,14 @@ def main(): # Step 7: Generate README if not args.skip_docs: ok, _ = run( - [sys.executable, "scripts/generate_readme.py", - "--db", "database.json", "--platforms-dir", "platforms"], + [ + sys.executable, + "scripts/generate_readme.py", + "--db", + "database.json", + "--platforms-dir", + "platforms", + ], "7/8 generate readme", ) results["generate_readme"] = ok diff --git a/scripts/refresh_data_dirs.py b/scripts/refresh_data_dirs.py index 5893a864..d6ca7095 100644 --- a/scripts/refresh_data_dirs.py +++ b/scripts/refresh_data_dirs.py @@ -57,7 +57,9 @@ def _load_versions(versions_path: str = VERSIONS_FILE) -> dict[str, dict]: return json.load(f) -def _save_versions(versions: dict[str, dict], versions_path: str = VERSIONS_FILE) -> None: +def _save_versions( + versions: dict[str, dict], versions_path: str = VERSIONS_FILE +) -> None: path = Path(versions_path) path.parent.mkdir(parents=True, exist_ok=True) with open(path, "w") as f: @@ -66,10 +68,13 @@ def _save_versions(versions: dict[str, dict], versions_path: str = VERSIONS_FILE def _api_request(url: str) -> dict: - req = urllib.request.Request(url, headers={ - "User-Agent": USER_AGENT, - "Accept": "application/json", - }) + req = urllib.request.Request( + url, + headers={ + "User-Agent": USER_AGENT, + "Accept": "application/json", + }, + ) token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") if token and "github" in url: req.add_header("Authorization", f"token {token}") @@ -111,7 +116,9 @@ def get_remote_sha(source_url: str, version: str) -> str | None: data = _api_request(url) return data["commit"]["id"] except (urllib.error.URLError, KeyError, OSError) as exc: - log.warning("failed to fetch remote SHA for %s/%s@%s: %s", owner, repo, version, exc) + log.warning( + "failed to fetch remote SHA for %s/%s@%s: %s", owner, repo, version, exc + ) return None @@ -167,7 +174,7 @@ def _download_and_extract( if not member.name.startswith(prefix) and member.name != source_path: continue - rel = member.name[len(prefix):] + rel = member.name[len(prefix) :] if not rel: continue @@ -285,8 +292,9 @@ def _download_and_extract_zip( def _get_remote_etag(source_url: str) -> str | None: """HEAD request to get ETag or Last-Modified for freshness check.""" try: - req = urllib.request.Request(source_url, method="HEAD", - headers={"User-Agent": USER_AGENT}) + req = urllib.request.Request( + source_url, method="HEAD", headers={"User-Agent": USER_AGENT} + ) with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT) as resp: return resp.headers.get("ETag") or resp.headers.get("Last-Modified") or "" except (urllib.error.URLError, OSError): @@ -333,17 +341,31 @@ def refresh_entry( return False if dry_run: - log.info("[%s] would refresh (type: %s, cached: %s)", key, source_type, cached_tag or "none") + log.info( + "[%s] would refresh (type: %s, cached: %s)", + key, + source_type, + cached_tag or "none", + ) return True try: if source_type == "zip": strip = entry.get("strip_components", 0) - file_count = _download_and_extract_zip(source_url, local_cache, exclude, strip) + file_count = _download_and_extract_zip( + source_url, local_cache, exclude, strip + ) else: source_path = entry["source_path"].format(version=version) - file_count = _download_and_extract(source_url, source_path, local_cache, exclude) - except (urllib.error.URLError, OSError, tarfile.TarError, zipfile.BadZipFile) as exc: + file_count = _download_and_extract( + source_url, source_path, local_cache, exclude + ) + except ( + urllib.error.URLError, + OSError, + tarfile.TarError, + zipfile.BadZipFile, + ) as exc: log.warning("[%s] download failed: %s", key, exc) return False @@ -380,18 +402,30 @@ def refresh_all( if platform and allowed and platform not in allowed: continue results[key] = refresh_entry( - key, entry, force=force, dry_run=dry_run, versions_path=versions_path, + key, + entry, + force=force, + dry_run=dry_run, + versions_path=versions_path, ) return results def main() -> None: - parser = argparse.ArgumentParser(description="Refresh cached data directories from upstream") + parser = argparse.ArgumentParser( + description="Refresh cached data directories from upstream" + ) parser.add_argument("--key", help="Refresh only this entry") - parser.add_argument("--force", action="store_true", help="Re-download even if up to date") - parser.add_argument("--dry-run", action="store_true", help="Preview without downloading") + parser.add_argument( + "--force", action="store_true", help="Re-download even if up to date" + ) + parser.add_argument( + "--dry-run", action="store_true", help="Preview without downloading" + ) parser.add_argument("--platform", help="Only refresh entries for this platform") - parser.add_argument("--registry", default=DEFAULT_REGISTRY, help="Path to _data_dirs.yml") + parser.add_argument( + "--registry", default=DEFAULT_REGISTRY, help="Path to _data_dirs.yml" + ) args = parser.parse_args() logging.basicConfig( @@ -405,9 +439,13 @@ def main() -> None: if args.key not in registry: log.error("unknown key: %s (available: %s)", args.key, ", ".join(registry)) raise SystemExit(1) - refresh_entry(args.key, registry[args.key], force=args.force, dry_run=args.dry_run) + refresh_entry( + args.key, registry[args.key], force=args.force, dry_run=args.dry_run + ) else: - refresh_all(registry, force=args.force, dry_run=args.dry_run, platform=args.platform) + refresh_all( + registry, force=args.force, dry_run=args.dry_run, platform=args.platform + ) if __name__ == "__main__": diff --git a/scripts/scraper/_hash_merge.py b/scripts/scraper/_hash_merge.py index c1faade3..b47ff5e0 100644 --- a/scripts/scraper/_hash_merge.py +++ b/scripts/scraper/_hash_merge.py @@ -34,40 +34,40 @@ def merge_mame_profile( profile = _load_yaml(profile_path) hashes = _load_json(hashes_path) - profile['core_version'] = hashes.get('version', profile.get('core_version')) + profile["core_version"] = hashes.get("version", profile.get("core_version")) - files = profile.get('files', []) - bios_zip, non_bios = _split_files(files, lambda f: f.get('category') == 'bios_zip') + files = profile.get("files", []) + bios_zip, non_bios = _split_files(files, lambda f: f.get("category") == "bios_zip") existing_by_name: dict[str, dict] = {} for entry in bios_zip: - key = _zip_name_to_set(entry['name']) + key = _zip_name_to_set(entry["name"]) existing_by_name[key] = entry updated_bios: list[dict] = [] matched_names: set[str] = set() - for set_name, set_data in hashes.get('bios_sets', {}).items(): - contents = _build_contents(set_data.get('roms', [])) + for set_name, set_data in hashes.get("bios_sets", {}).items(): + contents = _build_contents(set_data.get("roms", [])) source_ref = _build_source_ref(set_data) if set_name in existing_by_name: # Update existing entry: preserve manual fields, update contents entry = existing_by_name[set_name].copy() - entry['contents'] = contents + entry["contents"] = contents if source_ref: - entry['source_ref'] = source_ref + entry["source_ref"] = source_ref updated_bios.append(entry) matched_names.add(set_name) elif add_new: # New BIOS set — only added to the main profile entry = { - 'name': f'{set_name}.zip', - 'required': True, - 'category': 'bios_zip', - 'system': None, - 'source_ref': source_ref, - 'contents': contents, + "name": f"{set_name}.zip", + "required": True, + "category": "bios_zip", + "system": None, + "source_ref": source_ref, + "contents": contents, } updated_bios.append(entry) @@ -77,7 +77,7 @@ def merge_mame_profile( if set_name not in matched_names: updated_bios.append(entry) - profile['files'] = non_bios + updated_bios + profile["files"] = non_bios + updated_bios if write: _backup_and_write(profile_path, profile) @@ -102,49 +102,49 @@ def merge_fbneo_profile( profile = _load_yaml(profile_path) hashes = _load_json(hashes_path) - profile['core_version'] = hashes.get('version', profile.get('core_version')) + profile["core_version"] = hashes.get("version", profile.get("core_version")) - files = profile.get('files', []) - archive_files, non_archive = _split_files(files, lambda f: 'archive' in f) + files = profile.get("files", []) + archive_files, non_archive = _split_files(files, lambda f: "archive" in f) existing_by_key: dict[tuple[str, str], dict] = {} for entry in archive_files: - key = (entry['archive'], entry['name']) + key = (entry["archive"], entry["name"]) existing_by_key[key] = entry merged: list[dict] = [] matched_keys: set[tuple[str, str]] = set() - for set_name, set_data in hashes.get('bios_sets', {}).items(): - archive_name = f'{set_name}.zip' + for set_name, set_data in hashes.get("bios_sets", {}).items(): + archive_name = f"{set_name}.zip" source_ref = _build_source_ref(set_data) - for rom in set_data.get('roms', []): - rom_name = rom['name'] + for rom in set_data.get("roms", []): + rom_name = rom["name"] key = (archive_name, rom_name) if key in existing_by_key: entry = existing_by_key[key].copy() - entry['size'] = rom['size'] - entry['crc32'] = rom['crc32'] - if rom.get('sha1'): - entry['sha1'] = rom['sha1'] + entry["size"] = rom["size"] + entry["crc32"] = rom["crc32"] + if rom.get("sha1"): + entry["sha1"] = rom["sha1"] if source_ref: - entry['source_ref'] = source_ref + entry["source_ref"] = source_ref merged.append(entry) matched_keys.add(key) elif add_new: entry = { - 'name': rom_name, - 'archive': archive_name, - 'required': True, - 'size': rom['size'], - 'crc32': rom['crc32'], + "name": rom_name, + "archive": archive_name, + "required": True, + "size": rom["size"], + "crc32": rom["crc32"], } - if rom.get('sha1'): - entry['sha1'] = rom['sha1'] + if rom.get("sha1"): + entry["sha1"] = rom["sha1"] if source_ref: - entry['source_ref'] = source_ref + entry["source_ref"] = source_ref merged.append(entry) # Entries not matched stay untouched @@ -152,7 +152,7 @@ def merge_fbneo_profile( if key not in matched_keys: merged.append(entry) - profile['files'] = non_archive + merged + profile["files"] = non_archive + merged if write: _backup_and_write_fbneo(profile_path, profile, hashes) @@ -163,7 +163,7 @@ def merge_fbneo_profile( def compute_diff( profile_path: str, hashes_path: str, - mode: str = 'mame', + mode: str = "mame", ) -> dict[str, Any]: """Compute diff between profile and hashes without writing. @@ -172,7 +172,7 @@ def compute_diff( profile = _load_yaml(profile_path) hashes = _load_json(hashes_path) - if mode == 'mame': + if mode == "mame": return _diff_mame(profile, hashes) return _diff_fbneo(profile, hashes) @@ -181,26 +181,26 @@ def _diff_mame( profile: dict[str, Any], hashes: dict[str, Any], ) -> dict[str, Any]: - files = profile.get('files', []) - bios_zip, _ = _split_files(files, lambda f: f.get('category') == 'bios_zip') + files = profile.get("files", []) + bios_zip, _ = _split_files(files, lambda f: f.get("category") == "bios_zip") existing_by_name: dict[str, dict] = {} for entry in bios_zip: - existing_by_name[_zip_name_to_set(entry['name'])] = entry + existing_by_name[_zip_name_to_set(entry["name"])] = entry added: list[str] = [] updated: list[str] = [] unchanged = 0 - bios_sets = hashes.get('bios_sets', {}) + bios_sets = hashes.get("bios_sets", {}) for set_name, set_data in bios_sets.items(): if set_name not in existing_by_name: added.append(set_name) continue old_entry = existing_by_name[set_name] - new_contents = _build_contents(set_data.get('roms', [])) - old_contents = old_entry.get('contents', []) + new_contents = _build_contents(set_data.get("roms", [])) + old_contents = old_entry.get("contents", []) if _contents_differ(old_contents, new_contents): updated.append(set_name) @@ -213,11 +213,11 @@ def _diff_mame( ) return { - 'added': added, - 'updated': updated, - 'removed': [], - 'unchanged': unchanged, - 'out_of_scope': out_of_scope, + "added": added, + "updated": updated, + "removed": [], + "unchanged": unchanged, + "out_of_scope": out_of_scope, } @@ -225,24 +225,24 @@ def _diff_fbneo( profile: dict[str, Any], hashes: dict[str, Any], ) -> dict[str, Any]: - files = profile.get('files', []) - archive_files, _ = _split_files(files, lambda f: 'archive' in f) + files = profile.get("files", []) + archive_files, _ = _split_files(files, lambda f: "archive" in f) existing_by_key: dict[tuple[str, str], dict] = {} for entry in archive_files: - existing_by_key[(entry['archive'], entry['name'])] = entry + existing_by_key[(entry["archive"], entry["name"])] = entry added: list[str] = [] updated: list[str] = [] unchanged = 0 seen_keys: set[tuple[str, str]] = set() - bios_sets = hashes.get('bios_sets', {}) + bios_sets = hashes.get("bios_sets", {}) for set_name, set_data in bios_sets.items(): - archive_name = f'{set_name}.zip' - for rom in set_data.get('roms', []): - key = (archive_name, rom['name']) + archive_name = f"{set_name}.zip" + for rom in set_data.get("roms", []): + key = (archive_name, rom["name"]) seen_keys.add(key) label = f"{archive_name}:{rom['name']}" @@ -251,7 +251,9 @@ def _diff_fbneo( continue old = existing_by_key[key] - if old.get('crc32') != rom.get('crc32') or old.get('size') != rom.get('size'): + if old.get("crc32") != rom.get("crc32") or old.get("size") != rom.get( + "size" + ): updated.append(label) else: unchanged += 1 @@ -259,11 +261,11 @@ def _diff_fbneo( out_of_scope = sum(1 for k in existing_by_key if k not in seen_keys) return { - 'added': added, - 'updated': updated, - 'removed': [], - 'unchanged': unchanged, - 'out_of_scope': out_of_scope, + "added": added, + "updated": updated, + "removed": [], + "unchanged": unchanged, + "out_of_scope": out_of_scope, } @@ -271,12 +273,12 @@ def _diff_fbneo( def _load_yaml(path: str) -> dict[str, Any]: - with open(path, encoding='utf-8') as f: + with open(path, encoding="utf-8") as f: return yaml.safe_load(f) or {} def _load_json(path: str) -> dict[str, Any]: - with open(path, encoding='utf-8') as f: + with open(path, encoding="utf-8") as f: return json.load(f) @@ -295,7 +297,7 @@ def _split_files( def _zip_name_to_set(name: str) -> str: - if name.endswith('.zip'): + if name.endswith(".zip"): return name[:-4] return name @@ -304,42 +306,42 @@ def _build_contents(roms: list[dict]) -> list[dict]: contents: list[dict] = [] for rom in roms: entry: dict[str, Any] = { - 'name': rom['name'], - 'size': rom['size'], - 'crc32': rom['crc32'], + "name": rom["name"], + "size": rom["size"], + "crc32": rom["crc32"], } - if rom.get('sha1'): - entry['sha1'] = rom['sha1'] - desc = rom.get('bios_description') or rom.get('bios_label') or '' + if rom.get("sha1"): + entry["sha1"] = rom["sha1"] + desc = rom.get("bios_description") or rom.get("bios_label") or "" if desc: - entry['description'] = desc - if rom.get('bad_dump'): - entry['bad_dump'] = True + entry["description"] = desc + if rom.get("bad_dump"): + entry["bad_dump"] = True contents.append(entry) return contents def _build_source_ref(set_data: dict) -> str: - source_file = set_data.get('source_file', '') - source_line = set_data.get('source_line') + source_file = set_data.get("source_file", "") + source_line = set_data.get("source_line") if source_file and source_line is not None: - return f'{source_file}:{source_line}' + return f"{source_file}:{source_line}" return source_file def _contents_differ(old: list[dict], new: list[dict]) -> bool: if len(old) != len(new): return True - old_by_name = {c['name']: c for c in old} + old_by_name = {c["name"]: c for c in old} for entry in new: - prev = old_by_name.get(entry['name']) + prev = old_by_name.get(entry["name"]) if prev is None: return True - if prev.get('crc32') != entry.get('crc32'): + if prev.get("crc32") != entry.get("crc32"): return True - if prev.get('size') != entry.get('size'): + if prev.get("size") != entry.get("size"): return True - if prev.get('sha1') != entry.get('sha1'): + if prev.get("sha1") != entry.get("sha1"): return True return False @@ -352,15 +354,15 @@ def _backup_and_write(path: str, data: dict) -> None: (core_version, contents, source_ref), and appends new entries. """ p = Path(path) - backup = p.with_suffix('.old.yml') + backup = p.with_suffix(".old.yml") shutil.copy2(p, backup) - original = p.read_text(encoding='utf-8') - patched = _patch_core_version(original, data.get('core_version', '')) - patched = _patch_bios_entries(patched, data.get('files', [])) - patched = _append_new_entries(patched, data.get('files', []), original) + original = p.read_text(encoding="utf-8") + patched = _patch_core_version(original, data.get("core_version", "")) + patched = _patch_bios_entries(patched, data.get("files", [])) + patched = _append_new_entries(patched, data.get("files", []), original) - p.write_text(patched, encoding='utf-8') + p.write_text(patched, encoding="utf-8") def _patch_core_version(text: str, version: str) -> str: @@ -368,8 +370,9 @@ def _patch_core_version(text: str, version: str) -> str: if not version: return text import re + return re.sub( - r'^(core_version:\s*).*$', + r"^(core_version:\s*).*$", rf'\g<1>"{version}"', text, count=1, @@ -390,18 +393,18 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: # Build a lookup of what to patch patches: dict[str, dict] = {} for fe in files: - if fe.get('category') != 'bios_zip': + if fe.get("category") != "bios_zip": continue - patches[fe['name']] = fe + patches[fe["name"]] = fe if not patches: return text - lines = text.split('\n') + lines = text.split("\n") # Find all entry start positions (line indices) entry_starts: list[tuple[int, str]] = [] for i, line in enumerate(lines): - m = re.match(r'^ - name:\s*(.+?)\s*$', line) + m = re.match(r"^ - name:\s*(.+?)\s*$", line) if m: entry_starts.append((i, m.group(1).strip('"').strip("'"))) @@ -412,8 +415,8 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: continue fe = patches[entry_name] - contents = fe.get('contents', []) - source_ref = fe.get('source_ref', '') + contents = fe.get("contents", []) + source_ref = fe.get("source_ref", "") # Find the last "owned" line of this entry # Owned = indented with 4+ spaces (field lines of this entry) @@ -422,11 +425,11 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: stripped = lines[j].strip() if not stripped: break # blank line = end of entry - if stripped.startswith('#'): + if stripped.startswith("#"): break # comment = belongs to next entry - if re.match(r'^ - ', lines[j]): + if re.match(r"^ - ", lines[j]): break # next list item - if re.match(r'^ ', lines[j]) or re.match(r'^ \w', lines[j]): + if re.match(r"^ ", lines[j]) or re.match(r"^ \w", lines[j]): last_owned = j else: break @@ -435,7 +438,7 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: if source_ref: found_sr = False for j in range(start_line + 1, last_owned + 1): - if re.match(r'^ source_ref:', lines[j]): + if re.match(r"^ source_ref:", lines[j]): lines[j] = f' source_ref: "{source_ref}"' found_sr = True break @@ -447,10 +450,10 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: contents_start = None contents_end = None for j in range(start_line + 1, last_owned + 1): - if re.match(r'^ contents:', lines[j]): + if re.match(r"^ contents:", lines[j]): contents_start = j elif contents_start is not None: - if re.match(r'^ ', lines[j]): + if re.match(r"^ ", lines[j]): contents_end = j else: break @@ -458,29 +461,29 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str: contents_end = contents_start if contents_start is not None: - del lines[contents_start:contents_end + 1] - last_owned -= (contents_end - contents_start + 1) + del lines[contents_start : contents_end + 1] + last_owned -= contents_end - contents_start + 1 # Insert new contents after last owned line if contents: - new_lines = _format_contents(contents).split('\n') + new_lines = _format_contents(contents).split("\n") for k, cl in enumerate(new_lines): lines.insert(last_owned + 1 + k, cl) - return '\n'.join(lines) + return "\n".join(lines) def _append_new_entries(text: str, files: list[dict], original: str) -> str: """Append new bios_zip entries (system=None) that aren't in the original.""" # Parse original to get existing entry names (more reliable than text search) existing_data = yaml.safe_load(original) or {} - existing_names = {f['name'] for f in existing_data.get('files', [])} + existing_names = {f["name"] for f in existing_data.get("files", [])} new_entries = [] for fe in files: - if fe.get('category') != 'bios_zip' or fe.get('system') is not None: + if fe.get("category") != "bios_zip" or fe.get("system") is not None: continue - if fe['name'] in existing_names: + if fe["name"] in existing_names: continue new_entries.append(fe) @@ -489,36 +492,36 @@ def _append_new_entries(text: str, files: list[dict], original: str) -> str: lines = [] for fe in new_entries: - lines.append(f'\n - name: {fe["name"]}') - lines.append(f' required: {str(fe["required"]).lower()}') - lines.append(f' category: bios_zip') - if fe.get('source_ref'): + lines.append(f"\n - name: {fe['name']}") + lines.append(f" required: {str(fe['required']).lower()}") + lines.append(" category: bios_zip") + if fe.get("source_ref"): lines.append(f' source_ref: "{fe["source_ref"]}"') - if fe.get('contents'): - lines.append(_format_contents(fe['contents'])) + if fe.get("contents"): + lines.append(_format_contents(fe["contents"])) if lines: - text = text.rstrip('\n') + '\n' + '\n'.join(lines) + '\n' + text = text.rstrip("\n") + "\n" + "\n".join(lines) + "\n" return text def _format_contents(contents: list[dict]) -> str: """Format a contents list as YAML text.""" - lines = [' contents:'] + lines = [" contents:"] for rom in contents: - lines.append(f' - name: {rom["name"]}') - if rom.get('description'): - lines.append(f' description: {rom["description"]}') - if rom.get('size'): - lines.append(f' size: {rom["size"]}') - if rom.get('crc32'): + lines.append(f" - name: {rom['name']}") + if rom.get("description"): + lines.append(f" description: {rom['description']}") + if rom.get("size"): + lines.append(f" size: {rom['size']}") + if rom.get("crc32"): lines.append(f' crc32: "{rom["crc32"]}"') - if rom.get('sha1'): + if rom.get("sha1"): lines.append(f' sha1: "{rom["sha1"]}"') - if rom.get('bad_dump'): - lines.append(f' bad_dump: true') - return '\n'.join(lines) + if rom.get("bad_dump"): + lines.append(" bad_dump: true") + return "\n".join(lines) def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None: @@ -529,37 +532,38 @@ def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None: Existing entries are left untouched (CRC32 changes are rare). """ p = Path(path) - backup = p.with_suffix('.old.yml') + backup = p.with_suffix(".old.yml") shutil.copy2(p, backup) - original = p.read_text(encoding='utf-8') - patched = _patch_core_version(original, data.get('core_version', '')) + original = p.read_text(encoding="utf-8") + patched = _patch_core_version(original, data.get("core_version", "")) # Identify new ROM entries by comparing parsed data keys, not text search existing_data = yaml.safe_load(original) or {} existing_keys = { - (f['archive'], f['name']) - for f in existing_data.get('files', []) - if f.get('archive') + (f["archive"], f["name"]) + for f in existing_data.get("files", []) + if f.get("archive") } new_roms = [ - f for f in data.get('files', []) - if f.get('archive') and (f['archive'], f['name']) not in existing_keys + f + for f in data.get("files", []) + if f.get("archive") and (f["archive"], f["name"]) not in existing_keys ] if new_roms: lines = [] for fe in new_roms: lines.append(f' - name: "{fe["name"]}"') - lines.append(f' archive: {fe["archive"]}') - lines.append(f' required: {str(fe.get("required", True)).lower()}') - if fe.get('size'): - lines.append(f' size: {fe["size"]}') - if fe.get('crc32'): + lines.append(f" archive: {fe['archive']}") + lines.append(f" required: {str(fe.get('required', True)).lower()}") + if fe.get("size"): + lines.append(f" size: {fe['size']}") + if fe.get("crc32"): lines.append(f' crc32: "{fe["crc32"]}"') - if fe.get('source_ref'): + if fe.get("source_ref"): lines.append(f' source_ref: "{fe["source_ref"]}"') - lines.append('') - patched = patched.rstrip('\n') + '\n\n' + '\n'.join(lines) + lines.append("") + patched = patched.rstrip("\n") + "\n\n" + "\n".join(lines) - p.write_text(patched, encoding='utf-8') + p.write_text(patched, encoding="utf-8") diff --git a/scripts/scraper/base_scraper.py b/scripts/scraper/base_scraper.py index 00e70a6b..484fb3ec 100644 --- a/scripts/scraper/base_scraper.py +++ b/scripts/scraper/base_scraper.py @@ -4,8 +4,8 @@ from __future__ import annotations import json import sys -import urllib.request import urllib.error +import urllib.request from abc import ABC, abstractmethod from dataclasses import dataclass, field from pathlib import Path @@ -14,6 +14,7 @@ from pathlib import Path @dataclass class BiosRequirement: """A single BIOS file requirement from a platform source.""" + name: str system: str sha1: str | None = None @@ -29,9 +30,12 @@ class BiosRequirement: @dataclass class ChangeSet: """Differences between scraped requirements and current config.""" + added: list[BiosRequirement] = field(default_factory=list) removed: list[BiosRequirement] = field(default_factory=list) - modified: list[tuple[BiosRequirement, BiosRequirement]] = field(default_factory=list) + modified: list[tuple[BiosRequirement, BiosRequirement]] = field( + default_factory=list + ) @property def has_changes(self) -> bool: @@ -80,7 +84,9 @@ class BaseScraper(ABC): if not self.url: raise ValueError("No source URL configured") try: - req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"}) + req = urllib.request.Request( + self.url, headers={"User-Agent": "retrobios-scraper/1.0"} + ) with urllib.request.urlopen(req, timeout=30) as resp: self._raw_data = _read_limited(resp).decode("utf-8") return self._raw_data @@ -113,35 +119,49 @@ class BaseScraper(ABC): changes.added.append(req) else: existing_file = existing[key] - if req.sha1 and existing_file.get("sha1") and req.sha1 != existing_file["sha1"]: - changes.modified.append(( - BiosRequirement( - name=existing_file["name"], - system=key[0], - sha1=existing_file.get("sha1"), - md5=existing_file.get("md5"), - ), - req, - )) - elif req.md5 and existing_file.get("md5") and req.md5 != existing_file["md5"]: - changes.modified.append(( - BiosRequirement( - name=existing_file["name"], - system=key[0], - md5=existing_file.get("md5"), - ), - req, - )) + if ( + req.sha1 + and existing_file.get("sha1") + and req.sha1 != existing_file["sha1"] + ): + changes.modified.append( + ( + BiosRequirement( + name=existing_file["name"], + system=key[0], + sha1=existing_file.get("sha1"), + md5=existing_file.get("md5"), + ), + req, + ) + ) + elif ( + req.md5 + and existing_file.get("md5") + and req.md5 != existing_file["md5"] + ): + changes.modified.append( + ( + BiosRequirement( + name=existing_file["name"], + system=key[0], + md5=existing_file.get("md5"), + ), + req, + ) + ) for key in existing: if key not in scraped_map: f = existing[key] - changes.removed.append(BiosRequirement( - name=f["name"], - system=key[0], - sha1=f.get("sha1"), - md5=f.get("md5"), - )) + changes.removed.append( + BiosRequirement( + name=f["name"], + system=key[0], + sha1=f.get("sha1"), + md5=f.get("md5"), + ) + ) return changes @@ -163,10 +183,13 @@ def fetch_github_latest_version(repo: str) -> str | None: """Fetch the latest release version tag from a GitHub repo.""" url = f"https://api.github.com/repos/{repo}/releases/latest" try: - req = urllib.request.Request(url, headers={ - "User-Agent": "retrobios-scraper/1.0", - "Accept": "application/vnd.github.v3+json", - }) + req = urllib.request.Request( + url, + headers={ + "User-Agent": "retrobios-scraper/1.0", + "Accept": "application/vnd.github.v3+json", + }, + ) with urllib.request.urlopen(req, timeout=15) as resp: data = json.loads(resp.read()) return data.get("tag_name", "") @@ -174,7 +197,9 @@ def fetch_github_latest_version(repo: str) -> str | None: return None -def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirements") -> None: +def scraper_cli( + scraper_class: type, description: str = "Scrape BIOS requirements" +) -> None: """Shared CLI entry point for all scrapers. Eliminates main() boilerplate.""" import argparse @@ -203,13 +228,23 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement return if args.json: - data = [{"name": r.name, "system": r.system, "sha1": r.sha1, "md5": r.md5, - "size": r.size, "required": r.required} for r in reqs] + data = [ + { + "name": r.name, + "system": r.system, + "sha1": r.sha1, + "md5": r.md5, + "size": r.size, + "required": r.required, + } + for r in reqs + ] print(json.dumps(data, indent=2)) return if args.output: import yaml + # Use scraper's generate_platform_yaml() if available (includes # platform metadata, cores list, standalone_cores, etc.) if hasattr(scraper, "generate_platform_yaml"): @@ -224,7 +259,11 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement if req.native_id: sys_entry["native_id"] = req.native_id config["systems"][sys_id] = sys_entry - entry = {"name": req.name, "destination": req.destination or req.name, "required": req.required} + entry = { + "name": req.name, + "destination": req.destination or req.name, + "required": req.required, + } if req.sha1: entry["sha1"] = req.sha1 if req.md5: @@ -265,10 +304,13 @@ def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None: """Fetch the most recent matching tag from a GitHub repo.""" url = f"https://api.github.com/repos/{repo}/tags?per_page=50" try: - req = urllib.request.Request(url, headers={ - "User-Agent": "retrobios-scraper/1.0", - "Accept": "application/vnd.github.v3+json", - }) + req = urllib.request.Request( + url, + headers={ + "User-Agent": "retrobios-scraper/1.0", + "Accept": "application/vnd.github.v3+json", + }, + ) with urllib.request.urlopen(req, timeout=15) as resp: tags = json.loads(resp.read()) for tag in tags: diff --git a/scripts/scraper/batocera_scraper.py b/scripts/scraper/batocera_scraper.py index 9bdf76a9..cc9cc184 100644 --- a/scripts/scraper/batocera_scraper.py +++ b/scripts/scraper/batocera_scraper.py @@ -12,8 +12,8 @@ import ast import json import re import sys -import urllib.request import urllib.error +import urllib.request from pathlib import Path import yaml @@ -102,7 +102,6 @@ SYSTEM_SLUG_MAP = { "dragon64": "dragon64", "mc10": "mc10", "msx2+": "microsoft-msx", - "msxturbor": "microsoft-msx", "spectravideo": "spectravideo", "tvc": "videoton-tvc", "enterprise": "enterprise-64-128", @@ -116,7 +115,7 @@ SYSTEM_SLUG_MAP = { } -_MD5_RE = re.compile(r'^[a-fA-F0-9]+$') +_MD5_RE = re.compile(r"^[a-fA-F0-9]+$") def _load_md5_index() -> dict[str, str]: @@ -183,11 +182,11 @@ class Scraper(BaseScraper): def _extract_systems_dict(self, raw: str) -> dict: """Extract and parse the 'systems' dict from the Python source via ast.literal_eval.""" - match = re.search(r'^systems\s*=\s*\{', raw, re.MULTILINE) + match = re.search(r"^systems\s*=\s*\{", raw, re.MULTILINE) if not match: raise ValueError("Could not find 'systems = {' in batocera-systems") - start = match.start() + raw[match.start():].index("{") + start = match.start() + raw[match.start() :].index("{") depth = 0 i = start in_str = False @@ -195,7 +194,7 @@ class Scraper(BaseScraper): while i < len(raw): ch = raw[i] if in_str: - if ch == '\\': + if ch == "\\": i += 2 continue if ch == str_ch: @@ -214,7 +213,7 @@ class Scraper(BaseScraper): i += 1 i += 1 - dict_str = raw[start:i + 1] + dict_str = raw[start : i + 1] lines = [] for line in dict_str.split("\n"): @@ -224,7 +223,7 @@ class Scraper(BaseScraper): j = 0 while j < len(line): ch = line[j] - if ch == '\\' and j + 1 < len(line): + if ch == "\\" and j + 1 < len(line): clean.append(ch) clean.append(line[j + 1]) j += 2 @@ -246,8 +245,8 @@ class Scraper(BaseScraper): clean_dict_str = "\n".join(lines) # OrderedDict({...}) -> just the inner dict literal - clean_dict_str = re.sub(r'OrderedDict\(\s*\{', '{', clean_dict_str) - clean_dict_str = re.sub(r'\}\s*\)', '}', clean_dict_str) + clean_dict_str = re.sub(r"OrderedDict\(\s*\{", "{", clean_dict_str) + clean_dict_str = re.sub(r"\}\s*\)", "}", clean_dict_str) try: return ast.literal_eval(clean_dict_str) @@ -279,22 +278,24 @@ class Scraper(BaseScraper): name = file_path.split("/")[-1] if "/" in file_path else file_path - requirements.append(BiosRequirement( - name=name, - system=system_slug, - md5=md5 or None, - destination=file_path, - required=True, - zipped_file=zipped_file or None, - native_id=sys_key, - )) + requirements.append( + BiosRequirement( + name=name, + system=system_slug, + md5=md5 or None, + destination=file_path, + required=True, + zipped_file=zipped_file or None, + native_id=sys_key, + ) + ) return requirements def validate_format(self, raw_data: str) -> bool: """Validate batocera-systems format.""" has_systems = "systems" in raw_data and "biosFiles" in raw_data - has_dict = re.search(r'^systems\s*=\s*\{', raw_data, re.MULTILINE) is not None + has_dict = re.search(r"^systems\s*=\s*\{", raw_data, re.MULTILINE) is not None has_md5 = '"md5"' in raw_data has_file = '"file"' in raw_data return has_systems and has_dict and has_md5 and has_file @@ -336,7 +337,9 @@ class Scraper(BaseScraper): systems[req.system]["files"].append(entry) - tag = fetch_github_latest_tag("batocera-linux/batocera.linux", prefix="batocera-") + tag = fetch_github_latest_tag( + "batocera-linux/batocera.linux", prefix="batocera-" + ) batocera_version = "" if tag: num = tag.removeprefix("batocera-") @@ -344,7 +347,9 @@ class Scraper(BaseScraper): batocera_version = num if not batocera_version: # Preserve existing version when fetch fails (offline mode) - existing = Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml" + existing = ( + Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml" + ) if existing.exists(): with open(existing) as f: old = yaml.safe_load(f) or {} @@ -369,6 +374,7 @@ class Scraper(BaseScraper): def main(): from scripts.scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape batocera BIOS requirements") diff --git a/scripts/scraper/bizhawk_scraper.py b/scripts/scraper/bizhawk_scraper.py index e81221af..406400a2 100644 --- a/scripts/scraper/bizhawk_scraper.py +++ b/scripts/scraper/bizhawk_scraper.py @@ -19,7 +19,6 @@ the Ideal non-bad option is selected as canonical. from __future__ import annotations import re -import sys try: from .base_scraper import ( @@ -108,12 +107,33 @@ SYSTEM_ID_MAP: dict[str, str] = { # Cores that overlap with BizHawk's system coverage BIZHAWK_CORES = [ - "gambatte", "mgba", "sameboy", "melonds", "snes9x", "bsnes", - "beetle_psx", "beetle_saturn", "beetle_pce", "beetle_pcfx", - "beetle_wswan", "beetle_vb", "beetle_ngp", "opera", "stella", - "picodrive", "ppsspp", "handy", "quicknes", "genesis_plus_gx", - "ares", "mupen64plus_next", "puae", "prboom", "virtualjaguar", - "vice_x64", "mame", + "gambatte", + "mgba", + "sameboy", + "melonds", + "snes9x", + "bsnes", + "beetle_psx", + "beetle_saturn", + "beetle_pce", + "beetle_pcfx", + "beetle_wswan", + "beetle_vb", + "beetle_ngp", + "opera", + "stella", + "picodrive", + "ppsspp", + "handy", + "quicknes", + "genesis_plus_gx", + "ares", + "mupen64plus_next", + "puae", + "prboom", + "virtualjaguar", + "vice_x64", + "mame", ] @@ -137,9 +157,7 @@ def _safe_arithmetic(expr: str) -> int: def _strip_comments(source: str) -> str: """Remove block comments and #if false blocks.""" source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL) - source = re.sub( - r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL - ) + source = re.sub(r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL) return source @@ -158,14 +176,14 @@ def parse_firmware_database( var_to_hash: dict[str, str] = {} file_re = re.compile( - r'(?:var\s+(\w+)\s*=\s*)?' - r'File\(\s*' + r"(?:var\s+(\w+)\s*=\s*)?" + r"File\(\s*" r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*' - r'([^,]+?)\s*,\s*' + r"([^,]+?)\s*,\s*" r'"([^"]+)"\s*,\s*' r'"([^"]*)"' - r'(?:\s*,\s*isBad:\s*(true|false))?' - r'\s*\)' + r"(?:\s*,\s*isBad:\s*(true|false))?" + r"\s*\)" ) for m in file_re.finditer(source): @@ -194,15 +212,15 @@ def parse_firmware_database( # FirmwareAndOption one-liner fao_re = re.compile( - r'FirmwareAndOption\(\s*' + r"FirmwareAndOption\(\s*" r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*' - r'([^,]+?)\s*,\s*' + r"([^,]+?)\s*,\s*" r'"([^"]+)"\s*,\s*' r'"([^"]+)"\s*,\s*' r'"([^"]+)"\s*,\s*' r'"([^"]*)"' - r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?' - r'\s*\)' + r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?" + r"\s*\)" ) # Firmware(system, id, desc) @@ -213,10 +231,10 @@ def parse_firmware_database( # Option(system, id, in varref|File(...), status?) option_re = re.compile( r'Option\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*,\s*' - r'(?:in\s+(\w+)' + r"(?:in\s+(\w+)" r'|File\(\s*"([A-Fa-f0-9]+)"\s*,\s*([^,]+?)\s*,\s*"([^"]+)"\s*,\s*"([^"]*)"\s*\))' - r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?' - r'\s*\)' + r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?" + r"\s*\)" ) # Collect firmware slots @@ -269,15 +287,17 @@ def parse_firmware_database( desc = m.group(6) status = m.group(7) or "Acceptable" - records.append({ - "system": system, - "firmware_id": fw_id, - "sha1": sha1, - "name": name, - "size": _safe_arithmetic(size_expr), - "description": desc, - "status": status, - }) + records.append( + { + "system": system, + "firmware_id": fw_id, + "sha1": sha1, + "name": name, + "size": _safe_arithmetic(size_expr), + "description": desc, + "status": status, + } + ) # Build records from Firmware+Option pairs, picking best option for (system, fw_id), options in slot_options.items(): @@ -291,15 +311,17 @@ def parse_firmware_database( viable.sort(key=lambda x: STATUS_RANK.get(x[1], 2), reverse=True) best_file, best_status = viable[0] - records.append({ - "system": system, - "firmware_id": fw_id, - "sha1": best_file["sha1"], - "name": best_file["name"], - "size": best_file["size"], - "description": best_file.get("description", desc), - "status": best_status, - }) + records.append( + { + "system": system, + "firmware_id": fw_id, + "sha1": best_file["sha1"], + "name": best_file["name"], + "size": best_file["size"], + "description": best_file.get("description", desc), + "status": best_status, + } + ) return records, files_by_hash diff --git a/scripts/scraper/coreinfo_scraper.py b/scripts/scraper/coreinfo_scraper.py index 15891e5b..3d471b0a 100644 --- a/scripts/scraper/coreinfo_scraper.py +++ b/scripts/scraper/coreinfo_scraper.py @@ -13,19 +13,24 @@ Complements libretro_scraper (System.dat) with: from __future__ import annotations +import json import re import sys -import urllib.request import urllib.error -import json +import urllib.request try: from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version except ImportError: # Allow running directly: python scripts/scraper/coreinfo_scraper.py import os + sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - from scraper.base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version + from scraper.base_scraper import ( + BaseScraper, + BiosRequirement, + fetch_github_latest_version, + ) PLATFORM_NAME = "libretro_coreinfo" @@ -168,11 +173,13 @@ def _extract_firmware(info: dict) -> list[dict]: if _is_native_lib(path): continue - firmware.append({ - "path": path, - "desc": desc, - "optional": opt.lower() == "true", - }) + firmware.append( + { + "path": path, + "desc": desc, + "optional": opt.lower() == "true", + } + ) return firmware @@ -182,7 +189,7 @@ def _extract_md5_from_notes(info: dict) -> dict[str, str]: notes = info.get("notes", "") md5_map = {} - for match in re.finditer(r'\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})', notes): + for match in re.finditer(r"\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})", notes): filename = match.group(1).strip() md5 = match.group(2) md5_map[filename] = md5 @@ -202,15 +209,19 @@ class Scraper(BaseScraper): # Use the tree API to get all files at once url = f"{GITHUB_API}/git/trees/master?recursive=1" try: - req = urllib.request.Request(url, headers={ - "User-Agent": "retrobios-scraper/1.0", - "Accept": "application/vnd.github.v3+json", - }) + req = urllib.request.Request( + url, + headers={ + "User-Agent": "retrobios-scraper/1.0", + "Accept": "application/vnd.github.v3+json", + }, + ) with urllib.request.urlopen(req, timeout=30) as resp: data = json.loads(resp.read()) return [ - item["path"] for item in data.get("tree", []) + item["path"] + for item in data.get("tree", []) if item["path"].endswith("_libretro.info") ] except (urllib.error.URLError, json.JSONDecodeError) as e: @@ -220,7 +231,9 @@ class Scraper(BaseScraper): """Fetch and parse a single .info file.""" url = f"{RAW_BASE}/{filename}" try: - req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"}) + req = urllib.request.Request( + url, headers={"User-Agent": "retrobios-scraper/1.0"} + ) with urllib.request.urlopen(req, timeout=15) as resp: content = resp.read().decode("utf-8") return _parse_info_file(content) @@ -253,17 +266,25 @@ class Scraper(BaseScraper): basename = path.split("/")[-1] if "/" in path else path # Full path when basename is generic to avoid SGB1.sfc/program.rom vs SGB2.sfc/program.rom collisions - GENERIC_NAMES = {"program.rom", "data.rom", "boot.rom", "bios.bin", "firmware.bin"} + GENERIC_NAMES = { + "program.rom", + "data.rom", + "boot.rom", + "bios.bin", + "firmware.bin", + } name = path if basename.lower() in GENERIC_NAMES else basename md5 = md5_map.get(basename) - requirements.append(BiosRequirement( - name=name, - system=system, - md5=md5, - destination=path, - required=not fw["optional"], - )) + requirements.append( + BiosRequirement( + name=name, + system=system, + md5=md5, + destination=path, + required=not fw["optional"], + ) + ) return requirements @@ -281,7 +302,9 @@ def main(): """CLI entry point.""" import argparse - parser = argparse.ArgumentParser(description="Scrape libretro-core-info firmware requirements") + parser = argparse.ArgumentParser( + description="Scrape libretro-core-info firmware requirements" + ) parser.add_argument("--dry-run", action="store_true") parser.add_argument("--compare-db", help="Compare against database.json") args = parser.parse_args() @@ -296,6 +319,7 @@ def main(): if args.compare_db: import json as _json + with open(args.compare_db) as f: db = _json.load(f) @@ -320,6 +344,7 @@ def main(): return from collections import defaultdict + by_system = defaultdict(list) for r in reqs: by_system[r.system].append(r) diff --git a/scripts/scraper/dat_parser.py b/scripts/scraper/dat_parser.py index e7b7fc2b..07160f9e 100644 --- a/scripts/scraper/dat_parser.py +++ b/scripts/scraper/dat_parser.py @@ -10,13 +10,13 @@ Parses files like libretro's System.dat which uses the format: from __future__ import annotations -import re from dataclasses import dataclass @dataclass class DatRom: """A ROM entry from a DAT file.""" + name: str size: int crc32: str @@ -28,6 +28,7 @@ class DatRom: @dataclass class DatMetadata: """Metadata from a DAT file header.""" + name: str = "" version: str = "" description: str = "" @@ -53,7 +54,10 @@ def parse_dat(content: str) -> list[DatRom]: if stripped.startswith("comment "): value = stripped[8:].strip().strip('"') - if value in ("System", "System, firmware, and BIOS files used by libretro cores."): + if value in ( + "System", + "System, firmware, and BIOS files used by libretro cores.", + ): continue current_system = value @@ -78,9 +82,16 @@ def parse_dat_metadata(content: str) -> DatMetadata: if in_header and stripped == ")": break if in_header: - for field in ("name", "version", "description", "author", "homepage", "url"): + for field in ( + "name", + "version", + "description", + "author", + "homepage", + "url", + ): if stripped.startswith(f"{field} "): - value = stripped[len(field) + 1:].strip().strip('"') + value = stripped[len(field) + 1 :].strip().strip('"') setattr(meta, field, value) return meta @@ -94,7 +105,7 @@ def _parse_rom_line(line: str, system: str) -> DatRom | None: if start == -1 or end == -1 or end <= start: return None - content = line[start + 1:end].strip() + content = line[start + 1 : end].strip() fields = {} i = 0 diff --git a/scripts/scraper/emudeck_scraper.py b/scripts/scraper/emudeck_scraper.py index 032a3664..81c77536 100644 --- a/scripts/scraper/emudeck_scraper.py +++ b/scripts/scraper/emudeck_scraper.py @@ -14,9 +14,8 @@ from __future__ import annotations import csv import io import re -import sys -import urllib.request import urllib.error +import urllib.request try: from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version @@ -31,8 +30,7 @@ CHECKBIOS_URL = ( ) CSV_BASE_URL = ( - "https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/" - "main/docs/tables" + "https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables" ) CSV_SHEETS = [ @@ -117,10 +115,22 @@ KNOWN_BIOS_FILES = { {"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"}, ], "sony-playstation-2": [ - {"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"}, - {"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"}, - {"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"}, - {"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"}, + { + "name": "SCPH-70004_BIOS_V12_EUR_200.BIN", + "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN", + }, + { + "name": "SCPH-70004_BIOS_V12_EUR_200.EROM", + "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM", + }, + { + "name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", + "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1", + }, + { + "name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", + "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2", + }, ], "sega-mega-cd": [ {"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"}, @@ -157,17 +167,17 @@ KNOWN_BIOS_FILES = { } _RE_ARRAY = re.compile( - r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)', + r"(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)", re.MULTILINE, ) _RE_FUNC = re.compile( - r'function\s+(check\w+Bios)\s*\(\)', + r"function\s+(check\w+Bios)\s*\(\)", re.MULTILINE, ) _RE_LOCAL_HASHES = re.compile( - r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)', + r"local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)", re.MULTILINE, ) @@ -184,7 +194,9 @@ def _fetch_url(url: str) -> str: class Scraper(BaseScraper): """Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets.""" - def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL): + def __init__( + self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL + ): super().__init__(url=checkbios_url) self.checkbios_url = checkbios_url self.csv_base_url = csv_base_url @@ -241,12 +253,12 @@ class Scraper(BaseScraper): @staticmethod def _clean_markdown(text: str) -> str: """Strip markdown/HTML artifacts from CSV fields.""" - text = re.sub(r'\*\*', '', text) # bold - text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons - text = re.sub(r':material-[^:]+:', '', text) - text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text - text = re.sub(r'', ' ', text) #
- text = re.sub(r'<[^>]+>', '', text) # remaining HTML + text = re.sub(r"\*\*", "", text) # bold + text = re.sub(r":material-[^:]+:\{[^}]*\}", "", text) # mkdocs material icons + text = re.sub(r":material-[^:]+:", "", text) + text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) # [text](url) -> text + text = re.sub(r"", " ", text) #
+ text = re.sub(r"<[^>]+>", "", text) # remaining HTML return text.strip() def _parse_csv_bios(self, csv_text: str) -> list[dict]: @@ -274,28 +286,32 @@ class Scraper(BaseScraper): system_col = self._clean_markdown((row[key] or "")) break slug = None - for part in re.split(r'[`\s/]+', folder_col): - part = part.strip().strip('`').lower() + for part in re.split(r"[`\s/]+", folder_col): + part = part.strip().strip("`").lower() if part and part in SYSTEM_SLUG_MAP: slug = SYSTEM_SLUG_MAP[part] break if not slug: - clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower()) + clean = re.sub( + r"[^a-z0-9\-]", "", folder_col.strip().strip("`").lower() + ) slug = clean if clean else "unknown" - entries.append({ - "system": slug, - "system_name": system_col, - "bios_raw": bios_col, - }) + entries.append( + { + "system": slug, + "system_name": system_col, + "bios_raw": bios_col, + } + ) return entries def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]: """Extract individual BIOS filenames from a CSV BIOS field.""" results = [] - bios_raw = re.sub(r'', ' ', bios_raw) - bios_raw = bios_raw.replace('`', '') + bios_raw = re.sub(r"", " ", bios_raw) + bios_raw = bios_raw.replace("`", "") patterns = re.findall( - r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)', + r"[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)", bios_raw, ) for p in patterns: @@ -324,21 +340,25 @@ class Scraper(BaseScraper): if key in seen: continue seen.add(key) - requirements.append(BiosRequirement( - name=f["name"], - system=system, - destination=f.get("destination", f["name"]), - required=True, - )) + requirements.append( + BiosRequirement( + name=f["name"], + system=system, + destination=f.get("destination", f["name"]), + required=True, + ) + ) for md5 in system_hashes: - requirements.append(BiosRequirement( - name=f"{system}:{md5}", - system=system, - md5=md5, - destination="", - required=True, - )) + requirements.append( + BiosRequirement( + name=f"{system}:{md5}", + system=system, + md5=md5, + destination="", + required=True, + ) + ) for sheet in CSV_SHEETS: csv_text = self._fetch_csv(sheet) @@ -353,19 +373,21 @@ class Scraper(BaseScraper): seen.add(key) if system in KNOWN_BIOS_FILES: continue - requirements.append(BiosRequirement( - name=f["name"], - system=system, - destination=f.get("destination", f["name"]), - required=True, - )) + requirements.append( + BiosRequirement( + name=f["name"], + system=system, + destination=f.get("destination", f["name"]), + required=True, + ) + ) return requirements def validate_format(self, raw_data: str) -> bool: has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data - has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None + has_md5 = re.search(r"[0-9a-f]{32}", raw_data) is not None return has_ps and has_func and has_md5 def generate_platform_yaml(self) -> dict: @@ -419,14 +441,17 @@ class Scraper(BaseScraper): "contents/functions/EmuScripts" ) name_overrides = { - "pcsx2qt": "pcsx2", "rpcs3legacy": "rpcs3", - "cemuproton": "cemu", "rmg": "mupen64plus_next", + "pcsx2qt": "pcsx2", + "rpcs3legacy": "rpcs3", + "cemuproton": "cemu", + "rmg": "mupen64plus_next", } skip = {"retroarch_maincfg", "retroarch"} try: req = urllib.request.Request( - api_url, headers={"User-Agent": "retrobios-scraper/1.0"}, + api_url, + headers={"User-Agent": "retrobios-scraper/1.0"}, ) data = json.loads(urllib.request.urlopen(req, timeout=30).read()) except (urllib.error.URLError, OSError): @@ -454,6 +479,7 @@ class Scraper(BaseScraper): def main(): from scripts.scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape emudeck BIOS requirements") diff --git a/scripts/scraper/fbneo_hash_scraper.py b/scripts/scraper/fbneo_hash_scraper.py index bd2dd859..5ecc8def 100644 --- a/scripts/scraper/fbneo_hash_scraper.py +++ b/scripts/scraper/fbneo_hash_scraper.py @@ -13,22 +13,22 @@ import logging import shutil import subprocess import sys -from datetime import datetime, timezone, timedelta +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any import yaml -from scripts.scraper.fbneo_parser import parse_fbneo_source_tree from scripts.scraper._hash_merge import compute_diff, merge_fbneo_profile +from scripts.scraper.fbneo_parser import parse_fbneo_source_tree log = logging.getLogger(__name__) -REPO_URL = 'https://github.com/finalburnneo/FBNeo.git' +REPO_URL = "https://github.com/finalburnneo/FBNeo.git" REPO_ROOT = Path(__file__).resolve().parent.parent.parent -CLONE_DIR = REPO_ROOT / 'tmp' / 'fbneo' -CACHE_PATH = REPO_ROOT / 'data' / 'fbneo-hashes.json' -EMULATORS_DIR = REPO_ROOT / 'emulators' +CLONE_DIR = REPO_ROOT / "tmp" / "fbneo" +CACHE_PATH = REPO_ROOT / "data" / "fbneo-hashes.json" +EMULATORS_DIR = REPO_ROOT / "emulators" STALE_HOURS = 24 @@ -37,8 +37,8 @@ def _is_cache_fresh() -> bool: if not CACHE_PATH.exists(): return False try: - data = json.loads(CACHE_PATH.read_text(encoding='utf-8')) - fetched_at = datetime.fromisoformat(data['fetched_at']) + data = json.loads(CACHE_PATH.read_text(encoding="utf-8")) + fetched_at = datetime.fromisoformat(data["fetched_at"]) return datetime.now(timezone.utc) - fetched_at < timedelta(hours=STALE_HOURS) except (json.JSONDecodeError, KeyError, ValueError): return False @@ -53,8 +53,14 @@ def _sparse_clone() -> None: subprocess.run( [ - 'git', 'clone', '--depth', '1', '--filter=blob:none', - '--sparse', REPO_URL, str(CLONE_DIR), + "git", + "clone", + "--depth", + "1", + "--filter=blob:none", + "--sparse", + REPO_URL, + str(CLONE_DIR), ], check=True, capture_output=True, @@ -62,7 +68,7 @@ def _sparse_clone() -> None: ) subprocess.run( - ['git', 'sparse-checkout', 'set', 'src/burn/drv', 'src/burner/resource.h'], + ["git", "sparse-checkout", "set", "src/burn/drv", "src/burner/resource.h"], cwd=CLONE_DIR, check=True, capture_output=True, @@ -76,42 +82,44 @@ def _extract_version() -> tuple[str, str]: Returns (version, commit_sha). Falls back to resource.h if no tag. """ result = subprocess.run( - ['git', 'describe', '--tags', '--abbrev=0'], + ["git", "describe", "--tags", "--abbrev=0"], cwd=CLONE_DIR, capture_output=True, text=True, ) # Prefer real version tags over pseudo-tags like "latest" - version = 'unknown' + version = "unknown" if result.returncode == 0: tag = result.stdout.strip() - if tag and tag != 'latest': + if tag and tag != "latest": version = tag # Fallback: resource.h - if version == 'unknown': + if version == "unknown": version = _version_from_resource_h() # Last resort: use GitHub API for latest real release tag - if version == 'unknown': + if version == "unknown": try: - import urllib.request import urllib.error + import urllib.request + req = urllib.request.Request( - 'https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10', - headers={'User-Agent': 'retrobios-scraper/1.0'}, + "https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10", + headers={"User-Agent": "retrobios-scraper/1.0"}, ) with urllib.request.urlopen(req, timeout=10) as resp: import json as json_mod + tags = json_mod.loads(resp.read()) for t in tags: - if t['name'] != 'latest' and t['name'].startswith('v'): - version = t['name'] + if t["name"] != "latest" and t["name"].startswith("v"): + version = t["name"] break except (urllib.error.URLError, OSError): pass sha_result = subprocess.run( - ['git', 'rev-parse', 'HEAD'], + ["git", "rev-parse", "HEAD"], cwd=CLONE_DIR, capture_output=True, text=True, @@ -124,17 +132,17 @@ def _extract_version() -> tuple[str, str]: def _version_from_resource_h() -> str: """Fallback: parse VER_FULL_VERSION_STR from resource.h.""" - resource_h = CLONE_DIR / 'src' / 'burner' / 'resource.h' + resource_h = CLONE_DIR / "src" / "burner" / "resource.h" if not resource_h.exists(): - return 'unknown' + return "unknown" - text = resource_h.read_text(encoding='utf-8', errors='replace') + text = resource_h.read_text(encoding="utf-8", errors="replace") for line in text.splitlines(): - if 'VER_FULL_VERSION_STR' in line: + if "VER_FULL_VERSION_STR" in line: parts = line.split('"') if len(parts) >= 2: return parts[1] - return 'unknown' + return "unknown" def _cleanup() -> None: @@ -146,33 +154,33 @@ def _cleanup() -> None: def fetch_and_cache(force: bool = False) -> dict[str, Any]: """Clone, parse, and write JSON cache. Returns the cache dict.""" if not force and _is_cache_fresh(): - log.info('cache fresh, skipping clone (use --force to override)') - return json.loads(CACHE_PATH.read_text(encoding='utf-8')) + log.info("cache fresh, skipping clone (use --force to override)") + return json.loads(CACHE_PATH.read_text(encoding="utf-8")) try: - log.info('sparse cloning %s', REPO_URL) + log.info("sparse cloning %s", REPO_URL) _sparse_clone() - log.info('extracting version') + log.info("extracting version") version, commit = _extract_version() - log.info('parsing source tree') + log.info("parsing source tree") bios_sets = parse_fbneo_source_tree(str(CLONE_DIR)) cache: dict[str, Any] = { - 'source': 'finalburnneo/FBNeo', - 'version': version, - 'commit': commit, - 'fetched_at': datetime.now(timezone.utc).isoformat(), - 'bios_sets': bios_sets, + "source": "finalburnneo/FBNeo", + "version": version, + "commit": commit, + "fetched_at": datetime.now(timezone.utc).isoformat(), + "bios_sets": bios_sets, } CACHE_PATH.parent.mkdir(parents=True, exist_ok=True) CACHE_PATH.write_text( - json.dumps(cache, indent=2, ensure_ascii=False) + '\n', - encoding='utf-8', + json.dumps(cache, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", ) - log.info('wrote %d BIOS sets to %s', len(bios_sets), CACHE_PATH) + log.info("wrote %d BIOS sets to %s", len(bios_sets), CACHE_PATH) return cache finally: @@ -182,48 +190,50 @@ def fetch_and_cache(force: bool = False) -> dict[str, Any]: def _find_fbneo_profiles() -> list[Path]: """Find emulator profiles whose upstream references finalburnneo/FBNeo.""" profiles: list[Path] = [] - for path in sorted(EMULATORS_DIR.glob('*.yml')): - if path.name.endswith('.old.yml'): + for path in sorted(EMULATORS_DIR.glob("*.yml")): + if path.name.endswith(".old.yml"): continue try: - data = yaml.safe_load(path.read_text(encoding='utf-8')) + data = yaml.safe_load(path.read_text(encoding="utf-8")) except (yaml.YAMLError, OSError): continue if not data or not isinstance(data, dict): continue - upstream = data.get('upstream', '') - if isinstance(upstream, str) and 'finalburnneo/fbneo' in upstream.lower(): + upstream = data.get("upstream", "") + if isinstance(upstream, str) and "finalburnneo/fbneo" in upstream.lower(): profiles.append(path) return profiles -def _format_diff(profile_name: str, diff: dict[str, Any], show_added: bool = True) -> str: +def _format_diff( + profile_name: str, diff: dict[str, Any], show_added: bool = True +) -> str: """Format diff for a single profile.""" lines: list[str] = [] - lines.append(f' {profile_name}:') + lines.append(f" {profile_name}:") - added = diff.get('added', []) - updated = diff.get('updated', []) - oos = diff.get('out_of_scope', 0) + added = diff.get("added", []) + updated = diff.get("updated", []) + oos = diff.get("out_of_scope", 0) if not added and not updated: - lines.append(' no changes') + lines.append(" no changes") if oos: - lines.append(f' . {oos} out of scope') - return '\n'.join(lines) + lines.append(f" . {oos} out of scope") + return "\n".join(lines) if show_added: for label in added: - lines.append(f' + {label}') + lines.append(f" + {label}") elif added: - lines.append(f' + {len(added)} new ROMs available (main profile only)') + lines.append(f" + {len(added)} new ROMs available (main profile only)") for label in updated: - lines.append(f' ~ {label}') - lines.append(f' = {diff["unchanged"]} unchanged') + lines.append(f" ~ {label}") + lines.append(f" = {diff['unchanged']} unchanged") if oos: - lines.append(f' . {oos} out of scope') + lines.append(f" . {oos} out of scope") - return '\n'.join(lines) + return "\n".join(lines) def run( @@ -234,82 +244,84 @@ def run( """Main entry point for the scraper.""" cache = fetch_and_cache(force=force) - version = cache.get('version', 'unknown') - commit = cache.get('commit', '?')[:12] - bios_sets = cache.get('bios_sets', {}) + version = cache.get("version", "unknown") + commit = cache.get("commit", "?")[:12] + bios_sets = cache.get("bios_sets", {}) profiles = _find_fbneo_profiles() if json_output: result: dict[str, Any] = { - 'source': cache.get('source'), - 'version': version, - 'commit': cache.get('commit'), - 'bios_set_count': len(bios_sets), - 'profiles': {}, + "source": cache.get("source"), + "version": version, + "commit": cache.get("commit"), + "bios_set_count": len(bios_sets), + "profiles": {}, } for path in profiles: - diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo') - result['profiles'][path.stem] = diff + diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo") + result["profiles"][path.stem] = diff print(json.dumps(result, indent=2)) return 0 header = ( - f'fbneo-hashes: {len(bios_sets)} BIOS sets ' - f'from finalburnneo/FBNeo @ {version} ({commit})' + f"fbneo-hashes: {len(bios_sets)} BIOS sets " + f"from finalburnneo/FBNeo @ {version} ({commit})" ) print(header) print() if not profiles: - print(' no matching emulator profiles found') + print(" no matching emulator profiles found") return 0 for path in profiles: - is_main = path.name == 'fbneo.yml' - diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo') + is_main = path.name == "fbneo.yml" + diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo") print(_format_diff(path.stem, diff, show_added=is_main)) - effective_added = diff['added'] if is_main else [] - if not dry_run and (effective_added or diff['updated']): + effective_added = diff["added"] if is_main else [] + if not dry_run and (effective_added or diff["updated"]): merge_fbneo_profile(str(path), str(CACHE_PATH), write=True, add_new=is_main) - log.info('merged changes into %s', path.name) + log.info("merged changes into %s", path.name) return 0 def main() -> None: parser = argparse.ArgumentParser( - description='Scrape FBNeo BIOS set hashes from upstream source', + description="Scrape FBNeo BIOS set hashes from upstream source", ) parser.add_argument( - '--dry-run', - action='store_true', - help='show diff without writing changes', + "--dry-run", + action="store_true", + help="show diff without writing changes", ) parser.add_argument( - '--force', - action='store_true', - help='force re-clone even if cache is fresh', + "--force", + action="store_true", + help="force re-clone even if cache is fresh", ) parser.add_argument( - '--json', - action='store_true', - dest='json_output', - help='output diff as JSON', + "--json", + action="store_true", + dest="json_output", + help="output diff as JSON", ) args = parser.parse_args() logging.basicConfig( level=logging.INFO, - format='%(name)s: %(message)s', + format="%(name)s: %(message)s", ) - sys.exit(run( - dry_run=args.dry_run, - force=args.force, - json_output=args.json_output, - )) + sys.exit( + run( + dry_run=args.dry_run, + force=args.force, + json_output=args.json_output, + ) + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/scripts/scraper/fbneo_parser.py b/scripts/scraper/fbneo_parser.py index 03e82443..695db82c 100644 --- a/scripts/scraper/fbneo_parser.py +++ b/scripts/scraper/fbneo_parser.py @@ -11,18 +11,17 @@ import os import re from pathlib import Path - _ROM_ENTRY_RE = re.compile( r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}', ) _BURN_DRIVER_RE = re.compile( - r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};', + r"struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};", re.DOTALL, ) _ROM_DESC_RE = re.compile( - r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};', + r"static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};", re.DOTALL, ) @@ -37,7 +36,7 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]: for match in _BURN_DRIVER_RE.finditer(source): body = match.group(2) - if 'BDF_BOARDROM' not in body: + if "BDF_BOARDROM" not in body: continue # Set name is the first quoted string in the struct body @@ -46,11 +45,11 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]: continue set_name = name_match.group(1) - line_num = source[:match.start()].count('\n') + 1 + line_num = source[: match.start()].count("\n") + 1 results[set_name] = { - 'source_file': filename, - 'source_line': line_num, + "source_file": filename, + "source_line": line_num, } return results @@ -63,9 +62,9 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]: Sentinel entries (empty name) are skipped. """ pattern = re.compile( - r'static\s+struct\s+BurnRomInfo\s+' + r"static\s+struct\s+BurnRomInfo\s+" + re.escape(set_name) - + r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};', + + r"RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};", re.DOTALL, ) match = pattern.search(source) @@ -80,13 +79,15 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]: if not name: continue size = int(entry.group(2), 16) - crc32 = format(int(entry.group(3), 16), '08x') + crc32 = format(int(entry.group(3), 16), "08x") - roms.append({ - 'name': name, - 'size': size, - 'crc32': crc32, - }) + roms.append( + { + "name": name, + "size": size, + "crc32": crc32, + } + ) return roms @@ -100,7 +101,7 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]: Returns a dict mapping set name to: {source_file, source_line, roms: [{name, size, crc32}, ...]} """ - drv_path = Path(base_path) / 'src' / 'burn' / 'drv' + drv_path = Path(base_path) / "src" / "burn" / "drv" if not drv_path.is_dir(): return {} @@ -108,20 +109,20 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]: for root, _dirs, files in os.walk(drv_path): for fname in files: - if not fname.endswith('.cpp'): + if not fname.endswith(".cpp"): continue filepath = Path(root) / fname - source = filepath.read_text(encoding='utf-8', errors='replace') + source = filepath.read_text(encoding="utf-8", errors="replace") rel_path = str(filepath.relative_to(base_path)) bios_sets = find_bios_sets(source, rel_path) for set_name, meta in bios_sets.items(): roms = parse_rom_info(source, set_name) results[set_name] = { - 'source_file': meta['source_file'], - 'source_line': meta['source_line'], - 'roms': roms, + "source_file": meta["source_file"], + "source_line": meta["source_line"], + "roms": roms, } return results diff --git a/scripts/scraper/libretro_scraper.py b/scripts/scraper/libretro_scraper.py index 581a15ec..bdc1b379 100644 --- a/scripts/scraper/libretro_scraper.py +++ b/scripts/scraper/libretro_scraper.py @@ -8,9 +8,8 @@ Hash: SHA1 primary from __future__ import annotations -import sys -import urllib.request import urllib.error +import urllib.request from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format @@ -18,18 +17,17 @@ from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format PLATFORM_NAME = "libretro" SOURCE_URL = ( - "https://raw.githubusercontent.com/libretro/libretro-database/" - "master/dat/System.dat" + "https://raw.githubusercontent.com/libretro/libretro-database/master/dat/System.dat" ) # Libretro cores that expect BIOS files in a subdirectory of system/. # System.dat lists filenames flat; the scraper prepends the prefix. # ref: each core's libretro.c or equivalent -see platforms/README.md CORE_SUBDIR_MAP = { - "nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c - "sharp-x68000": "keropi", # px68k/libretro/libretro.c - "sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp - "sega-dreamcast-arcade": "dc", # flycast -same subfolder + "nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c + "sharp-x68000": "keropi", # px68k/libretro/libretro.c + "sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp + "sega-dreamcast-arcade": "dc", # flycast -same subfolder } SYSTEM_SLUG_MAP = { @@ -100,7 +98,6 @@ class Scraper(BaseScraper): def __init__(self, url: str = SOURCE_URL): super().__init__(url=url) - def fetch_requirements(self) -> list[BiosRequirement]: """Parse System.dat and return BIOS requirements.""" raw = self._fetch_raw() @@ -113,7 +110,9 @@ class Scraper(BaseScraper): for rom in roms: native_system = rom.system - system_slug = SYSTEM_SLUG_MAP.get(native_system, native_system.lower().replace(" ", "-")) + system_slug = SYSTEM_SLUG_MAP.get( + native_system, native_system.lower().replace(" ", "-") + ) destination = rom.name name = rom.name.split("/")[-1] if "/" in rom.name else rom.name @@ -122,17 +121,19 @@ class Scraper(BaseScraper): if subdir and not destination.startswith(subdir + "/"): destination = f"{subdir}/{destination}" - requirements.append(BiosRequirement( - name=name, - system=system_slug, - sha1=rom.sha1 or None, - md5=rom.md5 or None, - crc32=rom.crc32 or None, - size=rom.size or None, - destination=destination, - required=True, - native_id=native_system, - )) + requirements.append( + BiosRequirement( + name=name, + system=system_slug, + sha1=rom.sha1 or None, + md5=rom.md5 or None, + crc32=rom.crc32 or None, + size=rom.size or None, + destination=destination, + required=True, + native_id=native_system, + ) + ) return requirements @@ -158,17 +159,22 @@ class Scraper(BaseScraper): """Fetch per-core metadata from libretro-core-info .info files.""" metadata = {} try: - url = f"https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1" - req = urllib.request.Request(url, headers={ - "User-Agent": "retrobios-scraper/1.0", - "Accept": "application/vnd.github.v3+json", - }) + url = "https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1" + req = urllib.request.Request( + url, + headers={ + "User-Agent": "retrobios-scraper/1.0", + "Accept": "application/vnd.github.v3+json", + }, + ) with urllib.request.urlopen(req, timeout=30) as resp: import json + tree = json.loads(resp.read()) info_files = [ - item["path"] for item in tree.get("tree", []) + item["path"] + for item in tree.get("tree", []) if item["path"].endswith("_libretro.info") ] @@ -176,7 +182,9 @@ class Scraper(BaseScraper): core_name = filename.replace("_libretro.info", "") try: info_url = f"https://raw.githubusercontent.com/libretro/libretro-core-info/master/{filename}" - req = urllib.request.Request(info_url, headers={"User-Agent": "retrobios-scraper/1.0"}) + req = urllib.request.Request( + info_url, headers={"User-Agent": "retrobios-scraper/1.0"} + ) with urllib.request.urlopen(req, timeout=10) as resp: content = resp.read().decode("utf-8") @@ -194,10 +202,11 @@ class Scraper(BaseScraper): system_name = info.get("systemname", "") manufacturer = info.get("manufacturer", "") display_name = info.get("display_name", "") - categories = info.get("categories", "") + info.get("categories", "") # Map core to our system slug via firmware paths from .coreinfo_scraper import CORE_SYSTEM_MAP + system_slug = CORE_SYSTEM_MAP.get(core_name) if not system_slug: continue @@ -267,7 +276,11 @@ class Scraper(BaseScraper): # ref: Vircon32/libretro.c -virtual console, single BIOS "vircon32": { "files": [ - {"name": "Vircon32Bios.v32", "destination": "Vircon32Bios.v32", "required": True}, + { + "name": "Vircon32Bios.v32", + "destination": "Vircon32Bios.v32", + "required": True, + }, ], "core": "vircon32", "manufacturer": "Vircon", @@ -276,7 +289,11 @@ class Scraper(BaseScraper): # ref: xrick/src/sysvid.c, xrick/src/data.c -game data archive "xrick": { "files": [ - {"name": "data.zip", "destination": "xrick/data.zip", "required": True}, + { + "name": "data.zip", + "destination": "xrick/data.zip", + "required": True, + }, ], "core": "xrick", "manufacturer": "Other", @@ -318,27 +335,51 @@ class Scraper(BaseScraper): # segasp.zip for Sega System SP (Flycast) if "sega-dreamcast-arcade" in systems: - existing = {f["name"] for f in systems["sega-dreamcast-arcade"].get("files", [])} + existing = { + f["name"] for f in systems["sega-dreamcast-arcade"].get("files", []) + } if "segasp.zip" not in existing: - systems["sega-dreamcast-arcade"]["files"].append({ - "name": "segasp.zip", - "destination": "dc/segasp.zip", - "required": True, - }) + systems["sega-dreamcast-arcade"]["files"].append( + { + "name": "segasp.zip", + "destination": "dc/segasp.zip", + "required": True, + } + ) # Extra files missing from System.dat for specific systems. # Each traced to the core's source code. EXTRA_SYSTEM_FILES = { # melonDS DS DSi mode -ref: JesseTG/melonds-ds/src/libretro.cpp "nintendo-ds": [ - {"name": "dsi_bios7.bin", "destination": "dsi_bios7.bin", "required": True}, - {"name": "dsi_bios9.bin", "destination": "dsi_bios9.bin", "required": True}, - {"name": "dsi_firmware.bin", "destination": "dsi_firmware.bin", "required": True}, - {"name": "dsi_nand.bin", "destination": "dsi_nand.bin", "required": True}, + { + "name": "dsi_bios7.bin", + "destination": "dsi_bios7.bin", + "required": True, + }, + { + "name": "dsi_bios9.bin", + "destination": "dsi_bios9.bin", + "required": True, + }, + { + "name": "dsi_firmware.bin", + "destination": "dsi_firmware.bin", + "required": True, + }, + { + "name": "dsi_nand.bin", + "destination": "dsi_nand.bin", + "required": True, + }, ], # bsnes SGB naming -ref: bsnes/target-libretro/libretro.cpp "nintendo-sgb": [ - {"name": "sgb.boot.rom", "destination": "sgb.boot.rom", "required": False}, + { + "name": "sgb.boot.rom", + "destination": "sgb.boot.rom", + "required": False, + }, ], # JollyCV -ref: jollycv/libretro.c "coleco-colecovision": [ @@ -348,12 +389,20 @@ class Scraper(BaseScraper): ], # Kronos ST-V -ref: libretro-kronos/libretro/libretro.c "sega-saturn": [ - {"name": "stvbios.zip", "destination": "kronos/stvbios.zip", "required": True}, + { + "name": "stvbios.zip", + "destination": "kronos/stvbios.zip", + "required": True, + }, ], # PCSX ReARMed / Beetle PSX alt BIOS -ref: pcsx_rearmed/libpcsxcore/misc.c # docs say PSXONPSP660.bin (uppercase) but core accepts any case "sony-playstation": [ - {"name": "psxonpsp660.bin", "destination": "psxonpsp660.bin", "required": False}, + { + "name": "psxonpsp660.bin", + "destination": "psxonpsp660.bin", + "required": False, + }, ], # Dolphin GC -ref: DolphinLibretro/Boot.cpp:72-73, # BootManager.cpp:200-217, CommonPaths.h:139 GC_IPL="IPL.bin" @@ -361,15 +410,43 @@ class Scraper(BaseScraper): # System.dat gc-ntsc-*.bin names are NOT what Dolphin loads. # We add the correct Dolphin paths for BIOS + essential firmware. "nintendo-gamecube": [ - {"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/USA/IPL.bin", "required": False}, - {"name": "gc-pal-12.bin", "destination": "dolphin-emu/Sys/GC/EUR/IPL.bin", "required": False}, - {"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/JAP/IPL.bin", "required": False}, + { + "name": "gc-ntsc-12.bin", + "destination": "dolphin-emu/Sys/GC/USA/IPL.bin", + "required": False, + }, + { + "name": "gc-pal-12.bin", + "destination": "dolphin-emu/Sys/GC/EUR/IPL.bin", + "required": False, + }, + { + "name": "gc-ntsc-12.bin", + "destination": "dolphin-emu/Sys/GC/JAP/IPL.bin", + "required": False, + }, # DSP firmware -ref: Source/Core/Core/HW/DSPLLE/DSPHost.cpp - {"name": "dsp_coef.bin", "destination": "dolphin-emu/Sys/GC/dsp_coef.bin", "required": True}, - {"name": "dsp_rom.bin", "destination": "dolphin-emu/Sys/GC/dsp_rom.bin", "required": True}, + { + "name": "dsp_coef.bin", + "destination": "dolphin-emu/Sys/GC/dsp_coef.bin", + "required": True, + }, + { + "name": "dsp_rom.bin", + "destination": "dolphin-emu/Sys/GC/dsp_rom.bin", + "required": True, + }, # Fonts -ref: Source/Core/Core/HW/EXI/EXI_DeviceIPL.cpp - {"name": "font_western.bin", "destination": "dolphin-emu/Sys/GC/font_western.bin", "required": False}, - {"name": "font_japanese.bin", "destination": "dolphin-emu/Sys/GC/font_japanese.bin", "required": False}, + { + "name": "font_western.bin", + "destination": "dolphin-emu/Sys/GC/font_western.bin", + "required": False, + }, + { + "name": "font_japanese.bin", + "destination": "dolphin-emu/Sys/GC/font_japanese.bin", + "required": False, + }, ], # minivmac casing -ref: minivmac/src/MYOSGLUE.c # doc says MacII.rom, repo has MacII.ROM -both work on case-insensitive FS @@ -455,6 +532,7 @@ class Scraper(BaseScraper): def main(): from scripts.scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape libretro BIOS requirements") diff --git a/scripts/scraper/mame_hash_scraper.py b/scripts/scraper/mame_hash_scraper.py index a32360c3..d3ed46a4 100644 --- a/scripts/scraper/mame_hash_scraper.py +++ b/scripts/scraper/mame_hash_scraper.py @@ -21,16 +21,16 @@ from typing import Any import yaml -from .mame_parser import parse_mame_source_tree from ._hash_merge import compute_diff, merge_mame_profile +from .mame_parser import parse_mame_source_tree log = logging.getLogger(__name__) _ROOT = Path(__file__).resolve().parents[2] -_CACHE_PATH = _ROOT / 'data' / 'mame-hashes.json' -_CLONE_DIR = _ROOT / 'tmp' / 'mame' -_EMULATORS_DIR = _ROOT / 'emulators' -_REPO_URL = 'https://github.com/mamedev/mame.git' +_CACHE_PATH = _ROOT / "data" / "mame-hashes.json" +_CLONE_DIR = _ROOT / "tmp" / "mame" +_EMULATORS_DIR = _ROOT / "emulators" +_REPO_URL = "https://github.com/mamedev/mame.git" _STALE_HOURS = 24 @@ -41,7 +41,7 @@ def _load_cache() -> dict[str, Any] | None: if not _CACHE_PATH.exists(): return None try: - with open(_CACHE_PATH, encoding='utf-8') as f: + with open(_CACHE_PATH, encoding="utf-8") as f: return json.load(f) except (json.JSONDecodeError, OSError): return None @@ -50,7 +50,7 @@ def _load_cache() -> dict[str, Any] | None: def _is_stale(cache: dict[str, Any] | None) -> bool: if cache is None: return True - fetched_at = cache.get('fetched_at') + fetched_at = cache.get("fetched_at") if not fetched_at: return True try: @@ -63,17 +63,19 @@ def _is_stale(cache: dict[str, Any] | None) -> bool: def _write_cache(data: dict[str, Any]) -> None: _CACHE_PATH.parent.mkdir(parents=True, exist_ok=True) - with open(_CACHE_PATH, 'w', encoding='utf-8') as f: + with open(_CACHE_PATH, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) - log.info('cache written to %s', _CACHE_PATH) + log.info("cache written to %s", _CACHE_PATH) # ── Git operations ─────────────────────────────────────────────────── -def _run_git(args: list[str], cwd: Path | None = None) -> subprocess.CompletedProcess[str]: +def _run_git( + args: list[str], cwd: Path | None = None +) -> subprocess.CompletedProcess[str]: return subprocess.run( - ['git', *args], + ["git", *args], cwd=cwd, check=True, capture_output=True, @@ -86,17 +88,20 @@ def _sparse_clone() -> None: shutil.rmtree(_CLONE_DIR) _CLONE_DIR.parent.mkdir(parents=True, exist_ok=True) - log.info('sparse cloning mamedev/mame into %s', _CLONE_DIR) - _run_git([ - 'clone', - '--depth', '1', - '--filter=blob:none', - '--sparse', - _REPO_URL, - str(_CLONE_DIR), - ]) + log.info("sparse cloning mamedev/mame into %s", _CLONE_DIR) _run_git( - ['sparse-checkout', 'set', 'src/mame', 'src/devices'], + [ + "clone", + "--depth", + "1", + "--filter=blob:none", + "--sparse", + _REPO_URL, + str(_CLONE_DIR), + ] + ) + _run_git( + ["sparse-checkout", "set", "src/mame", "src/devices"], cwd=_CLONE_DIR, ) @@ -106,41 +111,41 @@ def _get_version() -> str: # Use GitHub API to get the latest release tag. try: req = urllib.request.Request( - 'https://api.github.com/repos/mamedev/mame/releases/latest', - headers={'User-Agent': 'retrobios-scraper/1.0', - 'Accept': 'application/vnd.github.v3+json'}, + "https://api.github.com/repos/mamedev/mame/releases/latest", + headers={ + "User-Agent": "retrobios-scraper/1.0", + "Accept": "application/vnd.github.v3+json", + }, ) with urllib.request.urlopen(req, timeout=10) as resp: data = json.loads(resp.read()) - tag = data.get('tag_name', '') + tag = data.get("tag_name", "") if tag: return _parse_version_tag(tag) except (urllib.error.URLError, json.JSONDecodeError, OSError): pass - return 'unknown' + return "unknown" def _parse_version_tag(tag: str) -> str: - prefix = 'mame' + prefix = "mame" raw = tag.removeprefix(prefix) if tag.startswith(prefix) else tag if raw.isdigit() and len(raw) >= 4: - return f'{raw[0]}.{raw[1:]}' + return f"{raw[0]}.{raw[1:]}" return raw - - def _get_commit() -> str: try: - result = _run_git(['rev-parse', 'HEAD'], cwd=_CLONE_DIR) + result = _run_git(["rev-parse", "HEAD"], cwd=_CLONE_DIR) return result.stdout.strip() except subprocess.CalledProcessError: - return '' + return "" def _cleanup() -> None: if _CLONE_DIR.exists(): - log.info('cleaning up %s', _CLONE_DIR) + log.info("cleaning up %s", _CLONE_DIR) shutil.rmtree(_CLONE_DIR) @@ -149,18 +154,21 @@ def _cleanup() -> None: def _find_mame_profiles() -> list[Path]: profiles: list[Path] = [] - for path in sorted(_EMULATORS_DIR.glob('*.yml')): - if path.name.endswith('.old.yml'): + for path in sorted(_EMULATORS_DIR.glob("*.yml")): + if path.name.endswith(".old.yml"): continue try: - with open(path, encoding='utf-8') as f: + with open(path, encoding="utf-8") as f: data = yaml.safe_load(f) if not isinstance(data, dict): continue - upstream = data.get('upstream', '') + upstream = data.get("upstream", "") # Only match profiles tracking current MAME (not frozen snapshots # which have upstream like "mamedev/mame/tree/mame0139") - if isinstance(upstream, str) and upstream.rstrip('/') == 'https://github.com/mamedev/mame': + if ( + isinstance(upstream, str) + and upstream.rstrip("/") == "https://github.com/mamedev/mame" + ): profiles.append(path) except (yaml.YAMLError, OSError): continue @@ -179,36 +187,36 @@ def _format_diff( lines: list[str] = [] name = profile_path.stem - added = diff.get('added', []) - updated = diff.get('updated', []) - removed = diff.get('removed', []) - unchanged = diff.get('unchanged', 0) + added = diff.get("added", []) + updated = diff.get("updated", []) + removed = diff.get("removed", []) + unchanged = diff.get("unchanged", 0) if not added and not updated and not removed: - lines.append(f' {name}:') - lines.append(' no changes') + lines.append(f" {name}:") + lines.append(" no changes") return lines - lines.append(f' {name}:') + lines.append(f" {name}:") if show_added: - bios_sets = hashes.get('bios_sets', {}) + bios_sets = hashes.get("bios_sets", {}) for set_name in added: - rom_count = len(bios_sets.get(set_name, {}).get('roms', [])) - source_file = bios_sets.get(set_name, {}).get('source_file', '') - source_line = bios_sets.get(set_name, {}).get('source_line', '') - ref = f'{source_file}:{source_line}' if source_file else '' - lines.append(f' + {set_name}.zip ({ref}, {rom_count} ROMs)') + rom_count = len(bios_sets.get(set_name, {}).get("roms", [])) + source_file = bios_sets.get(set_name, {}).get("source_file", "") + source_line = bios_sets.get(set_name, {}).get("source_line", "") + ref = f"{source_file}:{source_line}" if source_file else "" + lines.append(f" + {set_name}.zip ({ref}, {rom_count} ROMs)") elif added: - lines.append(f' + {len(added)} new sets available (main profile only)') + lines.append(f" + {len(added)} new sets available (main profile only)") for set_name in updated: - lines.append(f' ~ {set_name}.zip (contents changed)') + lines.append(f" ~ {set_name}.zip (contents changed)") - oos = diff.get('out_of_scope', 0) - lines.append(f' = {unchanged} unchanged') + oos = diff.get("out_of_scope", 0) + lines.append(f" = {unchanged} unchanged") if oos: - lines.append(f' . {oos} out of scope (not BIOS root sets)') + lines.append(f" . {oos} out of scope (not BIOS root sets)") return lines @@ -218,7 +226,7 @@ def _format_diff( def _fetch_hashes(force: bool) -> dict[str, Any]: cache = _load_cache() if not force and not _is_stale(cache): - log.info('using cached data from %s', cache.get('fetched_at', '')) + log.info("using cached data from %s", cache.get("fetched_at", "")) return cache # type: ignore[return-value] try: @@ -228,11 +236,11 @@ def _fetch_hashes(force: bool) -> dict[str, Any]: commit = _get_commit() data: dict[str, Any] = { - 'source': 'mamedev/mame', - 'version': version, - 'commit': commit, - 'fetched_at': datetime.now(timezone.utc).isoformat(), - 'bios_sets': bios_sets, + "source": "mamedev/mame", + "version": version, + "commit": commit, + "fetched_at": datetime.now(timezone.utc).isoformat(), + "bios_sets": bios_sets, } _write_cache(data) return data @@ -243,34 +251,36 @@ def _fetch_hashes(force: bool) -> dict[str, Any]: def _run(args: argparse.Namespace) -> None: hashes = _fetch_hashes(args.force) - total_sets = len(hashes.get('bios_sets', {})) - version = hashes.get('version', 'unknown') - commit = hashes.get('commit', '')[:12] + total_sets = len(hashes.get("bios_sets", {})) + version = hashes.get("version", "unknown") + commit = hashes.get("commit", "")[:12] if args.json: json.dump(hashes, sys.stdout, indent=2, ensure_ascii=False) - sys.stdout.write('\n') + sys.stdout.write("\n") return - print(f'mame-hashes: {total_sets} BIOS root sets from mamedev/mame' - f' @ {version} ({commit})') + print( + f"mame-hashes: {total_sets} BIOS root sets from mamedev/mame" + f" @ {version} ({commit})" + ) print() profiles = _find_mame_profiles() if not profiles: - print(' no profiles with mamedev/mame upstream found') + print(" no profiles with mamedev/mame upstream found") return for profile_path in profiles: - is_main = profile_path.name == 'mame.yml' - diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode='mame') + is_main = profile_path.name == "mame.yml" + diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode="mame") lines = _format_diff(profile_path, diff, hashes, show_added=is_main) for line in lines: print(line) if not args.dry_run: - updated = diff.get('updated', []) - added = diff.get('added', []) if is_main else [] + updated = diff.get("updated", []) + added = diff.get("added", []) if is_main else [] if added or updated: merge_mame_profile( str(profile_path), @@ -278,32 +288,32 @@ def _run(args: argparse.Namespace) -> None: write=True, add_new=is_main, ) - log.info('merged into %s', profile_path.name) + log.info("merged into %s", profile_path.name) print() if args.dry_run: - print('(dry run, no files modified)') + print("(dry run, no files modified)") def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( - prog='mame_hash_scraper', - description='Fetch MAME BIOS hashes from source and merge into profiles.', + prog="mame_hash_scraper", + description="Fetch MAME BIOS hashes from source and merge into profiles.", ) parser.add_argument( - '--dry-run', - action='store_true', - help='show diff only, do not modify profiles', + "--dry-run", + action="store_true", + help="show diff only, do not modify profiles", ) parser.add_argument( - '--json', - action='store_true', - help='output raw JSON to stdout', + "--json", + action="store_true", + help="output raw JSON to stdout", ) parser.add_argument( - '--force', - action='store_true', - help='re-fetch even if cache is fresh', + "--force", + action="store_true", + help="re-fetch even if cache is fresh", ) return parser @@ -311,12 +321,12 @@ def build_parser() -> argparse.ArgumentParser: def main() -> None: logging.basicConfig( level=logging.INFO, - format='%(levelname)s: %(message)s', + format="%(levelname)s: %(message)s", ) parser = build_parser() args = parser.parse_args() _run(args) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/scripts/scraper/mame_parser.py b/scripts/scraper/mame_parser.py index da79ef8d..757581cd 100644 --- a/scripts/scraper/mame_parser.py +++ b/scripts/scraper/mame_parser.py @@ -14,27 +14,27 @@ from pathlib import Path # Macros that declare a machine entry _MACHINE_MACROS = re.compile( - r'\b(GAME|SYST|COMP|CONS)\s*\(', + r"\b(GAME|SYST|COMP|CONS)\s*\(", re.MULTILINE, ) # ROM block boundaries -_ROM_START = re.compile(r'ROM_START\s*\(\s*(\w+)\s*\)') -_ROM_END = re.compile(r'ROM_END') +_ROM_START = re.compile(r"ROM_START\s*\(\s*(\w+)\s*\)") +_ROM_END = re.compile(r"ROM_END") # ROM_REGION variants: ROM_REGION, ROM_REGION16_BE, ROM_REGION16_LE, ROM_REGION32_LE, etc. _ROM_REGION = re.compile( - r'ROM_REGION\w*\s*\(' - r'\s*(0x[\da-fA-F]+|\d+)\s*,' # size - r'\s*"([^"]+)"\s*,', # tag + r"ROM_REGION\w*\s*\(" + r"\s*(0x[\da-fA-F]+|\d+)\s*," # size + r'\s*"([^"]+)"\s*,', # tag ) # ROM_SYSTEM_BIOS( index, label, description ) _ROM_SYSTEM_BIOS = re.compile( - r'ROM_SYSTEM_BIOS\s*\(' - r'\s*(\d+)\s*,' # index - r'\s*"([^"]+)"\s*,' # label - r'\s*"([^"]+)"\s*\)', # description + r"ROM_SYSTEM_BIOS\s*\(" + r"\s*(\d+)\s*," # index + r'\s*"([^"]+)"\s*,' # label + r'\s*"([^"]+)"\s*\)', # description ) # All ROM_LOAD variants including custom BIOS macros. @@ -44,23 +44,23 @@ _ROM_SYSTEM_BIOS = re.compile( # The key pattern: any macro containing "ROM_LOAD" or "ROMX_LOAD" in its name, # with the first quoted string being the ROM filename. _ROM_LOAD = re.compile( - r'\b\w*ROMX?_LOAD\w*\s*\(' - r'[^"]*' # skip any args before the filename (e.g., bios index) - r'"([^"]+)"\s*,' # name (first quoted string) - r'\s*(0x[\da-fA-F]+|\d+)\s*,' # offset - r'\s*(0x[\da-fA-F]+|\d+)\s*,', # size + r"\b\w*ROMX?_LOAD\w*\s*\(" + r'[^"]*' # skip any args before the filename (e.g., bios index) + r'"([^"]+)"\s*,' # name (first quoted string) + r"\s*(0x[\da-fA-F]+|\d+)\s*," # offset + r"\s*(0x[\da-fA-F]+|\d+)\s*,", # size ) # CRC32 and SHA1 within a ROM_LOAD line _CRC_SHA = re.compile( - r'CRC\s*\(\s*([0-9a-fA-F]+)\s*\)' - r'\s+' - r'SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)', + r"CRC\s*\(\s*([0-9a-fA-F]+)\s*\)" + r"\s+" + r"SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)", ) -_NO_DUMP = re.compile(r'\bNO_DUMP\b') -_BAD_DUMP = re.compile(r'\bBAD_DUMP\b') -_ROM_BIOS = re.compile(r'ROM_BIOS\s*\(\s*(\d+)\s*\)') +_NO_DUMP = re.compile(r"\bNO_DUMP\b") +_BAD_DUMP = re.compile(r"\bBAD_DUMP\b") +_ROM_BIOS = re.compile(r"ROM_BIOS\s*\(\s*(\d+)\s*\)") def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]: @@ -77,8 +77,8 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]: if block_end == -1: continue - block = source[start:block_end + 1] - if 'MACHINE_IS_BIOS_ROOT' not in block: + block = source[start : block_end + 1] + if "MACHINE_IS_BIOS_ROOT" not in block: continue # Extract set name: first arg after the opening paren @@ -97,11 +97,11 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]: continue set_name = args[1].strip() - line_no = source[:match.start()].count('\n') + 1 + line_no = source[: match.start()].count("\n") + 1 results[set_name] = { - 'source_file': filename, - 'source_line': line_no, + "source_file": filename, + "source_line": line_no, } return results @@ -115,7 +115,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]: extracts all ROM entries. Skips NO_DUMP, flags BAD_DUMP. """ pattern = re.compile( - r'ROM_START\s*\(\s*' + re.escape(set_name) + r'\s*\)', + r"ROM_START\s*\(\s*" + re.escape(set_name) + r"\s*\)", ) start_match = pattern.search(source) if not start_match: @@ -125,7 +125,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]: if not end_match: return [] - block = source[start_match.end():end_match.start()] + block = source[start_match.end() : end_match.start()] # Pre-expand macros: find #define macros in the file that contain # ROM_LOAD/ROM_REGION/ROM_SYSTEM_BIOS calls, then expand their @@ -144,26 +144,26 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]: results: dict[str, dict] = {} root = Path(base_path) - search_dirs = [root / 'src' / 'mame', root / 'src' / 'devices'] + search_dirs = [root / "src" / "mame", root / "src" / "devices"] for search_dir in search_dirs: if not search_dir.is_dir(): continue for dirpath, _dirnames, filenames in os.walk(search_dir): for fname in filenames: - if not fname.endswith(('.cpp', '.c', '.h', '.hxx')): + if not fname.endswith((".cpp", ".c", ".h", ".hxx")): continue filepath = Path(dirpath) / fname rel_path = str(filepath.relative_to(root)) - content = filepath.read_text(encoding='utf-8', errors='replace') + content = filepath.read_text(encoding="utf-8", errors="replace") bios_sets = find_bios_root_sets(content, rel_path) for set_name, info in bios_sets.items(): roms = parse_rom_block(content, set_name) results[set_name] = { - 'source_file': info['source_file'], - 'source_line': info['source_line'], - 'roms': roms, + "source_file": info["source_file"], + "source_line": info["source_line"], + "roms": roms, } return results @@ -171,13 +171,20 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]: # Regex for #define macros that span multiple lines (backslash continuation) _DEFINE_RE = re.compile( - r'^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)', + r"^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)", re.MULTILINE, ) # ROM-related tokens that indicate a macro is relevant for expansion -_ROM_TOKENS = {'ROM_LOAD', 'ROMX_LOAD', 'ROM_REGION', 'ROM_SYSTEM_BIOS', - 'ROM_FILL', 'ROM_COPY', 'ROM_RELOAD'} +_ROM_TOKENS = { + "ROM_LOAD", + "ROMX_LOAD", + "ROM_REGION", + "ROM_SYSTEM_BIOS", + "ROM_FILL", + "ROM_COPY", + "ROM_RELOAD", +} def _collect_rom_macros(source: str) -> dict[str, str]: @@ -193,14 +200,14 @@ def _collect_rom_macros(source: str) -> dict[str, str]: name = m.group(1) body = m.group(2) # Join backslash-continued lines - body = body.replace('\\\n', ' ') + body = body.replace("\\\n", " ") # Only keep macros that contain ROM-related tokens if not any(tok in body for tok in _ROM_TOKENS): continue # Skip wrapper macros: if the body contains ROMX_LOAD/ROM_LOAD # with unquoted args (formal parameters), it's a wrapper. # These are already recognized by the _ROM_LOAD regex directly. - if re.search(r'ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,', body): + if re.search(r"ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,", body): continue macros[name] = body return macros @@ -223,7 +230,7 @@ def _expand_macros(block: str, macros: dict[str, str], depth: int = 5) -> str: iterations += 1 for name, body in macros.items(): # Match macro invocation: NAME or NAME(args) - pattern = re.compile(r'\b' + re.escape(name) + r'(?:\s*\([^)]*\))?') + pattern = re.compile(r"\b" + re.escape(name) + r"(?:\s*\([^)]*\))?") if pattern.search(block): block = pattern.sub(body, block) changed = True @@ -237,9 +244,9 @@ def _find_closing_paren(source: str, start: int) -> int: i = start while i < len(source): ch = source[i] - if ch == '(': + if ch == "(": depth += 1 - elif ch == ')': + elif ch == ")": depth -= 1 if depth == 0: return i @@ -268,24 +275,24 @@ def _split_macro_args(inner: str) -> list[str]: i += 1 if i < len(inner): current.append(inner[i]) - elif ch == '(': + elif ch == "(": depth += 1 current.append(ch) - elif ch == ')': + elif ch == ")": if depth == 0: - args.append(''.join(current)) + args.append("".join(current)) break depth -= 1 current.append(ch) - elif ch == ',' and depth == 0: - args.append(''.join(current)) + elif ch == "," and depth == 0: + args.append("".join(current)) current = [] else: current.append(ch) i += 1 if current: - remaining = ''.join(current).strip() + remaining = "".join(current).strip() if remaining: args.append(remaining) @@ -300,15 +307,15 @@ def _parse_rom_entries(block: str) -> list[dict]: Processes matches in order of appearance to track region and BIOS context. """ roms: list[dict] = [] - current_region = '' + current_region = "" bios_labels: dict[int, tuple[str, str]] = {} # Build a combined pattern that matches all interesting tokens # and process them in order of occurrence token_patterns = [ - ('region', _ROM_REGION), - ('bios_label', _ROM_SYSTEM_BIOS), - ('rom_load', _ROM_LOAD), + ("region", _ROM_REGION), + ("bios_label", _ROM_SYSTEM_BIOS), + ("rom_load", _ROM_LOAD), ] # Collect all matches with their positions @@ -321,22 +328,22 @@ def _parse_rom_entries(block: str) -> list[dict]: events.sort(key=lambda e: e[0]) for _pos, tag, m in events: - if tag == 'region': + if tag == "region": current_region = m.group(2) - elif tag == 'bios_label': + elif tag == "bios_label": idx = int(m.group(1)) bios_labels[idx] = (m.group(2), m.group(3)) - elif tag == 'rom_load': + elif tag == "rom_load": # Get the full macro call as context (find closing paren) context_start = m.start() # Find the opening paren of the ROM_LOAD macro - paren_pos = block.find('(', context_start) + paren_pos = block.find("(", context_start) if paren_pos != -1: close_pos = _find_closing_paren(block, paren_pos) context_end = close_pos + 1 if close_pos != -1 else m.end() + 200 else: context_end = m.end() + 200 - context = block[context_start:min(context_end, len(block))] + context = block[context_start : min(context_end, len(block))] if _NO_DUMP.search(context): continue @@ -345,8 +352,8 @@ def _parse_rom_entries(block: str) -> list[dict]: rom_size = _parse_int(m.group(3)) crc_sha_match = _CRC_SHA.search(context) - crc32 = '' - sha1 = '' + crc32 = "" + sha1 = "" if crc_sha_match: crc32 = crc_sha_match.group(1).lower() sha1 = crc_sha_match.group(2).lower() @@ -354,8 +361,8 @@ def _parse_rom_entries(block: str) -> list[dict]: bad_dump = bool(_BAD_DUMP.search(context)) bios_index = None - bios_label = '' - bios_description = '' + bios_label = "" + bios_description = "" bios_ref = _ROM_BIOS.search(context) if bios_ref: bios_index = int(bios_ref.group(1)) @@ -363,18 +370,18 @@ def _parse_rom_entries(block: str) -> list[dict]: bios_label, bios_description = bios_labels[bios_index] entry: dict = { - 'name': rom_name, - 'size': rom_size, - 'crc32': crc32, - 'sha1': sha1, - 'region': current_region, - 'bad_dump': bad_dump, + "name": rom_name, + "size": rom_size, + "crc32": crc32, + "sha1": sha1, + "region": current_region, + "bad_dump": bad_dump, } if bios_index is not None: - entry['bios_index'] = bios_index - entry['bios_label'] = bios_label - entry['bios_description'] = bios_description + entry["bios_index"] = bios_index + entry["bios_label"] = bios_label + entry["bios_description"] = bios_description roms.append(entry) @@ -384,6 +391,6 @@ def _parse_rom_entries(block: str) -> list[dict]: def _parse_int(value: str) -> int: """Parse an integer that may be hex (0x...) or decimal.""" value = value.strip() - if value.startswith('0x') or value.startswith('0X'): + if value.startswith("0x") or value.startswith("0X"): return int(value, 16) return int(value) diff --git a/scripts/scraper/recalbox_scraper.py b/scripts/scraper/recalbox_scraper.py index 49267e89..f3ff0540 100644 --- a/scripts/scraper/recalbox_scraper.py +++ b/scripts/scraper/recalbox_scraper.py @@ -16,8 +16,6 @@ Recalbox verification logic: from __future__ import annotations import sys -import urllib.request -import urllib.error import xml.etree.ElementTree as ET from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag @@ -121,17 +119,19 @@ class Scraper(BaseScraper): for bios_elem in system_elem.findall("bios"): paths_str = bios_elem.get("path", "") md5_str = bios_elem.get("md5", "") - core = bios_elem.get("core", "") + bios_elem.get("core", "") mandatory = bios_elem.get("mandatory", "true") != "false" - hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false" - note = bios_elem.get("note", "") + bios_elem.get("hashMatchMandatory", "true") != "false" + bios_elem.get("note", "") paths = [p.strip() for p in paths_str.split("|") if p.strip()] if not paths: continue primary_path = paths[0] - name = primary_path.split("/")[-1] if "/" in primary_path else primary_path + name = ( + primary_path.split("/")[-1] if "/" in primary_path else primary_path + ) md5_list = [m.strip() for m in md5_str.split(",") if m.strip()] all_md5 = ",".join(md5_list) if md5_list else None @@ -141,14 +141,16 @@ class Scraper(BaseScraper): continue seen.add(dedup_key) - requirements.append(BiosRequirement( - name=name, - system=system_slug, - md5=all_md5, - destination=primary_path, - required=mandatory, - native_id=platform, - )) + requirements.append( + BiosRequirement( + name=name, + system=system_slug, + md5=all_md5, + destination=primary_path, + required=mandatory, + native_id=platform, + ) + ) return requirements @@ -168,7 +170,9 @@ class Scraper(BaseScraper): md5_str = bios_elem.get("md5", "") core = bios_elem.get("core", "") mandatory = bios_elem.get("mandatory", "true") != "false" - hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false" + hash_match_mandatory = ( + bios_elem.get("hashMatchMandatory", "true") != "false" + ) note = bios_elem.get("note", "") paths = [p.strip() for p in paths_str.split("|") if p.strip()] @@ -179,17 +183,19 @@ class Scraper(BaseScraper): name = paths[0].split("/")[-1] if "/" in paths[0] else paths[0] - requirements.append({ - "name": name, - "system": system_slug, - "system_name": system_name, - "paths": paths, - "md5_list": md5_list, - "core": core, - "mandatory": mandatory, - "hash_match_mandatory": hash_match_mandatory, - "note": note, - }) + requirements.append( + { + "name": name, + "system": system_slug, + "system_name": system_name, + "paths": paths, + "md5_list": md5_list, + "core": core, + "mandatory": mandatory, + "hash_match_mandatory": hash_match_mandatory, + "note": note, + } + ) return requirements @@ -245,7 +251,9 @@ def main(): parser = argparse.ArgumentParser(description="Scrape Recalbox es_bios.xml") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--json", action="store_true") - parser.add_argument("--full", action="store_true", help="Show full Recalbox-specific fields") + parser.add_argument( + "--full", action="store_true", help="Show full Recalbox-specific fields" + ) parser.add_argument("--output", "-o") args = parser.parse_args() @@ -264,6 +272,7 @@ def main(): if args.dry_run: from collections import defaultdict + by_system = defaultdict(list) for r in reqs: by_system[r.system].append(r) @@ -272,7 +281,7 @@ def main(): for f in files[:5]: print(f" {f.name} (md5={f.md5[:12] if f.md5 else 'N/A'}...)") if len(files) > 5: - print(f" ... +{len(files)-5} more") + print(f" ... +{len(files) - 5} more") print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems") return diff --git a/scripts/scraper/retrobat_scraper.py b/scripts/scraper/retrobat_scraper.py index 70a80e33..b43552b3 100644 --- a/scripts/scraper/retrobat_scraper.py +++ b/scripts/scraper/retrobat_scraper.py @@ -9,9 +9,6 @@ Hash: MD5 primary from __future__ import annotations import json -import sys -import urllib.request -import urllib.error try: from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version @@ -43,7 +40,6 @@ class Scraper(BaseScraper): super().__init__(url=url) self._parsed: dict | None = None - def _parse_json(self) -> dict: if self._parsed is not None: return self._parsed @@ -89,13 +85,15 @@ class Scraper(BaseScraper): name = file_path.split("/")[-1] if "/" in file_path else file_path - requirements.append(BiosRequirement( - name=name, - system=SYSTEM_SLUG_MAP.get(sys_key, sys_key), - md5=md5 or None, - destination=file_path, - required=True, - )) + requirements.append( + BiosRequirement( + name=name, + system=SYSTEM_SLUG_MAP.get(sys_key, sys_key), + md5=md5 or None, + destination=file_path, + required=True, + ) + ) return requirements @@ -170,6 +168,7 @@ class Scraper(BaseScraper): def main(): from scripts.scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape retrobat BIOS requirements") diff --git a/scripts/scraper/retrodeck_scraper.py b/scripts/scraper/retrodeck_scraper.py index 9be05b22..9082020d 100644 --- a/scripts/scraper/retrodeck_scraper.py +++ b/scripts/scraper/retrodeck_scraper.py @@ -29,8 +29,8 @@ import json import os import re import sys -import urllib.request import urllib.error +import urllib.request from pathlib import Path try: @@ -43,16 +43,16 @@ PLATFORM_NAME = "retrodeck" COMPONENTS_REPO = "RetroDECK/components" COMPONENTS_BRANCH = "main" COMPONENTS_API_URL = ( - f"https://api.github.com/repos/{COMPONENTS_REPO}" - f"/git/trees/{COMPONENTS_BRANCH}" -) -RAW_BASE = ( - f"https://raw.githubusercontent.com/{COMPONENTS_REPO}" - f"/{COMPONENTS_BRANCH}" + f"https://api.github.com/repos/{COMPONENTS_REPO}/git/trees/{COMPONENTS_BRANCH}" ) +RAW_BASE = f"https://raw.githubusercontent.com/{COMPONENTS_REPO}/{COMPONENTS_BRANCH}" SKIP_DIRS = {"archive_later", "archive_old", "automation-tools", ".github"} NON_EMULATOR_COMPONENTS = { - "framework", "es-de", "steam-rom-manager", "flips", "portmaster", + "framework", + "es-de", + "steam-rom-manager", + "flips", + "portmaster", } # RetroDECK system ID -> retrobios slug. @@ -358,13 +358,20 @@ class Scraper(BaseScraper): required_raw = entry.get("required", "") required = bool(required_raw) and str(required_raw).lower() not in ( - "false", "no", "optional", "", + "false", + "no", + "optional", + "", ) key = (system, filename.lower()) if key in seen: existing = next( - (r for r in requirements if (r.system, r.name.lower()) == key), + ( + r + for r in requirements + if (r.system, r.name.lower()) == key + ), None, ) if existing and md5 and existing.md5 and md5 != existing.md5: @@ -376,13 +383,15 @@ class Scraper(BaseScraper): continue seen.add(key) - requirements.append(BiosRequirement( - name=filename, - system=system, - destination=destination, - md5=md5, - required=required, - )) + requirements.append( + BiosRequirement( + name=filename, + system=system, + destination=destination, + md5=md5, + required=required, + ) + ) return requirements @@ -390,11 +399,14 @@ class Scraper(BaseScraper): reqs = self.fetch_requirements() manifests = self._get_manifests() - cores = sorted({ - comp_name for comp_name, _ in manifests - if comp_name not in SKIP_DIRS - and comp_name not in NON_EMULATOR_COMPONENTS - }) + cores = sorted( + { + comp_name + for comp_name, _ in manifests + if comp_name not in SKIP_DIRS + and comp_name not in NON_EMULATOR_COMPONENTS + } + ) systems: dict[str, dict] = {} for req in reqs: @@ -423,6 +435,7 @@ class Scraper(BaseScraper): def main() -> None: from scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape RetroDECK BIOS requirements") diff --git a/scripts/scraper/romm_scraper.py b/scripts/scraper/romm_scraper.py index 154e6a95..83cd7cff 100644 --- a/scripts/scraper/romm_scraper.py +++ b/scripts/scraper/romm_scraper.py @@ -138,16 +138,18 @@ class Scraper(BaseScraper): crc32 = (entry.get("crc") or "").strip() or None size = int(entry["size"]) if entry.get("size") else None - requirements.append(BiosRequirement( - name=filename, - system=system, - sha1=sha1, - md5=md5, - crc32=crc32, - size=size, - destination=f"{igdb_slug}/{filename}", - required=True, - )) + requirements.append( + BiosRequirement( + name=filename, + system=system, + sha1=sha1, + md5=md5, + crc32=crc32, + size=size, + destination=f"{igdb_slug}/{filename}", + required=True, + ) + ) return requirements @@ -164,7 +166,7 @@ class Scraper(BaseScraper): for key in list(data.keys())[:5]: if ":" not in key: return False - _, entry = key.split(":", 1), data[key] + _, _entry = key.split(":", 1), data[key] if not isinstance(data[key], dict): return False if "md5" not in data[key] and "sha1" not in data[key]: @@ -217,6 +219,7 @@ class Scraper(BaseScraper): def main(): from scripts.scraper.base_scraper import scraper_cli + scraper_cli(Scraper, "Scrape RomM BIOS requirements") diff --git a/scripts/scraper/targets/__init__.py b/scripts/scraper/targets/__init__.py index e88a829c..3049ca0b 100644 --- a/scripts/scraper/targets/__init__.py +++ b/scripts/scraper/targets/__init__.py @@ -2,6 +2,7 @@ Auto-detects *_targets_scraper.py files and exposes their scrapers. """ + from __future__ import annotations import importlib diff --git a/scripts/scraper/targets/batocera_targets_scraper.py b/scripts/scraper/targets/batocera_targets_scraper.py index a76ad1a8..d30d2f67 100644 --- a/scripts/scraper/targets/batocera_targets_scraper.py +++ b/scripts/scraper/targets/batocera_targets_scraper.py @@ -6,6 +6,7 @@ Sources (batocera-linux/batocera.linux): - package/batocera/emulationstation/batocera-es-system/es_systems.yml -- emulator requireAnyOf flag mapping """ + from __future__ import annotations import argparse @@ -35,23 +36,23 @@ _HEADERS = { "Accept": "application/vnd.github.v3+json", } -_TARGET_FLAG_RE = re.compile(r'^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y', re.MULTILINE) +_TARGET_FLAG_RE = re.compile(r"^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y", re.MULTILINE) # Matches: select BR2_PACKAGE_FOO (optional: if CONDITION) # Condition may span multiple lines (backslash continuation) _SELECT_RE = re.compile( - r'^\s+select\s+(BR2_PACKAGE_\w+)' # package being selected - r'(?:\s+if\s+((?:[^\n]|\\\n)+?))?' # optional "if CONDITION" (may continue with \) - r'(?:\s*#[^\n]*)?$', # optional trailing comment + r"^\s+select\s+(BR2_PACKAGE_\w+)" # package being selected + r"(?:\s+if\s+((?:[^\n]|\\\n)+?))?" # optional "if CONDITION" (may continue with \) + r"(?:\s*#[^\n]*)?$", # optional trailing comment re.MULTILINE, ) # Meta-flag definition: "if COND\n\tconfig DERIVED_FLAG\n\t...\nendif" _META_BLOCK_RE = re.compile( - r'^if\s+((?:[^\n]|\\\n)+?)\n' # condition (may span lines via \) - r'(?:.*?\n)*?' # optional lines before the config - r'\s+config\s+(BR2_PACKAGE_\w+)' # derived flag name - r'.*?^endif', # end of block + r"^if\s+((?:[^\n]|\\\n)+?)\n" # condition (may span lines via \) + r"(?:.*?\n)*?" # optional lines before the config + r"\s+config\s+(BR2_PACKAGE_\w+)" # derived flag name + r".*?^endif", # end of block re.MULTILINE | re.DOTALL, ) @@ -80,7 +81,7 @@ def _fetch_json(url: str) -> list | dict | None: def _normalise_condition(raw: str) -> str: """Strip backslash-continuations and collapse whitespace.""" - return re.sub(r'\\\n\s*', ' ', raw).strip() + return re.sub(r"\\\n\s*", " ", raw).strip() def _tokenise(condition: str) -> list[str]: @@ -89,14 +90,16 @@ def _tokenise(condition: str) -> list[str]: return token_re.findall(condition) -def _check_condition(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]: +def _check_condition( + tokens: list[str], pos: int, active: frozenset[str] +) -> tuple[bool, int]: """Recursive descent check of a Kconfig boolean expression.""" return _check_or(tokens, pos, active) def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]: left, pos = _check_and(tokens, pos, active) - while pos < len(tokens) and tokens[pos] == '||': + while pos < len(tokens) and tokens[pos] == "||": pos += 1 right, pos = _check_and(tokens, pos, active) left = left or right @@ -105,7 +108,7 @@ def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]: left, pos = _check_not(tokens, pos, active) - while pos < len(tokens) and tokens[pos] == '&&': + while pos < len(tokens) and tokens[pos] == "&&": pos += 1 right, pos = _check_not(tokens, pos, active) left = left and right @@ -113,24 +116,26 @@ def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[boo def _check_not(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]: - if pos < len(tokens) and tokens[pos] == '!': + if pos < len(tokens) and tokens[pos] == "!": pos += 1 val, pos = _check_atom(tokens, pos, active) return not val, pos return _check_atom(tokens, pos, active) -def _check_atom(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]: +def _check_atom( + tokens: list[str], pos: int, active: frozenset[str] +) -> tuple[bool, int]: if pos >= len(tokens): return True, pos tok = tokens[pos] - if tok == '(': + if tok == "(": pos += 1 val, pos = _check_or(tokens, pos, active) - if pos < len(tokens) and tokens[pos] == ')': + if pos < len(tokens) and tokens[pos] == ")": pos += 1 return val, pos - if tok.startswith('BR2_'): + if tok.startswith("BR2_"): pos += 1 return tok in active, pos if tok.startswith('"'): @@ -170,7 +175,9 @@ def _parse_meta_flags(text: str) -> list[tuple[str, str]]: return results -def _expand_flags(primary_flag: str, meta_rules: list[tuple[str, str]]) -> frozenset[str]: +def _expand_flags( + primary_flag: str, meta_rules: list[tuple[str, str]] +) -> frozenset[str]: """Given a board's primary flag, expand to all active derived flags. Iterates until stable (handles chained derivations like X86_64_ANY -> X86_ANY). @@ -194,7 +201,7 @@ def _parse_selects(text: str) -> list[tuple[str, str]]: results: list[tuple[str, str]] = [] for m in _SELECT_RE.finditer(text): pkg = m.group(1) - cond = _normalise_condition(m.group(2) or '') + cond = _normalise_condition(m.group(2) or "") results.append((pkg, cond)) return results @@ -261,7 +268,8 @@ class Scraper(BaseTargetScraper): if not data or not isinstance(data, list): return [] return [ - item["name"] for item in data + item["name"] + for item in data if isinstance(item, dict) and item.get("name", "").startswith("batocera-") and item.get("name", "").endswith(".board") diff --git a/scripts/scraper/targets/emudeck_targets_scraper.py b/scripts/scraper/targets/emudeck_targets_scraper.py index 44b4e02e..ec3bdd0a 100644 --- a/scripts/scraper/targets/emudeck_targets_scraper.py +++ b/scripts/scraper/targets/emudeck_targets_scraper.py @@ -4,6 +4,7 @@ Sources: SteamOS: dragoonDorise/EmuDeck -functions/EmuScripts/*.sh Windows: EmuDeck/emudeck-we -functions/EmuScripts/*.ps1 """ + from __future__ import annotations import argparse @@ -20,8 +21,12 @@ from . import BaseTargetScraper PLATFORM_NAME = "emudeck" -STEAMOS_API = "https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts" -WINDOWS_API = "https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts" +STEAMOS_API = ( + "https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts" +) +WINDOWS_API = ( + "https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts" +) # Map EmuDeck script names to emulator profile keys # Script naming: emuDeckDolphin.sh -> dolphin @@ -70,8 +75,8 @@ def _list_emuscripts(api_url: str) -> list[str]: def _script_to_core(filename: str) -> str | None: """Convert EmuScripts filename to core profile key.""" # Strip extension and emuDeck prefix - name = re.sub(r'\.(sh|ps1)$', '', filename, flags=re.IGNORECASE) - name = re.sub(r'^emuDeck', '', name, flags=re.IGNORECASE) + name = re.sub(r"\.(sh|ps1)$", "", filename, flags=re.IGNORECASE) + name = re.sub(r"^emuDeck", "", name, flags=re.IGNORECASE) if not name: return None key = name.lower() @@ -86,8 +91,9 @@ class Scraper(BaseTargetScraper): def __init__(self, url: str = "https://github.com/dragoonDorise/EmuDeck"): super().__init__(url=url) - def _fetch_cores_for_target(self, api_url: str, label: str, - arch: str = "x86_64") -> list[str]: + def _fetch_cores_for_target( + self, api_url: str, label: str, arch: str = "x86_64" + ) -> list[str]: print(f" fetching {label} EmuScripts...", file=sys.stderr) scripts = _list_emuscripts(api_url) cores: list[str] = [] @@ -99,7 +105,7 @@ class Scraper(BaseTargetScraper): seen.add(core) cores.append(core) # Detect RetroArch presence (provides all libretro cores) - name = re.sub(r'\.(sh|ps1)$', '', script, flags=re.IGNORECASE) + name = re.sub(r"\.(sh|ps1)$", "", script, flags=re.IGNORECASE) if name.lower() in ("emudeckretroarch", "retroarch_maincfg"): has_retroarch = True @@ -112,15 +118,18 @@ class Scraper(BaseTargetScraper): seen.add(c) cores.append(c) - print(f" {label}: {standalone_count} standalone + " - f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total", - file=sys.stderr) + print( + f" {label}: {standalone_count} standalone + " + f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total", + file=sys.stderr, + ) return sorted(cores) @staticmethod def _load_retroarch_cores(arch: str) -> list[str]: """Load RetroArch target cores for given architecture.""" import os + target_path = os.path.join("platforms", "targets", "retroarch.yml") if not os.path.exists(target_path): return [] @@ -157,9 +166,7 @@ class Scraper(BaseTargetScraper): def main() -> None: - parser = argparse.ArgumentParser( - description="Scrape EmuDeck emulator targets" - ) + parser = argparse.ArgumentParser(description="Scrape EmuDeck emulator targets") parser.add_argument("--dry-run", action="store_true", help="Show target summary") parser.add_argument("--output", "-o", help="Output YAML file") args = parser.parse_args() diff --git a/scripts/scraper/targets/retroarch_targets_scraper.py b/scripts/scraper/targets/retroarch_targets_scraper.py index 98353f56..033f5cc9 100644 --- a/scripts/scraper/targets/retroarch_targets_scraper.py +++ b/scripts/scraper/targets/retroarch_targets_scraper.py @@ -16,6 +16,7 @@ Buildbot structure varies by platform: - ps2: playstation/ps2/latest/ -> *_libretro_ps2.elf.zip - vita: bundles only (VPK) - no individual cores """ + from __future__ import annotations import argparse @@ -64,7 +65,9 @@ RECIPE_TARGETS: list[tuple[str, str, str]] = [ ("playstation/vita", "playstation-vita", "armv7"), ] -RECIPE_BASE_URL = "https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/" +RECIPE_BASE_URL = ( + "https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/" +) # Match any href containing _libretro followed by a platform-specific extension # Covers: .so.zip, .dll.zip, .dylib.zip, .nro.zip, .dol.zip, .rpx.zip, @@ -75,7 +78,7 @@ _HREF_RE = re.compile( ) # Extract core name: everything before _libretro -_CORE_NAME_RE = re.compile(r'^(.+?)_libretro') +_CORE_NAME_RE = re.compile(r"^(.+?)_libretro") class Scraper(BaseTargetScraper): @@ -180,12 +183,16 @@ def main() -> None: data = scraper.fetch_targets() total_cores = sum(len(t["cores"]) for t in data["targets"].values()) - print(f"\n{len(data['targets'])} targets, {total_cores} total core entries", - file=sys.stderr) + print( + f"\n{len(data['targets'])} targets, {total_cores} total core entries", + file=sys.stderr, + ) if args.dry_run: for name, info in sorted(data["targets"].items()): - print(f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores") + print( + f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores" + ) return if args.output: diff --git a/scripts/scraper/targets/retropie_targets_scraper.py b/scripts/scraper/targets/retropie_targets_scraper.py index c9972f92..5b965bea 100644 --- a/scripts/scraper/targets/retropie_targets_scraper.py +++ b/scripts/scraper/targets/retropie_targets_scraper.py @@ -4,6 +4,7 @@ Source: https://github.com/RetroPie/RetroPie-Setup/tree/master/scriptmodules/lib Parses rp_module_id and rp_module_flags from each scriptmodule to determine which platforms each core supports. """ + from __future__ import annotations import argparse diff --git a/scripts/sect233r1.py b/scripts/sect233r1.py index 9ade6c42..4185547e 100644 --- a/scripts/sect233r1.py +++ b/scripts/sect233r1.py @@ -9,6 +9,7 @@ Curve: sect233r1 (NIST B-233, SEC 2 v2) Field: GF(2^233) with irreducible polynomial t^233 + t^74 + 1 Equation: y^2 + xy = x^3 + x^2 + b """ + from __future__ import annotations import hashlib @@ -34,6 +35,7 @@ _H = 2 # GF(2^233) field arithmetic + def _gf_reduce(a: int) -> int: """Reduce polynomial a modulo t^233 + t^74 + 1.""" while a.bit_length() > _M: @@ -171,6 +173,7 @@ def _ec_mul(k: int, p: tuple[int, int] | None) -> tuple[int, int] | None: # ECDSA-SHA256 verification + def _modinv(a: int, m: int) -> int: """Modular inverse of a modulo m (integers, not GF(2^m)).""" if a < 0: diff --git a/scripts/truth.py b/scripts/truth.py index ac9dc9b4..b66b406f 100644 --- a/scripts/truth.py +++ b/scripts/truth.py @@ -13,7 +13,8 @@ from validation import filter_files_by_mode def _determine_core_mode( - emu_name: str, profile: dict, + emu_name: str, + profile: dict, cores_config: str | list | None, standalone_set: set[str] | None, ) -> str: @@ -62,7 +63,10 @@ def _enrich_hashes(entry: dict, db: dict) -> None: def _merge_file_into_system( - system: dict, file_entry: dict, emu_name: str, db: dict | None, + system: dict, + file_entry: dict, + emu_name: str, + db: dict | None, ) -> None: """Merge a file entry into a system's file list, deduplicating by name.""" files = system.setdefault("files", []) @@ -100,9 +104,22 @@ def _merge_file_into_system( entry: dict = {"name": file_entry["name"]} if file_entry.get("required") is not None: entry["required"] = file_entry["required"] - for field in ("sha1", "md5", "sha256", "crc32", "size", "path", - "description", "hle_fallback", "category", "note", - "validation", "min_size", "max_size", "aliases"): + for field in ( + "sha1", + "md5", + "sha256", + "crc32", + "size", + "path", + "description", + "hle_fallback", + "category", + "note", + "validation", + "min_size", + "max_size", + "aliases", + ): val = file_entry.get(field) if val is not None: entry[field] = val @@ -206,7 +223,9 @@ def generate_platform_truth( if mode == "both": filtered = raw_files else: - filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone")) + filtered = filter_files_by_mode( + raw_files, standalone=(mode == "standalone") + ) for fe in filtered: profile_sid = fe.get("system", "") @@ -217,9 +236,13 @@ def generate_platform_truth( system = systems.setdefault(sys_id, {}) _merge_file_into_system(system, fe, emu_name, db) # Track core contribution per system - sys_cov = system_cores.setdefault(sys_id, { - "profiled": set(), "unprofiled": set(), - }) + sys_cov = system_cores.setdefault( + sys_id, + { + "profiled": set(), + "unprofiled": set(), + }, + ) sys_cov["profiled"].add(emu_name) # Ensure all systems of resolved cores have entries (even with 0 files). @@ -230,17 +253,25 @@ def generate_platform_truth( for prof_sid in profile.get("systems", []): sys_id = _map_sys_id(prof_sid) systems.setdefault(sys_id, {}) - sys_cov = system_cores.setdefault(sys_id, { - "profiled": set(), "unprofiled": set(), - }) + sys_cov = system_cores.setdefault( + sys_id, + { + "profiled": set(), + "unprofiled": set(), + }, + ) sys_cov["profiled"].add(emu_name) # Track unprofiled cores per system based on profile system lists for emu_name in cores_unprofiled: for sys_id in systems: - sys_cov = system_cores.setdefault(sys_id, { - "profiled": set(), "unprofiled": set(), - }) + sys_cov = system_cores.setdefault( + sys_id, + { + "profiled": set(), + "unprofiled": set(), + }, + ) sys_cov["unprofiled"].add(emu_name) # Convert sets to sorted lists for serialization @@ -269,6 +300,7 @@ def generate_platform_truth( # Platform truth diffing + def _diff_system(truth_sys: dict, scraped_sys: dict) -> dict: """Compare files between truth and scraped for a single system.""" # Build truth index: name.lower() -> entry, alias.lower() -> entry @@ -310,32 +342,38 @@ def _diff_system(truth_sys: dict, scraped_sys: dict) -> dict: t_set = {v.lower() for v in t_list} s_set = {v.lower() for v in s_list} if not t_set & s_set: - hash_mismatch.append({ - "name": s_entry["name"], - "hash_type": h, - f"truth_{h}": t_hash, - f"scraped_{h}": s_hash, - "truth_cores": list(t_entry.get("_cores", [])), - }) + hash_mismatch.append( + { + "name": s_entry["name"], + "hash_type": h, + f"truth_{h}": t_hash, + f"scraped_{h}": s_hash, + "truth_cores": list(t_entry.get("_cores", [])), + } + ) break # Required mismatch t_req = t_entry.get("required") s_req = s_entry.get("required") if t_req is not None and s_req is not None and t_req != s_req: - required_mismatch.append({ - "name": s_entry["name"], - "truth_required": t_req, - "scraped_required": s_req, - }) + required_mismatch.append( + { + "name": s_entry["name"], + "truth_required": t_req, + "scraped_required": s_req, + } + ) # Collect unmatched files from both sides unmatched_truth = [ - fe for fe in truth_sys.get("files", []) + fe + for fe in truth_sys.get("files", []) if fe["name"].lower() not in matched_truth_names ] unmatched_scraped = { - s_key: s_entry for s_key, s_entry in scraped_index.items() + s_key: s_entry + for s_key, s_entry in scraped_index.items() if s_key not in truth_index } @@ -369,11 +407,13 @@ def _diff_system(truth_sys: dict, scraped_sys: dict) -> dict: # Truth files not matched (by name, alias, or hash) -> missing for fe in unmatched_truth: if fe["name"].lower() not in rename_matched_truth: - missing.append({ - "name": fe["name"], - "cores": list(fe.get("_cores", [])), - "source_refs": list(fe.get("_source_refs", [])), - }) + missing.append( + { + "name": fe["name"], + "cores": list(fe.get("_cores", [])), + "source_refs": list(fe.get("_source_refs", [])), + } + ) # Scraped files not in truth -> extra coverage = truth_sys.get("_coverage", {}) diff --git a/scripts/validate_pr.py b/scripts/validate_pr.py index 62a448fe..2c3f99d5 100644 --- a/scripts/validate_pr.py +++ b/scripts/validate_pr.py @@ -36,8 +36,20 @@ DEFAULT_DB = "database.json" DEFAULT_PLATFORMS_DIR = "platforms" BLOCKED_EXTENSIONS = { - ".exe", ".bat", ".cmd", ".sh", ".ps1", ".vbs", ".js", - ".msi", ".dll", ".so", ".dylib", ".py", ".rb", ".pl", + ".exe", + ".bat", + ".cmd", + ".sh", + ".ps1", + ".vbs", + ".js", + ".msi", + ".dll", + ".so", + ".dylib", + ".py", + ".rb", + ".pl", } MAX_FILE_SIZE = 100 * 1024 * 1024 @@ -140,7 +152,10 @@ def validate_file( result.add_check(False, f"Blocked file extension: {ext}") if result.size > MAX_FILE_SIZE: - result.add_check(False, f"File too large for embedded storage ({result.size:,} > {MAX_FILE_SIZE:,} bytes). Use storage: external in platform config.") + result.add_check( + False, + f"File too large for embedded storage ({result.size:,} > {MAX_FILE_SIZE:,} bytes). Use storage: external in platform config.", + ) elif result.size == 0: result.add_check(False, "File is empty (0 bytes)") else: @@ -149,7 +164,9 @@ def validate_file( if db: if result.sha1 in db.get("files", {}): existing = db["files"][result.sha1] - result.add_warning(f"Duplicate: identical file already exists at `{existing['path']}`") + result.add_warning( + f"Duplicate: identical file already exists at `{existing['path']}`" + ) else: result.add_check(True, "Not a duplicate in database") @@ -162,9 +179,13 @@ def validate_file( elif md5_known: result.add_check(True, "MD5 matches known platform requirement") elif name_known: - result.add_warning("Filename matches a known requirement but hash differs - may be a variant") + result.add_warning( + "Filename matches a known requirement but hash differs - may be a variant" + ) else: - result.add_warning("File not referenced in any platform config - needs manual review") + result.add_warning( + "File not referenced in any platform config - needs manual review" + ) normalized = os.path.normpath(filepath) if os.path.islink(filepath): @@ -194,9 +215,15 @@ def get_changed_files() -> list[str]: try: result = subprocess.run( ["git", "diff", "--name-only", f"origin/{base}...HEAD"], - capture_output=True, text=True, check=True, + capture_output=True, + text=True, + check=True, ) - files = [f for f in result.stdout.strip().split("\n") if f.startswith("bios/")] + files = [ + f + for f in result.stdout.strip().split("\n") + if f.startswith("bios/") + ] if files: return files except subprocess.CalledProcessError: @@ -206,7 +233,8 @@ def get_changed_files() -> list[str]: result = subprocess.run( ["git", "diff", "--cached", "--name-only"], - capture_output=True, text=True, + capture_output=True, + text=True, ) return [f for f in result.stdout.strip().split("\n") if f.startswith("bios/") and f] @@ -214,10 +242,14 @@ def get_changed_files() -> list[str]: def main(): parser = argparse.ArgumentParser(description="Validate BIOS file contributions") parser.add_argument("files", nargs="*", help="Files to validate") - parser.add_argument("--changed", action="store_true", help="Auto-detect changed BIOS files") + parser.add_argument( + "--changed", action="store_true", help="Auto-detect changed BIOS files" + ) parser.add_argument("--db", default=DEFAULT_DB, help="Path to database.json") parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR) - parser.add_argument("--markdown", action="store_true", help="Output as markdown (for PR comments)") + parser.add_argument( + "--markdown", action="store_true", help="Output as markdown (for PR comments)" + ) parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() @@ -250,14 +282,16 @@ def main(): if args.json: output = [] for r in results: - output.append({ - "file": r.filepath, - "passed": r.passed, - "sha1": r.sha1, - "md5": r.md5, - "size": r.size, - "checks": [{"status": s, "message": m} for s, m in r.checks], - }) + output.append( + { + "file": r.filepath, + "passed": r.passed, + "sha1": r.sha1, + "md5": r.md5, + "size": r.size, + "checks": [{"status": s, "message": m} for s, m in r.checks], + } + ) print(json.dumps(output, indent=2)) elif args.markdown: lines = ["## BIOS Validation Report", ""] @@ -278,7 +312,15 @@ def main(): print(f" MD5: {r.md5}") print(f" Size: {r.size:,}") for s, m in r.checks: - marker = "✓" if s == "PASS" else "✗" if s == "FAIL" else "!" if s == "WARN" else "i" + marker = ( + "✓" + if s == "PASS" + else "✗" + if s == "FAIL" + else "!" + if s == "WARN" + else "i" + ) print(f" [{marker}] {m}") if not all_passed: diff --git a/scripts/validation.py b/scripts/validation.py index 111675fc..b26e86ef 100644 --- a/scripts/validation.py +++ b/scripts/validation.py @@ -63,28 +63,37 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]: continue if fname not in index: index[fname] = { - "checks": set(), "sizes": set(), - "min_size": None, "max_size": None, - "crc32": set(), "md5": set(), "sha1": set(), "sha256": set(), - "adler32": set(), "crypto_only": set(), - "emulators": set(), "per_emulator": {}, + "checks": set(), + "sizes": set(), + "min_size": None, + "max_size": None, + "crc32": set(), + "md5": set(), + "sha1": set(), + "sha256": set(), + "adler32": set(), + "crypto_only": set(), + "emulators": set(), + "per_emulator": {}, } index[fname]["emulators"].add(emu_name) index[fname]["checks"].update(checks) # Track non-reproducible crypto checks - index[fname]["crypto_only"].update( - c for c in checks if c in _CRYPTO_CHECKS - ) + index[fname]["crypto_only"].update(c for c in checks if c in _CRYPTO_CHECKS) # Size checks if "size" in checks: if f.get("size") is not None: index[fname]["sizes"].add(f["size"]) if f.get("min_size") is not None: cur = index[fname]["min_size"] - index[fname]["min_size"] = min(cur, f["min_size"]) if cur is not None else f["min_size"] + index[fname]["min_size"] = ( + min(cur, f["min_size"]) if cur is not None else f["min_size"] + ) if f.get("max_size") is not None: cur = index[fname]["max_size"] - index[fname]["max_size"] = max(cur, f["max_size"]) if cur is not None else f["max_size"] + index[fname]["max_size"] = ( + max(cur, f["max_size"]) if cur is not None else f["max_size"] + ) # Hash checks -collect all accepted hashes as sets (multiple valid # versions of the same file, e.g. MT-32 ROM versions) if "crc32" in checks and f.get("crc32"): @@ -132,7 +141,9 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]: if emu_name in pe: # Merge checks from multiple file entries for same emulator existing = pe[emu_name] - merged_checks = sorted(set(existing["checks"]) | set(pe_entry["checks"])) + merged_checks = sorted( + set(existing["checks"]) | set(pe_entry["checks"]) + ) existing["checks"] = merged_checks existing["expected"].update(pe_entry["expected"]) if pe_entry["source_ref"] and not existing["source_ref"]: @@ -160,17 +171,21 @@ def build_ground_truth(filename: str, validation_index: dict[str, dict]) -> list result = [] for emu_name in sorted(entry["per_emulator"]): detail = entry["per_emulator"][emu_name] - result.append({ - "emulator": emu_name, - "checks": detail["checks"], - "source_ref": detail.get("source_ref"), - "expected": detail.get("expected", {}), - }) + result.append( + { + "emulator": emu_name, + "checks": detail["checks"], + "source_ref": detail.get("source_ref"), + "expected": detail.get("expected", {}), + } + ) return result def check_file_validation( - local_path: str, filename: str, validation_index: dict[str, dict], + local_path: str, + filename: str, + validation_index: dict[str, dict], bios_dir: str = "bios", ) -> str | None: """Check emulator-level validation on a resolved file. @@ -199,10 +214,9 @@ def check_file_validation( # Hash checks -compute once, reuse for all hash types. # Each hash field is a set of accepted values (multiple valid ROM versions). - need_hashes = ( - any(h in checks and entry.get(h) for h in ("crc32", "md5", "sha1", "sha256")) - or entry.get("adler32") - ) + need_hashes = any( + h in checks and entry.get(h) for h in ("crc32", "md5", "sha1", "sha256") + ) or entry.get("adler32") if need_hashes: hashes = compute_hashes(local_path) for hash_type in ("crc32", "md5", "sha1", "sha256"): @@ -218,6 +232,7 @@ def check_file_validation( # Signature/crypto checks (3DS RSA, AES) if entry["crypto_only"]: from crypto_verify import check_crypto_validation + crypto_reason = check_crypto_validation(local_path, filename, bios_dir) if crypto_reason: return crypto_reason diff --git a/scripts/verify.py b/scripts/verify.py index 38666ba5..eb3481a1 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -21,28 +21,41 @@ Usage: from __future__ import annotations import argparse -import hashlib import json import os import sys import zipfile -from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) from common import ( - build_target_cores_cache, build_zip_contents_index, check_inside_zip, - compute_hashes, expand_platform_declared_names, filter_systems_by_target, - group_identical_platforms, list_emulator_profiles, list_system_ids, - load_data_dir_registry, load_emulator_profiles, load_platform_config, - md5sum, md5_composite, require_yaml, resolve_local_file, + build_target_cores_cache, + build_zip_contents_index, + check_inside_zip, + compute_hashes, + expand_platform_declared_names, + filter_systems_by_target, + group_identical_platforms, + list_emulator_profiles, + list_system_ids, + load_data_dir_registry, + load_emulator_profiles, + load_platform_config, + md5_composite, + md5sum, + require_yaml, + resolve_local_file, resolve_platform_cores, ) yaml = require_yaml() from validation import ( - _build_validation_index, _parse_validation, build_ground_truth, - check_file_validation, filter_files_by_mode, + _build_validation_index, + _parse_validation, + build_ground_truth, + check_file_validation, + filter_files_by_mode, ) + DEFAULT_DB = "database.json" DEFAULT_PLATFORMS_DIR = "platforms" DEFAULT_EMULATORS_DIR = "emulators" @@ -50,27 +63,36 @@ DEFAULT_EMULATORS_DIR = "emulators" # Status model -aligned with Batocera BiosStatus (batocera-systems:967-969) + class Status: OK = "ok" - UNTESTED = "untested" # file present, hash not confirmed + UNTESTED = "untested" # file present, hash not confirmed MISSING = "missing" # Severity for per-file required/optional distinction class Severity: - CRITICAL = "critical" # required file missing or bad hash (Recalbox RED) - WARNING = "warning" # optional missing or hash mismatch (Recalbox YELLOW) - INFO = "info" # optional missing on existence-only platform - OK = "ok" # file verified + CRITICAL = "critical" # required file missing or bad hash (Recalbox RED) + WARNING = "warning" # optional missing or hash mismatch (Recalbox YELLOW) + INFO = "info" # optional missing on existence-only platform + OK = "ok" # file verified + _STATUS_ORDER = {Status.OK: 0, Status.UNTESTED: 1, Status.MISSING: 2} -_SEVERITY_ORDER = {Severity.OK: 0, Severity.INFO: 1, Severity.WARNING: 2, Severity.CRITICAL: 3} +_SEVERITY_ORDER = { + Severity.OK: 0, + Severity.INFO: 1, + Severity.WARNING: 2, + Severity.CRITICAL: 3, +} # Verification functions + def verify_entry_existence( - file_entry: dict, local_path: str | None, + file_entry: dict, + local_path: str | None, validation_index: dict[str, dict] | None = None, ) -> dict: """RetroArch verification: path_is_valid() -file exists = OK.""" @@ -120,13 +142,25 @@ def verify_entry_md5( elif result != "not_in_zip": found_in_zip = True if had_error and not found_in_zip: - return {**base, "status": Status.UNTESTED, "path": local_path, - "reason": f"{local_path} read error"} + return { + **base, + "status": Status.UNTESTED, + "path": local_path, + "reason": f"{local_path} read error", + } if not found_in_zip: - return {**base, "status": Status.UNTESTED, "path": local_path, - "reason": f"{zipped_file} not found inside ZIP"} - return {**base, "status": Status.UNTESTED, "path": local_path, - "reason": f"{zipped_file} MD5 mismatch inside ZIP"} + return { + **base, + "status": Status.UNTESTED, + "path": local_path, + "reason": f"{zipped_file} not found inside ZIP", + } + return { + **base, + "status": Status.UNTESTED, + "path": local_path, + "reason": f"{zipped_file} MD5 mismatch inside ZIP", + } if not md5_list: return {**base, "status": Status.OK, "path": local_path} @@ -151,8 +185,12 @@ def verify_entry_md5( except (zipfile.BadZipFile, OSError): pass - return {**base, "status": Status.UNTESTED, "path": local_path, - "reason": f"expected {md5_list[0][:12]}… got {actual_md5[:12]}…"} + return { + **base, + "status": Status.UNTESTED, + "path": local_path, + "reason": f"expected {md5_list[0][:12]}… got {actual_md5[:12]}…", + } def verify_entry_sha1( @@ -176,14 +214,22 @@ def verify_entry_sha1( if actual_sha1 == expected_sha1.lower(): return {**base, "status": Status.OK, "path": local_path} - return {**base, "status": Status.UNTESTED, "path": local_path, - "reason": f"expected {expected_sha1[:12]}… got {actual_sha1[:12]}…"} + return { + **base, + "status": Status.UNTESTED, + "path": local_path, + "reason": f"expected {expected_sha1[:12]}… got {actual_sha1[:12]}…", + } # Severity mapping per platform + def compute_severity( - status: str, required: bool, mode: str, hle_fallback: bool = False, + status: str, + required: bool, + mode: str, + hle_fallback: bool = False, ) -> str: """Map (status, required, verification_mode, hle_fallback) -> severity. @@ -235,8 +281,13 @@ def _build_expected(file_entry: dict, checks: list[str]) -> dict: expected["adler32"] = adler_val return expected -def _name_in_index(name: str, by_name: dict, by_path_suffix: dict | None = None, - data_names: set[str] | None = None) -> bool: + +def _name_in_index( + name: str, + by_name: dict, + by_path_suffix: dict | None = None, + data_names: set[str] | None = None, +) -> bool: """Check if a name is resolvable in the database indexes or data directories.""" if name in by_name: return True @@ -248,7 +299,9 @@ def _name_in_index(name: str, by_name: dict, by_path_suffix: dict | None = None, if data_names: if name in data_names or name.lower() in data_names: return True - if basename != name and (basename in data_names or basename.lower() in data_names): + if basename != name and ( + basename in data_names or basename.lower() in data_names + ): return True return False @@ -276,7 +329,11 @@ def find_undeclared_files( by_name = db.get("indexes", {}).get("by_name", {}) by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {}) - profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir) + profiles = ( + emu_profiles + if emu_profiles is not None + else load_emulator_profiles(emulators_dir) + ) relevant = resolve_platform_cores(config, profiles, target_cores=target_cores) standalone_set = set(str(c) for c in config.get("standalone_cores", [])) @@ -340,7 +397,9 @@ def find_undeclared_files( # Archived files are grouped by archive if archive: if archive not in archive_entries: - in_repo = _name_in_index(archive, by_name, by_path_suffix, data_names) + in_repo = _name_in_index( + archive, by_name, by_path_suffix, data_names + ) archive_entries[archive] = { "emulator": profile.get("emulator", emu_name), "name": archive, @@ -377,19 +436,21 @@ def find_undeclared_files( in_repo = _name_in_index(path_base, by_name, by_path_suffix, data_names) checks = _parse_validation(f.get("validation")) - undeclared.append({ - "emulator": profile.get("emulator", emu_name), - "name": fname, - "path": dest, - "required": f.get("required", False), - "hle_fallback": f.get("hle_fallback", False), - "category": f.get("category", "bios"), - "in_repo": in_repo, - "note": f.get("note", ""), - "checks": sorted(checks) if checks else [], - "source_ref": f.get("source_ref"), - "expected": _build_expected(f, checks), - }) + undeclared.append( + { + "emulator": profile.get("emulator", emu_name), + "name": fname, + "path": dest, + "required": f.get("required", False), + "hle_fallback": f.get("hle_fallback", False), + "category": f.get("category", "bios"), + "in_repo": in_repo, + "note": f.get("note", ""), + "checks": sorted(checks) if checks else [], + "source_ref": f.get("source_ref"), + "expected": _build_expected(f, checks), + } + ) # Append grouped archive entries for entry in sorted(archive_entries.values(), key=lambda e: e["name"]): @@ -399,7 +460,9 @@ def find_undeclared_files( def find_exclusion_notes( - config: dict, emulators_dir: str, emu_profiles: dict | None = None, + config: dict, + emulators_dir: str, + emu_profiles: dict | None = None, target_cores: set[str] | None = None, ) -> list[dict]: """Document why certain emulator files are intentionally excluded. @@ -410,7 +473,11 @@ def find_exclusion_notes( - Frozen snapshots with files: [] (code doesn't load .info firmware) - Files covered by data_directories """ - profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir) + profiles = ( + emu_profiles + if emu_profiles is not None + else load_emulator_profiles(emulators_dir) + ) platform_systems = set() for sys_id in config.get("systems", {}): platform_systems.add(sys_id) @@ -427,19 +494,27 @@ def find_exclusion_notes( # Launcher excluded entirely if profile.get("type") == "launcher": - notes.append({ - "emulator": emu_display, "reason": "launcher", - "detail": profile.get("exclusion_note", "BIOS managed by standalone emulator"), - }) + notes.append( + { + "emulator": emu_display, + "reason": "launcher", + "detail": profile.get( + "exclusion_note", "BIOS managed by standalone emulator" + ), + } + ) continue # Profile-level exclusion note (frozen snapshots, etc.) exclusion_note = profile.get("exclusion_note") if exclusion_note: - notes.append({ - "emulator": emu_display, "reason": "exclusion_note", - "detail": exclusion_note, - }) + notes.append( + { + "emulator": emu_display, + "reason": "exclusion_note", + "detail": exclusion_note, + } + ) continue # Count standalone-only files -but only report as excluded if the @@ -449,22 +524,34 @@ def find_exclusion_notes( standalone_set & {str(c) for c in profile.get("cores", [])} ) if not is_standalone: - standalone_files = [f for f in profile.get("files", []) if f.get("mode") == "standalone"] + standalone_files = [ + f for f in profile.get("files", []) if f.get("mode") == "standalone" + ] if standalone_files: names = [f["name"] for f in standalone_files[:3]] - more = f" +{len(standalone_files)-3}" if len(standalone_files) > 3 else "" - notes.append({ - "emulator": emu_display, "reason": "standalone_only", - "detail": f"{len(standalone_files)} files for standalone mode only ({', '.join(names)}{more})", - }) + more = ( + f" +{len(standalone_files) - 3}" + if len(standalone_files) > 3 + else "" + ) + notes.append( + { + "emulator": emu_display, + "reason": "standalone_only", + "detail": f"{len(standalone_files)} files for standalone mode only ({', '.join(names)}{more})", + } + ) return notes # Platform verification + def _find_best_variant( - file_entry: dict, db: dict, current_path: str, + file_entry: dict, + db: dict, + current_path: str, validation_index: dict, ) -> str | None: """Search for a repo file that passes both platform MD5 and emulator validation.""" @@ -473,7 +560,11 @@ def _find_best_variant( return None md5_expected = file_entry.get("md5", "") - md5_set = {m.strip().lower() for m in md5_expected.split(",") if m.strip()} if md5_expected else set() + md5_set = ( + {m.strip().lower() for m in md5_expected.split(",") if m.strip()} + if md5_expected + else set() + ) by_name = db.get("indexes", {}).get("by_name", {}) files_db = db.get("files", {}) @@ -481,7 +572,11 @@ def _find_best_variant( for sha1 in by_name.get(fname, []): candidate = files_db.get(sha1, {}) path = candidate.get("path", "") - if not path or not os.path.exists(path) or os.path.realpath(path) == os.path.realpath(current_path): + if ( + not path + or not os.path.exists(path) + or os.path.realpath(path) == os.path.realpath(current_path) + ): continue if md5_set and candidate.get("md5", "").lower() not in md5_set: continue @@ -492,7 +587,8 @@ def _find_best_variant( def verify_platform( - config: dict, db: dict, + config: dict, + db: dict, emulators_dir: str = DEFAULT_EMULATORS_DIR, emu_profiles: dict | None = None, target_cores: set[str] | None = None, @@ -511,7 +607,11 @@ def verify_platform( zip_contents = build_zip_contents_index(db) if has_zipped else {} # Build HLE + validation indexes from emulator profiles - profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir) + profiles = ( + emu_profiles + if emu_profiles is not None + else load_emulator_profiles(emulators_dir) + ) hle_index: dict[str, bool] = {} for profile in profiles.values(): for f in profile.get("files", []): @@ -522,7 +622,9 @@ def verify_platform( # Filter systems by target plat_cores = resolve_platform_cores(config, profiles) if target_cores else None verify_systems = filter_systems_by_target( - config.get("systems", {}), profiles, target_cores, + config.get("systems", {}), + profiles, + target_cores, platform_cores=plat_cores, ) @@ -536,12 +638,16 @@ def verify_platform( for sys_id, system in verify_systems.items(): for file_entry in system.get("files", []): local_path, resolve_status = resolve_local_file( - file_entry, db, zip_contents, + file_entry, + db, + zip_contents, data_dir_registry=data_dir_registry, ) if mode == "existence": result = verify_entry_existence( - file_entry, local_path, validation_index, + file_entry, + local_path, + validation_index, ) elif mode == "sha1": result = verify_entry_sha1(file_entry, local_path) @@ -555,16 +661,22 @@ def verify_platform( reason = check_file_validation(local_path, fname, validation_index) if reason: better = _find_best_variant( - file_entry, db, local_path, validation_index, + file_entry, + db, + local_path, + validation_index, ) if not better: ventry = validation_index.get(fname, {}) emus = ", ".join(ventry.get("emulators", [])) - result["discrepancy"] = f"{platform} says OK but {emus} says {reason}" + result["discrepancy"] = ( + f"{platform} says OK but {emus} says {reason}" + ) result["system"] = sys_id result["hle_fallback"] = hle_index.get(file_entry.get("name", ""), False) result["ground_truth"] = build_ground_truth( - file_entry.get("name", ""), validation_index, + file_entry.get("name", ""), + validation_index, ) details.append(result) @@ -581,11 +693,18 @@ def verify_platform( hle = hle_index.get(file_entry.get("name", ""), False) sev = compute_severity(cur, required, mode, hle) prev_sev = file_severity.get(dest) - if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get(prev_sev, 0): + if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get( + prev_sev, 0 + ): file_severity[dest] = sev # Count by severity - counts = {Severity.OK: 0, Severity.INFO: 0, Severity.WARNING: 0, Severity.CRITICAL: 0} + counts = { + Severity.OK: 0, + Severity.INFO: 0, + Severity.WARNING: 0, + Severity.CRITICAL: 0, + } for s in file_severity.values(): counts[s] = counts.get(s, 0) + 1 @@ -597,10 +716,19 @@ def verify_platform( # Cross-reference undeclared files if supplemental_names is None: from cross_reference import _build_supplemental_index + supplemental_names = _build_supplemental_index() - undeclared = find_undeclared_files(config, emulators_dir, db, emu_profiles, - target_cores=target_cores, data_names=supplemental_names) - exclusions = find_exclusion_notes(config, emulators_dir, emu_profiles, target_cores=target_cores) + undeclared = find_undeclared_files( + config, + emulators_dir, + db, + emu_profiles, + target_cores=target_cores, + data_names=supplemental_names, + ) + exclusions = find_exclusion_notes( + config, emulators_dir, emu_profiles, target_cores=target_cores + ) # Ground truth coverage gt_filenames = set(validation_index) @@ -635,6 +763,7 @@ def verify_platform( # Output + def _format_ground_truth_aggregate(ground_truth: list[dict]) -> str: """Format ground truth as a single aggregated line. @@ -759,8 +888,16 @@ def _print_undeclared_section(result: dict, verbose: bool) -> None: bios_files = [u for u in undeclared if u.get("category", "bios") == "bios"] game_data = [u for u in undeclared if u.get("category", "bios") == "game_data"] - req_not_in_repo = [u for u in bios_files if u["required"] and not u["in_repo"] and not u.get("hle_fallback")] - req_hle_not_in_repo = [u for u in bios_files if u["required"] and not u["in_repo"] and u.get("hle_fallback")] + req_not_in_repo = [ + u + for u in bios_files + if u["required"] and not u["in_repo"] and not u.get("hle_fallback") + ] + req_hle_not_in_repo = [ + u + for u in bios_files + if u["required"] and not u["in_repo"] and u.get("hle_fallback") + ] req_in_repo = [u for u in bios_files if u["required"] and u["in_repo"]] opt_in_repo = [u for u in bios_files if not u["required"] and u["in_repo"]] opt_not_in_repo = [u for u in bios_files if not u["required"] and not u["in_repo"]] @@ -769,7 +906,9 @@ def _print_undeclared_section(result: dict, verbose: bool) -> None: core_missing_req = len(req_not_in_repo) + len(req_hle_not_in_repo) core_missing_opt = len(opt_not_in_repo) - print(f" Core files: {core_in_pack} in pack, {core_missing_req} required missing, {core_missing_opt} optional missing") + print( + f" Core files: {core_in_pack} in pack, {core_missing_req} required missing, {core_missing_opt} optional missing" + ) for u in req_not_in_repo: _print_undeclared_entry(u, "MISSING (required)", verbose) @@ -783,7 +922,9 @@ def _print_undeclared_section(result: dict, verbose: bool) -> None: print(f" Game data: {len(gd_present)} in pack, {len(gd_missing)} missing") -def print_platform_result(result: dict, group: list[str], verbose: bool = False) -> None: +def print_platform_result( + result: dict, group: list[str], verbose: bool = False +) -> None: mode = result["verification_mode"] total = result["total_files"] c = result["severity_counts"] @@ -827,13 +968,16 @@ def print_platform_result(result: dict, group: list[str], verbose: bool = False) gt_cov = result.get("ground_truth_coverage") if gt_cov and gt_cov["total"] > 0: pct = gt_cov["with_validation"] * 100 // gt_cov["total"] - print(f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)") + print( + f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)" + ) if gt_cov["platform_only"]: print(f" {gt_cov['platform_only']} platform-only (no emulator profile)") # Emulator/system mode verification + def _effective_validation_label(details: list[dict], validation_index: dict) -> str: """Determine the bracket label for the report. @@ -863,7 +1007,7 @@ def verify_emulator( standalone: bool = False, ) -> dict: """Verify files for specific emulator profiles.""" - profiles = load_emulator_profiles(emulators_dir) + load_emulator_profiles(emulators_dir) zip_contents = build_zip_contents_index(db) # Also load aliases for redirect messages @@ -873,26 +1017,35 @@ def verify_emulator( selected: list[tuple[str, dict]] = [] for name in profile_names: if name not in all_profiles: - available = sorted(k for k, v in all_profiles.items() - if v.get("type") not in ("alias", "test")) + available = sorted( + k + for k, v in all_profiles.items() + if v.get("type") not in ("alias", "test") + ) print(f"Error: emulator '{name}' not found", file=sys.stderr) print(f"Available: {', '.join(available[:10])}...", file=sys.stderr) sys.exit(1) p = all_profiles[name] if p.get("type") == "alias": alias_of = p.get("alias_of", "?") - print(f"Error: {name} is an alias of {alias_of} -use --emulator {alias_of}", - file=sys.stderr) + print( + f"Error: {name} is an alias of {alias_of} -use --emulator {alias_of}", + file=sys.stderr, + ) sys.exit(1) if p.get("type") == "launcher": - print(f"Error: {name} is a launcher -use the emulator it launches", - file=sys.stderr) + print( + f"Error: {name} is a launcher -use the emulator it launches", + file=sys.stderr, + ) sys.exit(1) # Check standalone capability ptype = p.get("type", "libretro") if standalone and "standalone" not in ptype: - print(f"Error: {name} ({ptype}) does not support --standalone", - file=sys.stderr) + print( + f"Error: {name} ({ptype}) does not support --standalone", + file=sys.stderr, + ) sys.exit(1) selected.append((name, p)) @@ -924,12 +1077,16 @@ def verify_emulator( data_dir_notices.append(ref) if not files: - details.append({ - "name": f"({emu_name})", "status": Status.OK, - "required": False, "system": "", - "note": f"No files needed for {profile.get('emulator', emu_name)}", - "ground_truth": [], - }) + details.append( + { + "name": f"({emu_name})", + "status": Status.OK, + "required": False, + "system": "", + "note": f"No files needed for {profile.get('emulator', emu_name)}", + "ground_truth": [], + } + ) continue # Verify archives as units (e.g., neogeo.zip, aes.zip) @@ -940,7 +1097,9 @@ def verify_emulator( seen_archives.add(archive) archive_entry = {"name": archive} local_path, _ = resolve_local_file( - archive_entry, db, zip_contents, + archive_entry, + db, + zip_contents, data_dir_registry=data_registry, ) required = any( @@ -948,11 +1107,18 @@ def verify_emulator( for f in files ) if local_path: - result = {"name": archive, "status": Status.OK, - "required": required, "path": local_path} + result = { + "name": archive, + "status": Status.OK, + "required": required, + "path": local_path, + } else: - result = {"name": archive, "status": Status.MISSING, - "required": required} + result = { + "name": archive, + "status": Status.MISSING, + "required": required, + } result["system"] = file_entry.get("system", "") result["hle_fallback"] = False result["ground_truth"] = build_ground_truth(archive, validation_index) @@ -961,11 +1127,15 @@ def verify_emulator( dest_to_name[dest] = archive cur = result["status"] prev = file_status.get(dest) - if prev is None or _STATUS_ORDER.get(cur, 0) > _STATUS_ORDER.get(prev, 0): + if prev is None or _STATUS_ORDER.get(cur, 0) > _STATUS_ORDER.get( + prev, 0 + ): file_status[dest] = cur sev = compute_severity(cur, required, "existence", False) prev_sev = file_severity.get(dest) - if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get(prev_sev, 0): + if prev_sev is None or _SEVERITY_ORDER.get( + sev, 0 + ) > _SEVERITY_ORDER.get(prev_sev, 0): file_severity[dest] = sev for file_entry in files: @@ -975,7 +1145,10 @@ def verify_emulator( dest_hint = file_entry.get("path", "") local_path, resolve_status = resolve_local_file( - file_entry, db, zip_contents, dest_hint=dest_hint, + file_entry, + db, + zip_contents, + dest_hint=dest_hint, data_dir_registry=data_registry, ) name = file_entry.get("name", "") @@ -988,12 +1161,20 @@ def verify_emulator( # Apply emulator validation reason = check_file_validation(local_path, name, validation_index) if reason: - result = {"name": name, "status": Status.UNTESTED, - "required": required, "path": local_path, - "reason": reason} + result = { + "name": name, + "status": Status.UNTESTED, + "required": required, + "path": local_path, + "reason": reason, + } else: - result = {"name": name, "status": Status.OK, - "required": required, "path": local_path} + result = { + "name": name, + "status": Status.OK, + "required": required, + "path": local_path, + } result["system"] = file_entry.get("system", "") result["hle_fallback"] = hle @@ -1009,10 +1190,17 @@ def verify_emulator( file_status[dest] = cur sev = compute_severity(cur, required, "existence", hle) prev_sev = file_severity.get(dest) - if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get(prev_sev, 0): + if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get( + prev_sev, 0 + ): file_severity[dest] = sev - counts = {Severity.OK: 0, Severity.INFO: 0, Severity.WARNING: 0, Severity.CRITICAL: 0} + counts = { + Severity.OK: 0, + Severity.INFO: 0, + Severity.WARNING: 0, + Severity.CRITICAL: 0, + } for s in file_severity.values(): counts[s] = counts.get(s, 0) + 1 status_counts: dict[str, int] = {} @@ -1067,13 +1255,19 @@ def verify_system( for p in profiles.values(): all_systems.update(p.get("systems", [])) if standalone: - print(f"No standalone emulators found for system(s): {', '.join(system_ids)}", - file=sys.stderr) + print( + f"No standalone emulators found for system(s): {', '.join(system_ids)}", + file=sys.stderr, + ) else: - print(f"No emulators found for system(s): {', '.join(system_ids)}", - file=sys.stderr) - print(f"Available systems: {', '.join(sorted(all_systems)[:20])}...", - file=sys.stderr) + print( + f"No emulators found for system(s): {', '.join(system_ids)}", + file=sys.stderr, + ) + print( + f"Available systems: {', '.join(sorted(all_systems)[:20])}...", + file=sys.stderr, + ) sys.exit(1) return verify_emulator(matching, emulators_dir, db, standalone) @@ -1147,13 +1341,17 @@ def print_emulator_result(result: dict, verbose: bool = False) -> None: print(f" {line}") for ref in result.get("data_dir_notices", []): - print(f" Note: data directory '{ref}' required but not included (use refresh_data_dirs.py)") + print( + f" Note: data directory '{ref}' required but not included (use refresh_data_dirs.py)" + ) # Ground truth coverage footer gt_cov = result.get("ground_truth_coverage") if gt_cov and gt_cov["total"] > 0: pct = gt_cov["with_validation"] * 100 // gt_cov["total"] - print(f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)") + print( + f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)" + ) if gt_cov["platform_only"]: print(f" {gt_cov['platform_only']} platform-only (no emulator profile)") @@ -1161,19 +1359,36 @@ def print_emulator_result(result: dict, verbose: bool = False) -> None: def main(): parser = argparse.ArgumentParser(description="Platform-native BIOS verification") parser.add_argument("--platform", "-p", help="Platform name") - parser.add_argument("--all", action="store_true", help="Verify all active platforms") - parser.add_argument("--emulator", "-e", help="Emulator profile name(s), comma-separated") + parser.add_argument( + "--all", action="store_true", help="Verify all active platforms" + ) + parser.add_argument( + "--emulator", "-e", help="Emulator profile name(s), comma-separated" + ) parser.add_argument("--system", "-s", help="System ID(s), comma-separated") parser.add_argument("--standalone", action="store_true", help="Use standalone mode") - parser.add_argument("--list-emulators", action="store_true", help="List available emulators") - parser.add_argument("--list-systems", action="store_true", help="List available systems") + parser.add_argument( + "--list-emulators", action="store_true", help="List available emulators" + ) + parser.add_argument( + "--list-systems", action="store_true", help="List available systems" + ) parser.add_argument("--include-archived", action="store_true") parser.add_argument("--target", "-t", help="Hardware target (e.g., switch, rpi4)") - parser.add_argument("--list-targets", action="store_true", help="List available targets for the platform") + parser.add_argument( + "--list-targets", + action="store_true", + help="List available targets for the platform", + ) parser.add_argument("--db", default=DEFAULT_DB) parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR) parser.add_argument("--emulators-dir", default=DEFAULT_EMULATORS_DIR) - parser.add_argument("--verbose", "-v", action="store_true", help="Show emulator ground truth details") + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Show emulator ground truth details", + ) parser.add_argument("--json", action="store_true", help="JSON output") args = parser.parse_args() @@ -1188,13 +1403,16 @@ def main(): if not args.platform: parser.error("--list-targets requires --platform") from common import list_available_targets + targets = list_available_targets(args.platform, args.platforms_dir) if not targets: print(f"No targets configured for platform '{args.platform}'") return for t in targets: - aliases = f" (aliases: {', '.join(t['aliases'])})" if t['aliases'] else "" - print(f" {t['name']:30s} {t['architecture']:10s} {t['core_count']:>4d} cores{aliases}") + aliases = f" (aliases: {', '.join(t['aliases'])})" if t["aliases"] else "" + print( + f" {t['name']:30s} {t['architecture']:10s} {t['core_count']:>4d} cores{aliases}" + ) return # Mutual exclusion @@ -1202,7 +1420,9 @@ def main(): if modes == 0: parser.error("Specify --platform, --all, --emulator, or --system") if modes > 1: - parser.error("--platform, --all, --emulator, and --system are mutually exclusive") + parser.error( + "--platform, --all, --emulator, and --system are mutually exclusive" + ) if args.standalone and not (args.emulator or args.system): parser.error("--standalone requires --emulator or --system") if args.target and not (args.platform or args.all): @@ -1218,7 +1438,9 @@ def main(): names = [n.strip() for n in args.emulator.split(",") if n.strip()] result = verify_emulator(names, args.emulators_dir, db, args.standalone) if args.json: - result["details"] = [d for d in result["details"] if d["status"] != Status.OK] + result["details"] = [ + d for d in result["details"] if d["status"] != Status.OK + ] print(json.dumps(result, indent=2)) else: print_emulator_result(result, verbose=args.verbose) @@ -1229,7 +1451,9 @@ def main(): system_ids = [s.strip() for s in args.system.split(",") if s.strip()] result = verify_system(system_ids, args.emulators_dir, db, args.standalone) if args.json: - result["details"] = [d for d in result["details"] if d["status"] != Status.OK] + result["details"] = [ + d for d in result["details"] if d["status"] != Status.OK + ] print(json.dumps(result, indent=2)) else: print_emulator_result(result, verbose=args.verbose) @@ -1238,6 +1462,7 @@ def main(): # Platform mode (existing) if args.all: from list_platforms import list_platforms as _list_platforms + platforms = _list_platforms(include_archived=args.include_archived) elif args.platform: platforms = [args.platform] @@ -1253,16 +1478,21 @@ def main(): if args.target: try: target_cores_cache, platforms = build_target_cores_cache( - platforms, args.target, args.platforms_dir, is_all=args.all, + platforms, + args.target, + args.platforms_dir, + is_all=args.all, ) except (FileNotFoundError, ValueError) as e: print(f"ERROR: {e}", file=sys.stderr) sys.exit(1) # Group identical platforms (same function as generate_pack) - groups = group_identical_platforms(platforms, args.platforms_dir, - target_cores_cache if args.target else None) + groups = group_identical_platforms( + platforms, args.platforms_dir, target_cores_cache if args.target else None + ) from cross_reference import _build_supplemental_index + suppl_names = _build_supplemental_index() all_results = {} @@ -1271,11 +1501,18 @@ def main(): config = load_platform_config(representative, args.platforms_dir) tc = target_cores_cache.get(representative) if args.target else None result = verify_platform( - config, db, args.emulators_dir, emu_profiles, - target_cores=tc, data_dir_registry=data_registry, + config, + db, + args.emulators_dir, + emu_profiles, + target_cores=tc, + data_dir_registry=data_registry, supplemental_names=suppl_names, ) - names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms] + names = [ + load_platform_config(p, args.platforms_dir).get("platform", p) + for p in group_platforms + ] group_results.append((result, names)) for p in group_platforms: all_results[p] = result diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 6b497aca..52f135b4 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -14,10 +14,10 @@ Covers: Cross-reference: undeclared files, standalone skipped, alias profiles skipped, data_dir suppresses gaps """ + from __future__ import annotations import hashlib -import json import os import shutil import sys @@ -30,19 +30,32 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts")) import yaml from common import ( - build_zip_contents_index, check_inside_zip, compute_hashes, + build_zip_contents_index, + check_inside_zip, + compute_hashes, expand_platform_declared_names, - group_identical_platforms, load_emulator_profiles, load_platform_config, - md5_composite, md5sum, parse_md5_list, resolve_local_file, - resolve_platform_cores, safe_extract_zip, -) -from validation import ( - _build_validation_index, check_file_validation, filter_files_by_mode, + group_identical_platforms, + load_emulator_profiles, + load_platform_config, + md5_composite, + parse_md5_list, + resolve_local_file, + resolve_platform_cores, + safe_extract_zip, ) from truth import diff_platform_truth, generate_platform_truth +from validation import ( + _build_validation_index, + check_file_validation, + filter_files_by_mode, +) from verify import ( - Severity, Status, verify_platform, find_undeclared_files, find_exclusion_notes, - verify_emulator, _effective_validation_label, + Severity, + Status, + find_exclusion_notes, + find_undeclared_files, + verify_emulator, + verify_platform, ) @@ -52,7 +65,8 @@ def _h(data: bytes) -> dict: "sha1": hashlib.sha1(data).hexdigest(), "md5": hashlib.md5(data).hexdigest(), "crc32": format(hashlib.new("crc32", data).digest()[0], "08x") - if False else "", # not needed for tests + if False + else "", # not needed for tests } @@ -66,6 +80,7 @@ class TestE2E(unittest.TestCase): def setUp(self): # Clear emulator profile cache to avoid stale data between tests from common import _emulator_profiles_cache + _emulator_profiles_cache.clear() self.root = tempfile.mkdtemp() @@ -85,7 +100,9 @@ class TestE2E(unittest.TestCase): self._make_file("no_md5.bin", b"NO_MD5_CHECK") self._make_file("truncated.bin", b"BATOCERA_TRUNCATED") self._make_file("alias_target.bin", b"ALIAS_FILE_DATA") - self._make_file("leading_zero_crc.bin", b"LEADING_ZERO_CRC_12") # crc32=0179e92e + self._make_file( + "leading_zero_crc.bin", b"LEADING_ZERO_CRC_12" + ) # crc32=0179e92e # Regional variant files (same name, different content, in subdirs) os.makedirs(os.path.join(self.bios_dir, "TestConsole", "USA"), exist_ok=True) @@ -109,10 +126,13 @@ class TestE2E(unittest.TestCase): # ZIP for multi-hash self._make_zip("multi.zip", {"rom.bin": b"MULTI_HASH_DATA"}) # Archive BIOS ZIP (like neogeo.zip) containing multiple ROMs - self._make_zip("test_archive.zip", { - "rom_a.bin": b"ARCHIVE_ROM_A", - "rom_b.bin": b"ARCHIVE_ROM_B", - }) + self._make_zip( + "test_archive.zip", + { + "rom_a.bin": b"ARCHIVE_ROM_A", + "rom_b.bin": b"ARCHIVE_ROM_B", + }, + ) # -- Build synthetic database -- self.db = self._build_db() @@ -142,7 +162,9 @@ class TestE2E(unittest.TestCase): f.write(data) h = _h(data) self.files[f"{subdir}/{name}" if subdir else name] = { - "path": path, "data": data, **h, + "path": path, + "data": data, + **h, } return path @@ -185,8 +207,12 @@ class TestE2E(unittest.TestCase): by_path_suffix.setdefault(key, []).append(info["sha1"]) return { "files": files_db, - "indexes": {"by_md5": by_md5, "by_name": by_name, "by_crc32": {}, - "by_path_suffix": by_path_suffix}, + "indexes": { + "by_md5": by_md5, + "by_name": by_name, + "by_crc32": {}, + "by_path_suffix": by_path_suffix, + }, } # --------------------------------------------------------------- @@ -194,7 +220,6 @@ class TestE2E(unittest.TestCase): # --------------------------------------------------------------- def _create_existence_platform(self): - f = self.files config = { "platform": "TestExistence", "verification_mode": "existence", @@ -202,10 +227,26 @@ class TestE2E(unittest.TestCase): "systems": { "console-a": { "files": [ - {"name": "present_req.bin", "destination": "present_req.bin", "required": True}, - {"name": "missing_req.bin", "destination": "missing_req.bin", "required": True}, - {"name": "present_opt.bin", "destination": "present_opt.bin", "required": False}, - {"name": "missing_opt.bin", "destination": "missing_opt.bin", "required": False}, + { + "name": "present_req.bin", + "destination": "present_req.bin", + "required": True, + }, + { + "name": "missing_req.bin", + "destination": "missing_req.bin", + "required": True, + }, + { + "name": "present_opt.bin", + "destination": "present_opt.bin", + "required": False, + }, + { + "name": "missing_opt.bin", + "destination": "missing_opt.bin", + "required": False, + }, ], }, }, @@ -217,7 +258,9 @@ class TestE2E(unittest.TestCase): f = self.files good_inner_md5 = f["good.zip"]["inner_md5s"]["inner.rom"] bad_inner_md5 = "deadbeefdeadbeefdeadbeefdeadbeef" - composite_md5 = hashlib.md5(b"AAAA" + b"BBBB").hexdigest() # sorted: a.rom, b.rom + composite_md5 = hashlib.md5( + b"AAAA" + b"BBBB" + ).hexdigest() # sorted: a.rom, b.rom multi_wrong = "0000000000000000000000000000000" multi_right = f["multi.zip"]["inner_md5s"]["rom.bin"] truncated_md5 = f["truncated.bin"]["md5"][:29] # Batocera 29-char @@ -230,42 +273,98 @@ class TestE2E(unittest.TestCase): "includes": ["test_shared"], "files": [ # Correct hash - {"name": "correct_hash.bin", "destination": "correct_hash.bin", - "md5": f["correct_hash.bin"]["md5"], "required": True}, + { + "name": "correct_hash.bin", + "destination": "correct_hash.bin", + "md5": f["correct_hash.bin"]["md5"], + "required": True, + }, # Wrong hash on disk ->untested - {"name": "wrong_hash.bin", "destination": "wrong_hash.bin", - "md5": "ffffffffffffffffffffffffffffffff", "required": True}, + { + "name": "wrong_hash.bin", + "destination": "wrong_hash.bin", + "md5": "ffffffffffffffffffffffffffffffff", + "required": True, + }, # No MD5 ->OK (existence within md5 platform) - {"name": "no_md5.bin", "destination": "no_md5.bin", "required": False}, + { + "name": "no_md5.bin", + "destination": "no_md5.bin", + "required": False, + }, # Missing required - {"name": "gone_req.bin", "destination": "gone_req.bin", - "md5": "abcd", "required": True}, + { + "name": "gone_req.bin", + "destination": "gone_req.bin", + "md5": "abcd", + "required": True, + }, # Missing optional - {"name": "gone_opt.bin", "destination": "gone_opt.bin", - "md5": "abcd", "required": False}, + { + "name": "gone_opt.bin", + "destination": "gone_opt.bin", + "md5": "abcd", + "required": False, + }, # zipped_file correct - {"name": "good.zip", "destination": "good.zip", - "md5": good_inner_md5, "zipped_file": "inner.rom", "required": True}, + { + "name": "good.zip", + "destination": "good.zip", + "md5": good_inner_md5, + "zipped_file": "inner.rom", + "required": True, + }, # zipped_file wrong inner - {"name": "bad_inner.zip", "destination": "bad_inner.zip", - "md5": bad_inner_md5, "zipped_file": "inner.rom", "required": False}, + { + "name": "bad_inner.zip", + "destination": "bad_inner.zip", + "md5": bad_inner_md5, + "zipped_file": "inner.rom", + "required": False, + }, # zipped_file inner not found - {"name": "missing_inner.zip", "destination": "missing_inner.zip", - "md5": "abc", "zipped_file": "nope.rom", "required": False}, + { + "name": "missing_inner.zip", + "destination": "missing_inner.zip", + "md5": "abc", + "zipped_file": "nope.rom", + "required": False, + }, # md5_composite (Recalbox) - {"name": "composite.zip", "destination": "composite.zip", - "md5": composite_md5, "required": True}, + { + "name": "composite.zip", + "destination": "composite.zip", + "md5": composite_md5, + "required": True, + }, # Multi-hash comma-separated (Recalbox) - {"name": "multi.zip", "destination": "multi.zip", - "md5": f"{multi_wrong},{multi_right}", "zipped_file": "rom.bin", "required": True}, + { + "name": "multi.zip", + "destination": "multi.zip", + "md5": f"{multi_wrong},{multi_right}", + "zipped_file": "rom.bin", + "required": True, + }, # Truncated MD5 (Batocera 29 chars) - {"name": "truncated.bin", "destination": "truncated.bin", - "md5": truncated_md5, "required": True}, + { + "name": "truncated.bin", + "destination": "truncated.bin", + "md5": truncated_md5, + "required": True, + }, # Same destination from different entry ->worst status wins - {"name": "correct_hash.bin", "destination": "dedup_target.bin", - "md5": f["correct_hash.bin"]["md5"], "required": True}, - {"name": "correct_hash.bin", "destination": "dedup_target.bin", - "md5": "wrong_for_dedup_test", "required": True}, + { + "name": "correct_hash.bin", + "destination": "dedup_target.bin", + "md5": f["correct_hash.bin"]["md5"], + "required": True, + }, + { + "name": "correct_hash.bin", + "destination": "dedup_target.bin", + "md5": "wrong_for_dedup_test", + "required": True, + }, ], "data_directories": [ {"ref": "test-data-dir", "destination": "TestData"}, @@ -273,8 +372,12 @@ class TestE2E(unittest.TestCase): }, "sys-renamed": { "files": [ - {"name": "renamed_file.bin", "destination": "renamed_file.bin", - "md5": f["correct_hash.bin"]["md5"], "required": True}, + { + "name": "renamed_file.bin", + "destination": "renamed_file.bin", + "md5": f["correct_hash.bin"]["md5"], + "required": True, + }, ], }, }, @@ -286,7 +389,11 @@ class TestE2E(unittest.TestCase): shared = { "shared_groups": { "test_shared": [ - {"name": "shared_file.rom", "destination": "shared_file.rom", "required": False}, + { + "name": "shared_file.rom", + "destination": "shared_file.rom", + "required": False, + }, ], }, } @@ -311,15 +418,35 @@ class TestE2E(unittest.TestCase): "systems": { "sys-sha1": { "files": [ - {"name": "correct_hash.bin", "destination": "correct_hash.bin", - "sha1": f["correct_hash.bin"]["sha1"], "required": True}, - {"name": "wrong_hash.bin", "destination": "wrong_hash.bin", - "sha1": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "required": True}, - {"name": "missing_sha1.bin", "destination": "missing_sha1.bin", - "sha1": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "required": True}, - {"name": "optional_missing_sha1.bin", "destination": "optional_missing_sha1.bin", - "sha1": "cccccccccccccccccccccccccccccccccccccccc", "required": False}, - {"name": "no_md5.bin", "destination": "no_md5.bin", "required": True}, + { + "name": "correct_hash.bin", + "destination": "correct_hash.bin", + "sha1": f["correct_hash.bin"]["sha1"], + "required": True, + }, + { + "name": "wrong_hash.bin", + "destination": "wrong_hash.bin", + "sha1": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "required": True, + }, + { + "name": "missing_sha1.bin", + "destination": "missing_sha1.bin", + "sha1": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "required": True, + }, + { + "name": "optional_missing_sha1.bin", + "destination": "optional_missing_sha1.bin", + "sha1": "cccccccccccccccccccccccccccccccccccccccc", + "required": False, + }, + { + "name": "no_md5.bin", + "destination": "no_md5.bin", + "required": True, + }, ], }, }, @@ -336,9 +463,16 @@ class TestE2E(unittest.TestCase): "data_directories": [{"ref": "test-data-dir"}], "files": [ {"name": "present_req.bin", "required": True}, - {"name": "alias_target.bin", "required": False, - "aliases": ["alias_alt.bin"]}, - {"name": "standalone_only.bin", "required": False, "mode": "standalone"}, + { + "name": "alias_target.bin", + "required": False, + "aliases": ["alias_alt.bin"], + }, + { + "name": "standalone_only.bin", + "required": False, + "mode": "standalone", + }, {"name": "undeclared_req.bin", "required": True}, {"name": "undeclared_opt.bin", "required": False}, ], @@ -371,7 +505,12 @@ class TestE2E(unittest.TestCase): yaml.dump(launcher, fh) # Alias profile (should be skipped) - alias = {"emulator": "TestAlias", "type": "alias", "alias_of": "test_emu", "files": []} + alias = { + "emulator": "TestAlias", + "type": "alias", + "alias_of": "test_emu", + "files": [], + } with open(os.path.join(self.emulators_dir, "test_alias.yml"), "w") as fh: yaml.dump(alias, fh) @@ -396,7 +535,11 @@ class TestE2E(unittest.TestCase): "files": [ {"name": "rom_a.bin", "required": True, "archive": "test_archive.zip"}, {"name": "rom_b.bin", "required": False, "archive": "test_archive.zip"}, - {"name": "missing_rom.bin", "required": True, "archive": "missing_archive.zip"}, + { + "name": "missing_rom.bin", + "required": True, + "archive": "missing_archive.zip", + }, ], } with open(os.path.join(self.emulators_dir, "test_archive_emu.yml"), "w") as fh: @@ -408,10 +551,16 @@ class TestE2E(unittest.TestCase): "type": "libretro", "systems": ["console-a"], "files": [ - {"name": "Descriptive BIOS Name", "required": True, - "path": "present_req.bin"}, - {"name": "Missing Descriptive", "required": True, - "path": "nonexistent_path.bin"}, + { + "name": "Descriptive BIOS Name", + "required": True, + "path": "present_req.bin", + }, + { + "name": "Missing Descriptive", + "required": True, + "path": "nonexistent_path.bin", + }, ], } with open(os.path.join(self.emulators_dir, "test_descriptive.yml"), "w") as fh: @@ -424,50 +573,98 @@ class TestE2E(unittest.TestCase): "systems": ["console-a", "sys-md5"], "files": [ # Size validation -correct size (16 bytes = len(b"PRESENT_REQUIRED")) - {"name": "present_req.bin", "required": True, - "validation": ["size"], "size": 16, - "source_ref": "test.c:10-20"}, + { + "name": "present_req.bin", + "required": True, + "validation": ["size"], + "size": 16, + "source_ref": "test.c:10-20", + }, # Size validation -wrong expected size - {"name": "present_opt.bin", "required": False, - "validation": ["size"], "size": 9999}, + { + "name": "present_opt.bin", + "required": False, + "validation": ["size"], + "size": 9999, + }, # CRC32 validation -correct crc32 - {"name": "correct_hash.bin", "required": True, - "validation": ["crc32"], "crc32": "91d0b1d3", - "source_ref": "hash.c:42"}, + { + "name": "correct_hash.bin", + "required": True, + "validation": ["crc32"], + "crc32": "91d0b1d3", + "source_ref": "hash.c:42", + }, # CRC32 validation -wrong crc32 - {"name": "no_md5.bin", "required": False, - "validation": ["crc32"], "crc32": "deadbeef"}, + { + "name": "no_md5.bin", + "required": False, + "validation": ["crc32"], + "crc32": "deadbeef", + }, # CRC32 starting with '0' (regression: lstrip("0x") bug) - {"name": "leading_zero_crc.bin", "required": True, - "validation": ["crc32"], "crc32": "0179e92e"}, + { + "name": "leading_zero_crc.bin", + "required": True, + "validation": ["crc32"], + "crc32": "0179e92e", + }, # MD5 validation -correct md5 - {"name": "correct_hash.bin", "required": True, - "validation": ["md5"], "md5": "4a8db431e3b1a1acacec60e3424c4ce8"}, + { + "name": "correct_hash.bin", + "required": True, + "validation": ["md5"], + "md5": "4a8db431e3b1a1acacec60e3424c4ce8", + }, # SHA1 validation -correct sha1 - {"name": "correct_hash.bin", "required": True, - "validation": ["sha1"], "sha1": "a2ab6c95c5bbd191b9e87e8f4e85205a47be5764"}, + { + "name": "correct_hash.bin", + "required": True, + "validation": ["sha1"], + "sha1": "a2ab6c95c5bbd191b9e87e8f4e85205a47be5764", + }, # MD5 validation -wrong md5 - {"name": "alias_target.bin", "required": False, - "validation": ["md5"], "md5": "0000000000000000000000000000dead"}, + { + "name": "alias_target.bin", + "required": False, + "validation": ["md5"], + "md5": "0000000000000000000000000000dead", + }, # Adler32 -known_hash_adler32 field - {"name": "present_req.bin", "required": True, - "known_hash_adler32": None}, # placeholder, set below + { + "name": "present_req.bin", + "required": True, + "known_hash_adler32": None, + }, # placeholder, set below # Min/max size range validation - {"name": "present_req.bin", "required": True, - "validation": ["size"], "min_size": 10, "max_size": 100}, + { + "name": "present_req.bin", + "required": True, + "validation": ["size"], + "min_size": 10, + "max_size": 100, + }, # Signature -crypto check we can't reproduce, but size applies - {"name": "correct_hash.bin", "required": True, - "validation": ["size", "signature"], "size": 17}, + { + "name": "correct_hash.bin", + "required": True, + "validation": ["size", "signature"], + "size": 17, + }, ], } # Compute the actual adler32 of present_req.bin for the test fixture import zlib as _zlib + with open(self.files["present_req.bin"]["path"], "rb") as _f: _data = _f.read() _adler = format(_zlib.adler32(_data) & 0xFFFFFFFF, "08x") # Set the adler32 entry (the one with known_hash_adler32=None) for entry in emu_val["files"]: - if entry.get("known_hash_adler32") is None and "known_hash_adler32" in entry: + if ( + entry.get("known_hash_adler32") is None + and "known_hash_adler32" in entry + ): entry["known_hash_adler32"] = f"0x{_adler}" break with open(os.path.join(self.emulators_dir, "test_validation.yml"), "w") as fh: @@ -491,8 +688,11 @@ class TestE2E(unittest.TestCase): "type": "libretro", "systems": ["console-a"], "files": [ - {"name": "present_req.bin", "required": True, - "path": "subcore/bios/present_req.bin"}, + { + "name": "present_req.bin", + "required": True, + "path": "subcore/bios/present_req.bin", + }, ], } with open(os.path.join(self.emulators_dir, "test_subdir_core.yml"), "w") as fh: @@ -518,8 +718,12 @@ class TestE2E(unittest.TestCase): "bios_mode": "agnostic", "systems": ["console-a"], "files": [ - {"name": "correct_hash.bin", "required": True, - "min_size": 1, "max_size": 999999}, + { + "name": "correct_hash.bin", + "required": True, + "min_size": 1, + "max_size": 999999, + }, ], } with open(os.path.join(self.emulators_dir, "test_agnostic.yml"), "w") as fh: @@ -535,7 +739,9 @@ class TestE2E(unittest.TestCase): {"name": "agnostic_file.bin", "required": True, "agnostic": True}, ], } - with open(os.path.join(self.emulators_dir, "test_mixed_agnostic.yml"), "w") as fh: + with open( + os.path.join(self.emulators_dir, "test_mixed_agnostic.yml"), "w" + ) as fh: yaml.dump(emu_mixed_agnostic, fh) # --------------------------------------------------------------- @@ -543,13 +749,19 @@ class TestE2E(unittest.TestCase): # --------------------------------------------------------------- def test_01_resolve_sha1(self): - entry = {"name": "present_req.bin", "sha1": self.files["present_req.bin"]["sha1"]} + entry = { + "name": "present_req.bin", + "sha1": self.files["present_req.bin"]["sha1"], + } path, status = resolve_local_file(entry, self.db) self.assertEqual(status, "exact") self.assertIn("present_req.bin", path) def test_02_resolve_md5(self): - entry = {"name": "correct_hash.bin", "md5": self.files["correct_hash.bin"]["md5"]} + entry = { + "name": "correct_hash.bin", + "md5": self.files["correct_hash.bin"]["md5"], + } path, status = resolve_local_file(entry, self.db) self.assertEqual(status, "md5_exact") @@ -626,7 +838,7 @@ class TestE2E(unittest.TestCase): # 2 present (1 req + 1 opt), 2 missing (1 req WARNING + 1 opt INFO) self.assertEqual(c[Severity.OK], 2) self.assertEqual(c[Severity.WARNING], 1) # required missing - self.assertEqual(c[Severity.INFO], 1) # optional missing + self.assertEqual(c[Severity.INFO], 1) # optional missing self.assertEqual(sum(c.values()), total) def test_21_verify_md5_platform(self): @@ -703,7 +915,9 @@ class TestE2E(unittest.TestCase): def test_40_cross_ref_finds_undeclared(self): config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} self.assertIn("undeclared_req.bin", names) self.assertIn("undeclared_opt.bin", names) @@ -711,7 +925,9 @@ class TestE2E(unittest.TestCase): def test_41_cross_ref_skips_standalone(self): config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} self.assertNotIn("standalone_only.bin", names) @@ -722,7 +938,9 @@ class TestE2E(unittest.TestCase): def test_43_cross_ref_data_dir_does_not_suppress_files(self): config = load_platform_config("test_md5", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} # dd_covered.bin is a file entry, not data_dir content -still undeclared self.assertIn("dd_covered.bin", names) @@ -730,14 +948,17 @@ class TestE2E(unittest.TestCase): def test_44_cross_ref_skips_launchers(self): config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} # launcher_bios.bin from TestLauncher should NOT appear self.assertNotIn("launcher_bios.bin", names) def test_45_hle_fallback_downgrades_severity(self): """Missing file with hle_fallback=true ->INFO severity, not CRITICAL.""" - from verify import compute_severity, Severity + from verify import Severity, compute_severity + # required + missing + NO HLE = CRITICAL sev = compute_severity("missing", True, "md5", hle_fallback=False) self.assertEqual(sev, Severity.CRITICAL) @@ -763,7 +984,9 @@ class TestE2E(unittest.TestCase): """Undeclared files include hle_fallback from emulator profile.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) hle_files = {u["name"] for u in undeclared if u.get("hle_fallback")} self.assertIn("hle_missing.bin", hle_files) @@ -780,7 +1003,9 @@ class TestE2E(unittest.TestCase): config = { "platform": name, "verification_mode": "existence", - "systems": {"s": {"files": [{"name": "x.bin", "destination": "x.bin"}]}}, + "systems": { + "s": {"files": [{"name": "x.bin", "destination": "x.bin"}]} + }, } with open(os.path.join(self.platforms_dir, f"{name}.yml"), "w") as fh: yaml.dump(config, fh) @@ -790,6 +1015,7 @@ class TestE2E(unittest.TestCase): def test_60_storage_external(self): from generate_pack import resolve_file + entry = {"name": "large.pup", "storage": "external"} path, status = resolve_file(entry, self.db, self.bios_dir) self.assertIsNone(path) @@ -797,18 +1023,22 @@ class TestE2E(unittest.TestCase): def test_61_storage_user_provided(self): from generate_pack import resolve_file + entry = {"name": "user.bin", "storage": "user_provided"} path, status = resolve_file(entry, self.db, self.bios_dir) self.assertIsNone(path) self.assertEqual(status, "user_provided") - def test_resolve_cores_all_libretro(self): """all_libretro resolves to all libretro-type profiles, excludes alias/standalone.""" config = {"cores": "all_libretro", "systems": {"nes": {"files": []}}} profiles = { "fceumm": {"type": "libretro", "systems": ["nes"], "files": []}, - "dolphin_standalone": {"type": "standalone", "systems": ["gc"], "files": []}, + "dolphin_standalone": { + "type": "standalone", + "systems": ["gc"], + "files": [], + }, "gambatte": {"type": "pure_libretro", "systems": ["gb"], "files": []}, "mednafen_psx_hw": {"type": "alias", "alias_of": "beetle_psx", "files": []}, } @@ -845,18 +1075,16 @@ class TestE2E(unittest.TestCase): result = resolve_platform_cores(config, profiles) self.assertEqual(result, set()) - def test_cross_reference_uses_core_resolution(self): """Cross-reference matches by cores: field, not system intersection.""" config = { "cores": ["fbneo"], - "systems": { - "arcade": {"files": [{"name": "neogeo.zip", "md5": "abc"}]} - } + "systems": {"arcade": {"files": [{"name": "neogeo.zip", "md5": "abc"}]}}, } profiles = { "fbneo": { - "emulator": "FBNeo", "systems": ["snk-neogeo-mvs"], + "emulator": "FBNeo", + "systems": ["snk-neogeo-mvs"], "type": "pure_libretro", "files": [ {"name": "neogeo.zip", "required": True}, @@ -872,13 +1100,11 @@ class TestE2E(unittest.TestCase): def test_exclusion_notes_uses_core_resolution(self): """Exclusion notes match by cores: field, not system intersection.""" - config = { - "cores": ["desmume2015"], - "systems": {"nds": {"files": []}} - } + config = {"cores": ["desmume2015"], "systems": {"nds": {"files": []}}} profiles = { "desmume2015": { - "emulator": "DeSmuME 2015", "type": "frozen_snapshot", + "emulator": "DeSmuME 2015", + "type": "frozen_snapshot", "systems": ["nintendo-ds"], "files": [], "exclusion_note": "Frozen snapshot, code never loads BIOS", @@ -888,7 +1114,6 @@ class TestE2E(unittest.TestCase): emu_names = [n["emulator"] for n in notes] self.assertIn("DeSmuME 2015", emu_names) - def test_70_validation_index_built(self): """Validation index extracts checks from emulator profiles.""" profiles = load_emulator_profiles(self.emulators_dir) @@ -959,12 +1184,14 @@ class TestE2E(unittest.TestCase): """Multiple valid sizes from different profiles are collected as a set.""" profiles = { "emu_a": { - "type": "libretro", "files": [ + "type": "libretro", + "files": [ {"name": "shared.bin", "validation": ["size"], "size": 512}, ], }, "emu_b": { - "type": "libretro", "files": [ + "type": "libretro", + "files": [ {"name": "shared.bin", "validation": ["size"], "size": 1024}, ], }, @@ -1037,7 +1264,6 @@ class TestE2E(unittest.TestCase): reason = check_file_validation(path, "wrong_hash.bin", index) self.assertIsNone(reason) - # --------------------------------------------------------------- # Emulator/system mode verification # --------------------------------------------------------------- @@ -1052,8 +1278,12 @@ class TestE2E(unittest.TestCase): def test_91_verify_emulator_standalone_filters(self): """Standalone mode includes mode:standalone files, excludes mode:libretro.""" - result_lr = verify_emulator(["test_emu"], self.emulators_dir, self.db, standalone=False) - result_sa = verify_emulator(["test_emu"], self.emulators_dir, self.db, standalone=True) + result_lr = verify_emulator( + ["test_emu"], self.emulators_dir, self.db, standalone=False + ) + result_sa = verify_emulator( + ["test_emu"], self.emulators_dir, self.db, standalone=True + ) lr_names = {d["name"] for d in result_lr["details"]} sa_names = {d["name"] for d in result_sa["details"]} # standalone_only.bin should be in standalone, not libretro @@ -1063,10 +1293,14 @@ class TestE2E(unittest.TestCase): def test_102_resolve_dest_hint_disambiguates(self): """dest_hint resolves regional variants with same name to distinct files.""" usa_path, usa_status = resolve_local_file( - {"name": "BIOS.bin"}, self.db, dest_hint="TestConsole/USA/BIOS.bin", + {"name": "BIOS.bin"}, + self.db, + dest_hint="TestConsole/USA/BIOS.bin", ) eur_path, eur_status = resolve_local_file( - {"name": "BIOS.bin"}, self.db, dest_hint="TestConsole/EUR/BIOS.bin", + {"name": "BIOS.bin"}, + self.db, + dest_hint="TestConsole/EUR/BIOS.bin", ) self.assertIsNotNone(usa_path) self.assertIsNotNone(eur_path) @@ -1093,7 +1327,12 @@ class TestE2E(unittest.TestCase): def test_92b_verify_emulator_game_type_rejects_standalone(self): """Game-type profile rejects --standalone.""" - game = {"emulator": "TestGame", "type": "game", "systems": ["console-a"], "files": []} + game = { + "emulator": "TestGame", + "type": "game", + "systems": ["console-a"], + "files": [], + } with open(os.path.join(self.emulators_dir, "test_game.yml"), "w") as fh: yaml.dump(game, fh) with self.assertRaises(SystemExit): @@ -1124,7 +1363,9 @@ class TestE2E(unittest.TestCase): def test_96_verify_emulator_multi(self): """Multi-emulator verify aggregates files.""" result = verify_emulator( - ["test_emu", "test_hle"], self.emulators_dir, self.db, + ["test_emu", "test_hle"], + self.emulators_dir, + self.db, ) self.assertEqual(len(result["emulators"]), 2) all_names = {d["name"] for d in result["details"]} @@ -1146,10 +1387,10 @@ class TestE2E(unittest.TestCase): def test_99filter_files_by_mode(self): """filter_files_by_mode correctly filters standalone/libretro.""" files = [ - {"name": "a.bin"}, # no mode ->both - {"name": "b.bin", "mode": "libretro"}, # libretro only - {"name": "c.bin", "mode": "standalone"}, # standalone only - {"name": "d.bin", "mode": "both"}, # explicit both + {"name": "a.bin"}, # no mode ->both + {"name": "b.bin", "mode": "libretro"}, # libretro only + {"name": "c.bin", "mode": "standalone"}, # standalone only + {"name": "d.bin", "mode": "both"}, # explicit both ] lr = filter_files_by_mode(files, standalone=False) sa = filter_files_by_mode(files, standalone=True) @@ -1161,8 +1402,10 @@ class TestE2E(unittest.TestCase): def test_100_verify_emulator_empty_profile(self): """Profile with files:[] produces note, not error.""" empty = { - "emulator": "TestEmpty", "type": "libretro", - "systems": ["console-a"], "files": [], + "emulator": "TestEmpty", + "type": "libretro", + "systems": ["console-a"], + "files": [], "exclusion_note": "Code never loads BIOS", } with open(os.path.join(self.emulators_dir, "test_empty.yml"), "w") as fh: @@ -1187,7 +1430,6 @@ class TestE2E(unittest.TestCase): # Severity should be WARNING (existence mode base) self.assertGreater(result["severity_counts"][Severity.WARNING], 0) - def test_102_safe_extract_zip_blocks_traversal(self): """safe_extract_zip must reject zip-slip path traversal.""" malicious_zip = os.path.join(self.root, "evil.zip") @@ -1218,6 +1460,7 @@ class TestE2E(unittest.TestCase): with open(test_file, "wb") as f: f.write(data) import zlib + expected_sha1 = hashlib.sha1(data).hexdigest() expected_md5 = hashlib.md5(data).hexdigest() expected_sha256 = hashlib.sha256(data).hexdigest() @@ -1231,7 +1474,10 @@ class TestE2E(unittest.TestCase): def test_105_resolve_with_empty_database(self): """resolve_local_file handles empty database gracefully.""" - empty_db = {"files": {}, "indexes": {"by_md5": {}, "by_name": {}, "by_path_suffix": {}}} + empty_db = { + "files": {}, + "indexes": {"by_md5": {}, "by_name": {}, "by_path_suffix": {}}, + } entry = {"name": "nonexistent.bin", "sha1": "abc123"} path, status = resolve_local_file(entry, empty_db) self.assertIsNone(path) @@ -1268,7 +1514,6 @@ class TestE2E(unittest.TestCase): self.assertIn("c.bin", names) self.assertIn("d.bin", names) - def test_108_standalone_path_in_undeclared(self): """Undeclared files use standalone_path when core is in standalone_cores.""" # Create a platform with standalone_cores @@ -1280,8 +1525,11 @@ class TestE2E(unittest.TestCase): "systems": { "console-a": { "files": [ - {"name": "present_req.bin", "destination": "present_req.bin", - "required": True}, + { + "name": "present_req.bin", + "destination": "present_req.bin", + "required": True, + }, ], }, }, @@ -1296,18 +1544,30 @@ class TestE2E(unittest.TestCase): "cores": ["test_emu"], "systems": ["console-a"], "files": [ - {"name": "libretro_file.bin", "path": "subdir/libretro_file.bin", - "standalone_path": "flat_file.bin", "required": True}, - {"name": "standalone_only.bin", "mode": "standalone", "required": False}, + { + "name": "libretro_file.bin", + "path": "subdir/libretro_file.bin", + "standalone_path": "flat_file.bin", + "required": True, + }, + { + "name": "standalone_only.bin", + "mode": "standalone", + "required": False, + }, {"name": "libretro_only.bin", "mode": "libretro", "required": False}, ], } - with open(os.path.join(self.emulators_dir, "test_standalone_emu.yml"), "w") as fh: + with open( + os.path.join(self.emulators_dir, "test_standalone_emu.yml"), "w" + ) as fh: yaml.dump(emu, fh) config = load_platform_config("test_standalone", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) by_name = {u["name"]: u for u in undeclared} # standalone_path used for undeclared file (core is standalone) @@ -1324,7 +1584,9 @@ class TestE2E(unittest.TestCase): """Without standalone_cores, undeclared files use path: (libretro).""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) # standalone_only.bin should be excluded (platform has no standalone_cores) names = {u["name"] for u in undeclared} self.assertNotIn("standalone_only.bin", names) @@ -1404,18 +1666,21 @@ class TestE2E(unittest.TestCase): def test_load_target_config(self): self._write_target_fixtures() from common import load_target_config + cores = load_target_config("testplatform", "target-minimal", self.platforms_dir) self.assertEqual(cores, {"core_a"}) def test_target_alias_resolution(self): self._write_target_fixtures() from common import load_target_config + cores = load_target_config("testplatform", "full", self.platforms_dir) self.assertEqual(cores, {"core_a", "core_b", "core_d"}) def test_target_unknown_error(self): self._write_target_fixtures() from common import load_target_config + with self.assertRaises(ValueError) as ctx: load_target_config("testplatform", "nonexistent", self.platforms_dir) self.assertIn("target-full", str(ctx.exception)) @@ -1424,6 +1689,7 @@ class TestE2E(unittest.TestCase): def test_target_override_add_remove(self): self._write_target_fixtures() from common import load_target_config + cores = load_target_config("testplatform", "full", self.platforms_dir) self.assertIn("core_d", cores) self.assertNotIn("core_c", cores) @@ -1433,6 +1699,7 @@ class TestE2E(unittest.TestCase): def test_target_single_target_noop(self): self._write_target_fixtures() from common import load_target_config + cores = load_target_config("singleplatform", "only-target", self.platforms_dir) self.assertEqual(cores, {"core_a", "core_b"}) @@ -1453,7 +1720,10 @@ class TestE2E(unittest.TestCase): with open(os.path.join(targets_dir, "childplatform.yml"), "w") as f: yaml.dump(child_config, f) from common import load_target_config - parent = load_target_config("testplatform", "target-minimal", self.platforms_dir) + + parent = load_target_config( + "testplatform", "target-minimal", self.platforms_dir + ) child = load_target_config("childplatform", "target-full", self.platforms_dir) self.assertEqual(parent, {"core_a"}) self.assertEqual(child, {"core_a"}) @@ -1477,7 +1747,9 @@ class TestE2E(unittest.TestCase): config = {"cores": "all_libretro"} result = resolve_platform_cores(config, profiles) self.assertEqual(result, {"core_a", "core_b", "core_c", "core_d"}) - result = resolve_platform_cores(config, profiles, target_cores={"core_a", "core_b"}) + result = resolve_platform_cores( + config, profiles, target_cores={"core_a", "core_b"} + ) self.assertEqual(result, {"core_a", "core_b"}) def test_target_none_no_filter(self): @@ -1495,35 +1767,49 @@ class TestE2E(unittest.TestCase): core_a_path = os.path.join(self.emulators_dir, "core_a.yml") core_b_path = os.path.join(self.emulators_dir, "core_b.yml") with open(core_a_path, "w") as f: - yaml.dump({ - "emulator": "CoreA", "type": "libretro", "systems": ["sys1"], - "files": [{"name": "bios_a.bin", "required": True}], - }, f) + yaml.dump( + { + "emulator": "CoreA", + "type": "libretro", + "systems": ["sys1"], + "files": [{"name": "bios_a.bin", "required": True}], + }, + f, + ) with open(core_b_path, "w") as f: - yaml.dump({ - "emulator": "CoreB", "type": "libretro", "systems": ["sys1"], - "files": [{"name": "bios_b.bin", "required": True}], - }, f) + yaml.dump( + { + "emulator": "CoreB", + "type": "libretro", + "systems": ["sys1"], + "files": [{"name": "bios_b.bin", "required": True}], + }, + f, + ) config = {"cores": "all_libretro", "systems": {"sys1": {"files": []}}} profiles = load_emulator_profiles(self.emulators_dir) # Without target: both cores' files are undeclared - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} self.assertIn("bios_a.bin", names) self.assertIn("bios_b.bin", names) # With target filtering to core_a only undeclared = find_undeclared_files( - config, self.emulators_dir, self.db, profiles, + config, + self.emulators_dir, + self.db, + profiles, target_cores={"core_a"}, ) names = {u["name"] for u in undeclared} self.assertIn("bios_a.bin", names) self.assertNotIn("bios_b.bin", names) - # --------------------------------------------------------------- # Validation index per-emulator ground truth (Task: ground truth) # --------------------------------------------------------------- @@ -1543,6 +1829,7 @@ class TestE2E(unittest.TestCase): def test_112_build_ground_truth(self): """build_ground_truth returns per-emulator detail for a filename.""" from validation import build_ground_truth + profiles = load_emulator_profiles(self.emulators_dir) index = _build_validation_index(profiles) gt = build_ground_truth("present_req.bin", index) @@ -1559,6 +1846,7 @@ class TestE2E(unittest.TestCase): def test_113_build_ground_truth_empty(self): """build_ground_truth returns [] for unknown filename.""" from validation import build_ground_truth + profiles = load_emulator_profiles(self.emulators_dir) index = _build_validation_index(profiles) gt = build_ground_truth("nonexistent.bin", index) @@ -1585,7 +1873,9 @@ class TestE2E(unittest.TestCase): """find_undeclared_files attaches ground truth fields.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) for u in undeclared: self.assertIn("checks", u) self.assertIn("source_ref", u) @@ -1631,13 +1921,23 @@ class TestE2E(unittest.TestCase): self.assertEqual(d["ground_truth"], []) break - def test_120_format_ground_truth_aggregate(self): """Aggregate format: one line with all cores.""" from verify import _format_ground_truth_aggregate + gt = [ - {"emulator": "beetle_psx", "checks": ["md5"], "source_ref": "libretro.cpp:252", "expected": {"md5": "abc"}}, - {"emulator": "pcsx_rearmed", "checks": ["existence"], "source_ref": None, "expected": {}}, + { + "emulator": "beetle_psx", + "checks": ["md5"], + "source_ref": "libretro.cpp:252", + "expected": {"md5": "abc"}, + }, + { + "emulator": "pcsx_rearmed", + "checks": ["existence"], + "source_ref": None, + "expected": {}, + }, ] line = _format_ground_truth_aggregate(gt) self.assertIn("beetle_psx", line) @@ -1648,9 +1948,14 @@ class TestE2E(unittest.TestCase): def test_121_format_ground_truth_verbose(self): """Verbose format: one line per core with expected values and source ref.""" from verify import _format_ground_truth_verbose + gt = [ - {"emulator": "handy", "checks": ["size", "crc32"], - "source_ref": "rom.h:48-49", "expected": {"size": 512, "crc32": "0d973c9d"}}, + { + "emulator": "handy", + "checks": ["size", "crc32"], + "source_ref": "rom.h:48-49", + "expected": {"size": 512, "crc32": "0d973c9d"}, + }, ] lines = _format_ground_truth_verbose(gt) self.assertEqual(len(lines), 1) @@ -1662,8 +1967,14 @@ class TestE2E(unittest.TestCase): def test_122_format_ground_truth_verbose_no_source_ref(self): """Verbose format omits bracket when source_ref is None.""" from verify import _format_ground_truth_verbose + gt = [ - {"emulator": "core_a", "checks": ["existence"], "source_ref": None, "expected": {}}, + { + "emulator": "core_a", + "checks": ["existence"], + "source_ref": None, + "expected": {}, + }, ] lines = _format_ground_truth_verbose(gt) self.assertEqual(len(lines), 1) @@ -1711,6 +2022,7 @@ class TestE2E(unittest.TestCase): def test_130_required_only_excludes_optional(self): """--required-only excludes files with required: false from pack.""" from generate_pack import generate_pack + output_dir = os.path.join(self.root, "pack_reqonly") os.makedirs(output_dir, exist_ok=True) # Create a platform with one required and one optional file @@ -1721,8 +2033,16 @@ class TestE2E(unittest.TestCase): "systems": { "test-sys": { "files": [ - {"name": "present_req.bin", "destination": "present_req.bin", "required": True}, - {"name": "present_opt.bin", "destination": "present_opt.bin", "required": False}, + { + "name": "present_req.bin", + "destination": "present_req.bin", + "required": True, + }, + { + "name": "present_opt.bin", + "destination": "present_opt.bin", + "required": False, + }, ], }, }, @@ -1730,7 +2050,11 @@ class TestE2E(unittest.TestCase): with open(os.path.join(self.platforms_dir, "test_reqonly.yml"), "w") as fh: yaml.dump(config, fh) zip_path = generate_pack( - "test_reqonly", self.platforms_dir, self.db, self.bios_dir, output_dir, + "test_reqonly", + self.platforms_dir, + self.db, + self.bios_dir, + output_dir, required_only=True, ) self.assertIsNotNone(zip_path) @@ -1744,6 +2068,7 @@ class TestE2E(unittest.TestCase): def test_131_required_only_keeps_default_required(self): """--required-only keeps files with no required field (default = required).""" from generate_pack import generate_pack + output_dir = os.path.join(self.root, "pack_reqdef") os.makedirs(output_dir, exist_ok=True) # File with no required field @@ -1762,7 +2087,11 @@ class TestE2E(unittest.TestCase): with open(os.path.join(self.platforms_dir, "test_reqdef.yml"), "w") as fh: yaml.dump(config, fh) zip_path = generate_pack( - "test_reqdef", self.platforms_dir, self.db, self.bios_dir, output_dir, + "test_reqdef", + self.platforms_dir, + self.db, + self.bios_dir, + output_dir, required_only=True, ) self.assertIsNotNone(zip_path) @@ -1770,10 +2099,10 @@ class TestE2E(unittest.TestCase): names = zf.namelist() self.assertTrue(any("present_req.bin" in n for n in names)) - def test_132_platform_system_filter(self): """--platform + --system filters systems within a platform pack.""" from generate_pack import generate_pack + output_dir = os.path.join(self.root, "pack_sysfilter") os.makedirs(output_dir, exist_ok=True) config = { @@ -1796,7 +2125,11 @@ class TestE2E(unittest.TestCase): with open(os.path.join(self.platforms_dir, "test_sysfilter.yml"), "w") as fh: yaml.dump(config, fh) zip_path = generate_pack( - "test_sysfilter", self.platforms_dir, self.db, self.bios_dir, output_dir, + "test_sysfilter", + self.platforms_dir, + self.db, + self.bios_dir, + output_dir, system_filter=["system-a"], ) self.assertIsNotNone(zip_path) @@ -1808,6 +2141,7 @@ class TestE2E(unittest.TestCase): def test_133_platform_system_filter_normalized(self): """_norm_system_id normalization matches with manufacturer prefix.""" from common import _norm_system_id + self.assertEqual( _norm_system_id("sony-playstation"), _norm_system_id("playstation"), @@ -1815,8 +2149,10 @@ class TestE2E(unittest.TestCase): def test_134_list_systems_platform_context(self): """list_platform_system_ids lists systems from a platform YAML.""" - from common import list_platform_system_ids import io + + from common import list_platform_system_ids + config = { "platform": "ListSysTest", "verification_mode": "existence", @@ -1849,10 +2185,10 @@ class TestE2E(unittest.TestCase): self.assertIn("1 file", output) self.assertIn("2 files", output) - def test_135_split_by_system(self): """--split generates one ZIP per system in a subdirectory.""" import tempfile + with tempfile.TemporaryDirectory() as tmpdir: plat_dir = os.path.join(tmpdir, "platforms") os.makedirs(plat_dir) @@ -1871,24 +2207,34 @@ class TestE2E(unittest.TestCase): f.write(b"system_b") from common import compute_hashes + ha = compute_hashes(file_a) hb = compute_hashes(file_b) db = { "files": { - ha["sha1"]: {"name": "bios_a.bin", "md5": ha["md5"], - "sha1": ha["sha1"], "sha256": ha["sha256"], - "path": file_a, - "paths": [file_a]}, - hb["sha1"]: {"name": "bios_b.bin", "md5": hb["md5"], - "sha1": hb["sha1"], "sha256": hb["sha256"], - "path": file_b, - "paths": [file_b]}, + ha["sha1"]: { + "name": "bios_a.bin", + "md5": ha["md5"], + "sha1": ha["sha1"], + "sha256": ha["sha256"], + "path": file_a, + "paths": [file_a], + }, + hb["sha1"]: { + "name": "bios_b.bin", + "md5": hb["md5"], + "sha1": hb["sha1"], + "sha256": hb["sha256"], + "path": file_b, + "paths": [file_b], + }, }, "indexes": { "by_md5": {ha["md5"]: ha["sha1"], hb["md5"]: hb["sha1"]}, "by_name": {"bios_a.bin": [ha["sha1"]], "bios_b.bin": [hb["sha1"]]}, - "by_crc32": {}, "by_path_suffix": {}, + "by_crc32": {}, + "by_path_suffix": {}, }, } @@ -1899,22 +2245,33 @@ class TestE2E(unittest.TestCase): "platform": "SplitTest", "verification_mode": "existence", "systems": { - "test-system-a": {"files": [{"name": "bios_a.bin", "sha1": ha["sha1"]}]}, - "test-system-b": {"files": [{"name": "bios_b.bin", "sha1": hb["sha1"]}]}, + "test-system-a": { + "files": [{"name": "bios_a.bin", "sha1": ha["sha1"]}] + }, + "test-system-b": { + "files": [{"name": "bios_b.bin", "sha1": hb["sha1"]}] + }, }, } with open(os.path.join(plat_dir, "splitplat.yml"), "w") as f: yaml.dump(plat_cfg, f) - from generate_pack import generate_split_packs from common import build_zip_contents_index, load_emulator_profiles + from generate_pack import generate_split_packs + zip_contents = build_zip_contents_index(db) emu_profiles = load_emulator_profiles(emu_dir) zip_paths = generate_split_packs( - "splitplat", plat_dir, db, os.path.join(tmpdir, "bios"), out_dir, - emulators_dir=emu_dir, zip_contents=zip_contents, - emu_profiles=emu_profiles, group_by="system", + "splitplat", + plat_dir, + db, + os.path.join(tmpdir, "bios"), + out_dir, + emulators_dir=emu_dir, + zip_contents=zip_contents, + emu_profiles=emu_profiles, + group_by="system", ) self.assertEqual(len(zip_paths), 2) @@ -1934,10 +2291,10 @@ class TestE2E(unittest.TestCase): self.assertIn("bios_b.bin", names) self.assertNotIn("bios_a.bin", names) - def test_136_derive_manufacturer(self): """derive_manufacturer extracts manufacturer correctly.""" from common import derive_manufacturer + # From system ID prefix self.assertEqual(derive_manufacturer("sony-playstation", {}), "Sony") self.assertEqual(derive_manufacturer("nintendo-snes", {}), "Nintendo") @@ -1949,13 +2306,16 @@ class TestE2E(unittest.TestCase): "Panasonic", ) # Various = skip to prefix check, then Other - self.assertEqual(derive_manufacturer("arcade", {"manufacturer": "Various"}), "Other") + self.assertEqual( + derive_manufacturer("arcade", {"manufacturer": "Various"}), "Other" + ) # Fallback self.assertEqual(derive_manufacturer("dos", {}), "Other") def test_137_group_systems_by_manufacturer(self): """_group_systems_by_manufacturer groups correctly.""" from generate_pack import _group_systems_by_manufacturer + systems = { "sony-playstation": {"files": [{"name": "a.bin"}]}, "sony-psp": {"files": [{"name": "b.bin"}]}, @@ -1970,15 +2330,17 @@ class TestE2E(unittest.TestCase): self.assertIn("Other", groups) self.assertEqual(groups["Other"], ["arcade"]) - def test_138_parse_hash_input(self): """parse_hash_input handles various formats.""" from generate_pack import parse_hash_input + # Plain MD5 result = parse_hash_input("d8f1206299c48946e6ec5ef96d014eaa") self.assertEqual(result, [("md5", "d8f1206299c48946e6ec5ef96d014eaa")]) # Comma-separated - result = parse_hash_input("d8f1206299c48946e6ec5ef96d014eaa,d8f1206299c48946e6ec5ef96d014eab") + result = parse_hash_input( + "d8f1206299c48946e6ec5ef96d014eaa,d8f1206299c48946e6ec5ef96d014eab" + ) self.assertEqual(len(result), 2) # SHA1 sha1 = "a" * 40 @@ -1991,6 +2353,7 @@ class TestE2E(unittest.TestCase): def test_139_parse_hash_file(self): """parse_hash_file handles comments, empty lines, various formats.""" from generate_pack import parse_hash_file + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: f.write("# PS1 BIOS files\n") f.write("\n") @@ -2007,14 +2370,18 @@ class TestE2E(unittest.TestCase): def test_140_lookup_hashes_found(self): """lookup_hashes returns file info for known hashes.""" - import io import contextlib + import io + from generate_pack import lookup_hashes + db = { "files": { "sha1abc": { - "name": "test.bin", "md5": "md5abc", - "sha1": "sha1abc", "sha256": "sha256abc", + "name": "test.bin", + "md5": "md5abc", + "sha1": "sha1abc", + "sha256": "sha256abc", "paths": ["Mfr/Console/test.bin"], "aliases": ["alt.bin"], }, @@ -2034,23 +2401,27 @@ class TestE2E(unittest.TestCase): def test_141_lookup_hashes_not_found(self): """lookup_hashes reports unknown hashes.""" - import io import contextlib + import io + from generate_pack import lookup_hashes + db = {"files": {}, "indexes": {"by_md5": {}, "by_crc32": {}}} buf = io.StringIO() with contextlib.redirect_stdout(buf): - lookup_hashes([("md5", "unknown123" + "0" * 22)], db, "bios", "emulators", "platforms") + lookup_hashes( + [("md5", "unknown123" + "0" * 22)], db, "bios", "emulators", "platforms" + ) output = buf.getvalue() self.assertIn("NOT FOUND", output) - def test_142_from_md5_platform_pack(self): """--from-md5 with --platform generates correctly laid out ZIP.""" import tempfile - import json import zipfile + import yaml + with tempfile.TemporaryDirectory() as tmpdir: plat_dir = os.path.join(tmpdir, "platforms") os.makedirs(plat_dir) @@ -2064,13 +2435,16 @@ class TestE2E(unittest.TestCase): with open(bios_file, "wb") as f: f.write(b"ps1_bios_content") from common import compute_hashes + h = compute_hashes(bios_file) db = { "files": { h["sha1"]: { - "name": "scph5501.bin", "md5": h["md5"], - "sha1": h["sha1"], "sha256": h["sha256"], + "name": "scph5501.bin", + "md5": h["md5"], + "sha1": h["sha1"], + "sha256": h["sha256"], "path": bios_file, "paths": ["Sony/PS1/scph5501.bin"], }, @@ -2078,7 +2452,8 @@ class TestE2E(unittest.TestCase): "indexes": { "by_md5": {h["md5"]: h["sha1"]}, "by_name": {"scph5501.bin": [h["sha1"]]}, - "by_crc32": {}, "by_path_suffix": {}, + "by_crc32": {}, + "by_path_suffix": {}, }, } @@ -2092,8 +2467,11 @@ class TestE2E(unittest.TestCase): "systems": { "sony-playstation": { "files": [ - {"name": "scph5501.bin", "md5": h["md5"], - "destination": "scph5501.bin"}, + { + "name": "scph5501.bin", + "md5": h["md5"], + "destination": "scph5501.bin", + }, ] } }, @@ -2101,15 +2479,19 @@ class TestE2E(unittest.TestCase): with open(os.path.join(plat_dir, "testplat.yml"), "w") as f: yaml.dump(plat_cfg, f) - from generate_pack import generate_md5_pack from common import build_zip_contents_index + from generate_pack import generate_md5_pack + zip_contents = build_zip_contents_index(db) zip_path = generate_md5_pack( hashes=[("md5", h["md5"])], - db=db, bios_dir=bios_dir, output_dir=out_dir, + db=db, + bios_dir=bios_dir, + output_dir=out_dir, zip_contents=zip_contents, - platform_name="testplat", platforms_dir=plat_dir, + platform_name="testplat", + platforms_dir=plat_dir, ) self.assertIsNotNone(zip_path) with zipfile.ZipFile(zip_path) as zf: @@ -2119,16 +2501,19 @@ class TestE2E(unittest.TestCase): def test_143_from_md5_not_in_repo(self): """--from-md5 reports files in DB but missing from repo.""" - import tempfile - import io import contextlib + import io + import tempfile + from generate_pack import generate_md5_pack db = { "files": { "sha1known": { - "name": "missing.bin", "md5": "md5known" + "0" * 25, - "sha1": "sha1known", "sha256": "sha256known", + "name": "missing.bin", + "md5": "md5known" + "0" * 25, + "sha1": "sha1known", + "sha256": "sha256known", "path": "/nonexistent/missing.bin", "paths": ["Test/missing.bin"], }, @@ -2146,20 +2531,23 @@ class TestE2E(unittest.TestCase): with contextlib.redirect_stdout(buf): result = generate_md5_pack( hashes=[("md5", "md5known" + "0" * 25)], - db=db, bios_dir=bios_dir, output_dir=out_dir, + db=db, + bios_dir=bios_dir, + output_dir=out_dir, zip_contents={}, ) output = buf.getvalue() self.assertIn("NOT IN REPO", output) self.assertIsNone(result) - def test_144_invalid_split_emulator(self): """--split + --emulator is rejected.""" import subprocess + result = subprocess.run( ["python", "scripts/generate_pack.py", "--emulator", "test", "--split"], - capture_output=True, text=True, + capture_output=True, + text=True, ) self.assertNotEqual(result.returncode, 0) self.assertIn("error", result.stderr.lower()) @@ -2167,33 +2555,60 @@ class TestE2E(unittest.TestCase): def test_145_invalid_from_md5_all(self): """--from-md5 + --all is rejected.""" import subprocess + result = subprocess.run( - ["python", "scripts/generate_pack.py", "--all", "--from-md5", "abc123" + "0" * 26], - capture_output=True, text=True, + [ + "python", + "scripts/generate_pack.py", + "--all", + "--from-md5", + "abc123" + "0" * 26, + ], + capture_output=True, + text=True, ) self.assertNotEqual(result.returncode, 0) def test_146_invalid_from_md5_system(self): """--from-md5 + --system is rejected.""" import subprocess + result = subprocess.run( - ["python", "scripts/generate_pack.py", "--system", "psx", "--from-md5", "abc123" + "0" * 26], - capture_output=True, text=True, + [ + "python", + "scripts/generate_pack.py", + "--system", + "psx", + "--from-md5", + "abc123" + "0" * 26, + ], + capture_output=True, + text=True, ) self.assertNotEqual(result.returncode, 0) def test_147_invalid_group_by_without_split(self): """--group-by without --split is rejected.""" import subprocess + result = subprocess.run( - ["python", "scripts/generate_pack.py", "--platform", "retroarch", "--group-by", "manufacturer"], - capture_output=True, text=True, + [ + "python", + "scripts/generate_pack.py", + "--platform", + "retroarch", + "--group-by", + "manufacturer", + ], + capture_output=True, + text=True, ) self.assertNotEqual(result.returncode, 0) def test_148_valid_platform_system(self): """--platform + --system is accepted (not rejected at validation stage).""" import argparse + sys.path.insert(0, "scripts") # Build the same parser as generate_pack.main() parser = argparse.ArgumentParser() @@ -2203,7 +2618,9 @@ class TestE2E(unittest.TestCase): parser.add_argument("--system", "-s") parser.add_argument("--standalone", action="store_true") parser.add_argument("--split", action="store_true") - parser.add_argument("--group-by", choices=["system", "manufacturer"], default="system") + parser.add_argument( + "--group-by", choices=["system", "manufacturer"], default="system" + ) parser.add_argument("--target", "-t") parser.add_argument("--from-md5") parser.add_argument("--from-md5-file") @@ -2220,7 +2637,9 @@ class TestE2E(unittest.TestCase): # These should NOT raise self.assertFalse(has_emulator and (has_platform or has_all or has_system)) self.assertFalse(has_platform and has_all) - self.assertTrue(has_platform or has_all or has_emulator or has_system or has_from_md5) + self.assertTrue( + has_platform or has_all or has_emulator or has_system or has_from_md5 + ) # --platform + --system is a valid combination self.assertTrue(has_platform and has_system) @@ -2229,10 +2648,11 @@ class TestE2E(unittest.TestCase): def test_150_bizhawk_scraper_parse_firmware_and_option(self): """Parse FirmwareAndOption() one-liner pattern.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ FirmwareAndOption("DBEBD76A448447CB6E524AC3CB0FD19FC065D944", 256, "32X", "G", "32X_G_BIOS.BIN", "32x 68k BIOS"); FirmwareAndOption("1E5B0B2441A4979B6966D942B20CC76C413B8C5E", 2048, "32X", "M", "32X_M_BIOS.BIN", "32x SH2 MASTER BIOS"); - ''' + """ records, files = parse_firmware_database(fragment) self.assertEqual(len(records), 2) self.assertEqual(records[0]["system"], "32X") @@ -2244,11 +2664,12 @@ class TestE2E(unittest.TestCase): def test_151_bizhawk_scraper_parse_variable_refs(self): """Parse var = File() + Firmware() + Option() pattern.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ var gbaNormal = File("300C20DF6731A33952DED8C436F7F186D25D3492", 16384, "GBA_bios.rom", "Bios (World)"); Firmware("GBA", "Bios", "Bios"); Option("GBA", "Bios", in gbaNormal, FirmwareOptionStatus.Ideal); - ''' + """ records, files = parse_firmware_database(fragment) self.assertEqual(len(records), 1) self.assertEqual(records[0]["system"], "GBA") @@ -2259,13 +2680,14 @@ class TestE2E(unittest.TestCase): def test_152_bizhawk_scraper_skips_comments(self): """Commented-out blocks (PS2) are skipped.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ FirmwareAndOption("DBEBD76A448447CB6E524AC3CB0FD19FC065D944", 256, "32X", "G", "32X_G_BIOS.BIN", "32x 68k BIOS"); /* Firmware("PS2", "BIOS", "PS2 Bios"); Option("PS2", "BIOS", File("FBD54BFC020AF34008B317DCB80B812DD29B3759", 4194304, "ps2.bin", "PS2 Bios")); */ - ''' + """ records, files = parse_firmware_database(fragment) systems = {r["system"] for r in records} self.assertNotIn("PS2", systems) @@ -2274,18 +2696,20 @@ class TestE2E(unittest.TestCase): def test_153_bizhawk_scraper_arithmetic_size(self): """Size expressions like 4 * 1024 * 1024 are evaluated.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ FirmwareAndOption("BF861922DCB78C316360E3E742F4F70FF63C9BC3", 4 * 1024 * 1024, "N64DD", "IPL_JPN", "64DD_IPL.bin", "N64DD JPN IPL"); - ''' + """ records, _ = parse_firmware_database(fragment) self.assertEqual(records[0]["size"], 4194304) def test_154_bizhawk_scraper_dummy_hash(self): """SHA1Checksum.Dummy entries get no sha1 field.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ FirmwareAndOption(SHA1Checksum.Dummy, 0, "3DS", "aes_keys", "aes_keys.txt", "AES Keys"); - ''' + """ records, _ = parse_firmware_database(fragment) self.assertEqual(len(records), 1) self.assertIsNone(records[0]["sha1"]) @@ -2293,13 +2717,14 @@ class TestE2E(unittest.TestCase): def test_155_bizhawk_scraper_multi_option_picks_ideal(self): """When multiple options exist, Ideal is selected as canonical.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ var ss_100_j = File("2B8CB4F87580683EB4D760E4ED210813D667F0A2", 524288, "SAT_1.00-(J).bin", "Bios v1.00 (J)"); var ss_101_j = File("DF94C5B4D47EB3CC404D88B33A8FDA237EAF4720", 524288, "SAT_1.01-(J).bin", "Bios v1.01 (J)"); Firmware("SAT", "J", "Bios (J)"); Option("SAT", "J", in ss_100_j); Option("SAT", "J", in ss_101_j, FirmwareOptionStatus.Ideal); - ''' + """ records, _ = parse_firmware_database(fragment) self.assertEqual(len(records), 1) self.assertEqual(records[0]["sha1"], "DF94C5B4D47EB3CC404D88B33A8FDA237EAF4720") @@ -2308,13 +2733,14 @@ class TestE2E(unittest.TestCase): def test_156_bizhawk_scraper_is_bad_excluded(self): """Files with isBad: true are not selected as canonical.""" from scraper.bizhawk_scraper import parse_firmware_database - fragment = ''' + + fragment = """ var good = File("AAAA", 100, "good.bin", "Good"); var bad = File("BBBB", 100, "bad.bin", "Bad", isBad: true); Firmware("TEST", "X", "Test"); Option("TEST", "X", in bad); Option("TEST", "X", in good, FirmwareOptionStatus.Ideal); - ''' + """ records, _ = parse_firmware_database(fragment) self.assertEqual(records[0]["name"], "good.bin") @@ -2329,12 +2755,14 @@ class TestE2E(unittest.TestCase): _register_path("system/SGB1.sfc", seen_files, seen_parents) # Adding system/SGB1.sfc/program.rom should conflict (parent is a file) - self.assertTrue(_has_path_conflict("system/SGB1.sfc/program.rom", - seen_files, seen_parents)) + self.assertTrue( + _has_path_conflict("system/SGB1.sfc/program.rom", seen_files, seen_parents) + ) # Adding system/other.bin should not conflict - self.assertFalse(_has_path_conflict("system/other.bin", - seen_files, seen_parents)) + self.assertFalse( + _has_path_conflict("system/other.bin", seen_files, seen_parents) + ) # Reverse: register a nested path first, then check flat seen_files2: set[str] = set() @@ -2342,12 +2770,14 @@ class TestE2E(unittest.TestCase): _register_path("system/SGB2.sfc/program.rom", seen_files2, seen_parents2) # Adding system/SGB2.sfc as a file should conflict (it's a directory) - self.assertTrue(_has_path_conflict("system/SGB2.sfc", - seen_files2, seen_parents2)) + self.assertTrue( + _has_path_conflict("system/SGB2.sfc", seen_files2, seen_parents2) + ) # Adding system/SGB2.sfc/boot.rom should not conflict (sibling in same dir) - self.assertFalse(_has_path_conflict("system/SGB2.sfc/boot.rom", - seen_files2, seen_parents2)) + self.assertFalse( + _has_path_conflict("system/SGB2.sfc/boot.rom", seen_files2, seen_parents2) + ) def test_158_pack_skips_file_directory_conflict(self): """Pack generation skips entries that conflict with existing paths.""" @@ -2364,8 +2794,11 @@ class TestE2E(unittest.TestCase): "systems": { "test-sys": { "files": [ - {"name": "present_req.bin", "destination": "present_req.bin", - "required": True}, + { + "name": "present_req.bin", + "destination": "present_req.bin", + "required": True, + }, ], }, }, @@ -2386,19 +2819,23 @@ class TestE2E(unittest.TestCase): yaml.dump(emu, fh) zip_path = generate_pack( - "test_conflict", self.platforms_dir, self.db, self.bios_dir, - output_dir, emulators_dir=self.emulators_dir, + "test_conflict", + self.platforms_dir, + self.db, + self.bios_dir, + output_dir, + emulators_dir=self.emulators_dir, ) self.assertIsNotNone(zip_path) with zipfile.ZipFile(zip_path) as zf: names = zf.namelist() # Flat file should be present - self.assertTrue(any("present_req.bin" in n and "/" + "nested" not in n - for n in names)) + self.assertTrue( + any("present_req.bin" in n and "/" + "nested" not in n for n in names) + ) # Nested conflict should NOT be present self.assertFalse(any("nested.rom" in n for n in names)) - # --------------------------------------------------------------- # Archive cross-reference and descriptive name tests # --------------------------------------------------------------- @@ -2407,9 +2844,13 @@ class TestE2E(unittest.TestCase): """Archived files group by archive; in_repo=True when archive exists.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) # test_archive.zip should appear as a single grouped entry - archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"] + archive_entries = [ + u for u in undeclared if u.get("archive") == "test_archive.zip" + ] self.assertEqual(len(archive_entries), 1) entry = archive_entries[0] self.assertTrue(entry["in_repo"]) @@ -2421,8 +2862,12 @@ class TestE2E(unittest.TestCase): """Missing archive reported as single entry with in_repo=False.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) - missing_entries = [u for u in undeclared if u.get("archive") == "missing_archive.zip"] + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) + missing_entries = [ + u for u in undeclared if u.get("archive") == "missing_archive.zip" + ] self.assertEqual(len(missing_entries), 1) entry = missing_entries[0] self.assertFalse(entry["in_repo"]) @@ -2433,7 +2878,9 @@ class TestE2E(unittest.TestCase): """Individual ROM names from archived files should NOT appear as separate entries.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} # Individual ROMs should NOT be in the undeclared list self.assertNotIn("rom_a.bin", names) @@ -2444,9 +2891,12 @@ class TestE2E(unittest.TestCase): """Descriptive name with path: fallback resolves via path basename.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) - desc_entries = {u["name"]: u for u in undeclared - if u["emulator"] == "TestDescriptive"} + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) + desc_entries = { + u["name"]: u for u in undeclared if u["emulator"] == "TestDescriptive" + } # "Descriptive BIOS Name" has path: "present_req.bin" which IS in by_name self.assertIn("Descriptive BIOS Name", desc_entries) self.assertTrue(desc_entries["Descriptive BIOS Name"]["in_repo"]) @@ -2463,29 +2913,43 @@ class TestE2E(unittest.TestCase): "systems": { "console-a": { "files": [ - {"name": "test_archive.zip", "destination": "test_archive.zip", - "required": True}, + { + "name": "test_archive.zip", + "destination": "test_archive.zip", + "required": True, + }, ], }, }, } - with open(os.path.join(self.platforms_dir, "test_archive_platform.yml"), "w") as fh: + with open( + os.path.join(self.platforms_dir, "test_archive_platform.yml"), "w" + ) as fh: yaml.dump(config, fh) config = load_platform_config("test_archive_platform", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) # test_archive.zip is declared ->its archived ROMs should be skipped - archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"] + archive_entries = [ + u for u in undeclared if u.get("archive") == "test_archive.zip" + ] self.assertEqual(len(archive_entries), 0) def test_164_pack_extras_use_archive_name(self): """Pack extras for archived files use archive name, not individual ROM.""" from generate_pack import _collect_emulator_extras + config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) extras = _collect_emulator_extras( - config, self.emulators_dir, self.db, - set(), "", profiles, + config, + self.emulators_dir, + self.db, + set(), + "", + profiles, ) extra_names = {e["name"] for e in extras} # Archive name should be present, not individual ROMs @@ -2495,15 +2959,19 @@ class TestE2E(unittest.TestCase): # Missing archive should NOT be in extras (in_repo=False) self.assertNotIn("missing_archive.zip", extra_names) - def test_165_pack_extras_multi_dest_cross_ref(self): """Same file at different paths from two profiles produces both destinations.""" from generate_pack import _collect_emulator_extras + config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) extras = _collect_emulator_extras( - config, self.emulators_dir, self.db, - set(), "", profiles, + config, + self.emulators_dir, + self.db, + set(), + "", + profiles, ) extra_dests = {e["destination"] for e in extras} # Root destination (from test_emu or test_root_core, no path) @@ -2514,13 +2982,18 @@ class TestE2E(unittest.TestCase): def test_166_pack_extras_multi_dest_platform_declared(self): """Profile with path different from platform destination adds alternative.""" from generate_pack import _collect_emulator_extras + config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) # Simulate platform already having present_req.bin at root seen = {"present_req.bin"} extras = _collect_emulator_extras( - config, self.emulators_dir, self.db, - seen, "", profiles, + config, + self.emulators_dir, + self.db, + seen, + "", + profiles, ) extra_dests = {e["destination"] for e in extras} # Root is already in pack (in seen), should NOT be duplicated @@ -2572,13 +3045,18 @@ class TestE2E(unittest.TestCase): # Clear profile cache so fresh load picks up our file from common import _emulator_profiles_cache + _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) config = {"cores": ["testcore"]} result = generate_platform_truth( - "testplat", config, {}, profiles, db=None, + "testplat", + config, + {}, + profiles, + db=None, ) self.assertEqual(result["platform"], "testplat") @@ -2604,12 +3082,24 @@ class TestE2E(unittest.TestCase): "systems": ["test-system"], "cores": ["dualmode"], "files": [ - {"name": "both.bin", "system": "test-system", "required": True, - "mode": "both"}, - {"name": "lr_only.bin", "system": "test-system", "required": True, - "mode": "libretro"}, - {"name": "sa_only.bin", "system": "test-system", "required": True, - "mode": "standalone"}, + { + "name": "both.bin", + "system": "test-system", + "required": True, + "mode": "both", + }, + { + "name": "lr_only.bin", + "system": "test-system", + "required": True, + "mode": "libretro", + }, + { + "name": "sa_only.bin", + "system": "test-system", + "required": True, + "mode": "standalone", + }, {"name": "nomode.bin", "system": "test-system", "required": True}, ], } @@ -2639,10 +3129,18 @@ class TestE2E(unittest.TestCase): "systems": ["test-system"], "cores": ["dualcore"], "files": [ - {"name": "lr_file.bin", "system": "test-system", "required": True, - "mode": "libretro"}, - {"name": "sa_file.bin", "system": "test-system", "required": True, - "mode": "standalone"}, + { + "name": "lr_file.bin", + "system": "test-system", + "required": True, + "mode": "libretro", + }, + { + "name": "sa_file.bin", + "system": "test-system", + "required": True, + "mode": "standalone", + }, ], } with open(os.path.join(self.emulators_dir, "dualcore.yml"), "w") as f: @@ -2672,8 +3170,12 @@ class TestE2E(unittest.TestCase): "systems": ["test-system"], "cores": ["core_a"], "files": [ - {"name": "shared.bin", "system": "test-system", - "required": False, "source_ref": "a.cpp:10"}, + { + "name": "shared.bin", + "system": "test-system", + "required": False, + "source_ref": "a.cpp:10", + }, ], } core_b = { @@ -2682,8 +3184,12 @@ class TestE2E(unittest.TestCase): "systems": ["test-system"], "cores": ["core_b"], "files": [ - {"name": "shared.bin", "system": "test-system", - "required": True, "source_ref": "b.cpp:20"}, + { + "name": "shared.bin", + "system": "test-system", + "required": True, + "source_ref": "b.cpp:20", + }, ], } for name, data in [("core_a", core_a), ("core_b", core_b)]: @@ -2731,9 +3237,9 @@ class TestE2E(unittest.TestCase): cov = result["_coverage"] self.assertEqual(cov["cores_profiled"], 1) - self.assertNotIn("unprofiled_core", [ - name for name in profiles if name == "unprofiled_core" - ]) + self.assertNotIn( + "unprofiled_core", [name for name in profiles if name == "unprofiled_core"] + ) # unprofiled_core has no profile YAML so resolve_platform_cores # won't include it; cores_resolved reflects only matched profiles self.assertEqual(cov["cores_resolved"], 1) @@ -2742,10 +3248,20 @@ class TestE2E(unittest.TestCase): def test_90_registry_install_metadata(self): """Registry install section is accessible.""" import yaml + with open("platforms/_registry.yml") as f: registry = yaml.safe_load(f) - for name in ("retroarch", "batocera", "emudeck", "recalbox", - "retrobat", "retrodeck", "lakka", "romm", "bizhawk"): + for name in ( + "retroarch", + "batocera", + "emudeck", + "recalbox", + "retrobat", + "retrodeck", + "lakka", + "romm", + "bizhawk", + ): plat = registry["platforms"][name] self.assertIn("install", plat, f"{name} missing install section") self.assertIn("detect", plat["install"]) @@ -2754,7 +3270,8 @@ class TestE2E(unittest.TestCase): self.assertIn("os", hint) # EmuDeck has standalone_copies self.assertIn( - "standalone_copies", registry["platforms"]["emudeck"]["install"], + "standalone_copies", + registry["platforms"]["emudeck"]["install"], ) def test_91_generate_manifest(self): @@ -2767,8 +3284,13 @@ class TestE2E(unittest.TestCase): "platforms": { "test_existence": { "install": { - "detect": [{"os": "linux", "method": "path_exists", - "path": "/test/bios"}], + "detect": [ + { + "os": "linux", + "method": "path_exists", + "path": "/test/bios", + } + ], }, }, }, @@ -2777,8 +3299,12 @@ class TestE2E(unittest.TestCase): yaml.dump(registry_data, fh) manifest = generate_manifest( - "test_existence", self.platforms_dir, self.db, self.bios_dir, - registry_path, emulators_dir=self.emulators_dir, + "test_existence", + self.platforms_dir, + self.db, + self.bios_dir, + registry_path, + emulators_dir=self.emulators_dir, ) self.assertEqual(manifest["manifest_version"], 1) @@ -2790,7 +3316,10 @@ class TestE2E(unittest.TestCase): self.assertEqual(manifest["total_files"], len(manifest["files"])) self.assertGreater(len(manifest["files"]), 0) self.assertEqual(manifest["base_destination"], "system") - self.assertEqual(manifest["detect"], registry_data["platforms"]["test_existence"]["install"]["detect"]) + self.assertEqual( + manifest["detect"], + registry_data["platforms"]["test_existence"]["install"]["detect"], + ) for f in manifest["files"]: self.assertIn("dest", f) @@ -2820,15 +3349,20 @@ class TestE2E(unittest.TestCase): output_dir = os.path.join(self.root, "pack_manifest_cmp") os.makedirs(output_dir, exist_ok=True) zip_path = generate_pack( - "test_existence", self.platforms_dir, self.db, self.bios_dir, - output_dir, emulators_dir=self.emulators_dir, + "test_existence", + self.platforms_dir, + self.db, + self.bios_dir, + output_dir, + emulators_dir=self.emulators_dir, ) self.assertIsNotNone(zip_path) # Get ZIP file destinations (exclude metadata) with zipfile.ZipFile(zip_path) as zf: zip_names = { - n for n in zf.namelist() + n + for n in zf.namelist() if not n.startswith("INSTRUCTIONS_") and n != "manifest.json" and n != "README.txt" @@ -2836,8 +3370,12 @@ class TestE2E(unittest.TestCase): # Generate manifest manifest = generate_manifest( - "test_existence", self.platforms_dir, self.db, self.bios_dir, - registry_path, emulators_dir=self.emulators_dir, + "test_existence", + self.platforms_dir, + self.db, + self.bios_dir, + registry_path, + emulators_dir=self.emulators_dir, ) base = manifest.get("base_destination", "") manifest_dests = set() @@ -2847,7 +3385,6 @@ class TestE2E(unittest.TestCase): self.assertEqual(manifest_dests, zip_names) - # --------------------------------------------------------------- # install.py tests # --------------------------------------------------------------- @@ -2856,6 +3393,7 @@ class TestE2E(unittest.TestCase): """Parse system_directory from retroarch.cfg.""" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from install import _parse_retroarch_system_dir + cfg = os.path.join(self.root, "retroarch.cfg") # Quoted absolute path with open(cfg, "w") as f: @@ -2869,7 +3407,7 @@ class TestE2E(unittest.TestCase): self.assertEqual(result, Path(self.root) / "system") # Unquoted with open(cfg, "w") as f: - f.write('system_directory = /tmp/ra_system\n') + f.write("system_directory = /tmp/ra_system\n") result = _parse_retroarch_system_dir(Path(cfg)) self.assertEqual(result, Path("/tmp/ra_system")) @@ -2877,9 +3415,12 @@ class TestE2E(unittest.TestCase): """Parse emulationPath from EmuDeck settings.sh.""" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from install import _parse_bash_var + settings = os.path.join(self.root, "settings.sh") with open(settings, "w") as f: - f.write('emulationPath="/home/deck/Emulation"\nromsPath="/home/deck/Emulation/roms"\n') + f.write( + 'emulationPath="/home/deck/Emulation"\nromsPath="/home/deck/Emulation/roms"\n' + ) result = _parse_bash_var(Path(settings), "emulationPath") self.assertEqual(result, "/home/deck/Emulation") @@ -2887,6 +3428,7 @@ class TestE2E(unittest.TestCase): """Parse $emulationPath from EmuDeck settings.ps1.""" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from install import _parse_ps1_var + settings = os.path.join(self.root, "settings.ps1") with open(settings, "w") as f: f.write('$emulationPath="C:\\Emulation"\n') @@ -2897,6 +3439,7 @@ class TestE2E(unittest.TestCase): """--target filters files by cores field.""" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from install import _filter_by_target + files = [ {"dest": "a.bin", "cores": None}, {"dest": "b.bin", "cores": ["flycast", "redream"]}, @@ -2912,6 +3455,7 @@ class TestE2E(unittest.TestCase): """Standalone keys copied to existing emulator dirs.""" sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from install import do_standalone_copies + bios_dir = Path(self.root) / "bios" bios_dir.mkdir(exist_ok=True) (bios_dir / "prod.keys").write_bytes(b"KEYS") @@ -2920,7 +3464,12 @@ class TestE2E(unittest.TestCase): missing_dir = Path(self.root) / "nonexistent" manifest = { "base_destination": "bios", - "standalone_copies": [{"file": "prod.keys", "targets": {"linux": [str(yuzu_dir), str(missing_dir)]}}] + "standalone_copies": [ + { + "file": "prod.keys", + "targets": {"linux": [str(yuzu_dir), str(missing_dir)]}, + } + ], } copied, skipped = do_standalone_copies(manifest, bios_dir, "linux") self.assertEqual(copied, 1) @@ -2928,25 +3477,42 @@ class TestE2E(unittest.TestCase): self.assertTrue((yuzu_dir / "prod.keys").exists()) self.assertFalse((missing_dir / "prod.keys").exists()) - # --------------------------------------------------------------- # diff_platform_truth tests # --------------------------------------------------------------- def test_98_diff_truth_missing(self): """Truth has 2 files, scraped has 1 -> 1 missing with cores/source_refs.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, - "files": [ - {"name": "bios_a.bin", "required": True, "md5": "aaa", - "_cores": ["core_a"], "_source_refs": ["src/a.c:10"]}, - {"name": "bios_b.bin", "required": False, "md5": "bbb", - "_cores": ["core_a"], "_source_refs": ["src/b.c:20"]}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [{"name": "bios_a.bin", "md5": "aaa"}], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + { + "name": "bios_a.bin", + "required": True, + "md5": "aaa", + "_cores": ["core_a"], + "_source_refs": ["src/a.c:10"], + }, + { + "name": "bios_b.bin", + "required": False, + "md5": "bbb", + "_cores": ["core_a"], + "_source_refs": ["src/b.c:20"], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [{"name": "bios_a.bin", "md5": "aaa"}], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_missing"], 1) div = result["divergences"]["test-sys"] @@ -2958,19 +3524,31 @@ class TestE2E(unittest.TestCase): def test_99_diff_truth_extra_phantom(self): """All cores profiled, scraped has extra file -> extra_phantom.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, - "files": [ - {"name": "bios.bin", "md5": "aaa", - "_cores": ["core_a"], "_source_refs": []}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [ - {"name": "bios.bin", "md5": "aaa"}, - {"name": "phantom.bin", "md5": "zzz"}, - ], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + { + "name": "bios.bin", + "md5": "aaa", + "_cores": ["core_a"], + "_source_refs": [], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [ + {"name": "bios.bin", "md5": "aaa"}, + {"name": "phantom.bin", "md5": "zzz"}, + ], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_extra_phantom"], 1) div = result["divergences"]["test-sys"] @@ -2979,20 +3557,34 @@ class TestE2E(unittest.TestCase): def test_100_diff_truth_extra_unprofiled(self): """Some cores unprofiled, scraped has extra -> extra_unprofiled.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], - "cores_unprofiled": ["core_b"]}, - "files": [ - {"name": "bios.bin", "md5": "aaa", - "_cores": ["core_a"], "_source_refs": []}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [ - {"name": "bios.bin", "md5": "aaa"}, - {"name": "extra.bin", "md5": "yyy"}, - ], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": { + "cores_profiled": ["core_a"], + "cores_unprofiled": ["core_b"], + }, + "files": [ + { + "name": "bios.bin", + "md5": "aaa", + "_cores": ["core_a"], + "_source_refs": [], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [ + {"name": "bios.bin", "md5": "aaa"}, + {"name": "extra.bin", "md5": "yyy"}, + ], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_extra_unprofiled"], 1) div = result["divergences"]["test-sys"] @@ -3001,16 +3593,29 @@ class TestE2E(unittest.TestCase): def test_101_diff_truth_alias_matching(self): """Truth file with aliases, scraped uses alias -> not extra or missing.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, - "files": [ - {"name": "bios.bin", "md5": "aaa", "aliases": ["alt.bin"], - "_cores": ["core_a"], "_source_refs": []}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [{"name": "alt.bin", "md5": "aaa"}], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + { + "name": "bios.bin", + "md5": "aaa", + "aliases": ["alt.bin"], + "_cores": ["core_a"], + "_source_refs": [], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [{"name": "alt.bin", "md5": "aaa"}], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_missing"], 0) self.assertEqual(result["summary"]["total_extra_phantom"], 0) @@ -3018,33 +3623,56 @@ class TestE2E(unittest.TestCase): def test_102_diff_truth_case_insensitive(self): """Truth 'BIOS.ROM', scraped 'bios.rom' -> match, no missing.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, - "files": [ - {"name": "BIOS.ROM", "md5": "aaa", - "_cores": ["core_a"], "_source_refs": []}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [{"name": "bios.rom", "md5": "aaa"}], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + { + "name": "BIOS.ROM", + "md5": "aaa", + "_cores": ["core_a"], + "_source_refs": [], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [{"name": "bios.rom", "md5": "aaa"}], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_missing"], 0) self.assertNotIn("test-sys", result.get("divergences", {})) def test_103_diff_truth_hash_mismatch(self): """Same file, different md5 -> hash_mismatch with truth_cores.""" - truth = {"systems": {"test-sys": { - "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, - "files": [ - {"name": "bios.bin", "md5": "truth_hash", - "_cores": ["core_a", "core_b"], - "_source_refs": ["src/x.c:5"]}, - ], - }}} - scraped = {"systems": {"test-sys": { - "files": [{"name": "bios.bin", "md5": "scraped_hash"}], - }}} + truth = { + "systems": { + "test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + { + "name": "bios.bin", + "md5": "truth_hash", + "_cores": ["core_a", "core_b"], + "_source_refs": ["src/x.c:5"], + }, + ], + } + } + } + scraped = { + "systems": { + "test-sys": { + "files": [{"name": "bios.bin", "md5": "scraped_hash"}], + } + } + } result = diff_platform_truth(truth, scraped) self.assertEqual(result["summary"]["total_hash_mismatch"], 1) div = result["divergences"]["test-sys"] @@ -3055,7 +3683,6 @@ class TestE2E(unittest.TestCase): self.assertEqual(hm["truth_md5"], "truth_hash") self.assertEqual(hm["scraped_md5"], "scraped_hash") - def test_104_diff_truth_normalized_system_ids(self): """Diff matches systems with different ID formats via normalization.""" from truth import diff_platform_truth @@ -3065,8 +3692,13 @@ class TestE2E(unittest.TestCase): "sega-gamegear": { "_coverage": {"cores_profiled": ["c"], "cores_unprofiled": []}, "files": [ - {"name": "bios.gg", "required": True, "md5": "a" * 32, - "_cores": ["c"], "_source_refs": []}, + { + "name": "bios.gg", + "required": True, + "md5": "a" * 32, + "_cores": ["c"], + "_source_refs": [], + }, ], }, } @@ -3100,13 +3732,21 @@ class TestE2E(unittest.TestCase): "sony-playstation": { "native_id": "Sony - PlayStation", "files": [ - {"name": "scph5501.bin", "destination": "scph5501.bin", "required": True}, + { + "name": "scph5501.bin", + "destination": "scph5501.bin", + "required": True, + }, ], }, "nintendo-snes": { "native_id": "snes", "files": [ - {"name": "bs-x.bin", "destination": "bs-x.bin", "required": False}, + { + "name": "bs-x.bin", + "destination": "bs-x.bin", + "required": False, + }, ], }, }, @@ -3120,7 +3760,6 @@ class TestE2E(unittest.TestCase): snes = loaded["systems"]["nintendo-snes"] self.assertEqual(snes["native_id"], "snes") - # --------------------------------------------------------------- # Exporter: System.dat round-trip # --------------------------------------------------------------- @@ -3227,13 +3866,16 @@ class TestE2E(unittest.TestCase): # No profile for core_b (unprofiled) # Clear cache so the new profile is picked up from common import _emulator_profiles_cache + _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) self.assertIn("core_a", profiles) self.assertNotIn("core_b", profiles) # Generate truth - truth = generate_platform_truth("testplat", config, registry_entry, profiles, db=None) + truth = generate_platform_truth( + "testplat", config, registry_entry, profiles, db=None + ) # Verify truth structure self.assertIn("test-system", truth["systems"]) @@ -3304,7 +3946,9 @@ class TestE2E(unittest.TestCase): """Platform file under different name matched by MD5 is not undeclared.""" config = load_platform_config("test_md5", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} # correct_hash.bin is declared by platform as renamed_file.bin with same MD5 # hash-based matching should suppress it from undeclared @@ -3319,7 +3963,6 @@ class TestE2E(unittest.TestCase): # correct_hash.bin is the DB canonical name for the same MD5 self.assertIn("correct_hash.bin", result) - # --------------------------------------------------------------- # Registry merge + all_libretro expansion + diff hash fallback # --------------------------------------------------------------- @@ -3358,7 +4001,7 @@ class TestE2E(unittest.TestCase): def test_176_all_libretro_in_list(self): """resolve_platform_cores expands all_libretro/retroarch in a list.""" - from common import resolve_platform_cores, load_emulator_profiles + from common import load_emulator_profiles, resolve_platform_cores # Create a libretro profile and a standalone profile for name, ptype in [("lr_core", "libretro"), ("sa_core", "standalone")]: @@ -3389,8 +4032,13 @@ class TestE2E(unittest.TestCase): "test-system": { "_coverage": {"cores_profiled": ["c"], "cores_unprofiled": []}, "files": [ - {"name": "ROM", "required": True, "md5": "abcd1234" * 4, - "_cores": ["c"], "_source_refs": []}, + { + "name": "ROM", + "required": True, + "md5": "abcd1234" * 4, + "_cores": ["c"], + "_source_refs": [], + }, ], } } @@ -3419,8 +4067,13 @@ class TestE2E(unittest.TestCase): "sega-gamegear": { "_coverage": {"cores_profiled": ["c"], "cores_unprofiled": []}, "files": [ - {"name": "bios.gg", "required": True, "md5": "a" * 32, - "_cores": ["c"], "_source_refs": []}, + { + "name": "bios.gg", + "required": True, + "md5": "a" * 32, + "_cores": ["c"], + "_source_refs": [], + }, ], }, } @@ -3444,7 +4097,9 @@ class TestE2E(unittest.TestCase): """bios_mode: agnostic profiles are skipped entirely by find_undeclared_files.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) emulators = {u["emulator"] for u in undeclared} # TestAgnostic should NOT appear in undeclared (bios_mode: agnostic) self.assertNotIn("TestAgnostic", emulators) @@ -3453,7 +4108,9 @@ class TestE2E(unittest.TestCase): """Files with agnostic: true are skipped, others in same profile are not.""" config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) - undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + undeclared = find_undeclared_files( + config, self.emulators_dir, self.db, profiles + ) names = {u["name"] for u in undeclared} # agnostic_file.bin should NOT be in undeclared (agnostic: true) self.assertNotIn("agnostic_file.bin", names) @@ -3463,12 +4120,20 @@ class TestE2E(unittest.TestCase): def test_181_agnostic_extras_scan(self): """Agnostic profiles add all matching DB files as extras.""" from generate_pack import _collect_emulator_extras + config = load_platform_config("test_existence", self.platforms_dir) profiles = load_emulator_profiles(self.emulators_dir) extras = _collect_emulator_extras( - config, self.emulators_dir, self.db, set(), "system", profiles, + config, + self.emulators_dir, + self.db, + set(), + "system", + profiles, ) - agnostic_extras = [e for e in extras if e.get("source_emulator") == "TestAgnostic"] + agnostic_extras = [ + e for e in extras if e.get("source_emulator") == "TestAgnostic" + ] # Agnostic scan should find files in the same directory as correct_hash.bin self.assertTrue(len(agnostic_extras) > 0, "Agnostic scan should produce extras") # All agnostic extras should have agnostic_scan flag @@ -3478,8 +4143,10 @@ class TestE2E(unittest.TestCase): def test_182_agnostic_rename_readme(self): """_build_agnostic_rename_readme generates correct text.""" from generate_pack import _build_agnostic_rename_readme + result = _build_agnostic_rename_readme( - "dsi_nand.bin", "DSi_Nand_AUS.bin", + "dsi_nand.bin", + "DSi_Nand_AUS.bin", ["DSi_Nand_EUR.bin", "DSi_Nand_USA.bin"], ) self.assertIn("dsi_nand.bin <- DSi_Nand_AUS.bin", result) @@ -3500,7 +4167,6 @@ class TestE2E(unittest.TestCase): self.assertIsNotNone(path) self.assertEqual(status, "agnostic_fallback") - def test_179_batocera_exporter_round_trip(self): """Batocera exporter produces valid Python dict format.""" from exporter.batocera_exporter import Exporter @@ -3510,9 +4176,14 @@ class TestE2E(unittest.TestCase): "sony-playstation": { "_coverage": {"cores_profiled": ["c"]}, "files": [ - {"name": "scph5501.bin", "destination": "scph5501.bin", - "required": True, "md5": "b" * 32, - "_cores": ["c"], "_source_refs": []}, + { + "name": "scph5501.bin", + "destination": "scph5501.bin", + "required": True, + "md5": "b" * 32, + "_cores": ["c"], + "_source_refs": [], + }, ], } } @@ -3541,9 +4212,14 @@ class TestE2E(unittest.TestCase): "sony-playstation": { "_coverage": {"cores_profiled": ["c"]}, "files": [ - {"name": "scph5501.bin", "destination": "scph5501.bin", - "required": True, "md5": "b" * 32, - "_cores": ["c"], "_source_refs": []}, + { + "name": "scph5501.bin", + "destination": "scph5501.bin", + "required": True, + "md5": "b" * 32, + "_cores": ["c"], + "_source_refs": [], + }, ], } } @@ -3560,7 +4236,7 @@ class TestE2E(unittest.TestCase): content = open(out).read() self.assertIn(" None: - result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp') - self.assertIn('neogeo', result) - self.assertEqual(result['neogeo']['source_file'], 'd_neogeo.cpp') + result = find_bios_sets(NEOGEO_FIXTURE, "d_neogeo.cpp") + self.assertIn("neogeo", result) + self.assertEqual(result["neogeo"]["source_file"], "d_neogeo.cpp") def test_detects_pgm(self) -> None: - result = find_bios_sets(PGM_FIXTURE, 'd_pgm.cpp') - self.assertIn('pgm', result) - self.assertEqual(result['pgm']['source_file'], 'd_pgm.cpp') + result = find_bios_sets(PGM_FIXTURE, "d_pgm.cpp") + self.assertIn("pgm", result) + self.assertEqual(result["pgm"]["source_file"], "d_pgm.cpp") def test_ignores_non_bios(self) -> None: - result = find_bios_sets(NON_BIOS_FIXTURE, 'd_neogeo.cpp') + result = find_bios_sets(NON_BIOS_FIXTURE, "d_neogeo.cpp") self.assertEqual(result, {}) def test_source_line_positive(self) -> None: - result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp') - self.assertGreater(result['neogeo']['source_line'], 0) + result = find_bios_sets(NEOGEO_FIXTURE, "d_neogeo.cpp") + self.assertGreater(result["neogeo"]["source_line"], 0) class TestParseRomInfo(unittest.TestCase): - def test_neogeo_rom_count(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + roms = parse_rom_info(NEOGEO_FIXTURE, "neogeo") self.assertEqual(len(roms), 5) def test_sentinel_skipped(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') - names = [r['name'] for r in roms] - self.assertNotIn('', names) + roms = parse_rom_info(NEOGEO_FIXTURE, "neogeo") + names = [r["name"] for r in roms] + self.assertNotIn("", names) def test_crc32_lowercase_hex(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + roms = parse_rom_info(NEOGEO_FIXTURE, "neogeo") first = roms[0] - self.assertEqual(first['crc32'], '9036d879') - self.assertRegex(first['crc32'], r'^[0-9a-f]{8}$') + self.assertEqual(first["crc32"], "9036d879") + self.assertRegex(first["crc32"], r"^[0-9a-f]{8}$") def test_no_sha1(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + roms = parse_rom_info(NEOGEO_FIXTURE, "neogeo") for rom in roms: - self.assertNotIn('sha1', rom) + self.assertNotIn("sha1", rom) def test_neogeo_first_rom(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + roms = parse_rom_info(NEOGEO_FIXTURE, "neogeo") first = roms[0] - self.assertEqual(first['name'], 'sp-s2.sp1') - self.assertEqual(first['size'], 0x020000) - self.assertEqual(first['crc32'], '9036d879') + self.assertEqual(first["name"], "sp-s2.sp1") + self.assertEqual(first["size"], 0x020000) + self.assertEqual(first["crc32"], "9036d879") def test_pgm_rom_count(self) -> None: - roms = parse_rom_info(PGM_FIXTURE, 'pgm') + roms = parse_rom_info(PGM_FIXTURE, "pgm") self.assertEqual(len(roms), 3) def test_pgm_bios_entry(self) -> None: - roms = parse_rom_info(PGM_FIXTURE, 'pgm') + roms = parse_rom_info(PGM_FIXTURE, "pgm") bios = roms[2] - self.assertEqual(bios['name'], 'pgm_p01s.rom') - self.assertEqual(bios['crc32'], 'e42b166e') + self.assertEqual(bios["name"], "pgm_p01s.rom") + self.assertEqual(bios["crc32"], "e42b166e") def test_unknown_set_returns_empty(self) -> None: - roms = parse_rom_info(NEOGEO_FIXTURE, 'nonexistent') + roms = parse_rom_info(NEOGEO_FIXTURE, "nonexistent") self.assertEqual(roms, []) class TestParseSourceTree(unittest.TestCase): - def test_walks_drv_directory(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: - drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' / 'neogeo' + drv_dir = Path(tmpdir) / "src" / "burn" / "drv" / "neogeo" drv_dir.mkdir(parents=True) - (drv_dir / 'd_neogeo.cpp').write_text(NEOGEO_FIXTURE) + (drv_dir / "d_neogeo.cpp").write_text(NEOGEO_FIXTURE) result = parse_fbneo_source_tree(tmpdir) - self.assertIn('neogeo', result) - self.assertEqual(len(result['neogeo']['roms']), 5) + self.assertIn("neogeo", result) + self.assertEqual(len(result["neogeo"]["roms"]), 5) def test_skips_non_cpp(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: - drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' + drv_dir = Path(tmpdir) / "src" / "burn" / "drv" drv_dir.mkdir(parents=True) - (drv_dir / 'd_neogeo.h').write_text(NEOGEO_FIXTURE) + (drv_dir / "d_neogeo.h").write_text(NEOGEO_FIXTURE) result = parse_fbneo_source_tree(tmpdir) self.assertEqual(result, {}) @@ -175,16 +171,16 @@ class TestParseSourceTree(unittest.TestCase): self.assertEqual(result, {}) def test_multiple_sets(self) -> None: - combined = NEOGEO_FIXTURE + '\n' + PGM_FIXTURE + combined = NEOGEO_FIXTURE + "\n" + PGM_FIXTURE with tempfile.TemporaryDirectory() as tmpdir: - drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' + drv_dir = Path(tmpdir) / "src" / "burn" / "drv" drv_dir.mkdir(parents=True) - (drv_dir / 'd_combined.cpp').write_text(combined) + (drv_dir / "d_combined.cpp").write_text(combined) result = parse_fbneo_source_tree(tmpdir) - self.assertIn('neogeo', result) - self.assertIn('pgm', result) + self.assertIn("neogeo", result) + self.assertIn("pgm", result) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_hash_merge.py b/tests/test_hash_merge.py index fbd1c434..9d708634 100644 --- a/tests/test_hash_merge.py +++ b/tests/test_hash_merge.py @@ -18,35 +18,35 @@ from scripts.scraper._hash_merge import ( def _write_yaml(path: Path, data: dict) -> str: p = str(path) - with open(p, 'w', encoding='utf-8') as f: + with open(p, "w", encoding="utf-8") as f: yaml.dump(data, f, default_flow_style=False, sort_keys=False) return p def _write_json(path: Path, data: dict) -> str: p = str(path) - with open(p, 'w', encoding='utf-8') as f: + with open(p, "w", encoding="utf-8") as f: json.dump(data, f) return p def _make_mame_profile(**overrides: object) -> dict: base = { - 'emulator': 'MAME', - 'core_version': '0.285', - 'files': [ + "emulator": "MAME", + "core_version": "0.285", + "files": [ { - 'name': 'neogeo.zip', - 'required': True, - 'category': 'bios_zip', - 'system': 'snk-neogeo-mvs', - 'source_ref': 'src/mame/neogeo/neogeo.cpp:2400', - 'contents': [ + "name": "neogeo.zip", + "required": True, + "category": "bios_zip", + "system": "snk-neogeo-mvs", + "source_ref": "src/mame/neogeo/neogeo.cpp:2400", + "contents": [ { - 'name': 'sp-s2.sp1', - 'size': 131072, - 'crc32': 'oldcrc32', - 'description': 'Europe MVS (Ver. 2)', + "name": "sp-s2.sp1", + "size": 131072, + "crc32": "oldcrc32", + "description": "Europe MVS (Ver. 2)", }, ], }, @@ -58,23 +58,23 @@ def _make_mame_profile(**overrides: object) -> dict: def _make_mame_hashes(**overrides: object) -> dict: base = { - 'source': 'mamedev/mame', - 'version': '0.286', - 'commit': 'abc123', - 'fetched_at': '2026-03-30T12:00:00Z', - 'bios_sets': { - 'neogeo': { - 'source_file': 'src/mame/neogeo/neogeo.cpp', - 'source_line': 2432, - 'roms': [ + "source": "mamedev/mame", + "version": "0.286", + "commit": "abc123", + "fetched_at": "2026-03-30T12:00:00Z", + "bios_sets": { + "neogeo": { + "source_file": "src/mame/neogeo/neogeo.cpp", + "source_line": 2432, + "roms": [ { - 'name': 'sp-s2.sp1', - 'size': 131072, - 'crc32': '9036d879', - 'sha1': '4f834c55', - 'region': 'mainbios', - 'bios_label': 'euro', - 'bios_description': 'Europe MVS (Ver. 2)', + "name": "sp-s2.sp1", + "size": 131072, + "crc32": "9036d879", + "sha1": "4f834c55", + "region": "mainbios", + "bios_label": "euro", + "bios_description": "Europe MVS (Ver. 2)", }, ], }, @@ -86,21 +86,21 @@ def _make_mame_hashes(**overrides: object) -> dict: def _make_fbneo_profile(**overrides: object) -> dict: base = { - 'emulator': 'FinalBurn Neo', - 'core_version': 'v1.0.0.02', - 'files': [ + "emulator": "FinalBurn Neo", + "core_version": "v1.0.0.02", + "files": [ { - 'name': 'sp-s2.sp1', - 'archive': 'neogeo.zip', - 'system': 'snk-neogeo-mvs', - 'required': True, - 'size': 131072, - 'crc32': 'oldcrc32', - 'source_ref': 'src/burn/drv/neogeo/d_neogeo.cpp:1605', + "name": "sp-s2.sp1", + "archive": "neogeo.zip", + "system": "snk-neogeo-mvs", + "required": True, + "size": 131072, + "crc32": "oldcrc32", + "source_ref": "src/burn/drv/neogeo/d_neogeo.cpp:1605", }, { - 'name': 'hiscore.dat', - 'required': False, + "name": "hiscore.dat", + "required": False, }, ], } @@ -110,20 +110,20 @@ def _make_fbneo_profile(**overrides: object) -> dict: def _make_fbneo_hashes(**overrides: object) -> dict: base = { - 'source': 'finalburnneo/FBNeo', - 'version': 'v1.0.0.03', - 'commit': 'def456', - 'fetched_at': '2026-03-30T12:00:00Z', - 'bios_sets': { - 'neogeo': { - 'source_file': 'src/burn/drv/neogeo/d_neogeo.cpp', - 'source_line': 1604, - 'roms': [ + "source": "finalburnneo/FBNeo", + "version": "v1.0.0.03", + "commit": "def456", + "fetched_at": "2026-03-30T12:00:00Z", + "bios_sets": { + "neogeo": { + "source_file": "src/burn/drv/neogeo/d_neogeo.cpp", + "source_line": 1604, + "roms": [ { - 'name': 'sp-s2.sp1', - 'size': 131072, - 'crc32': '9036d879', - 'sha1': 'aabbccdd', + "name": "sp-s2.sp1", + "size": 131072, + "crc32": "9036d879", + "sha1": "aabbccdd", }, ], }, @@ -139,129 +139,129 @@ class TestMameMerge(unittest.TestCase): def test_merge_updates_contents(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) result = merge_mame_profile(profile_path, hashes_path) - bios_files = [f for f in result['files'] if f.get('category') == 'bios_zip'] + bios_files = [f for f in result["files"] if f.get("category") == "bios_zip"] self.assertEqual(len(bios_files), 1) - contents = bios_files[0]['contents'] - self.assertEqual(contents[0]['crc32'], '9036d879') - self.assertEqual(contents[0]['sha1'], '4f834c55') - self.assertEqual(contents[0]['description'], 'Europe MVS (Ver. 2)') + contents = bios_files[0]["contents"] + self.assertEqual(contents[0]["crc32"], "9036d879") + self.assertEqual(contents[0]["sha1"], "4f834c55") + self.assertEqual(contents[0]["description"], "Europe MVS (Ver. 2)") def test_merge_preserves_manual_fields(self) -> None: profile = _make_mame_profile() - profile['files'][0]['note'] = 'manually curated note' - profile['files'][0]['system'] = 'snk-neogeo-mvs' - profile['files'][0]['required'] = False + profile["files"][0]["note"] = "manually curated note" + profile["files"][0]["system"] = "snk-neogeo-mvs" + profile["files"][0]["required"] = False with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', profile) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", profile) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) result = merge_mame_profile(profile_path, hashes_path) - entry = [f for f in result['files'] if f.get('category') == 'bios_zip'][0] - self.assertEqual(entry['note'], 'manually curated note') - self.assertEqual(entry['system'], 'snk-neogeo-mvs') - self.assertFalse(entry['required']) + entry = [f for f in result["files"] if f.get("category") == "bios_zip"][0] + self.assertEqual(entry["note"], "manually curated note") + self.assertEqual(entry["system"], "snk-neogeo-mvs") + self.assertFalse(entry["required"]) def test_merge_adds_new_bios_set(self) -> None: hashes = _make_mame_hashes() - hashes['bios_sets']['pgm'] = { - 'source_file': 'src/mame/igs/pgm.cpp', - 'source_line': 5515, - 'roms': [ - {'name': 'pgm_t01s.rom', 'size': 2097152, 'crc32': '1a7123a0'}, + hashes["bios_sets"]["pgm"] = { + "source_file": "src/mame/igs/pgm.cpp", + "source_line": 5515, + "roms": [ + {"name": "pgm_t01s.rom", "size": 2097152, "crc32": "1a7123a0"}, ], } with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) result = merge_mame_profile(profile_path, hashes_path) - bios_files = [f for f in result['files'] if f.get('category') == 'bios_zip'] - names = {f['name'] for f in bios_files} - self.assertIn('pgm.zip', names) + bios_files = [f for f in result["files"] if f.get("category") == "bios_zip"] + names = {f["name"] for f in bios_files} + self.assertIn("pgm.zip", names) - pgm = next(f for f in bios_files if f['name'] == 'pgm.zip') - self.assertIsNone(pgm['system']) - self.assertTrue(pgm['required']) - self.assertEqual(pgm['category'], 'bios_zip') + pgm = next(f for f in bios_files if f["name"] == "pgm.zip") + self.assertIsNone(pgm["system"]) + self.assertTrue(pgm["required"]) + self.assertEqual(pgm["category"], "bios_zip") def test_merge_preserves_non_bios_files(self) -> None: profile = _make_mame_profile() - profile['files'].append({'name': 'hiscore.dat', 'required': False}) + profile["files"].append({"name": "hiscore.dat", "required": False}) with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', profile) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", profile) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) result = merge_mame_profile(profile_path, hashes_path) - non_bios = [f for f in result['files'] if f.get('category') != 'bios_zip'] + non_bios = [f for f in result["files"] if f.get("category") != "bios_zip"] self.assertEqual(len(non_bios), 1) - self.assertEqual(non_bios[0]['name'], 'hiscore.dat') + self.assertEqual(non_bios[0]["name"], "hiscore.dat") def test_merge_keeps_unmatched_bios_set(self) -> None: """Entries not in scraper scope stay untouched (no _upstream_removed).""" hashes = _make_mame_hashes() - hashes['bios_sets'] = {} # nothing from scraper + hashes["bios_sets"] = {} # nothing from scraper with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) result = merge_mame_profile(profile_path, hashes_path) - bios_files = [f for f in result['files'] if f.get('category') == 'bios_zip'] + bios_files = [f for f in result["files"] if f.get("category") == "bios_zip"] self.assertEqual(len(bios_files), 1) - self.assertNotIn('_upstream_removed', bios_files[0]) - self.assertEqual(bios_files[0]['name'], 'neogeo.zip') + self.assertNotIn("_upstream_removed", bios_files[0]) + self.assertEqual(bios_files[0]["name"], "neogeo.zip") def test_merge_updates_core_version(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) result = merge_mame_profile(profile_path, hashes_path) - self.assertEqual(result['core_version'], '0.286') + self.assertEqual(result["core_version"], "0.286") def test_merge_backup_created(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) merge_mame_profile(profile_path, hashes_path, write=True) - backup = p / 'mame.old.yml' + backup = p / "mame.old.yml" self.assertTrue(backup.exists()) - with open(backup, encoding='utf-8') as f: + with open(backup, encoding="utf-8") as f: old = yaml.safe_load(f) - self.assertEqual(old['core_version'], '0.285') + self.assertEqual(old["core_version"], "0.285") def test_merge_updates_source_ref(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_mame_hashes()) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", _make_mame_hashes()) result = merge_mame_profile(profile_path, hashes_path) - entry = [f for f in result['files'] if f.get('category') == 'bios_zip'][0] - self.assertEqual(entry['source_ref'], 'src/mame/neogeo/neogeo.cpp:2432') + entry = [f for f in result["files"] if f.get("category") == "bios_zip"][0] + self.assertEqual(entry["source_ref"], "src/mame/neogeo/neogeo.cpp:2432") class TestFbneoMerge(unittest.TestCase): @@ -270,74 +270,76 @@ class TestFbneoMerge(unittest.TestCase): def test_merge_updates_rom_entries(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_fbneo_hashes()) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", _make_fbneo_hashes()) result = merge_fbneo_profile(profile_path, hashes_path) - archive_files = [f for f in result['files'] if 'archive' in f] + archive_files = [f for f in result["files"] if "archive" in f] self.assertEqual(len(archive_files), 1) - self.assertEqual(archive_files[0]['crc32'], '9036d879') - self.assertEqual(archive_files[0]['system'], 'snk-neogeo-mvs') + self.assertEqual(archive_files[0]["crc32"], "9036d879") + self.assertEqual(archive_files[0]["system"], "snk-neogeo-mvs") def test_merge_adds_new_roms(self) -> None: hashes = _make_fbneo_hashes() - hashes['bios_sets']['neogeo']['roms'].append({ - 'name': 'sp-s3.sp1', - 'size': 131072, - 'crc32': '91b64be3', - }) + hashes["bios_sets"]["neogeo"]["roms"].append( + { + "name": "sp-s3.sp1", + "size": 131072, + "crc32": "91b64be3", + } + ) with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) result = merge_fbneo_profile(profile_path, hashes_path) - archive_files = [f for f in result['files'] if 'archive' in f] + archive_files = [f for f in result["files"] if "archive" in f] self.assertEqual(len(archive_files), 2) - new_rom = next(f for f in archive_files if f['name'] == 'sp-s3.sp1') - self.assertEqual(new_rom['archive'], 'neogeo.zip') - self.assertTrue(new_rom['required']) + new_rom = next(f for f in archive_files if f["name"] == "sp-s3.sp1") + self.assertEqual(new_rom["archive"], "neogeo.zip") + self.assertTrue(new_rom["required"]) def test_merge_preserves_non_archive_files(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_fbneo_hashes()) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", _make_fbneo_hashes()) result = merge_fbneo_profile(profile_path, hashes_path) - non_archive = [f for f in result['files'] if 'archive' not in f] + non_archive = [f for f in result["files"] if "archive" not in f] self.assertEqual(len(non_archive), 1) - self.assertEqual(non_archive[0]['name'], 'hiscore.dat') + self.assertEqual(non_archive[0]["name"], "hiscore.dat") def test_merge_keeps_unmatched_roms(self) -> None: """Entries not in scraper scope stay untouched (no _upstream_removed).""" hashes = _make_fbneo_hashes() - hashes['bios_sets'] = {} + hashes["bios_sets"] = {} with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) result = merge_fbneo_profile(profile_path, hashes_path) - archive_files = [f for f in result['files'] if 'archive' in f] + archive_files = [f for f in result["files"] if "archive" in f] self.assertEqual(len(archive_files), 1) - self.assertNotIn('_upstream_removed', archive_files[0]) + self.assertNotIn("_upstream_removed", archive_files[0]) def test_merge_updates_core_version(self) -> None: with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', _make_fbneo_hashes()) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", _make_fbneo_hashes()) result = merge_fbneo_profile(profile_path, hashes_path) - self.assertEqual(result['core_version'], 'v1.0.0.03') + self.assertEqual(result["core_version"], "v1.0.0.03") class TestDiff(unittest.TestCase): @@ -345,79 +347,81 @@ class TestDiff(unittest.TestCase): def test_diff_mame_detects_changes(self) -> None: hashes = _make_mame_hashes() - hashes['bios_sets']['pgm'] = { - 'source_file': 'src/mame/igs/pgm.cpp', - 'source_line': 5515, - 'roms': [ - {'name': 'pgm_t01s.rom', 'size': 2097152, 'crc32': '1a7123a0'}, + hashes["bios_sets"]["pgm"] = { + "source_file": "src/mame/igs/pgm.cpp", + "source_line": 5515, + "roms": [ + {"name": "pgm_t01s.rom", "size": 2097152, "crc32": "1a7123a0"}, ], } with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) - diff = compute_diff(profile_path, hashes_path, mode='mame') + diff = compute_diff(profile_path, hashes_path, mode="mame") - self.assertIn('pgm', diff['added']) - self.assertIn('neogeo', diff['updated']) - self.assertEqual(len(diff['removed']), 0) - self.assertEqual(diff['unchanged'], 0) + self.assertIn("pgm", diff["added"]) + self.assertIn("neogeo", diff["updated"]) + self.assertEqual(len(diff["removed"]), 0) + self.assertEqual(diff["unchanged"], 0) def test_diff_mame_out_of_scope(self) -> None: """Items in profile but not in scraper output = out of scope, not removed.""" hashes = _make_mame_hashes() - hashes['bios_sets'] = {} + hashes["bios_sets"] = {} with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'mame.yml', _make_mame_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "mame.yml", _make_mame_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) - diff = compute_diff(profile_path, hashes_path, mode='mame') + diff = compute_diff(profile_path, hashes_path, mode="mame") - self.assertEqual(diff['removed'], []) - self.assertEqual(diff['out_of_scope'], 1) - self.assertEqual(len(diff['added']), 0) + self.assertEqual(diff["removed"], []) + self.assertEqual(diff["out_of_scope"], 1) + self.assertEqual(len(diff["added"]), 0) def test_diff_fbneo_detects_changes(self) -> None: hashes = _make_fbneo_hashes() - hashes['bios_sets']['neogeo']['roms'].append({ - 'name': 'sp-s3.sp1', - 'size': 131072, - 'crc32': '91b64be3', - }) + hashes["bios_sets"]["neogeo"]["roms"].append( + { + "name": "sp-s3.sp1", + "size": 131072, + "crc32": "91b64be3", + } + ) with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', _make_fbneo_profile()) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "fbneo.yml", _make_fbneo_profile()) + hashes_path = _write_json(p / "hashes.json", hashes) - diff = compute_diff(profile_path, hashes_path, mode='fbneo') + diff = compute_diff(profile_path, hashes_path, mode="fbneo") - self.assertIn('neogeo.zip:sp-s3.sp1', diff['added']) - self.assertIn('neogeo.zip:sp-s2.sp1', diff['updated']) - self.assertEqual(len(diff['removed']), 0) + self.assertIn("neogeo.zip:sp-s3.sp1", diff["added"]) + self.assertIn("neogeo.zip:sp-s2.sp1", diff["updated"]) + self.assertEqual(len(diff["removed"]), 0) def test_diff_fbneo_unchanged(self) -> None: profile = _make_fbneo_profile() - profile['files'][0]['crc32'] = '9036d879' - profile['files'][0]['size'] = 131072 + profile["files"][0]["crc32"] = "9036d879" + profile["files"][0]["size"] = 131072 hashes = _make_fbneo_hashes() with tempfile.TemporaryDirectory() as td: p = Path(td) - profile_path = _write_yaml(p / 'fbneo.yml', profile) - hashes_path = _write_json(p / 'hashes.json', hashes) + profile_path = _write_yaml(p / "fbneo.yml", profile) + hashes_path = _write_json(p / "hashes.json", hashes) - diff = compute_diff(profile_path, hashes_path, mode='fbneo') + diff = compute_diff(profile_path, hashes_path, mode="fbneo") - self.assertEqual(diff['unchanged'], 1) - self.assertEqual(len(diff['added']), 0) - self.assertEqual(len(diff['updated']), 0) + self.assertEqual(diff["unchanged"], 1) + self.assertEqual(len(diff["added"]), 0) + self.assertEqual(len(diff["updated"]), 0) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_mame_parser.py b/tests/test_mame_parser.py index 779e2ea1..b74baced 100644 --- a/tests/test_mame_parser.py +++ b/tests/test_mame_parser.py @@ -86,101 +86,101 @@ class TestFindBiosRootSets(unittest.TestCase): """Tests for find_bios_root_sets.""" def test_detects_neogeo_from_game_macro(self) -> None: - result = find_bios_root_sets(NEOGEO_FIXTURE, 'src/mame/snk/neogeo.cpp') - self.assertIn('neogeo', result) - self.assertEqual(result['neogeo']['source_file'], 'src/mame/snk/neogeo.cpp') - self.assertIsInstance(result['neogeo']['source_line'], int) + result = find_bios_root_sets(NEOGEO_FIXTURE, "src/mame/snk/neogeo.cpp") + self.assertIn("neogeo", result) + self.assertEqual(result["neogeo"]["source_file"], "src/mame/snk/neogeo.cpp") + self.assertIsInstance(result["neogeo"]["source_line"], int) def test_detects_from_comp_macro(self) -> None: - result = find_bios_root_sets(DEVICE_FIXTURE, 'src/mame/acorn/bbc.cpp') - self.assertIn('bbcb', result) + result = find_bios_root_sets(DEVICE_FIXTURE, "src/mame/acorn/bbc.cpp") + self.assertIn("bbcb", result) def test_detects_from_cons_macro(self) -> None: - result = find_bios_root_sets(CONS_FIXTURE, 'src/mame/sega/megadriv.cpp') - self.assertIn('megadriv', result) + result = find_bios_root_sets(CONS_FIXTURE, "src/mame/sega/megadriv.cpp") + self.assertIn("megadriv", result) def test_ignores_non_bios_games(self) -> None: - result = find_bios_root_sets(NON_BIOS_FIXTURE, 'src/mame/pacman/pacman.cpp') + result = find_bios_root_sets(NON_BIOS_FIXTURE, "src/mame/pacman/pacman.cpp") self.assertEqual(result, {}) def test_detects_from_nodump_fixture(self) -> None: - result = find_bios_root_sets(NODUMP_FIXTURE, 'test.cpp') - self.assertIn('testnd', result) + result = find_bios_root_sets(NODUMP_FIXTURE, "test.cpp") + self.assertIn("testnd", result) def test_detects_from_baddump_fixture(self) -> None: - result = find_bios_root_sets(BADDUMP_FIXTURE, 'test.cpp') - self.assertIn('testbd', result) + result = find_bios_root_sets(BADDUMP_FIXTURE, "test.cpp") + self.assertIn("testbd", result) class TestParseRomBlock(unittest.TestCase): """Tests for parse_rom_block.""" def test_extracts_rom_names(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - names = [r['name'] for r in roms] - self.assertIn('sp-s2.sp1', names) - self.assertIn('vs-bios.rom', names) - self.assertIn('sm1.sm1', names) + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + names = [r["name"] for r in roms] + self.assertIn("sp-s2.sp1", names) + self.assertIn("vs-bios.rom", names) + self.assertIn("sm1.sm1", names) def test_extracts_crc32_and_sha1(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - sp_s2 = next(r for r in roms if r['name'] == 'sp-s2.sp1') - self.assertEqual(sp_s2['crc32'], '9036d879') - self.assertEqual(sp_s2['sha1'], '4f5ed7105b7128794654ce82b51723e16e389543') + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + sp_s2 = next(r for r in roms if r["name"] == "sp-s2.sp1") + self.assertEqual(sp_s2["crc32"], "9036d879") + self.assertEqual(sp_s2["sha1"], "4f5ed7105b7128794654ce82b51723e16e389543") def test_extracts_size(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - sp_s2 = next(r for r in roms if r['name'] == 'sp-s2.sp1') - self.assertEqual(sp_s2['size'], 0x020000) + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + sp_s2 = next(r for r in roms if r["name"] == "sp-s2.sp1") + self.assertEqual(sp_s2["size"], 0x020000) def test_extracts_bios_metadata(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - sp_s2 = next(r for r in roms if r['name'] == 'sp-s2.sp1') - self.assertEqual(sp_s2['bios_index'], 0) - self.assertEqual(sp_s2['bios_label'], 'euro') - self.assertEqual(sp_s2['bios_description'], 'Europe MVS (Ver. 2)') + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + sp_s2 = next(r for r in roms if r["name"] == "sp-s2.sp1") + self.assertEqual(sp_s2["bios_index"], 0) + self.assertEqual(sp_s2["bios_label"], "euro") + self.assertEqual(sp_s2["bios_description"], "Europe MVS (Ver. 2)") def test_non_bios_rom_has_no_bios_fields(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - sm1 = next(r for r in roms if r['name'] == 'sm1.sm1') - self.assertNotIn('bios_index', sm1) - self.assertNotIn('bios_label', sm1) + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + sm1 = next(r for r in roms if r["name"] == "sm1.sm1") + self.assertNotIn("bios_index", sm1) + self.assertNotIn("bios_label", sm1) def test_skips_no_dump(self) -> None: - roms = parse_rom_block(NODUMP_FIXTURE, 'testnd') - names = [r['name'] for r in roms] - self.assertIn('good.rom', names) - self.assertNotIn('missing.rom', names) + roms = parse_rom_block(NODUMP_FIXTURE, "testnd") + names = [r["name"] for r in roms] + self.assertIn("good.rom", names) + self.assertNotIn("missing.rom", names) def test_includes_bad_dump_with_flag(self) -> None: - roms = parse_rom_block(BADDUMP_FIXTURE, 'testbd') + roms = parse_rom_block(BADDUMP_FIXTURE, "testbd") self.assertEqual(len(roms), 1) - self.assertEqual(roms[0]['name'], 'badrom.bin') - self.assertTrue(roms[0]['bad_dump']) - self.assertEqual(roms[0]['crc32'], 'deadbeef') - self.assertEqual(roms[0]['sha1'], '0123456789abcdef0123456789abcdef01234567') + self.assertEqual(roms[0]["name"], "badrom.bin") + self.assertTrue(roms[0]["bad_dump"]) + self.assertEqual(roms[0]["crc32"], "deadbeef") + self.assertEqual(roms[0]["sha1"], "0123456789abcdef0123456789abcdef01234567") def test_handles_rom_load16_word(self) -> None: - roms = parse_rom_block(CONS_FIXTURE, 'megadriv') + roms = parse_rom_block(CONS_FIXTURE, "megadriv") self.assertEqual(len(roms), 1) - self.assertEqual(roms[0]['name'], 'epr-6209.ic7') - self.assertEqual(roms[0]['crc32'], 'cafebabe') + self.assertEqual(roms[0]["name"], "epr-6209.ic7") + self.assertEqual(roms[0]["crc32"], "cafebabe") def test_tracks_rom_region(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'neogeo') - sp_s2 = next(r for r in roms if r['name'] == 'sp-s2.sp1') - sm1 = next(r for r in roms if r['name'] == 'sm1.sm1') - self.assertEqual(sp_s2['region'], 'mainbios') - self.assertEqual(sm1['region'], 'audiocpu') + roms = parse_rom_block(NEOGEO_FIXTURE, "neogeo") + sp_s2 = next(r for r in roms if r["name"] == "sp-s2.sp1") + sm1 = next(r for r in roms if r["name"] == "sm1.sm1") + self.assertEqual(sp_s2["region"], "mainbios") + self.assertEqual(sm1["region"], "audiocpu") def test_returns_empty_for_unknown_set(self) -> None: - roms = parse_rom_block(NEOGEO_FIXTURE, 'nonexistent') + roms = parse_rom_block(NEOGEO_FIXTURE, "nonexistent") self.assertEqual(roms, []) def test_good_rom_not_flagged_bad_dump(self) -> None: - roms = parse_rom_block(NODUMP_FIXTURE, 'testnd') - good = next(r for r in roms if r['name'] == 'good.rom') - self.assertFalse(good['bad_dump']) + roms = parse_rom_block(NODUMP_FIXTURE, "testnd") + good = next(r for r in roms if r["name"] == "good.rom") + self.assertFalse(good["bad_dump"]) def test_crc32_sha1_lowercase(self) -> None: fixture = """\ @@ -189,9 +189,9 @@ ROM_START( upper ) ROM_LOAD( "test.rom", 0x00000, 0x4000, CRC(AABBCCDD) SHA1(AABBCCDDEEFF00112233AABBCCDDEEFF00112233) ) ROM_END """ - roms = parse_rom_block(fixture, 'upper') - self.assertEqual(roms[0]['crc32'], 'aabbccdd') - self.assertEqual(roms[0]['sha1'], 'aabbccddeeff00112233aabbccddeeff00112233') + roms = parse_rom_block(fixture, "upper") + self.assertEqual(roms[0]["crc32"], "aabbccdd") + self.assertEqual(roms[0]["sha1"], "aabbccddeeff00112233aabbccddeeff00112233") class TestParseMameSourceTree(unittest.TestCase): @@ -199,26 +199,26 @@ class TestParseMameSourceTree(unittest.TestCase): def test_walks_source_tree(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: - mame_dir = os.path.join(tmpdir, 'src', 'mame', 'snk') + mame_dir = os.path.join(tmpdir, "src", "mame", "snk") os.makedirs(mame_dir) - filepath = os.path.join(mame_dir, 'neogeo.cpp') - with open(filepath, 'w') as f: + filepath = os.path.join(mame_dir, "neogeo.cpp") + with open(filepath, "w") as f: f.write(NEOGEO_FIXTURE) results = parse_mame_source_tree(tmpdir) - self.assertIn('neogeo', results) - self.assertEqual(len(results['neogeo']['roms']), 3) + self.assertIn("neogeo", results) + self.assertEqual(len(results["neogeo"]["roms"]), 3) self.assertEqual( - results['neogeo']['source_file'], - 'src/mame/snk/neogeo.cpp', + results["neogeo"]["source_file"], + "src/mame/snk/neogeo.cpp", ) def test_ignores_non_source_files(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: - mame_dir = os.path.join(tmpdir, 'src', 'mame') + mame_dir = os.path.join(tmpdir, "src", "mame") os.makedirs(mame_dir) # Write a .txt file that should be ignored - with open(os.path.join(mame_dir, 'notes.txt'), 'w') as f: + with open(os.path.join(mame_dir, "notes.txt"), "w") as f: f.write(NEOGEO_FIXTURE) results = parse_mame_source_tree(tmpdir) @@ -226,13 +226,13 @@ class TestParseMameSourceTree(unittest.TestCase): def test_scans_devices_dir(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: - dev_dir = os.path.join(tmpdir, 'src', 'devices', 'bus') + dev_dir = os.path.join(tmpdir, "src", "devices", "bus") os.makedirs(dev_dir) - with open(os.path.join(dev_dir, 'test.cpp'), 'w') as f: + with open(os.path.join(dev_dir, "test.cpp"), "w") as f: f.write(DEVICE_FIXTURE) results = parse_mame_source_tree(tmpdir) - self.assertIn('bbcb', results) + self.assertIn("bbcb", results) def test_empty_tree(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: @@ -240,5 +240,5 @@ class TestParseMameSourceTree(unittest.TestCase): self.assertEqual(results, {}) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_pack_integrity.py b/tests/test_pack_integrity.py index deb57e85..43184f94 100644 --- a/tests/test_pack_integrity.py +++ b/tests/test_pack_integrity.py @@ -25,11 +25,11 @@ def _platform_has_pack(platform_name: str) -> bool: return False sys.path.insert(0, os.path.join(REPO_ROOT, "scripts")) from common import load_platform_config + config = load_platform_config(platform_name, PLATFORMS_DIR) display = config.get("platform", platform_name).replace(" ", "_") return any( - f.endswith("_BIOS_Pack.zip") and display in f - for f in os.listdir(DIST_DIR) + f.endswith("_BIOS_Pack.zip") and display in f for f in os.listdir(DIST_DIR) ) @@ -40,10 +40,18 @@ class PackIntegrityTest(unittest.TestCase): if not _platform_has_pack(platform_name): self.skipTest(f"no pack found for {platform_name}") result = subprocess.run( - [sys.executable, "scripts/generate_pack.py", - "--platform", platform_name, - "--verify-packs", "--output-dir", "dist/"], - capture_output=True, text=True, cwd=REPO_ROOT, + [ + sys.executable, + "scripts/generate_pack.py", + "--platform", + platform_name, + "--verify-packs", + "--output-dir", + "dist/", + ], + capture_output=True, + text=True, + cwd=REPO_ROOT, ) if result.returncode != 0: self.fail(