Files
libretro/scripts/generate_pack.py
Abdessamad Derraz 6a21a99c22 feat: platform-core registry for exact pack generation
resolve_platform_cores() links platforms to their cores via
three strategies: all_libretro, explicit list, system ID
fallback. Pack generation always includes core requirements
beyond platform baseline. Case-insensitive dedup prevents
conflicts on Windows/macOS. Data dir strip_components fixes
doubled paths for Dolphin and PPSSPP caches.
2026-03-19 16:10:43 +01:00

546 lines
22 KiB
Python

#!/usr/bin/env python3
"""Generate platform-specific BIOS ZIP packs.
Usage:
python scripts/generate_pack.py --platform retroarch [--output-dir dist/]
python scripts/generate_pack.py --all [--output-dir dist/]
Reads platform YAML config + database.json -> creates ZIP with correct
file layout for each platform. Handles inheritance, shared groups, variants,
and 3-tier storage (embedded/external/user_provided).
"""
from __future__ import annotations
import argparse
import hashlib
import json
import os
import sys
import tempfile
import urllib.request
import urllib.error
import zipfile
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import (
build_zip_contents_index, check_inside_zip, compute_hashes,
group_identical_platforms, load_database, load_data_dir_registry,
load_emulator_profiles, load_platform_config, md5_composite,
resolve_local_file,
)
try:
import yaml
except ImportError:
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
sys.exit(1)
DEFAULT_PLATFORMS_DIR = "platforms"
DEFAULT_DB_FILE = "database.json"
DEFAULT_OUTPUT_DIR = "dist"
DEFAULT_BIOS_DIR = "bios"
LARGE_FILES_RELEASE = "large-files"
LARGE_FILES_REPO = "Abdess/retrobios"
MAX_ENTRY_SIZE = 512 * 1024 * 1024 # 512MB
def _verify_file_hash(path: str, expected_sha1: str = "",
expected_md5: str = "") -> bool:
if not expected_sha1 and not expected_md5:
return True
hashes = compute_hashes(path)
if expected_sha1:
return hashes["sha1"].lower() == expected_sha1.lower()
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
return hashes["md5"].lower() in md5_list
def fetch_large_file(name: str, dest_dir: str = ".cache/large",
expected_sha1: str = "", expected_md5: str = "") -> str | None:
"""Download a large file from the 'large-files' GitHub release if not cached."""
cached = os.path.join(dest_dir, name)
if os.path.exists(cached):
if expected_sha1 or expected_md5:
if _verify_file_hash(cached, expected_sha1, expected_md5):
return cached
os.unlink(cached)
else:
return cached
encoded_name = urllib.request.quote(name)
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{encoded_name}"
try:
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack/1.0"})
with urllib.request.urlopen(req, timeout=300) as resp:
os.makedirs(dest_dir, exist_ok=True)
with open(cached, "wb") as f:
while True:
chunk = resp.read(65536)
if not chunk:
break
f.write(chunk)
except (urllib.error.URLError, urllib.error.HTTPError):
return None
if expected_sha1 or expected_md5:
if not _verify_file_hash(cached, expected_sha1, expected_md5):
os.unlink(cached)
return None
return cached
def _sanitize_path(raw: str) -> str:
"""Strip path traversal components from a relative path."""
raw = raw.replace("\\", "/")
parts = [p for p in raw.split("/") if p and p not in ("..", ".")]
return "/".join(parts)
def resolve_file(file_entry: dict, db: dict, bios_dir: str,
zip_contents: dict | None = None) -> tuple[str | None, str]:
"""Resolve a BIOS file with storage tiers and release asset fallback.
Wraps common.resolve_local_file() with pack-specific logic for
storage tiers (external/user_provided) and large file release assets.
"""
storage = file_entry.get("storage", "embedded")
if storage == "user_provided":
return None, "user_provided"
if storage == "external":
return None, "external"
path, status = resolve_local_file(file_entry, db, zip_contents)
if path:
return path, status
# Last resort: large files from GitHub release assets
name = file_entry.get("name", "")
sha1 = file_entry.get("sha1")
md5_raw = file_entry.get("md5", "")
md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else []
first_md5 = md5_list[0] if md5_list else ""
cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5)
if cached:
return cached, "release_asset"
return None, "not_found"
def download_external(file_entry: dict, dest_path: str) -> bool:
"""Download an external BIOS file, verify hash, save to dest_path."""
url = file_entry.get("source_url")
if not url:
return False
sha256 = file_entry.get("sha256")
sha1 = file_entry.get("sha1")
md5 = file_entry.get("md5")
if not (sha256 or sha1 or md5):
print(f" WARNING: no hash for {file_entry['name']}, skipping unverifiable download")
return False
try:
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack-gen/1.0"})
with urllib.request.urlopen(req, timeout=120) as resp:
data = resp.read()
except urllib.error.URLError as e:
print(f" WARNING: Failed to download {url}: {e}")
return False
if sha256:
actual = hashlib.sha256(data).hexdigest()
if actual != sha256:
print(f" WARNING: SHA256 mismatch for {file_entry['name']}")
return False
elif sha1:
actual = hashlib.sha1(data).hexdigest()
if actual != sha1:
print(f" WARNING: SHA1 mismatch for {file_entry['name']}")
return False
elif md5:
actual = hashlib.md5(data).hexdigest()
if actual != md5:
print(f" WARNING: MD5 mismatch for {file_entry['name']}")
return False
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
with open(dest_path, "wb") as f:
f.write(data)
return True
def _collect_emulator_extras(
config: dict,
emulators_dir: str,
db: dict,
seen: set,
base_dest: str,
emu_profiles: dict | None = None,
) -> list[dict]:
"""Collect core requirement files from emulator profiles not in the platform pack.
Uses the same system-overlap matching as verify.py cross-reference:
- Matches emulators by shared system IDs with the platform
- Filters mode: standalone, type: launcher, type: alias
- Respects data_directories coverage
- Only returns files that exist in the repo (packable)
Works for ANY platform (RetroArch, Batocera, Recalbox, etc.)
"""
from verify import find_undeclared_files
undeclared = find_undeclared_files(config, emulators_dir, db, emu_profiles)
extras = []
for u in undeclared:
if not u["in_repo"]:
continue
name = u["name"]
dest = name
full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen:
continue
extras.append({
"name": name,
"destination": dest,
"required": u.get("required", False),
"hle_fallback": u.get("hle_fallback", False),
"source_emulator": u.get("emulator", ""),
})
return extras
def generate_pack(
platform_name: str,
platforms_dir: str,
db: dict,
bios_dir: str,
output_dir: str,
include_extras: bool = False,
emulators_dir: str = "emulators",
zip_contents: dict | None = None,
data_registry: dict | None = None,
) -> str | None:
"""Generate a ZIP pack for a platform.
Returns the path to the generated ZIP, or None on failure.
"""
config = load_platform_config(platform_name, platforms_dir)
if zip_contents is None:
zip_contents = {}
verification_mode = config.get("verification_mode", "existence")
platform_display = config.get("platform", platform_name)
base_dest = config.get("base_destination", "")
zip_name = f"{platform_display.replace(' ', '_')}_BIOS_Pack.zip"
zip_path = os.path.join(output_dir, zip_name)
os.makedirs(output_dir, exist_ok=True)
total_files = 0
missing_files = []
user_provided = []
seen_destinations: set[str] = set()
seen_lower: set[str] = set() # case-insensitive dedup for Windows/macOS
# Per-file status: worst status wins (missing > untested > ok)
file_status: dict[str, str] = {}
file_reasons: dict[str, str] = {}
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for sys_id, system in sorted(config.get("systems", {}).items()):
for file_entry in system.get("files", []):
dest = _sanitize_path(file_entry.get("destination", file_entry["name"]))
if not dest:
# EmuDeck-style entries (system:md5 whitelist, no filename).
fkey = f"{sys_id}/{file_entry.get('name', '')}"
md5 = file_entry.get("md5", "")
if md5 and md5 in db.get("indexes", {}).get("by_md5", {}):
file_status.setdefault(fkey, "ok")
else:
file_status[fkey] = "missing"
continue
if base_dest:
full_dest = f"{base_dest}/{dest}"
else:
full_dest = dest
dedup_key = full_dest
already_packed = dedup_key in seen_destinations
storage = file_entry.get("storage", "embedded")
if storage == "user_provided":
if already_packed:
continue
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
file_status.setdefault(dedup_key, "ok")
instructions = file_entry.get("instructions", "Please provide this file manually.")
instr_name = f"INSTRUCTIONS_{file_entry['name']}.txt"
instr_path = f"{base_dest}/{instr_name}" if base_dest else instr_name
zf.writestr(instr_path, f"File needed: {file_entry['name']}\n\n{instructions}\n")
user_provided.append(file_entry["name"])
total_files += 1
continue
local_path, status = resolve_file(file_entry, db, bios_dir, zip_contents)
if status == "external":
file_ext = os.path.splitext(file_entry["name"])[1] or ""
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
tmp_path = tmp.name
try:
if download_external(file_entry, tmp_path):
extract = file_entry.get("extract", False)
if extract and tmp_path.endswith(".zip"):
_extract_zip_to_archive(tmp_path, full_dest, zf)
else:
zf.write(tmp_path, full_dest)
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
file_status.setdefault(dedup_key, "ok")
total_files += 1
else:
missing_files.append(file_entry["name"])
file_status[dedup_key] = "missing"
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
continue
if status == "not_found":
if not already_packed:
missing_files.append(file_entry["name"])
file_status[dedup_key] = "missing"
continue
if status == "hash_mismatch" and verification_mode != "existence":
zf_name = file_entry.get("zipped_file")
if zf_name and local_path:
inner_md5_raw = file_entry.get("md5", "")
inner_md5_list = (
[m.strip() for m in inner_md5_raw.split(",") if m.strip()]
if inner_md5_raw else [""]
)
zip_ok = False
last_result = "not_in_zip"
for md5_candidate in inner_md5_list:
last_result = check_inside_zip(local_path, zf_name, md5_candidate)
if last_result == "ok":
zip_ok = True
break
if zip_ok:
file_status.setdefault(dedup_key, "ok")
elif last_result == "not_in_zip":
file_status[dedup_key] = "untested"
file_reasons[dedup_key] = f"{zf_name} not found inside ZIP"
elif last_result == "error":
file_status[dedup_key] = "untested"
file_reasons[dedup_key] = "cannot read ZIP"
else:
file_status[dedup_key] = "untested"
file_reasons[dedup_key] = f"{zf_name} MD5 mismatch inside ZIP"
else:
file_status[dedup_key] = "untested"
file_reasons[dedup_key] = "hash mismatch"
else:
file_status.setdefault(dedup_key, "ok")
if already_packed:
continue
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
extract = file_entry.get("extract", False)
if extract and local_path.endswith(".zip"):
_extract_zip_to_archive(local_path, full_dest, zf)
else:
zf.write(local_path, full_dest)
total_files += 1
# Core requirements: files platform's cores need but YAML doesn't declare
emu_profiles = load_emulator_profiles(emulators_dir)
core_files = _collect_emulator_extras(
config, emulators_dir, db,
seen_destinations, base_dest, emu_profiles,
)
core_count = 0
for fe in core_files:
dest = _sanitize_path(fe.get("destination", fe["name"]))
if not dest:
continue
full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen_destinations:
continue
# Skip case-insensitive duplicates (Windows/macOS FS safety)
if full_dest.lower() in seen_lower:
continue
local_path, status = resolve_file(fe, db, bios_dir, zip_contents)
if status in ("not_found", "external", "user_provided"):
continue
zf.write(local_path, full_dest)
seen_destinations.add(full_dest)
seen_lower.add(full_dest.lower())
core_count += 1
total_files += 1
# Data directories from _data_dirs.yml
for sys_id, system in sorted(config.get("systems", {}).items()):
for dd in system.get("data_directories", []):
ref_key = dd.get("ref", "")
if not ref_key or not data_registry or ref_key not in data_registry:
continue
entry = data_registry[ref_key]
allowed = entry.get("for_platforms")
if allowed and platform_name not in allowed:
continue
local_path = entry.get("local_cache", "")
if not local_path or not os.path.isdir(local_path):
print(f" WARNING: data directory '{ref_key}' not cached at {local_path} — run refresh_data_dirs.py")
continue
dd_dest = dd.get("destination", "")
dd_prefix = f"{base_dest}/{dd_dest}" if base_dest else dd_dest
for root, _dirs, filenames in os.walk(local_path):
for fname in filenames:
src = os.path.join(root, fname)
rel = os.path.relpath(src, local_path)
full = f"{dd_prefix}/{rel}"
if full in seen_destinations or full.lower() in seen_lower:
continue
seen_destinations.add(full)
seen_lower.add(full.lower())
zf.write(src, full)
total_files += 1
files_ok = sum(1 for s in file_status.values() if s == "ok")
files_untested = sum(1 for s in file_status.values() if s == "untested")
files_miss = sum(1 for s in file_status.values() if s == "missing")
total_checked = len(file_status)
parts = [f"{files_ok}/{total_checked} files OK"]
if files_untested:
parts.append(f"{files_untested} untested")
if files_miss:
parts.append(f"{files_miss} missing")
baseline = total_files - core_count
print(f" {zip_path}: {total_files} files packed ({baseline} baseline + {core_count} from cores), {', '.join(parts)} [{verification_mode}]")
for key, reason in sorted(file_reasons.items()):
status = file_status.get(key, "")
label = "UNTESTED"
print(f" {label}: {key}{reason}")
for name in missing_files:
print(f" MISSING: {name}")
return zip_path
def _extract_zip_to_archive(source_zip: str, dest_prefix: str, target_zf: zipfile.ZipFile):
"""Extract contents of a source ZIP into target ZIP under dest_prefix."""
with zipfile.ZipFile(source_zip, "r") as src:
for info in src.infolist():
if info.is_dir():
continue
clean_name = _sanitize_path(info.filename)
if not clean_name:
continue
data = src.read(info.filename)
target_path = f"{dest_prefix}/{clean_name}" if dest_prefix else clean_name
target_zf.writestr(target_path, data)
def list_platforms(platforms_dir: str) -> list[str]:
"""List available platform names from YAML files."""
platforms = []
for f in sorted(Path(platforms_dir).glob("*.yml")):
if f.name.startswith("_"):
continue
platforms.append(f.stem)
return platforms
def main():
parser = argparse.ArgumentParser(description="Generate platform BIOS ZIP packs")
parser.add_argument("--platform", "-p", help="Platform name (e.g., retroarch)")
parser.add_argument("--all", action="store_true", help="Generate packs for all active platforms")
parser.add_argument("--include-archived", action="store_true", help="Include archived platforms")
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
parser.add_argument("--db", default=DEFAULT_DB_FILE, help="Path to database.json")
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
parser.add_argument("--output-dir", "-o", default=DEFAULT_OUTPUT_DIR)
# --include-extras is now a no-op: core requirements are always included
parser.add_argument("--include-extras", action="store_true",
help="(no-op) Core requirements are always included")
parser.add_argument("--emulators-dir", default="emulators")
parser.add_argument("--offline", action="store_true",
help="Skip data directory freshness check, use cache only")
parser.add_argument("--refresh-data", action="store_true",
help="Force re-download all data directories")
parser.add_argument("--list", action="store_true", help="List available platforms")
args = parser.parse_args()
if args.list:
platforms = list_platforms(args.platforms_dir)
for p in platforms:
print(p)
return
if args.all:
sys.path.insert(0, os.path.dirname(__file__))
from list_platforms import list_platforms as _list_active
platforms = _list_active(include_archived=args.include_archived)
elif args.platform:
platforms = [args.platform]
else:
parser.error("Specify --platform or --all")
return
db = load_database(args.db)
zip_contents = build_zip_contents_index(db)
data_registry = load_data_dir_registry(args.platforms_dir)
if data_registry and not args.offline:
from refresh_data_dirs import refresh_all, load_registry
registry = load_registry(os.path.join(args.platforms_dir, "_data_dirs.yml"))
results = refresh_all(registry, force=args.refresh_data)
updated = sum(1 for v in results.values() if v)
if updated:
print(f"Refreshed {updated} data director{'ies' if updated > 1 else 'y'}")
groups = group_identical_platforms(platforms, args.platforms_dir)
for group_platforms, representative in groups:
if len(group_platforms) > 1:
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
combined_name = " + ".join(names)
print(f"\nGenerating shared pack for {combined_name}...")
else:
print(f"\nGenerating pack for {representative}...")
try:
zip_path = generate_pack(
representative, args.platforms_dir, db, args.bios_dir, args.output_dir,
include_extras=args.include_extras, emulators_dir=args.emulators_dir,
zip_contents=zip_contents, data_registry=data_registry,
)
if zip_path and len(group_platforms) > 1:
# Rename ZIP to include all platform names
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
combined_filename = "_".join(n.replace(" ", "") for n in names) + "_BIOS_Pack.zip"
new_path = os.path.join(os.path.dirname(zip_path), combined_filename)
if new_path != zip_path:
os.rename(zip_path, new_path)
print(f" Renamed -> {os.path.basename(new_path)}")
except (FileNotFoundError, OSError, yaml.YAMLError) as e:
print(f" ERROR: {e}")
if __name__ == "__main__":
main()