mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
414 lines
14 KiB
Python
414 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""Auto-fetch missing BIOS files from multiple sources.
|
|
|
|
Pipeline:
|
|
1. Cross-reference database.json (already exists under different name/path?)
|
|
2. Scan old branches (git show origin/branch:path)
|
|
3. Search public BIOS repos on GitHub
|
|
4. Search archive.org collections
|
|
5. Create GitHub Issue for community help
|
|
|
|
Usage:
|
|
python scripts/auto_fetch.py --platform retroarch [--dry-run]
|
|
python scripts/auto_fetch.py --all [--dry-run]
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import urllib.request
|
|
import urllib.error
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from common import load_database, load_platform_config
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
DEFAULT_DB = "database.json"
|
|
DEFAULT_PLATFORMS_DIR = "platforms"
|
|
DEFAULT_BIOS_DIR = "bios"
|
|
|
|
LEGACY_BRANCHES = ["libretro", "RetroArch", "RetroPie", "Recalbox", "batocera", "Other"]
|
|
|
|
PUBLIC_REPOS = [
|
|
# archtaurus/RetroPieBIOS - most complete verified collection
|
|
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/{name}",
|
|
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/pcsx2/bios/{name}",
|
|
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/ep128emu/roms/{name}",
|
|
"https://raw.githubusercontent.com/archtaurus/RetroPieBIOS/master/BIOS/fuse/{name}",
|
|
# prefetchnta/retroarch-bios - alternative verified collection
|
|
"https://raw.githubusercontent.com/prefetchnta/retroarch-bios/main/system/{name}",
|
|
"https://raw.githubusercontent.com/prefetchnta/retroarch-bios/main/system/pcsx2/bios/{name}",
|
|
# BatoceraPLUS - Batocera-specific
|
|
"https://raw.githubusercontent.com/BatoceraPLUS/Batocera.PLUS-bios/main/{name}",
|
|
]
|
|
|
|
ARCHIVE_ORG_COLLECTIONS = [
|
|
"RetroarchSystemFiles",
|
|
"retroarch_bios",
|
|
"retroarch-ultimate-bios-pack_20250824",
|
|
"system_20240621",
|
|
"full-pack-bios-batocera-39",
|
|
]
|
|
|
|
|
|
def find_missing(config: dict, db: dict) -> list[dict]:
|
|
"""Find BIOS files required by platform but not in database."""
|
|
missing = []
|
|
|
|
for sys_id, system in config.get("systems", {}).items():
|
|
for file_entry in system.get("files", []):
|
|
storage = file_entry.get("storage", "embedded")
|
|
if storage != "embedded":
|
|
continue
|
|
|
|
sha1 = file_entry.get("sha1")
|
|
md5 = file_entry.get("md5")
|
|
name = file_entry.get("name", "")
|
|
|
|
found = False
|
|
if sha1 and sha1 in db.get("files", {}):
|
|
found = True
|
|
elif md5 and md5 in db.get("indexes", {}).get("by_md5", {}):
|
|
found = True
|
|
|
|
if not found:
|
|
missing.append({
|
|
"name": name,
|
|
"system": sys_id,
|
|
"sha1": sha1,
|
|
"md5": md5,
|
|
"size": file_entry.get("size"),
|
|
"destination": file_entry.get("destination", name),
|
|
})
|
|
|
|
return missing
|
|
|
|
|
|
def verify_content(data: bytes, expected: dict) -> bool:
|
|
"""Verify downloaded content matches expected hashes."""
|
|
if expected.get("sha1"):
|
|
actual = hashlib.sha1(data).hexdigest()
|
|
return actual == expected["sha1"]
|
|
if expected.get("md5"):
|
|
actual = hashlib.md5(data).hexdigest()
|
|
return actual == expected["md5"]
|
|
return False
|
|
|
|
|
|
def step1_crossref_db(entry: dict, db: dict) -> str | None:
|
|
"""Check if file exists under different name/path in database."""
|
|
sha1 = entry.get("sha1")
|
|
md5 = entry.get("md5")
|
|
|
|
if sha1 and sha1 in db.get("files", {}):
|
|
return db["files"][sha1]["path"]
|
|
|
|
if md5:
|
|
sha1_match = db.get("indexes", {}).get("by_md5", {}).get(md5)
|
|
if sha1_match and sha1_match in db["files"]:
|
|
return db["files"][sha1_match]["path"]
|
|
|
|
return None
|
|
|
|
|
|
def step2_scan_branches(entry: dict) -> bytes | None:
|
|
"""Search old git branches for the file by hash."""
|
|
name = entry["name"]
|
|
|
|
for branch in LEGACY_BRANCHES:
|
|
ref = f"origin/{branch}"
|
|
try:
|
|
subprocess.run(
|
|
["git", "rev-parse", "--verify", ref],
|
|
capture_output=True, check=True,
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
continue
|
|
|
|
result = subprocess.run(
|
|
["git", "ls-tree", "-r", "--name-only", ref],
|
|
capture_output=True, text=True,
|
|
)
|
|
|
|
for filepath in result.stdout.strip().split("\n"):
|
|
if filepath.endswith(f"/{name}") or filepath == name or filepath.endswith(name):
|
|
try:
|
|
blob = subprocess.run(
|
|
["git", "show", f"{ref}:{filepath}"],
|
|
capture_output=True, check=True,
|
|
)
|
|
if verify_content(blob.stdout, entry):
|
|
return blob.stdout
|
|
except subprocess.CalledProcessError:
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
def step3_search_public_repos(entry: dict) -> bytes | None:
|
|
"""Search public GitHub BIOS repos."""
|
|
name = entry["name"]
|
|
destination = entry.get("destination", name)
|
|
|
|
for url_template in PUBLIC_REPOS:
|
|
url = url_template.format(name=name)
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
data = resp.read()
|
|
if verify_content(data, entry):
|
|
return data
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
continue
|
|
|
|
if "/" in destination:
|
|
url = url_template.format(name=destination)
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
data = resp.read()
|
|
if verify_content(data, entry):
|
|
return data
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
continue
|
|
|
|
return None
|
|
|
|
|
|
def step4_search_archive_org(entry: dict) -> bytes | None:
|
|
"""Search archive.org firmware collections by direct download."""
|
|
name = entry["name"]
|
|
|
|
for collection_id in ARCHIVE_ORG_COLLECTIONS:
|
|
for path in [name, f"system/{name}", f"bios/{name}"]:
|
|
url = f"https://archive.org/download/{collection_id}/{path}"
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
data = resp.read()
|
|
if verify_content(data, entry):
|
|
return data
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
continue
|
|
|
|
sha1 = entry.get("sha1", "")
|
|
if not sha1:
|
|
return None
|
|
|
|
search_url = (
|
|
f"https://archive.org/advancedsearch.php?"
|
|
f"q=sha1:{sha1}&output=json&rows=1"
|
|
)
|
|
|
|
try:
|
|
req = urllib.request.Request(search_url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
result = json.loads(resp.read())
|
|
docs = result.get("response", {}).get("docs", [])
|
|
if docs:
|
|
identifier = docs[0].get("identifier")
|
|
if identifier:
|
|
dl_url = f"https://archive.org/download/{identifier}/{name}"
|
|
try:
|
|
req2 = urllib.request.Request(dl_url, headers={"User-Agent": "retrobios-fetch/1.0"})
|
|
with urllib.request.urlopen(req2, timeout=30) as resp2:
|
|
data = resp2.read()
|
|
if verify_content(data, entry):
|
|
return data
|
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
pass
|
|
except (urllib.error.URLError, json.JSONDecodeError):
|
|
pass
|
|
|
|
return None
|
|
|
|
|
|
def place_file(data: bytes, entry: dict, bios_dir: str, db: dict) -> str:
|
|
"""Place a fetched BIOS file in the correct location."""
|
|
name = entry["name"]
|
|
system = entry["system"]
|
|
|
|
dest_dir = Path(bios_dir)
|
|
|
|
for manufacturer_dir in dest_dir.iterdir():
|
|
if not manufacturer_dir.is_dir():
|
|
continue
|
|
for console_dir in manufacturer_dir.iterdir():
|
|
if not console_dir.is_dir():
|
|
continue
|
|
dir_path = f"{manufacturer_dir.name}/{console_dir.name}".lower()
|
|
if any(part in dir_path for part in system.split("-") if len(part) > 2):
|
|
dest = console_dir / name
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return str(dest)
|
|
|
|
dest = dest_dir / "Unknown" / system / name
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(dest, "wb") as f:
|
|
f.write(data)
|
|
return str(dest)
|
|
|
|
|
|
def fetch_missing(
|
|
missing: list[dict],
|
|
db: dict,
|
|
bios_dir: str,
|
|
dry_run: bool = False,
|
|
) -> dict:
|
|
"""Run the 5-step auto-fetch pipeline for missing files."""
|
|
stats = {"found": 0, "not_found": 0, "errors": 0}
|
|
still_missing = []
|
|
|
|
for entry in missing:
|
|
name = entry["name"]
|
|
print(f"\n Searching: {name} ({entry['system']})")
|
|
|
|
existing = step1_crossref_db(entry, db)
|
|
if existing:
|
|
print(f" [1] Found in database at: {existing}")
|
|
stats["found"] += 1
|
|
continue
|
|
|
|
if dry_run:
|
|
print(f" [DRY RUN] Would search branches, repos, archive.org")
|
|
still_missing.append(entry)
|
|
stats["not_found"] += 1
|
|
continue
|
|
|
|
data = step2_scan_branches(entry)
|
|
if data:
|
|
path = place_file(data, entry, bios_dir, db)
|
|
print(f" [2] Found in branch, saved to: {path}")
|
|
stats["found"] += 1
|
|
continue
|
|
|
|
data = step3_search_public_repos(entry)
|
|
if data:
|
|
path = place_file(data, entry, bios_dir, db)
|
|
print(f" [3] Found in public repo, saved to: {path}")
|
|
stats["found"] += 1
|
|
continue
|
|
|
|
data = step4_search_archive_org(entry)
|
|
if data:
|
|
path = place_file(data, entry, bios_dir, db)
|
|
print(f" [4] Found on archive.org, saved to: {path}")
|
|
stats["found"] += 1
|
|
continue
|
|
|
|
print(f" [5] Not found - needs community contribution")
|
|
still_missing.append(entry)
|
|
stats["not_found"] += 1
|
|
|
|
return {"stats": stats, "still_missing": still_missing}
|
|
|
|
|
|
def generate_issue_body(missing: list[dict], platform: str) -> str:
|
|
"""Generate a GitHub Issue body for missing BIOS files."""
|
|
lines = [
|
|
f"## Missing BIOS Files for {platform}",
|
|
"",
|
|
"The following BIOS files are required but not available in the repository.",
|
|
"If you have any of these files, please submit a Pull Request!",
|
|
"",
|
|
"| File | System | SHA1 | MD5 |",
|
|
"|------|--------|------|-----|",
|
|
]
|
|
|
|
for entry in missing:
|
|
sha1 = entry.get("sha1", "N/A")
|
|
md5 = entry.get("md5", "N/A")
|
|
lines.append(f"| `{entry['name']}` | {entry['system']} | `{sha1[:12]}...` | `{md5[:12]}...` |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"### How to Contribute",
|
|
"",
|
|
"1. Fork this repository",
|
|
"2. Add the BIOS file to `bios/Manufacturer/Console/`",
|
|
"3. Create a Pull Request - checksums are verified automatically",
|
|
])
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Auto-fetch missing BIOS files")
|
|
parser.add_argument("--platform", "-p", help="Platform to check")
|
|
parser.add_argument("--all", action="store_true", help="Check all platforms")
|
|
parser.add_argument("--dry-run", action="store_true", help="Don't download, just report")
|
|
parser.add_argument("--db", default=DEFAULT_DB)
|
|
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
|
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
|
|
parser.add_argument("--create-issues", action="store_true", help="Output GitHub Issue bodies")
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.exists(args.db):
|
|
print(f"Error: {args.db} not found. Run generate_db.py first.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
db = load_database(args.db)
|
|
|
|
if args.all:
|
|
platforms = []
|
|
for f in Path(args.platforms_dir).glob("*.yml"):
|
|
if not f.name.startswith("_"):
|
|
platforms.append(f.stem)
|
|
elif args.platform:
|
|
platforms = [args.platform]
|
|
else:
|
|
parser.error("Specify --platform or --all")
|
|
return
|
|
|
|
all_still_missing = {}
|
|
|
|
for platform in sorted(platforms):
|
|
print(f"\n{'='*60}")
|
|
print(f"Platform: {platform}")
|
|
print(f"{'='*60}")
|
|
|
|
try:
|
|
config = load_platform_config(platform, args.platforms_dir)
|
|
except FileNotFoundError:
|
|
print(f" Config not found, skipping")
|
|
continue
|
|
|
|
missing = find_missing(config, db)
|
|
if not missing:
|
|
print(f" All BIOS files present!")
|
|
continue
|
|
|
|
print(f" {len(missing)} missing files")
|
|
result = fetch_missing(missing, db, args.bios_dir, args.dry_run)
|
|
|
|
if result["still_missing"]:
|
|
all_still_missing[platform] = result["still_missing"]
|
|
|
|
stats = result["stats"]
|
|
print(f"\n Results: {stats['found']} found, {stats['not_found']} not found")
|
|
|
|
if args.create_issues and all_still_missing:
|
|
print(f"\n{'='*60}")
|
|
print("GitHub Issue Bodies")
|
|
print(f"{'='*60}")
|
|
for platform, missing in all_still_missing.items():
|
|
print(f"\n--- Issue for {platform} ---\n")
|
|
print(generate_issue_body(missing, platform))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|