mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
generate_db.py now reads aliases from emulator YAMLs and indexes them in database.json by_name. resolve_local_file in common.py tries all alias names when the primary name fails to match. beetle_psx alt_names renamed to aliases (was not indexed before). snes9x BS-X.bios, np2kai FONT.ROM/ide.rom/pci.rom fallbacks, all now formally declared as aliases and indexed. verify --all and generate_pack --all pass with 0 regressions.
361 lines
12 KiB
Python
361 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Scan bios/ directory and generate multi-indexed database.json.
|
|
|
|
Usage:
|
|
python scripts/generate_db.py [--force] [--bios-dir DIR] [--output FILE]
|
|
|
|
Supports incremental mode via .cache/db_cache.json (mtime-based).
|
|
Use --force to rehash all files.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from common import compute_hashes
|
|
|
|
CACHE_DIR = ".cache"
|
|
CACHE_FILE = os.path.join(CACHE_DIR, "db_cache.json")
|
|
DEFAULT_BIOS_DIR = "bios"
|
|
DEFAULT_OUTPUT = "database.json"
|
|
|
|
SKIP_PATTERNS = {".git", ".github", "__pycache__", ".cache", ".DS_Store", "desktop.ini"}
|
|
|
|
|
|
def should_skip(path: Path) -> bool:
|
|
"""Check if a path should be skipped. Allows .variants/ directories."""
|
|
for part in path.parts:
|
|
if part in SKIP_PATTERNS:
|
|
return True
|
|
if part.startswith(".") and part != ".variants":
|
|
return True
|
|
return False
|
|
|
|
|
|
def _canonical_name(filepath: Path) -> str:
|
|
"""Get canonical filename, stripping .variants/ hash suffix."""
|
|
name = filepath.name
|
|
if "/.variants/" in str(filepath) or "\\.variants\\" in str(filepath):
|
|
# naomi2.zip.da79eca4 -> naomi2.zip
|
|
parts = name.rsplit(".", 1)
|
|
if len(parts) == 2 and len(parts[1]) == 8 and all(c in "0123456789abcdef" for c in parts[1]):
|
|
return parts[0]
|
|
return name
|
|
|
|
|
|
def scan_bios_dir(bios_dir: Path, cache: dict, force: bool) -> dict:
|
|
"""Scan bios directory and compute hashes, using cache when possible."""
|
|
files = {}
|
|
aliases = {}
|
|
new_cache = {}
|
|
|
|
for filepath in sorted(bios_dir.rglob("*")):
|
|
if not filepath.is_file():
|
|
continue
|
|
if should_skip(filepath.relative_to(bios_dir)):
|
|
continue
|
|
|
|
rel_path = str(filepath.relative_to(bios_dir.parent))
|
|
stat = filepath.stat()
|
|
mtime = stat.st_mtime
|
|
size = stat.st_size
|
|
cache_key = rel_path
|
|
|
|
if not force and cache_key in cache:
|
|
cached = cache[cache_key]
|
|
if cached.get("mtime") == mtime and cached.get("size") == size:
|
|
hashes = {
|
|
"sha1": cached["sha1"],
|
|
"md5": cached["md5"],
|
|
"sha256": cached["sha256"],
|
|
"crc32": cached["crc32"],
|
|
}
|
|
sha1 = hashes["sha1"]
|
|
if sha1 in files:
|
|
if sha1 not in aliases:
|
|
aliases[sha1] = []
|
|
aliases[sha1].append({"name": _canonical_name(filepath), "path": rel_path})
|
|
else:
|
|
entry = {
|
|
"path": rel_path,
|
|
"name": _canonical_name(filepath),
|
|
"size": size,
|
|
**hashes,
|
|
}
|
|
files[sha1] = entry
|
|
new_cache[cache_key] = {**hashes, "mtime": mtime, "size": size}
|
|
continue
|
|
|
|
hashes = compute_hashes(filepath)
|
|
sha1 = hashes["sha1"]
|
|
if sha1 in files:
|
|
if sha1 not in aliases:
|
|
aliases[sha1] = []
|
|
aliases[sha1].append({"name": _canonical_name(filepath), "path": rel_path})
|
|
else:
|
|
entry = {
|
|
"path": rel_path,
|
|
"name": _canonical_name(filepath),
|
|
"size": size,
|
|
**hashes,
|
|
}
|
|
files[sha1] = entry
|
|
new_cache[cache_key] = {**hashes, "mtime": mtime, "size": size}
|
|
|
|
return files, aliases, new_cache
|
|
|
|
|
|
def build_indexes(files: dict, aliases: dict) -> dict:
|
|
"""Build secondary indexes for fast lookup."""
|
|
by_md5 = {}
|
|
by_name = {}
|
|
by_crc32 = {}
|
|
|
|
for sha1, entry in files.items():
|
|
by_md5[entry["md5"]] = sha1
|
|
|
|
name = entry["name"]
|
|
if name not in by_name:
|
|
by_name[name] = []
|
|
by_name[name].append(sha1)
|
|
|
|
by_crc32[entry["crc32"]] = sha1
|
|
|
|
# Add alias names to by_name index (aliases have different filenames for same SHA1)
|
|
for sha1, alias_list in aliases.items():
|
|
for alias in alias_list:
|
|
name = alias["name"]
|
|
if name not in by_name:
|
|
by_name[name] = []
|
|
if sha1 not in by_name[name]:
|
|
by_name[name].append(sha1)
|
|
|
|
return {
|
|
"by_md5": by_md5,
|
|
"by_name": by_name,
|
|
"by_crc32": by_crc32,
|
|
}
|
|
|
|
|
|
def load_cache(cache_path: str) -> dict:
|
|
"""Load cache file if it exists."""
|
|
try:
|
|
with open(cache_path) as f:
|
|
return json.load(f)
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
|
return {}
|
|
|
|
|
|
def save_cache(cache_path: str, cache: dict):
|
|
"""Save cache to disk."""
|
|
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
|
with open(cache_path, "w") as f:
|
|
json.dump(cache, f)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate multi-indexed BIOS database")
|
|
parser.add_argument("--force", action="store_true", help="Force rehash all files")
|
|
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR, help="BIOS directory path")
|
|
parser.add_argument("--output", "-o", default=DEFAULT_OUTPUT, help="Output JSON file")
|
|
args = parser.parse_args()
|
|
|
|
bios_dir = Path(args.bios_dir)
|
|
if not bios_dir.is_dir():
|
|
print(f"Error: BIOS directory '{bios_dir}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
cache = {} if args.force else load_cache(CACHE_FILE)
|
|
|
|
print(f"Scanning {bios_dir}/ ...")
|
|
files, aliases, new_cache = scan_bios_dir(bios_dir, cache, args.force)
|
|
|
|
if not files:
|
|
print("Warning: No BIOS files found", file=sys.stderr)
|
|
|
|
platform_aliases = _collect_all_aliases(files)
|
|
for sha1, name_list in platform_aliases.items():
|
|
for alias_entry in name_list:
|
|
if sha1 not in aliases:
|
|
aliases[sha1] = []
|
|
aliases[sha1].append(alias_entry)
|
|
|
|
indexes = build_indexes(files, aliases)
|
|
total_size = sum(entry["size"] for entry in files.values())
|
|
|
|
database = {
|
|
"generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
"total_files": len(files),
|
|
"total_size": total_size,
|
|
"files": files,
|
|
"indexes": indexes,
|
|
}
|
|
|
|
with open(args.output, "w") as f:
|
|
json.dump(database, f, indent=2)
|
|
|
|
save_cache(CACHE_FILE, new_cache)
|
|
|
|
alias_count = sum(len(v) for v in aliases.values())
|
|
name_count = len(indexes["by_name"])
|
|
print(f"Generated {args.output}: {len(files)} files, {total_size:,} bytes total")
|
|
print(f" Name index: {name_count} names ({alias_count} aliases)")
|
|
return 0
|
|
|
|
|
|
def _collect_all_aliases(files: dict) -> dict:
|
|
"""Collect alternate filenames from platform YAMLs, core-info, and known aliases.
|
|
|
|
Registers alternate names so generate_pack can resolve files stored under different names.
|
|
"""
|
|
md5_to_sha1 = {}
|
|
name_to_sha1 = {}
|
|
for sha1, entry in files.items():
|
|
md5_to_sha1[entry["md5"]] = sha1
|
|
name_to_sha1[entry["name"]] = sha1
|
|
|
|
aliases = {}
|
|
|
|
def _add_alias(name: str, matched_sha1: str):
|
|
if not name or name in name_to_sha1:
|
|
return
|
|
if matched_sha1 not in aliases:
|
|
aliases[matched_sha1] = []
|
|
existing = {a["name"] for a in aliases[matched_sha1]}
|
|
if name not in existing:
|
|
aliases[matched_sha1].append({"name": name, "path": ""})
|
|
|
|
platforms_dir = Path("platforms")
|
|
if platforms_dir.is_dir():
|
|
try:
|
|
import yaml
|
|
for config_file in platforms_dir.glob("*.yml"):
|
|
if config_file.name.startswith("_"):
|
|
continue
|
|
try:
|
|
with open(config_file) as f:
|
|
config = yaml.safe_load(f) or {}
|
|
except (yaml.YAMLError, OSError) as e:
|
|
print(f"Warning: {config_file.name}: {e}", file=sys.stderr)
|
|
continue
|
|
|
|
for sys_id, system in config.get("systems", {}).items():
|
|
for file_entry in system.get("files", []):
|
|
name = file_entry.get("name", "")
|
|
sha1 = file_entry.get("sha1", "")
|
|
md5 = file_entry.get("md5", "")
|
|
|
|
matched = None
|
|
if sha1 and sha1 in files:
|
|
matched = sha1
|
|
elif md5 and md5 in md5_to_sha1:
|
|
matched = md5_to_sha1[md5]
|
|
|
|
if matched:
|
|
_add_alias(name, matched)
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
sys.path.insert(0, "scripts")
|
|
from scraper.coreinfo_scraper import Scraper as CoreInfoScraper
|
|
ci_reqs = CoreInfoScraper().fetch_requirements()
|
|
for r in ci_reqs:
|
|
basename = r.name
|
|
# Try to match by MD5 or by known canonical names
|
|
matched = None
|
|
if r.md5 and r.md5 in md5_to_sha1:
|
|
matched = md5_to_sha1[r.md5]
|
|
if matched:
|
|
_add_alias(basename, matched)
|
|
except (ImportError, ConnectionError, OSError):
|
|
pass
|
|
|
|
# Collect aliases from emulator YAMLs (aliases field on file entries)
|
|
emulators_dir = Path("emulators")
|
|
if emulators_dir.is_dir():
|
|
try:
|
|
import yaml
|
|
for emu_file in emulators_dir.glob("*.yml"):
|
|
try:
|
|
with open(emu_file) as f:
|
|
emu_config = yaml.safe_load(f) or {}
|
|
except (yaml.YAMLError, OSError):
|
|
continue
|
|
for file_entry in emu_config.get("files", []):
|
|
entry_aliases = file_entry.get("aliases", [])
|
|
if not entry_aliases:
|
|
continue
|
|
entry_name = file_entry.get("name", "")
|
|
sha1 = file_entry.get("sha1", "")
|
|
md5 = file_entry.get("md5", "")
|
|
matched = None
|
|
if sha1 and sha1 in files:
|
|
matched = sha1
|
|
elif md5 and md5 in md5_to_sha1:
|
|
matched = md5_to_sha1[md5]
|
|
elif entry_name and entry_name in name_to_sha1:
|
|
matched = name_to_sha1[entry_name]
|
|
if matched:
|
|
for alias_name in entry_aliases:
|
|
_add_alias(alias_name, matched)
|
|
except ImportError:
|
|
pass
|
|
|
|
# Identical content named differently across platforms/cores
|
|
KNOWN_ALIAS_GROUPS = [
|
|
# ColecoVision - all these are the same 8KB BIOS
|
|
["colecovision.rom", "coleco.rom", "BIOS.col", "bioscv.rom"],
|
|
# Game Boy - DMG boot ROM
|
|
["gb_bios.bin", "dmg_boot.bin", "dmg_rom.bin", "dmg0_rom.bin"],
|
|
# Game Boy Color - CGB boot ROM
|
|
["gbc_bios.bin", "cgb_boot.bin", "cgb0_boot.bin", "cgb_agb_boot.bin"],
|
|
# Super Game Boy
|
|
["sgb_bios.bin", "sgb_boot.bin", "sgb.boot.rom"],
|
|
["sgb2_bios.bin", "sgb2_boot.bin", "sgb2.boot.rom"],
|
|
["sgb1.program.rom", "SGB1.sfc/program.rom"],
|
|
["sgb2.program.rom", "SGB2.sfc/program.rom"],
|
|
# Nintendo DS
|
|
["bios7.bin", "nds7.bin"],
|
|
["bios9.bin", "nds9.bin"],
|
|
["dsi_sd_card.bin", "nds_sd_card.bin"],
|
|
# MSX
|
|
["MSX.ROM", "MSX.rom", "Machines/Shared Roms/MSX.rom"],
|
|
# NEC PC-98
|
|
["N88KNJ1.ROM", "n88knj1.rom", "quasi88/n88knj1.rom"],
|
|
# Enterprise
|
|
["zt19uk.rom", "zt19hfnt.rom", "ep128emu/roms/zt19hfnt.rom"],
|
|
# ZX Spectrum
|
|
["48.rom", "zx48.rom"],
|
|
# SquirrelJME - all JARs are the same
|
|
["squirreljme.sqc", "squirreljme.jar", "squirreljme-fast.jar",
|
|
"squirreljme-slow.jar", "squirreljme-slow-test.jar",
|
|
"squirreljme-0.3.0.jar", "squirreljme-0.3.0-fast.jar",
|
|
"squirreljme-0.3.0-slow.jar", "squirreljme-0.3.0-slow-test.jar"],
|
|
# Arcade - FBNeo spectrum
|
|
["spectrum.zip", "fbneo/spectrum.zip", "spec48k.zip"],
|
|
]
|
|
|
|
for group in KNOWN_ALIAS_GROUPS:
|
|
matched_sha1 = None
|
|
for name in group:
|
|
if name in name_to_sha1:
|
|
matched_sha1 = name_to_sha1[name]
|
|
break
|
|
if not matched_sha1:
|
|
continue
|
|
for name in group:
|
|
_add_alias(name, matched_sha1)
|
|
|
|
return aliases
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main() or 0)
|