refactor: fix cross-reference archive grouping and path resolution

Group archived files by archive unit in find_undeclared_files instead
of reporting individual ROMs. Add path-based fallback for descriptive
names (e.g. "SeaBIOS (128 KB)" resolves via path: bios.bin). Update
_collect_extras to use archive name for pack resolution. Regenerate
database with new bios files. 6 new E2E tests covering archive
in_repo, missing archives, descriptive names, and pack extras.
This commit is contained in:
Abdessamad Derraz
2026-03-28 14:00:08 +01:00
parent 1ee8623b2e
commit 7dc8428ac1
7 changed files with 820 additions and 107 deletions

View File

@@ -2,7 +2,7 @@
Complete BIOS and firmware packs for Batocera, BizHawk, EmuDeck, Lakka, Recalbox, RetroArch, RetroBat, RetroDECK, RetroPie, and RomM. Complete BIOS and firmware packs for Batocera, BizHawk, EmuDeck, Lakka, Recalbox, RetroArch, RetroBat, RetroDECK, RetroPie, and RomM.
**6,816** verified files across **352** systems, ready to extract into your emulator's BIOS directory. **6,845** verified files across **352** systems, ready to extract into your emulator's BIOS directory.
## Download BIOS packs ## Download BIOS packs
@@ -11,7 +11,7 @@ Pick your platform, download the ZIP, extract to the BIOS path.
| Platform | BIOS files | Extract to | Download | | Platform | BIOS files | Extract to | Download |
|----------|-----------|-----------|----------| |----------|-----------|-----------|----------|
| Batocera | 359 | `/userdata/bios/` | [Download](../../releases/latest) | | Batocera | 359 | `/userdata/bios/` | [Download](../../releases/latest) |
| BizHawk | 118 | | [Download](../../releases/latest) | | BizHawk | 118 | `Firmware/` | [Download](../../releases/latest) |
| EmuDeck | 161 | `Emulation/bios/` | [Download](../../releases/latest) | | EmuDeck | 161 | `Emulation/bios/` | [Download](../../releases/latest) |
| Lakka | 448 | `system/` | [Download](../../releases/latest) | | Lakka | 448 | `system/` | [Download](../../releases/latest) |
| Recalbox | 346 | `/recalbox/share/bios/` | [Download](../../releases/latest) | | Recalbox | 346 | `/recalbox/share/bios/` | [Download](../../releases/latest) |
@@ -29,8 +29,8 @@ Each file is checked against the emulator's source code to match what the code a
- **10 platforms** supported with platform-specific verification - **10 platforms** supported with platform-specific verification
- **328 emulators** profiled from source (RetroArch cores + standalone) - **328 emulators** profiled from source (RetroArch cores + standalone)
- **352 systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...) - **352 systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...)
- **6,816 files** verified with MD5, SHA1, CRC32 checksums - **6,845 files** verified with MD5, SHA1, CRC32 checksums
- **6863 MB** total collection size - **6866 MB** total collection size
## Supported systems ## Supported systems
@@ -113,4 +113,4 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
This repository provides BIOS files for personal backup and archival purposes. This repository provides BIOS files for personal backup and archival purposes.
*Auto-generated on 2026-03-28T10:48:52Z* *Auto-generated on 2026-03-28T12:55:41Z*

File diff suppressed because it is too large Load Diff

View File

@@ -122,6 +122,7 @@ nav:
- Synertek: systems/synertek.md - Synertek: systems/synertek.md
- Tandy: systems/tandy.md - Tandy: systems/tandy.md
- Texas Instruments: systems/texas-instruments.md - Texas Instruments: systems/texas-instruments.md
- Thomson: systems/thomson.md
- Tiger: systems/tiger.md - Tiger: systems/tiger.md
- Timex: systems/timex.md - Timex: systems/timex.md
- Tomy: systems/tomy.md - Tomy: systems/tomy.md

View File

@@ -337,8 +337,10 @@ def _collect_emulator_extras(
for u in undeclared: for u in undeclared:
if not u["in_repo"]: if not u["in_repo"]:
continue continue
name = u["name"] # For archive entries, use the archive name for resolution
dest = u.get("path") or name archive = u.get("archive")
name = archive if archive else u["name"]
dest = archive if archive else (u.get("path") or u["name"])
full_dest = f"{base_dest}/{dest}" if base_dest else dest full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen: if full_dest in seen:
continue continue

View File

@@ -128,6 +128,7 @@ def generate_readme(db: dict, platforms_dir: str) -> str:
"RetroArch": "`system/`", "RetroArch": "`system/`",
"Lakka": "`system/`", "Lakka": "`system/`",
"Batocera": "`/userdata/bios/`", "Batocera": "`/userdata/bios/`",
"BizHawk": "`Firmware/`",
"Recalbox": "`/recalbox/share/bios/`", "Recalbox": "`/recalbox/share/bios/`",
"RetroBat": "`bios/`", "RetroBat": "`bios/`",
"RetroPie": "`BIOS/`", "RetroPie": "`BIOS/`",

View File

@@ -246,6 +246,18 @@ def _build_expected(file_entry: dict, checks: list[str]) -> dict:
expected["adler32"] = adler_val expected["adler32"] = adler_val
return expected return expected
def _name_in_index(name: str, by_name: dict, by_path_suffix: dict | None = None) -> bool:
"""Check if a name is resolvable in the database indexes."""
if name in by_name:
return True
basename = name.rsplit("/", 1)[-1]
if basename != name and basename in by_name:
return True
if by_path_suffix and name in by_path_suffix:
return True
return False
def find_undeclared_files( def find_undeclared_files(
config: dict, config: dict,
emulators_dir: str, emulators_dir: str,
@@ -271,12 +283,16 @@ def find_undeclared_files(
declared_dd.add(ref) declared_dd.add(ref)
by_name = db.get("indexes", {}).get("by_name", {}) by_name = db.get("indexes", {}).get("by_name", {})
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir) profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir)
relevant = resolve_platform_cores(config, profiles, target_cores=target_cores) relevant = resolve_platform_cores(config, profiles, target_cores=target_cores)
standalone_set = set(str(c) for c in config.get("standalone_cores", [])) standalone_set = set(str(c) for c in config.get("standalone_cores", []))
undeclared = [] undeclared = []
seen = set() seen_files: set[str] = set()
# Track archives: archive_name -> {in_repo, emulator, files: [...], ...}
archive_entries: dict[str, dict] = {}
for emu_name, profile in sorted(profiles.items()): for emu_name, profile in sorted(profiles.items()):
if profile.get("type") in ("launcher", "alias"): if profile.get("type") in ("launcher", "alias"):
continue continue
@@ -290,7 +306,7 @@ def find_undeclared_files(
for f in profile.get("files", []): for f in profile.get("files", []):
fname = f.get("name", "") fname = f.get("name", "")
if not fname or fname in seen: if not fname or fname in seen_files:
continue continue
# Skip pattern placeholders (e.g., <user-selected>.bin) # Skip pattern placeholders (e.g., <user-selected>.bin)
if "<" in fname or ">" in fname or "*" in fname: if "<" in fname or ">" in fname or "*" in fname:
@@ -301,7 +317,44 @@ def find_undeclared_files(
continue continue
if file_mode == "libretro" and is_standalone: if file_mode == "libretro" and is_standalone:
continue continue
archive = f.get("archive")
# Skip files declared by the platform (by name or archive)
if fname in declared_names: if fname in declared_names:
seen_files.add(fname)
continue
if archive and archive in declared_names:
seen_files.add(fname)
continue
seen_files.add(fname)
# Archived files are grouped by archive
if archive:
if archive not in archive_entries:
in_repo = _name_in_index(archive, by_name, by_path_suffix)
archive_entries[archive] = {
"emulator": profile.get("emulator", emu_name),
"name": archive,
"archive": archive,
"path": archive,
"required": False,
"hle_fallback": False,
"category": f.get("category", "bios"),
"in_repo": in_repo,
"note": "",
"checks": [],
"source_ref": None,
"expected": {},
"archive_file_count": 0,
"archive_required_count": 0,
}
entry = archive_entries[archive]
entry["archive_file_count"] += 1
if f.get("required", False):
entry["archive_required_count"] += 1
entry["required"] = True
continue continue
# Determine destination path based on mode # Determine destination path based on mode
@@ -310,8 +363,12 @@ def find_undeclared_files(
else: else:
dest = f.get("path") or fname dest = f.get("path") or fname
in_repo = fname in by_name or fname.rsplit("/", 1)[-1] in by_name # Resolution: try name, then path basename, then path_suffix
seen.add(fname) in_repo = _name_in_index(fname, by_name, by_path_suffix)
if not in_repo and dest != fname:
path_base = dest.rsplit("/", 1)[-1]
in_repo = _name_in_index(path_base, by_name, by_path_suffix)
checks = _parse_validation(f.get("validation")) checks = _parse_validation(f.get("validation"))
undeclared.append({ undeclared.append({
"emulator": profile.get("emulator", emu_name), "emulator": profile.get("emulator", emu_name),
@@ -327,6 +384,10 @@ def find_undeclared_files(
"expected": _build_expected(f, checks), "expected": _build_expected(f, checks),
}) })
# Append grouped archive entries
for entry in sorted(archive_entries.values(), key=lambda e: e["name"]):
undeclared.append(entry)
return undeclared return undeclared
@@ -715,7 +776,12 @@ def print_platform_result(result: dict, group: list[str], verbose: bool = False)
# Required NOT in repo = critical # Required NOT in repo = critical
if req_not_in_repo: if req_not_in_repo:
for u in req_not_in_repo: for u in req_not_in_repo:
print(f" MISSING (required): {u['emulator']} needs {u['name']}") arc_count = u.get("archive_file_count")
if arc_count:
label = f"{u['name']} ({arc_count} file{'s' if arc_count != 1 else ''})"
else:
label = u["name"]
print(f" MISSING (required): {u['emulator']} needs {label}")
checks = u.get("checks", []) checks = u.get("checks", [])
if checks: if checks:
if verbose: if verbose:
@@ -733,7 +799,12 @@ def print_platform_result(result: dict, group: list[str], verbose: bool = False)
print(f" [{checks_label}]") print(f" [{checks_label}]")
if req_hle_not_in_repo: if req_hle_not_in_repo:
for u in req_hle_not_in_repo: for u in req_hle_not_in_repo:
print(f" MISSING (required, HLE fallback): {u['emulator']} needs {u['name']}") arc_count = u.get("archive_file_count")
if arc_count:
label = f"{u['name']} ({arc_count} file{'s' if arc_count != 1 else ''})"
else:
label = u["name"]
print(f" MISSING (required, HLE fallback): {u['emulator']} needs {label}")
checks = u.get("checks", []) checks = u.get("checks", [])
if checks: if checks:
if verbose: if verbose:

View File

@@ -103,6 +103,11 @@ class TestE2E(unittest.TestCase):
self._make_zip("composite.zip", {"b.rom": b"BBBB", "a.rom": b"AAAA"}) self._make_zip("composite.zip", {"b.rom": b"BBBB", "a.rom": b"AAAA"})
# ZIP for multi-hash # ZIP for multi-hash
self._make_zip("multi.zip", {"rom.bin": b"MULTI_HASH_DATA"}) self._make_zip("multi.zip", {"rom.bin": b"MULTI_HASH_DATA"})
# Archive BIOS ZIP (like neogeo.zip) containing multiple ROMs
self._make_zip("test_archive.zip", {
"rom_a.bin": b"ARCHIVE_ROM_A",
"rom_b.bin": b"ARCHIVE_ROM_B",
})
# -- Build synthetic database -- # -- Build synthetic database --
self.db = self._build_db() self.db = self._build_db()
@@ -371,6 +376,35 @@ class TestE2E(unittest.TestCase):
with open(os.path.join(self.emulators_dir, "test_emu_dd.yml"), "w") as fh: with open(os.path.join(self.emulators_dir, "test_emu_dd.yml"), "w") as fh:
yaml.dump(emu_dd, fh) yaml.dump(emu_dd, fh)
# Emulator with archived files (like FBNeo with neogeo.zip)
emu_archive = {
"emulator": "TestArchiveEmu",
"type": "libretro",
"systems": ["console-a"],
"files": [
{"name": "rom_a.bin", "required": True, "archive": "test_archive.zip"},
{"name": "rom_b.bin", "required": False, "archive": "test_archive.zip"},
{"name": "missing_rom.bin", "required": True, "archive": "missing_archive.zip"},
],
}
with open(os.path.join(self.emulators_dir, "test_archive_emu.yml"), "w") as fh:
yaml.dump(emu_archive, fh)
# Emulator with descriptive name and path (like QEMU SeaBIOS)
emu_descriptive = {
"emulator": "TestDescriptive",
"type": "libretro",
"systems": ["console-a"],
"files": [
{"name": "Descriptive BIOS Name", "required": True,
"path": "present_req.bin"},
{"name": "Missing Descriptive", "required": True,
"path": "nonexistent_path.bin"},
],
}
with open(os.path.join(self.emulators_dir, "test_descriptive.yml"), "w") as fh:
yaml.dump(emu_descriptive, fh)
# Emulator with validation checks (size, crc32) # Emulator with validation checks (size, crc32)
emu_val = { emu_val = {
"emulator": "TestValidation", "emulator": "TestValidation",
@@ -2288,5 +2322,102 @@ class TestE2E(unittest.TestCase):
self.assertFalse(any("nested.rom" in n for n in names)) self.assertFalse(any("nested.rom" in n for n in names))
# ---------------------------------------------------------------
# Archive cross-reference and descriptive name tests
# ---------------------------------------------------------------
def test_159_cross_ref_archive_in_repo(self):
"""Archived files group by archive; in_repo=True when archive exists."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
# test_archive.zip should appear as a single grouped entry
archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"]
self.assertEqual(len(archive_entries), 1)
entry = archive_entries[0]
self.assertTrue(entry["in_repo"])
self.assertEqual(entry["name"], "test_archive.zip")
self.assertEqual(entry["archive_file_count"], 2)
self.assertTrue(entry["required"]) # at least one file is required
def test_160_cross_ref_archive_missing(self):
"""Missing archive reported as single entry with in_repo=False."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
missing_entries = [u for u in undeclared if u.get("archive") == "missing_archive.zip"]
self.assertEqual(len(missing_entries), 1)
entry = missing_entries[0]
self.assertFalse(entry["in_repo"])
self.assertEqual(entry["name"], "missing_archive.zip")
self.assertEqual(entry["archive_file_count"], 1)
def test_161_cross_ref_archive_not_individual_roms(self):
"""Individual ROM names from archived files should NOT appear as separate entries."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
names = {u["name"] for u in undeclared}
# Individual ROMs should NOT be in the undeclared list
self.assertNotIn("rom_a.bin", names)
self.assertNotIn("rom_b.bin", names)
self.assertNotIn("missing_rom.bin", names)
def test_162_cross_ref_descriptive_name_resolved_by_path(self):
"""Descriptive name with path: fallback resolves via path basename."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
desc_entries = {u["name"]: u for u in undeclared
if u["emulator"] == "TestDescriptive"}
# "Descriptive BIOS Name" has path: "present_req.bin" which IS in by_name
self.assertIn("Descriptive BIOS Name", desc_entries)
self.assertTrue(desc_entries["Descriptive BIOS Name"]["in_repo"])
# "Missing Descriptive" has path: "nonexistent_path.bin" which is NOT in by_name
self.assertIn("Missing Descriptive", desc_entries)
self.assertFalse(desc_entries["Missing Descriptive"]["in_repo"])
def test_163_cross_ref_archive_declared_by_platform_skipped(self):
"""Archive files whose archive is declared by platform are skipped."""
# Create a platform that declares test_archive.zip
config = {
"platform": "TestArchivePlatform",
"verification_mode": "existence",
"systems": {
"console-a": {
"files": [
{"name": "test_archive.zip", "destination": "test_archive.zip",
"required": True},
],
},
},
}
with open(os.path.join(self.platforms_dir, "test_archive_platform.yml"), "w") as fh:
yaml.dump(config, fh)
config = load_platform_config("test_archive_platform", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
# test_archive.zip is declared → its archived ROMs should be skipped
archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"]
self.assertEqual(len(archive_entries), 0)
def test_164_pack_extras_use_archive_name(self):
"""Pack extras for archived files use archive name, not individual ROM."""
from generate_pack import _collect_emulator_extras
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
extras = _collect_emulator_extras(
config, self.emulators_dir, self.db,
set(), "", profiles,
)
extra_names = {e["name"] for e in extras}
# Archive name should be present, not individual ROMs
self.assertIn("test_archive.zip", extra_names)
self.assertNotIn("rom_a.bin", extra_names)
self.assertNotIn("rom_b.bin", extra_names)
# Missing archive should NOT be in extras (in_repo=False)
self.assertNotIn("missing_archive.zip", extra_names)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()