refactor: fix cross-reference archive grouping and path resolution

Group archived files by archive unit in find_undeclared_files instead
of reporting individual ROMs. Add path-based fallback for descriptive
names (e.g. "SeaBIOS (128 KB)" resolves via path: bios.bin). Update
_collect_extras to use archive name for pack resolution. Regenerate
database with new bios files. 6 new E2E tests covering archive
in_repo, missing archives, descriptive names, and pack extras.
This commit is contained in:
Abdessamad Derraz
2026-03-28 14:00:08 +01:00
parent 1ee8623b2e
commit 7dc8428ac1
7 changed files with 820 additions and 107 deletions

View File

@@ -2,7 +2,7 @@
Complete BIOS and firmware packs for Batocera, BizHawk, EmuDeck, Lakka, Recalbox, RetroArch, RetroBat, RetroDECK, RetroPie, and RomM.
**6,816** verified files across **352** systems, ready to extract into your emulator's BIOS directory.
**6,845** verified files across **352** systems, ready to extract into your emulator's BIOS directory.
## Download BIOS packs
@@ -11,7 +11,7 @@ Pick your platform, download the ZIP, extract to the BIOS path.
| Platform | BIOS files | Extract to | Download |
|----------|-----------|-----------|----------|
| Batocera | 359 | `/userdata/bios/` | [Download](../../releases/latest) |
| BizHawk | 118 | | [Download](../../releases/latest) |
| BizHawk | 118 | `Firmware/` | [Download](../../releases/latest) |
| EmuDeck | 161 | `Emulation/bios/` | [Download](../../releases/latest) |
| Lakka | 448 | `system/` | [Download](../../releases/latest) |
| Recalbox | 346 | `/recalbox/share/bios/` | [Download](../../releases/latest) |
@@ -29,8 +29,8 @@ Each file is checked against the emulator's source code to match what the code a
- **10 platforms** supported with platform-specific verification
- **328 emulators** profiled from source (RetroArch cores + standalone)
- **352 systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...)
- **6,816 files** verified with MD5, SHA1, CRC32 checksums
- **6863 MB** total collection size
- **6,845 files** verified with MD5, SHA1, CRC32 checksums
- **6866 MB** total collection size
## Supported systems
@@ -113,4 +113,4 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
This repository provides BIOS files for personal backup and archival purposes.
*Auto-generated on 2026-03-28T10:48:52Z*
*Auto-generated on 2026-03-28T12:55:41Z*

File diff suppressed because it is too large Load Diff

View File

@@ -122,6 +122,7 @@ nav:
- Synertek: systems/synertek.md
- Tandy: systems/tandy.md
- Texas Instruments: systems/texas-instruments.md
- Thomson: systems/thomson.md
- Tiger: systems/tiger.md
- Timex: systems/timex.md
- Tomy: systems/tomy.md

View File

@@ -337,8 +337,10 @@ def _collect_emulator_extras(
for u in undeclared:
if not u["in_repo"]:
continue
name = u["name"]
dest = u.get("path") or name
# For archive entries, use the archive name for resolution
archive = u.get("archive")
name = archive if archive else u["name"]
dest = archive if archive else (u.get("path") or u["name"])
full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen:
continue

View File

@@ -128,6 +128,7 @@ def generate_readme(db: dict, platforms_dir: str) -> str:
"RetroArch": "`system/`",
"Lakka": "`system/`",
"Batocera": "`/userdata/bios/`",
"BizHawk": "`Firmware/`",
"Recalbox": "`/recalbox/share/bios/`",
"RetroBat": "`bios/`",
"RetroPie": "`BIOS/`",

View File

@@ -246,6 +246,18 @@ def _build_expected(file_entry: dict, checks: list[str]) -> dict:
expected["adler32"] = adler_val
return expected
def _name_in_index(name: str, by_name: dict, by_path_suffix: dict | None = None) -> bool:
"""Check if a name is resolvable in the database indexes."""
if name in by_name:
return True
basename = name.rsplit("/", 1)[-1]
if basename != name and basename in by_name:
return True
if by_path_suffix and name in by_path_suffix:
return True
return False
def find_undeclared_files(
config: dict,
emulators_dir: str,
@@ -271,12 +283,16 @@ def find_undeclared_files(
declared_dd.add(ref)
by_name = db.get("indexes", {}).get("by_name", {})
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir)
relevant = resolve_platform_cores(config, profiles, target_cores=target_cores)
standalone_set = set(str(c) for c in config.get("standalone_cores", []))
undeclared = []
seen = set()
seen_files: set[str] = set()
# Track archives: archive_name -> {in_repo, emulator, files: [...], ...}
archive_entries: dict[str, dict] = {}
for emu_name, profile in sorted(profiles.items()):
if profile.get("type") in ("launcher", "alias"):
continue
@@ -290,7 +306,7 @@ def find_undeclared_files(
for f in profile.get("files", []):
fname = f.get("name", "")
if not fname or fname in seen:
if not fname or fname in seen_files:
continue
# Skip pattern placeholders (e.g., <user-selected>.bin)
if "<" in fname or ">" in fname or "*" in fname:
@@ -301,7 +317,44 @@ def find_undeclared_files(
continue
if file_mode == "libretro" and is_standalone:
continue
archive = f.get("archive")
# Skip files declared by the platform (by name or archive)
if fname in declared_names:
seen_files.add(fname)
continue
if archive and archive in declared_names:
seen_files.add(fname)
continue
seen_files.add(fname)
# Archived files are grouped by archive
if archive:
if archive not in archive_entries:
in_repo = _name_in_index(archive, by_name, by_path_suffix)
archive_entries[archive] = {
"emulator": profile.get("emulator", emu_name),
"name": archive,
"archive": archive,
"path": archive,
"required": False,
"hle_fallback": False,
"category": f.get("category", "bios"),
"in_repo": in_repo,
"note": "",
"checks": [],
"source_ref": None,
"expected": {},
"archive_file_count": 0,
"archive_required_count": 0,
}
entry = archive_entries[archive]
entry["archive_file_count"] += 1
if f.get("required", False):
entry["archive_required_count"] += 1
entry["required"] = True
continue
# Determine destination path based on mode
@@ -310,8 +363,12 @@ def find_undeclared_files(
else:
dest = f.get("path") or fname
in_repo = fname in by_name or fname.rsplit("/", 1)[-1] in by_name
seen.add(fname)
# Resolution: try name, then path basename, then path_suffix
in_repo = _name_in_index(fname, by_name, by_path_suffix)
if not in_repo and dest != fname:
path_base = dest.rsplit("/", 1)[-1]
in_repo = _name_in_index(path_base, by_name, by_path_suffix)
checks = _parse_validation(f.get("validation"))
undeclared.append({
"emulator": profile.get("emulator", emu_name),
@@ -327,6 +384,10 @@ def find_undeclared_files(
"expected": _build_expected(f, checks),
})
# Append grouped archive entries
for entry in sorted(archive_entries.values(), key=lambda e: e["name"]):
undeclared.append(entry)
return undeclared
@@ -715,7 +776,12 @@ def print_platform_result(result: dict, group: list[str], verbose: bool = False)
# Required NOT in repo = critical
if req_not_in_repo:
for u in req_not_in_repo:
print(f" MISSING (required): {u['emulator']} needs {u['name']}")
arc_count = u.get("archive_file_count")
if arc_count:
label = f"{u['name']} ({arc_count} file{'s' if arc_count != 1 else ''})"
else:
label = u["name"]
print(f" MISSING (required): {u['emulator']} needs {label}")
checks = u.get("checks", [])
if checks:
if verbose:
@@ -733,7 +799,12 @@ def print_platform_result(result: dict, group: list[str], verbose: bool = False)
print(f" [{checks_label}]")
if req_hle_not_in_repo:
for u in req_hle_not_in_repo:
print(f" MISSING (required, HLE fallback): {u['emulator']} needs {u['name']}")
arc_count = u.get("archive_file_count")
if arc_count:
label = f"{u['name']} ({arc_count} file{'s' if arc_count != 1 else ''})"
else:
label = u["name"]
print(f" MISSING (required, HLE fallback): {u['emulator']} needs {label}")
checks = u.get("checks", [])
if checks:
if verbose:

View File

@@ -103,6 +103,11 @@ class TestE2E(unittest.TestCase):
self._make_zip("composite.zip", {"b.rom": b"BBBB", "a.rom": b"AAAA"})
# ZIP for multi-hash
self._make_zip("multi.zip", {"rom.bin": b"MULTI_HASH_DATA"})
# Archive BIOS ZIP (like neogeo.zip) containing multiple ROMs
self._make_zip("test_archive.zip", {
"rom_a.bin": b"ARCHIVE_ROM_A",
"rom_b.bin": b"ARCHIVE_ROM_B",
})
# -- Build synthetic database --
self.db = self._build_db()
@@ -371,6 +376,35 @@ class TestE2E(unittest.TestCase):
with open(os.path.join(self.emulators_dir, "test_emu_dd.yml"), "w") as fh:
yaml.dump(emu_dd, fh)
# Emulator with archived files (like FBNeo with neogeo.zip)
emu_archive = {
"emulator": "TestArchiveEmu",
"type": "libretro",
"systems": ["console-a"],
"files": [
{"name": "rom_a.bin", "required": True, "archive": "test_archive.zip"},
{"name": "rom_b.bin", "required": False, "archive": "test_archive.zip"},
{"name": "missing_rom.bin", "required": True, "archive": "missing_archive.zip"},
],
}
with open(os.path.join(self.emulators_dir, "test_archive_emu.yml"), "w") as fh:
yaml.dump(emu_archive, fh)
# Emulator with descriptive name and path (like QEMU SeaBIOS)
emu_descriptive = {
"emulator": "TestDescriptive",
"type": "libretro",
"systems": ["console-a"],
"files": [
{"name": "Descriptive BIOS Name", "required": True,
"path": "present_req.bin"},
{"name": "Missing Descriptive", "required": True,
"path": "nonexistent_path.bin"},
],
}
with open(os.path.join(self.emulators_dir, "test_descriptive.yml"), "w") as fh:
yaml.dump(emu_descriptive, fh)
# Emulator with validation checks (size, crc32)
emu_val = {
"emulator": "TestValidation",
@@ -2288,5 +2322,102 @@ class TestE2E(unittest.TestCase):
self.assertFalse(any("nested.rom" in n for n in names))
# ---------------------------------------------------------------
# Archive cross-reference and descriptive name tests
# ---------------------------------------------------------------
def test_159_cross_ref_archive_in_repo(self):
"""Archived files group by archive; in_repo=True when archive exists."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
# test_archive.zip should appear as a single grouped entry
archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"]
self.assertEqual(len(archive_entries), 1)
entry = archive_entries[0]
self.assertTrue(entry["in_repo"])
self.assertEqual(entry["name"], "test_archive.zip")
self.assertEqual(entry["archive_file_count"], 2)
self.assertTrue(entry["required"]) # at least one file is required
def test_160_cross_ref_archive_missing(self):
"""Missing archive reported as single entry with in_repo=False."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
missing_entries = [u for u in undeclared if u.get("archive") == "missing_archive.zip"]
self.assertEqual(len(missing_entries), 1)
entry = missing_entries[0]
self.assertFalse(entry["in_repo"])
self.assertEqual(entry["name"], "missing_archive.zip")
self.assertEqual(entry["archive_file_count"], 1)
def test_161_cross_ref_archive_not_individual_roms(self):
"""Individual ROM names from archived files should NOT appear as separate entries."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
names = {u["name"] for u in undeclared}
# Individual ROMs should NOT be in the undeclared list
self.assertNotIn("rom_a.bin", names)
self.assertNotIn("rom_b.bin", names)
self.assertNotIn("missing_rom.bin", names)
def test_162_cross_ref_descriptive_name_resolved_by_path(self):
"""Descriptive name with path: fallback resolves via path basename."""
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
desc_entries = {u["name"]: u for u in undeclared
if u["emulator"] == "TestDescriptive"}
# "Descriptive BIOS Name" has path: "present_req.bin" which IS in by_name
self.assertIn("Descriptive BIOS Name", desc_entries)
self.assertTrue(desc_entries["Descriptive BIOS Name"]["in_repo"])
# "Missing Descriptive" has path: "nonexistent_path.bin" which is NOT in by_name
self.assertIn("Missing Descriptive", desc_entries)
self.assertFalse(desc_entries["Missing Descriptive"]["in_repo"])
def test_163_cross_ref_archive_declared_by_platform_skipped(self):
"""Archive files whose archive is declared by platform are skipped."""
# Create a platform that declares test_archive.zip
config = {
"platform": "TestArchivePlatform",
"verification_mode": "existence",
"systems": {
"console-a": {
"files": [
{"name": "test_archive.zip", "destination": "test_archive.zip",
"required": True},
],
},
},
}
with open(os.path.join(self.platforms_dir, "test_archive_platform.yml"), "w") as fh:
yaml.dump(config, fh)
config = load_platform_config("test_archive_platform", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
# test_archive.zip is declared → its archived ROMs should be skipped
archive_entries = [u for u in undeclared if u.get("archive") == "test_archive.zip"]
self.assertEqual(len(archive_entries), 0)
def test_164_pack_extras_use_archive_name(self):
"""Pack extras for archived files use archive name, not individual ROM."""
from generate_pack import _collect_emulator_extras
config = load_platform_config("test_existence", self.platforms_dir)
profiles = load_emulator_profiles(self.emulators_dir)
extras = _collect_emulator_extras(
config, self.emulators_dir, self.db,
set(), "", profiles,
)
extra_names = {e["name"] for e in extras}
# Archive name should be present, not individual ROMs
self.assertIn("test_archive.zip", extra_names)
self.assertNotIn("rom_a.bin", extra_names)
self.assertNotIn("rom_b.bin", extra_names)
# Missing archive should NOT be in extras (in_repo=False)
self.assertNotIn("missing_archive.zip", extra_names)
if __name__ == "__main__":
unittest.main()