fix: eliminate cross-reference false positives

Skip placeholder names (<bios>.bin), resolve by MD5/SHA1 hash
match for alias files, fix directory basename extraction for
trailing slash entries, index bios/ directory names for
directory-type file entries. 1011 -> 113 true missing.
149/149 tests pass.
This commit is contained in:
Abdessamad Derraz
2026-03-28 19:24:16 +01:00
parent 1825fff893
commit ddf2937f41
3 changed files with 49 additions and 13 deletions

View File

@@ -2,7 +2,7 @@
Complete BIOS and firmware packs for Batocera, BizHawk, EmuDeck, Lakka, Recalbox, RetroArch, RetroBat, RetroDECK, RetroPie, and RomM. Complete BIOS and firmware packs for Batocera, BizHawk, EmuDeck, Lakka, Recalbox, RetroArch, RetroBat, RetroDECK, RetroPie, and RomM.
**7,577** verified files across **352** systems, ready to extract into your emulator's BIOS directory. **7,581** verified files across **352** systems, ready to extract into your emulator's BIOS directory.
## Quick Install ## Quick Install
@@ -46,8 +46,8 @@ Each file is checked against the emulator's source code to match what the code a
- **10 platforms** supported with platform-specific verification - **10 platforms** supported with platform-specific verification
- **328 emulators** profiled from source (RetroArch cores + standalone) - **328 emulators** profiled from source (RetroArch cores + standalone)
- **352 systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...) - **352 systems** covered (NES, SNES, PlayStation, Saturn, Dreamcast, ...)
- **7,577 files** verified with MD5, SHA1, CRC32 checksums - **7,581 files** verified with MD5, SHA1, CRC32 checksums
- **8211 MB** total collection size - **8212 MB** total collection size
## Supported systems ## Supported systems
@@ -130,4 +130,4 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
This repository provides BIOS files for personal backup and archival purposes. This repository provides BIOS files for personal backup and archival purposes.
*Auto-generated on 2026-03-28T18:07:20Z* *Auto-generated on 2026-03-28T18:23:17Z*

View File

@@ -1,5 +1,5 @@
{ {
"generated_at": "2026-03-28T18:09:42Z", "generated_at": "2026-03-28T18:22:52Z",
"total_files": 7581, "total_files": 7581,
"total_size": 8610392265, "total_size": 8610392265,
"files": { "files": {

View File

@@ -52,16 +52,33 @@ def load_platform_files(platforms_dir: str) -> tuple[dict[str, set[str]], dict[s
def _build_supplemental_index(data_root: str = "data", def _build_supplemental_index(data_root: str = "data",
bios_root: str = "bios") -> set[str]: bios_root: str = "bios") -> set[str]:
"""Build a set of filenames in data/ directories and inside bios/ ZIPs.""" """Build a set of filenames and directory names in data/ and inside bios/ ZIPs."""
names: set[str] = set() names: set[str] = set()
root_path = Path(data_root) root_path = Path(data_root)
if root_path.is_dir(): if root_path.is_dir():
for fpath in root_path.rglob("*"): for fpath in root_path.rglob("*"):
if fpath.is_file() and not fpath.name.startswith("."): if fpath.name.startswith("."):
names.add(fpath.name) continue
names.add(fpath.name.lower()) names.add(fpath.name)
names.add(fpath.name.lower())
if fpath.is_dir():
# Also index relative path from data/subdir/ for directory entries
parts = fpath.relative_to(root_path).parts
if len(parts) > 1:
rel = "/".join(parts[1:])
names.add(rel)
names.add(rel + "/")
names.add(rel.lower())
names.add(rel.lower() + "/")
bios_path = Path(bios_root) bios_path = Path(bios_root)
if bios_path.is_dir(): if bios_path.is_dir():
# Index directory names for directory-type entries (e.g., "nestopia/samples/moepro/")
for dpath in bios_path.rglob("*"):
if dpath.is_dir() and not dpath.name.startswith("."):
names.add(dpath.name)
names.add(dpath.name.lower())
names.add(dpath.name + "/")
names.add(dpath.name.lower() + "/")
import zipfile import zipfile
for zpath in bios_path.rglob("*.zip"): for zpath in bios_path.rglob("*.zip"):
try: try:
@@ -80,7 +97,9 @@ def _find_in_repo(fname: str, by_name: dict[str, list], by_name_lower: dict[str,
data_names: set[str] | None = None) -> bool: data_names: set[str] | None = None) -> bool:
if fname in by_name: if fname in by_name:
return True return True
basename = fname.rsplit("/", 1)[-1] if "/" in fname else None # For directory entries or paths, extract the meaningful basename
stripped = fname.rstrip("/")
basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None
if basename and basename in by_name: if basename and basename in by_name:
return True return True
key = fname.lower() key = fname.lower()
@@ -127,14 +146,17 @@ def cross_reference(
gaps = [] gaps = []
covered = [] covered = []
by_md5 = db.get("indexes", {}).get("by_md5", {})
for f in emu_files: for f in emu_files:
fname = f.get("name", "") fname = f.get("name", "")
if not fname: if not fname:
continue continue
# Skip standalone-only files when comparing against libretro # Skip pattern placeholders (e.g., <bios>.bin, <user-selected>.bin)
# platforms (RetroArch, Lakka, etc.). These files are embedded if "<" in fname or ">" in fname:
# in the core and don't need to be in the platform pack. continue
# Skip standalone-only files
file_mode = f.get("mode", "both") file_mode = f.get("mode", "both")
if file_mode == "standalone": if file_mode == "standalone":
continue continue
@@ -145,6 +167,20 @@ def cross_reference(
path_field = f.get("path", "") path_field = f.get("path", "")
if path_field and path_field != fname: if path_field and path_field != fname:
in_repo = _find_in_repo(path_field, by_name, by_name_lower, data_names) in_repo = _find_in_repo(path_field, by_name, by_name_lower, data_names)
# Try MD5 hash match (handles files that exist under different names)
if not in_repo:
md5_raw = f.get("md5", "")
if md5_raw:
for md5_val in md5_raw.split(","):
md5_val = md5_val.strip().lower()
if md5_val and by_md5.get(md5_val):
in_repo = True
break
# Try SHA1 hash match
if not in_repo:
sha1 = f.get("sha1", "")
if sha1 and sha1 in db.get("files", {}):
in_repo = True
entry = { entry = {
"name": fname, "name": fname,