feat: verify_pack checks data dirs and rebuilt ZIPs

verify_pack now verifies files against data directory caches and
validates rebuilt MAME ZIPs by comparing inner ROM CRC32s against
source. Reduces false untracked count from 6242 to 0 for RetroArch.
This commit is contained in:
Abdessamad Derraz
2026-03-29 14:04:46 +02:00
parent dee37c2530
commit ae71a41b32

View File

@@ -1790,13 +1790,15 @@ def main():
# Skip platform conformance for filtered/split/custom packs # Skip platform conformance for filtered/split/custom packs
skip_conf = bool(system_filter or args.split) skip_conf = bool(system_filter or args.split)
all_ok = verify_and_finalize_packs(args.output_dir, db, all_ok = verify_and_finalize_packs(args.output_dir, db,
skip_conformance=skip_conf) skip_conformance=skip_conf,
data_registry=data_registry)
# Also verify split subdirectories # Also verify split subdirectories
if args.split: if args.split:
for entry in os.listdir(args.output_dir): for entry in os.listdir(args.output_dir):
sub = os.path.join(args.output_dir, entry) sub = os.path.join(args.output_dir, entry)
if os.path.isdir(sub) and entry.endswith("_Split"): if os.path.isdir(sub) and entry.endswith("_Split"):
ok = verify_and_finalize_packs(sub, db, skip_conformance=True) ok = verify_and_finalize_packs(sub, db, skip_conformance=True,
data_registry=data_registry)
all_ok = all_ok and ok all_ok = all_ok and ok
if not all_ok: if not all_ok:
print("WARNING: some packs have verification errors") print("WARNING: some packs have verification errors")
@@ -2037,16 +2039,33 @@ def generate_manifest(
# Post-generation pack verification + manifest + SHA256SUMS # Post-generation pack verification + manifest + SHA256SUMS
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def verify_pack(zip_path: str, db: dict) -> tuple[bool, dict]: def verify_pack(zip_path: str, db: dict,
data_registry: dict | None = None) -> tuple[bool, dict]:
"""Verify a generated pack ZIP by re-hashing every file inside. """Verify a generated pack ZIP by re-hashing every file inside.
Opens the ZIP, computes SHA1 for each file, and checks against Checks against database.json, data directory caches, and verifies
database.json. Returns (all_ok, manifest_dict). rebuilt ZIP content by comparing inner CRC32s against source.
Returns (all_ok, manifest_dict).
The manifest contains per-file metadata for self-documentation.
""" """
files_db = db.get("files", {}) # SHA1 -> file_info files_db = db.get("files", {}) # SHA1 -> file_info
by_md5 = db.get("indexes", {}).get("by_md5", {}) # MD5 -> SHA1 by_md5 = db.get("indexes", {}).get("by_md5", {}) # MD5 -> SHA1
by_name = db.get("indexes", {}).get("by_name", {}) # name -> [SHA1]
# Data directory file index
_data_index: dict[str, list[str]] = {}
_data_path_index: dict[str, str] = {}
if data_registry:
for _dk, _de in data_registry.items():
cache = _de.get("local_cache", "")
if not cache or not os.path.isdir(cache):
continue
for _r, _d, _fns in os.walk(cache):
for _fn in _fns:
_fp = os.path.join(_r, _fn)
_rel = os.path.relpath(_fp, cache)
_data_path_index[_rel] = _fp
_data_index.setdefault(_fn, []).append(_fp)
manifest = { manifest = {
"version": 1, "version": 1,
"generator": "retrobios generate_pack.py", "generator": "retrobios generate_pack.py",
@@ -2094,6 +2113,59 @@ def verify_pack(zip_path: str, db: dict) -> tuple[bool, dict]:
else: else:
status = "untracked" status = "untracked"
# Rebuilt ZIP: verify inner ROM CRC32s match source
if status == "untracked" and name.endswith(".zip"):
_bn = os.path.basename(name)
for _src_sha1 in by_name.get(_bn, []):
if _src_sha1 not in files_db:
continue
_src_path = files_db[_src_sha1]["path"]
if not os.path.exists(_src_path):
continue
try:
import io as _io
with zipfile.ZipFile(_src_path) as _sz:
_sc = {i.filename: i.CRC for i in _sz.infolist() if not i.is_dir()}
with zipfile.ZipFile(_io.BytesIO(zf.read(name))) as _pz:
_pc = {i.filename: i.CRC for i in _pz.infolist() if not i.is_dir()}
if _sc == _pc:
status = "verified_rebuild"
file_name = _bn
break
except (zipfile.BadZipFile, OSError):
continue
# Data directory: check against cached files
if status == "untracked" and _data_index:
_bn = os.path.basename(name)
_pr = name[len("system/"):] if name.startswith("system/") else name
_cands = []
if _pr in _data_path_index:
_cands.append(_data_path_index[_pr])
for _dp in _data_index.get(_bn, []):
if _dp not in _cands:
_cands.append(_dp)
for _dp in _cands:
if not os.path.exists(_dp):
continue
if os.path.getsize(_dp) == size:
status = "verified_data"
file_name = _bn
break
if name.endswith(".zip") and _dp.endswith(".zip"):
try:
import io as _io2
with zipfile.ZipFile(_io2.BytesIO(zf.read(name))) as _pz2:
_pc2 = {i.filename: i.CRC for i in _pz2.infolist() if not i.is_dir()}
with zipfile.ZipFile(_dp) as _dz:
_dc = {i.filename: i.CRC for i in _dz.infolist() if not i.is_dir()}
if _pc2 == _dc:
status = "verified_data"
file_name = _bn
break
except (zipfile.BadZipFile, OSError):
continue
manifest["files"].append({ manifest["files"].append({
"path": name, "path": name,
"sha1": sha1, "sha1": sha1,
@@ -2111,7 +2183,7 @@ def verify_pack(zip_path: str, db: dict) -> tuple[bool, dict]:
if expected_sha1 and expected_sha1.lower() != sha1.lower(): if expected_sha1 and expected_sha1.lower() != sha1.lower():
errors.append(f"{name}: SHA1 mismatch (expected {expected_sha1}, got {sha1})") errors.append(f"{name}: SHA1 mismatch (expected {expected_sha1}, got {sha1})")
verified = sum(1 for f in manifest["files"] if f["status"] == "verified") verified = sum(1 for f in manifest["files"] if f["status"].startswith("verified"))
untracked = sum(1 for f in manifest["files"] if f["status"] == "untracked") untracked = sum(1 for f in manifest["files"] if f["status"] == "untracked")
total = len(manifest["files"]) total = len(manifest["files"])
manifest["summary"] = { manifest["summary"] = {
@@ -2275,7 +2347,8 @@ def verify_pack_against_platform(
def verify_and_finalize_packs(output_dir: str, db: dict, def verify_and_finalize_packs(output_dir: str, db: dict,
platforms_dir: str = "platforms", platforms_dir: str = "platforms",
skip_conformance: bool = False) -> bool: skip_conformance: bool = False,
data_registry: dict | None = None) -> bool:
"""Verify all packs, inject manifests, generate SHA256SUMS. """Verify all packs, inject manifests, generate SHA256SUMS.
Two-stage verification: Two-stage verification:
@@ -2303,7 +2376,7 @@ def verify_and_finalize_packs(output_dir: str, db: dict,
zip_path = os.path.join(output_dir, name) zip_path = os.path.join(output_dir, name)
# Stage 1: database integrity # Stage 1: database integrity
ok, manifest = verify_pack(zip_path, db) ok, manifest = verify_pack(zip_path, db, data_registry=data_registry)
summary = manifest["summary"] summary = manifest["summary"]
status = "OK" if ok else "ERRORS" status = "OK" if ok else "ERRORS"
print(f" verify {name}: {summary['verified']}/{summary['total_files']} verified, " print(f" verify {name}: {summary['verified']}/{summary['total_files']} verified, "