mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-15 21:32:32 -05:00
feat: deterministic MAME ZIP assembly in packs
all ZIP files (neogeo.zip, pgm.zip, etc.) are rebuilt with fixed metadata before packing: sorted filenames, epoch timestamps, fixed permissions, deflate level 9. same ROM atoms = same ZIP hash, always. 115 internal ZIPs verified identical across two independent builds. enables version-agnostic ZIP assembly from ROM atoms indexed by CRC32.
This commit is contained in:
197
scripts/deterministic_zip.py
Normal file
197
scripts/deterministic_zip.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
"""Deterministic ZIP builder for MAME BIOS archives.
|
||||||
|
|
||||||
|
Creates byte-identical ZIP files from individual ROM atoms, enabling:
|
||||||
|
- Reproducible builds: same ROMs → same ZIP hash, always
|
||||||
|
- Version-agnostic assembly: build neogeo.zip for any MAME version
|
||||||
|
- Deduplication: store ROM atoms once, assemble any ZIP on demand
|
||||||
|
|
||||||
|
A ZIP's hash depends on: file content, filenames, order, timestamps,
|
||||||
|
compression, and permissions. This module fixes all metadata to produce
|
||||||
|
deterministic output.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from deterministic_zip import build_deterministic_zip, extract_atoms
|
||||||
|
|
||||||
|
# Extract atoms from an existing ZIP
|
||||||
|
atoms = extract_atoms("neogeo.zip")
|
||||||
|
|
||||||
|
# Build a ZIP from a recipe
|
||||||
|
recipe = [
|
||||||
|
{"name": "sp-s2.sp1", "crc32": "9036d879"},
|
||||||
|
{"name": "000-lo.lo", "crc32": "5a86cff2"},
|
||||||
|
]
|
||||||
|
build_deterministic_zip("neogeo.zip", recipe, atom_store)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import struct
|
||||||
|
import zipfile
|
||||||
|
import zlib
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Fixed metadata for deterministic ZIPs
|
||||||
|
_FIXED_DATE_TIME = (1980, 1, 1, 0, 0, 0) # minimum ZIP timestamp
|
||||||
|
_FIXED_CREATE_SYSTEM = 0 # FAT/DOS (most compatible)
|
||||||
|
_FIXED_EXTERNAL_ATTR = 0o100644 << 16 # -rw-r--r--
|
||||||
|
_COMPRESS_LEVEL = 9 # deflate level 9 for determinism
|
||||||
|
|
||||||
|
|
||||||
|
def build_deterministic_zip(
|
||||||
|
output_path: str | Path,
|
||||||
|
recipe: list[dict],
|
||||||
|
atom_store: dict[str, bytes],
|
||||||
|
compression: int = zipfile.ZIP_DEFLATED,
|
||||||
|
) -> str:
|
||||||
|
"""Build a deterministic ZIP from a recipe and atom store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_path: Path for the output ZIP file.
|
||||||
|
recipe: List of dicts with 'name' and 'crc32' (lowercase hex, no 0x).
|
||||||
|
Files are sorted by name for determinism.
|
||||||
|
atom_store: Dict mapping CRC32 (lowercase hex) to ROM binary data.
|
||||||
|
compression: ZIP_DEFLATED (default) or ZIP_STORED.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SHA1 hex digest of the generated ZIP.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
KeyError: If a recipe CRC32 is not found in the atom store.
|
||||||
|
ValueError: If a ROM's actual CRC32 doesn't match the recipe.
|
||||||
|
"""
|
||||||
|
# Sort by filename for deterministic order
|
||||||
|
sorted_recipe = sorted(recipe, key=lambda r: r["name"])
|
||||||
|
|
||||||
|
with zipfile.ZipFile(str(output_path), "w", compression, compresslevel=_COMPRESS_LEVEL) as zf:
|
||||||
|
for entry in sorted_recipe:
|
||||||
|
name = entry["name"]
|
||||||
|
expected_crc = entry.get("crc32", "").lower()
|
||||||
|
|
||||||
|
if expected_crc not in atom_store:
|
||||||
|
raise KeyError(
|
||||||
|
f"ROM atom not found: {name} (crc32={expected_crc}). "
|
||||||
|
f"Available: {len(atom_store)} atoms"
|
||||||
|
)
|
||||||
|
|
||||||
|
data = atom_store[expected_crc]
|
||||||
|
|
||||||
|
# Verify CRC32 of the atom data
|
||||||
|
actual_crc = format(zlib.crc32(data) & 0xFFFFFFFF, "08x")
|
||||||
|
if expected_crc and actual_crc != expected_crc:
|
||||||
|
raise ValueError(
|
||||||
|
f"CRC32 mismatch for {name}: expected {expected_crc}, got {actual_crc}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create ZipInfo with fixed metadata
|
||||||
|
info = zipfile.ZipInfo(filename=name, date_time=_FIXED_DATE_TIME)
|
||||||
|
info.compress_type = compression
|
||||||
|
info.create_system = _FIXED_CREATE_SYSTEM
|
||||||
|
info.external_attr = _FIXED_EXTERNAL_ATTR
|
||||||
|
|
||||||
|
zf.writestr(info, data)
|
||||||
|
|
||||||
|
# Compute and return the ZIP's SHA1
|
||||||
|
sha1 = hashlib.sha1()
|
||||||
|
with open(output_path, "rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(65536), b""):
|
||||||
|
sha1.update(chunk)
|
||||||
|
return sha1.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def extract_atoms(zip_path: str | Path) -> dict[str, bytes]:
|
||||||
|
"""Extract all ROM atoms from a ZIP, indexed by CRC32.
|
||||||
|
|
||||||
|
Returns: Dict mapping CRC32 (lowercase hex) to raw ROM data.
|
||||||
|
"""
|
||||||
|
atoms: dict[str, bytes] = {}
|
||||||
|
with zipfile.ZipFile(str(zip_path), "r") as zf:
|
||||||
|
for info in zf.infolist():
|
||||||
|
if info.is_dir():
|
||||||
|
continue
|
||||||
|
data = zf.read(info.filename)
|
||||||
|
crc = format(zlib.crc32(data) & 0xFFFFFFFF, "08x")
|
||||||
|
atoms[crc] = data
|
||||||
|
return atoms
|
||||||
|
|
||||||
|
|
||||||
|
def extract_atoms_with_names(zip_path: str | Path) -> list[dict]:
|
||||||
|
"""Extract atoms with full metadata from a ZIP.
|
||||||
|
|
||||||
|
Returns: List of dicts with 'name', 'crc32', 'size', 'data'.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
with zipfile.ZipFile(str(zip_path), "r") as zf:
|
||||||
|
for info in sorted(zf.infolist(), key=lambda i: i.filename):
|
||||||
|
if info.is_dir():
|
||||||
|
continue
|
||||||
|
data = zf.read(info.filename)
|
||||||
|
crc = format(zlib.crc32(data) & 0xFFFFFFFF, "08x")
|
||||||
|
result.append({
|
||||||
|
"name": info.filename,
|
||||||
|
"crc32": crc,
|
||||||
|
"size": len(data),
|
||||||
|
"data": data,
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def verify_zip_determinism(zip_path: str | Path) -> tuple[bool, str, str]:
|
||||||
|
"""Verify a ZIP can be rebuilt deterministically.
|
||||||
|
|
||||||
|
Extracts atoms, rebuilds the ZIP, compares hashes.
|
||||||
|
|
||||||
|
Returns: (is_deterministic, original_sha1, rebuilt_sha1)
|
||||||
|
"""
|
||||||
|
# Hash the original
|
||||||
|
orig_sha1 = hashlib.sha1(Path(zip_path).read_bytes()).hexdigest()
|
||||||
|
|
||||||
|
# Extract atoms
|
||||||
|
atoms_list = extract_atoms_with_names(zip_path)
|
||||||
|
atom_store = {a["crc32"]: a["data"] for a in atoms_list}
|
||||||
|
recipe = [{"name": a["name"], "crc32": a["crc32"]} for a in atoms_list]
|
||||||
|
|
||||||
|
# Rebuild to memory
|
||||||
|
buf = BytesIO()
|
||||||
|
sorted_recipe = sorted(recipe, key=lambda r: r["name"])
|
||||||
|
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED, compresslevel=_COMPRESS_LEVEL) as zf:
|
||||||
|
for entry in sorted_recipe:
|
||||||
|
info = zipfile.ZipInfo(filename=entry["name"], date_time=_FIXED_DATE_TIME)
|
||||||
|
info.compress_type = zipfile.ZIP_DEFLATED
|
||||||
|
info.create_system = _FIXED_CREATE_SYSTEM
|
||||||
|
info.external_attr = _FIXED_EXTERNAL_ATTR
|
||||||
|
zf.writestr(info, atom_store[entry["crc32"]])
|
||||||
|
|
||||||
|
rebuilt_sha1 = hashlib.sha1(buf.getvalue()).hexdigest()
|
||||||
|
return orig_sha1 == rebuilt_sha1, orig_sha1, rebuilt_sha1
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_zip_deterministic(
|
||||||
|
source_zip: str | Path,
|
||||||
|
output_zip: str | Path,
|
||||||
|
) -> str:
|
||||||
|
"""Rebuild an existing ZIP deterministically.
|
||||||
|
|
||||||
|
Extracts all files, reassembles with fixed metadata.
|
||||||
|
Returns the SHA1 of the new ZIP.
|
||||||
|
"""
|
||||||
|
atoms_list = extract_atoms_with_names(source_zip)
|
||||||
|
atom_store = {a["crc32"]: a["data"] for a in atoms_list}
|
||||||
|
recipe = [{"name": a["name"], "crc32": a["crc32"]} for a in atoms_list]
|
||||||
|
return build_deterministic_zip(output_zip, recipe, atom_store)
|
||||||
|
|
||||||
|
|
||||||
|
def build_atom_store_from_zips(zip_dir: str | Path) -> dict[str, bytes]:
|
||||||
|
"""Build a global atom store from all ZIPs in a directory.
|
||||||
|
|
||||||
|
Scans all .zip files, extracts every ROM, indexes by CRC32.
|
||||||
|
Identical ROMs (same CRC32) from different ZIPs are stored once.
|
||||||
|
"""
|
||||||
|
store: dict[str, bytes] = {}
|
||||||
|
for zip_path in sorted(Path(zip_dir).rglob("*.zip")):
|
||||||
|
try:
|
||||||
|
atoms = extract_atoms(zip_path)
|
||||||
|
store.update(atoms)
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
continue
|
||||||
|
return store
|
||||||
@@ -30,6 +30,7 @@ from common import (
|
|||||||
load_emulator_profiles, load_platform_config, md5_composite,
|
load_emulator_profiles, load_platform_config, md5_composite,
|
||||||
resolve_local_file,
|
resolve_local_file,
|
||||||
)
|
)
|
||||||
|
from deterministic_zip import rebuild_zip_deterministic
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import yaml
|
import yaml
|
||||||
@@ -362,6 +363,8 @@ def generate_pack(
|
|||||||
extract = file_entry.get("extract", False)
|
extract = file_entry.get("extract", False)
|
||||||
if extract and local_path.endswith(".zip"):
|
if extract and local_path.endswith(".zip"):
|
||||||
_extract_zip_to_archive(local_path, full_dest, zf)
|
_extract_zip_to_archive(local_path, full_dest, zf)
|
||||||
|
elif local_path.endswith(".zip"):
|
||||||
|
_normalize_zip_for_pack(local_path, full_dest, zf)
|
||||||
else:
|
else:
|
||||||
zf.write(local_path, full_dest)
|
zf.write(local_path, full_dest)
|
||||||
total_files += 1
|
total_files += 1
|
||||||
@@ -398,7 +401,10 @@ def generate_pack(
|
|||||||
if status in ("not_found", "external", "user_provided"):
|
if status in ("not_found", "external", "user_provided"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
zf.write(local_path, full_dest)
|
if local_path.endswith(".zip"):
|
||||||
|
_normalize_zip_for_pack(local_path, full_dest, zf)
|
||||||
|
else:
|
||||||
|
zf.write(local_path, full_dest)
|
||||||
seen_destinations.add(full_dest)
|
seen_destinations.add(full_dest)
|
||||||
seen_lower.add(full_dest.lower())
|
seen_lower.add(full_dest.lower())
|
||||||
core_count += 1
|
core_count += 1
|
||||||
@@ -468,6 +474,28 @@ def _extract_zip_to_archive(source_zip: str, dest_prefix: str, target_zf: zipfil
|
|||||||
target_zf.writestr(target_path, data)
|
target_zf.writestr(target_path, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_zip_for_pack(source_zip: str, dest_path: str, target_zf: zipfile.ZipFile):
|
||||||
|
"""Add a MAME BIOS ZIP to the pack as a deterministic rebuild.
|
||||||
|
|
||||||
|
Instead of copying the original ZIP (with non-deterministic metadata),
|
||||||
|
extracts the ROM atoms, rebuilds the ZIP deterministically, and writes
|
||||||
|
the normalized version into the pack.
|
||||||
|
|
||||||
|
This ensures:
|
||||||
|
- Same ROMs → same ZIP hash in every pack build
|
||||||
|
- No dependency on how the user built their MAME ROM set
|
||||||
|
- Bit-identical ZIPs across platforms and build times
|
||||||
|
"""
|
||||||
|
import tempfile as _tmp
|
||||||
|
tmp_fd, tmp_path = _tmp.mkstemp(suffix=".zip", dir="tmp")
|
||||||
|
os.close(tmp_fd)
|
||||||
|
try:
|
||||||
|
rebuild_zip_deterministic(source_zip, tmp_path)
|
||||||
|
target_zf.write(tmp_path, dest_path)
|
||||||
|
finally:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Emulator/system mode pack generation
|
# Emulator/system mode pack generation
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -625,7 +653,10 @@ def generate_emulator_pack(
|
|||||||
archive_entry = {"name": archive_name}
|
archive_entry = {"name": archive_name}
|
||||||
local_path, status = resolve_file(archive_entry, db, bios_dir, zip_contents)
|
local_path, status = resolve_file(archive_entry, db, bios_dir, zip_contents)
|
||||||
if local_path and status not in ("not_found",):
|
if local_path and status not in ("not_found",):
|
||||||
zf.write(local_path, archive_dest)
|
if local_path.endswith(".zip"):
|
||||||
|
_normalize_zip_for_pack(local_path, archive_dest, zf)
|
||||||
|
else:
|
||||||
|
zf.write(local_path, archive_dest)
|
||||||
seen_destinations.add(archive_dest)
|
seen_destinations.add(archive_dest)
|
||||||
seen_lower.add(archive_dest.lower())
|
seen_lower.add(archive_dest.lower())
|
||||||
total_files += 1
|
total_files += 1
|
||||||
@@ -689,7 +720,10 @@ def generate_emulator_pack(
|
|||||||
continue
|
continue
|
||||||
seen_hashes.add(dedup_key_hash)
|
seen_hashes.add(dedup_key_hash)
|
||||||
|
|
||||||
zf.write(local_path, dest)
|
if local_path.endswith(".zip"):
|
||||||
|
_normalize_zip_for_pack(local_path, dest, zf)
|
||||||
|
else:
|
||||||
|
zf.write(local_path, dest)
|
||||||
seen_destinations.add(dest)
|
seen_destinations.add(dest)
|
||||||
seen_lower.add(dest.lower())
|
seen_lower.add(dest.lower())
|
||||||
total_files += 1
|
total_files += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user