fix: clone resolution in common.py, move clone map to root

moved _mame_clones.json out of bios/ (was indexed by generate_db.py
as BIOS file). clone resolution now in common.py resolve_local_file
so all tools (verify, pack, cross_reference) resolve clones
transparently. removed duplicate clone code from generate_pack.py.
added error handling on os.remove in dedup.py. consistency check
now passes for Batocera/EmuDeck/Lakka/RetroArch (4/6 platforms).
This commit is contained in:
Abdessamad Derraz
2026-03-24 21:57:49 +01:00
parent 85308edd73
commit ae4846550f
5 changed files with 39 additions and 55 deletions

View File

@@ -1,7 +1,7 @@
{ {
"generated_at": "2026-03-24T20:29:18Z", "generated_at": "2026-03-24T20:51:56Z",
"total_files": 6734, "total_files": 6733,
"total_size": 5288666957, "total_size": 5288644732,
"files": { "files": {
"520d3d1b5897800af47f92efd2444a26b7a7dead": { "520d3d1b5897800af47f92efd2444a26b7a7dead": {
"path": "bios/3DO Company/3DO/3do_arcade_saot.bin", "path": "bios/3DO Company/3DO/3do_arcade_saot.bin",
@@ -67323,16 +67323,6 @@
"crc32": "0a4e2e07", "crc32": "0a4e2e07",
"adler32": "4d032232" "adler32": "4d032232"
}, },
"065c00ac8244d46e75448f667bbdfcf64dfc37c4": {
"path": "bios/_mame_clones.json",
"name": "_mame_clones.json",
"size": 22225,
"sha1": "065c00ac8244d46e75448f667bbdfcf64dfc37c4",
"md5": "178e3d546f24924ccf71160799a09bc4",
"sha256": "098e8ee3af19d9c290a082a03b923f25b0c333b52c689a99ac75cee69f569af3",
"crc32": "834c7566",
"adler32": "12de26b9"
},
"ecfc092fe6371dbf38e238a8ba5f90785b5db52d": { "ecfc092fe6371dbf38e238a8ba5f90785b5db52d": {
"path": "bios/xrick/data.zip", "path": "bios/xrick/data.zip",
"name": "data.zip", "name": "data.zip",
@@ -74078,7 +74068,6 @@
"6f68e4baf89c8ee4623c19617319184b": "cee76080884af97c20059da0eb1ca956a835f3d0", "6f68e4baf89c8ee4623c19617319184b": "cee76080884af97c20059da0eb1ca956a835f3d0",
"2010e5b85f9e1d60685ccb3d84a17115": "c7cc306fb921754ba00794153292d533cf0765ef", "2010e5b85f9e1d60685ccb3d84a17115": "c7cc306fb921754ba00794153292d533cf0765ef",
"39e5bc84ce9aac3a2d297d8aeb2a0d05": "22bcfeb5b6c6481569b90db96aa3f4b5f06c8848", "39e5bc84ce9aac3a2d297d8aeb2a0d05": "22bcfeb5b6c6481569b90db96aa3f4b5f06c8848",
"178e3d546f24924ccf71160799a09bc4": "065c00ac8244d46e75448f667bbdfcf64dfc37c4",
"a471e64e9f69afbe59c10cc94ed1b184": "ecfc092fe6371dbf38e238a8ba5f90785b5db52d" "a471e64e9f69afbe59c10cc94ed1b184": "ecfc092fe6371dbf38e238a8ba5f90785b5db52d"
}, },
"by_name": { "by_name": {
@@ -92631,9 +92620,6 @@
"default.sf2": [ "default.sf2": [
"22bcfeb5b6c6481569b90db96aa3f4b5f06c8848" "22bcfeb5b6c6481569b90db96aa3f4b5f06c8848"
], ],
"_mame_clones.json": [
"065c00ac8244d46e75448f667bbdfcf64dfc37c4"
],
"data.zip": [ "data.zip": [
"ecfc092fe6371dbf38e238a8ba5f90785b5db52d" "ecfc092fe6371dbf38e238a8ba5f90785b5db52d"
], ],
@@ -102100,7 +102086,6 @@
"8419990c": "cee76080884af97c20059da0eb1ca956a835f3d0", "8419990c": "cee76080884af97c20059da0eb1ca956a835f3d0",
"3cacb086": "c7cc306fb921754ba00794153292d533cf0765ef", "3cacb086": "c7cc306fb921754ba00794153292d533cf0765ef",
"0a4e2e07": "22bcfeb5b6c6481569b90db96aa3f4b5f06c8848", "0a4e2e07": "22bcfeb5b6c6481569b90db96aa3f4b5f06c8848",
"834c7566": "065c00ac8244d46e75448f667bbdfcf64dfc37c4",
"74b76447": "ecfc092fe6371dbf38e238a8ba5f90785b5db52d" "74b76447": "ecfc092fe6371dbf38e238a8ba5f90785b5db52d"
}, },
"by_path_suffix": { "by_path_suffix": {

View File

@@ -292,9 +292,38 @@ def resolve_local_file(
if os.path.exists(path): if os.path.exists(path):
return path, "zip_exact" return path, "zip_exact"
# MAME clone fallback: if a file was deduped, resolve via canonical
clone_map = _get_mame_clone_map()
canonical = clone_map.get(name)
if canonical and canonical != name:
canonical_entry = {"name": canonical}
result = resolve_local_file(canonical_entry, db, zip_contents, dest_hint)
if result[0]:
return result[0], "mame_clone"
return None, "not_found" return None, "not_found"
def _get_mame_clone_map() -> dict[str, str]:
"""Load and cache the MAME clone map (clone_name -> canonical_name)."""
if not hasattr(_get_mame_clone_map, "_cache"):
clone_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"_mame_clones.json",
)
if os.path.exists(clone_path):
import json as _json
with open(clone_path) as f:
data = _json.load(f)
_get_mame_clone_map._cache = {}
for canonical, info in data.items():
for clone in info.get("clones", []):
_get_mame_clone_map._cache[clone] = canonical
else:
_get_mame_clone_map._cache = {}
return _get_mame_clone_map._cache
def check_inside_zip(container: str, file_name: str, expected_md5: str) -> str: def check_inside_zip(container: str, file_name: str, expected_md5: str) -> str:
"""Check a ROM inside a ZIP — replicates Batocera checkInsideZip(). """Check a ROM inside a ZIP — replicates Batocera checkInsideZip().

View File

@@ -170,7 +170,11 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
if dry_run: if dry_run:
print(f" WOULD REMOVE: {dup}") print(f" WOULD REMOVE: {dup}")
else: else:
try:
os.remove(dup) os.remove(dup)
except OSError as e:
print(f" WARNING: cannot remove {dup}: {e}")
continue
# Clean up empty .variants/ directories # Clean up empty .variants/ directories
parent = os.path.dirname(dup) parent = os.path.dirname(dup)
if os.path.basename(parent) == ".variants" and not os.listdir(parent): if os.path.basename(parent) == ".variants" and not os.listdir(parent):
@@ -204,7 +208,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
# Write MAME clone mapping # Write MAME clone mapping
if mame_clones: if mame_clones:
clone_path = os.path.join(bios_dir, "_mame_clones.json") clone_path = "_mame_clones.json"
if dry_run: if dry_run:
print(f"\nWould write MAME clone map: {clone_path}") print(f"\nWould write MAME clone map: {clone_path}")
print(f" {len(mame_clones)} canonical ZIPs with " print(f" {len(mame_clones)} canonical ZIPs with "

View File

@@ -100,31 +100,6 @@ def _sanitize_path(raw: str) -> str:
return "/".join(parts) return "/".join(parts)
def _load_mame_clones(bios_dir: str) -> dict[str, str]:
"""Load MAME clone mapping: clone_name -> canonical_name."""
clone_path = os.path.join(bios_dir, "_mame_clones.json")
if not os.path.exists(clone_path):
return {}
with open(clone_path) as f:
data = json.load(f)
# Invert: clone_name -> canonical_name
result = {}
for canonical, info in data.items():
for clone in info.get("clones", []):
result[clone] = canonical
return result
_MAME_CLONE_MAP: dict[str, str] | None = None
def _get_mame_clone_map(bios_dir: str) -> dict[str, str]:
global _MAME_CLONE_MAP
if _MAME_CLONE_MAP is None:
_MAME_CLONE_MAP = _load_mame_clones(bios_dir)
return _MAME_CLONE_MAP
def resolve_file(file_entry: dict, db: dict, bios_dir: str, def resolve_file(file_entry: dict, db: dict, bios_dir: str,
zip_contents: dict | None = None, zip_contents: dict | None = None,
dest_hint: str = "") -> tuple[str | None, str]: dest_hint: str = "") -> tuple[str | None, str]:
@@ -145,17 +120,8 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
if path: if path:
return path, status return path, status
# MAME clone fallback: if the file was deduped, resolve via canonical
name = file_entry.get("name", "")
clone_map = _get_mame_clone_map(bios_dir)
canonical = clone_map.get(name)
if canonical:
canonical_entry = {"name": canonical}
cpath, cstatus = resolve_local_file(canonical_entry, db, zip_contents)
if cpath:
return cpath, "mame_clone"
# Last resort: large files from GitHub release assets # Last resort: large files from GitHub release assets
name = file_entry.get("name", "")
sha1 = file_entry.get("sha1") sha1 = file_entry.get("sha1")
md5_raw = file_entry.get("md5", "") md5_raw = file_entry.get("md5", "")
md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else []