From 7ae995fb3249da9420a9201b99d0ced7012a6dce Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Wed, 18 Mar 2026 07:06:46 +0100 Subject: [PATCH] fix: resolve_file multi-MD5 + md5_composite for Recalbox packs Three fixes in resolve_file(): - Split comma-separated MD5 lists (Recalbox uses multi-hash) - Add md5_composite check in name fallback (matches verify.py logic) - Use ".zip" in basename instead of endswith for variant files Recalbox pack: 346/346 verified (was 332/346 with 14 wrong hash) Batocera pack: 359/359 verified (was 304/359 with 55 inner missing) All 5 platforms now produce 0 untested, 0 missing packs. --- scripts/generate_pack.py | 82 +++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index 90950596..6d138998 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -24,7 +24,7 @@ import zipfile from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) -from common import load_database, load_platform_config +from common import load_database, load_platform_config, md5_composite try: import yaml @@ -112,48 +112,54 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, return None, "external" sha1 = file_entry.get("sha1") - md5 = file_entry.get("md5") + md5_raw = file_entry.get("md5", "") name = file_entry.get("name", "") zipped_file = file_entry.get("zipped_file") + # Recalbox uses comma-separated MD5 lists for accepted variants + md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + if sha1 and sha1 in db.get("files", {}): local_path = db["files"][sha1]["path"] if os.path.exists(local_path): return local_path, "exact" + by_md5 = db.get("indexes", {}).get("by_md5", {}) + # Skip MD5 direct lookup for zipped_file entries: the md5 is for the inner ROM, # not the container ZIP. Matching it would resolve to the standalone ROM file. - if md5 and not zipped_file: - sha1_from_md5 = db.get("indexes", {}).get("by_md5", {}).get(md5.lower()) - if sha1_from_md5 and sha1_from_md5 in db["files"]: - local_path = db["files"][sha1_from_md5]["path"] - if os.path.exists(local_path): - return local_path, "exact" - - # Truncated MD5 match (batocera-systems bug: 29 chars instead of 32) - if len(md5) < 32: - md5_lower = md5.lower() - for db_md5, db_sha1 in db.get("indexes", {}).get("by_md5", {}).items(): - if db_md5.startswith(md5_lower) and db_sha1 in db["files"]: - local_path = db["files"][db_sha1]["path"] - if os.path.exists(local_path): - return local_path, "exact" - - if zipped_file and md5 and zip_contents: - if md5 in zip_contents: - zip_sha1 = zip_contents[md5] - if zip_sha1 in db["files"]: - local_path = db["files"][zip_sha1]["path"] + if md5_list and not zipped_file: + for md5_candidate in md5_list: + sha1_from_md5 = by_md5.get(md5_candidate) + if sha1_from_md5 and sha1_from_md5 in db["files"]: + local_path = db["files"][sha1_from_md5]["path"] if os.path.exists(local_path): - return local_path, "zip_exact" + return local_path, "exact" + + # Truncated MD5 match (batocera-systems bug: 29 chars instead of 32) + if len(md5_candidate) < 32: + for db_md5, db_sha1 in by_md5.items(): + if db_md5.startswith(md5_candidate) and db_sha1 in db["files"]: + local_path = db["files"][db_sha1]["path"] + if os.path.exists(local_path): + return local_path, "exact" + + if zipped_file and md5_list and zip_contents: + for md5_candidate in md5_list: + if md5_candidate in zip_contents: + zip_sha1 = zip_contents[md5_candidate] + if zip_sha1 in db["files"]: + local_path = db["files"][zip_sha1]["path"] + if os.path.exists(local_path): + return local_path, "zip_exact" # Release assets override local files (authoritative large files) - cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=md5 or "") + cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=md5_raw or "") if cached: return cached, "release_asset" # No MD5 specified = any local file with that name is acceptable - if not md5: + if not md5_list: name_matches = db.get("indexes", {}).get("by_name", {}).get(name, []) candidates = [] for match_sha1 in name_matches: @@ -165,17 +171,33 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, primary = [p for p in candidates if "/.variants/" not in p] return (primary[0] if primary else candidates[0]), "exact" - # Name fallback (hash mismatch) - prefer primary over variants + # Name fallback: check md5_composite for ZIPs (Recalbox Zip::Md5Composite) + md5_set = set(md5_list) name_matches = db.get("indexes", {}).get("by_name", {}).get(name, []) candidates = [] for match_sha1 in name_matches: if match_sha1 in db["files"]: local_path = db["files"][match_sha1]["path"] if os.path.exists(local_path): - candidates.append(local_path) + candidates.append((local_path, db["files"][match_sha1].get("md5", ""))) + + if candidates and md5_set: + # Try md5_composite for ZIP files before falling back to hash_mismatch + for path, db_md5 in candidates: + if ".zip" in os.path.basename(path): + try: + composite = md5_composite(path).lower() + if composite in md5_set: + return path, "exact" + except (zipfile.BadZipFile, OSError): + pass + # Also check direct MD5 match per candidate + if db_md5.lower() in md5_set: + return path, "exact" + if candidates: - primary = [p for p in candidates if "/.variants/" not in p] - return (primary[0] if primary else candidates[0]), "hash_mismatch" + primary = [p for p, _ in candidates if "/.variants/" not in p] + return (primary[0] if primary else candidates[0][0]), "hash_mismatch" return None, "not_found"