From 7b1c6a723e0ee87a3c7c478da95bd3ae75aaa1c9 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Wed, 18 Mar 2026 07:13:11 +0100 Subject: [PATCH] refactor: review fixes - resolve coherence + cleanup 1. fetch_large_file moved to last resort (avoids HTTP before name lookup) 2. fetch_large_file receives first MD5 only (not comma-separated string) 3. verify.py MD5 lookup now splits comma-separated + lowercases (matches generate_pack) 4. seen_destinations simplified to set (stored hash was dead data) 5. Variable suffix shadowing renamed to file_ext --- scripts/generate_pack.py | 21 +++++++++++---------- scripts/verify.py | 28 ++++++++++++++++------------ 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index 6d138998..3e51ee5a 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -153,11 +153,6 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, if os.path.exists(local_path): return local_path, "zip_exact" - # Release assets override local files (authoritative large files) - cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=md5_raw or "") - if cached: - return cached, "release_asset" - # No MD5 specified = any local file with that name is acceptable if not md5_list: name_matches = db.get("indexes", {}).get("by_name", {}).get(name, []) @@ -199,6 +194,12 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, primary = [p for p, _ in candidates if "/.variants/" not in p] return (primary[0] if primary else candidates[0][0]), "hash_mismatch" + # Last resort: large files from GitHub release assets + first_md5 = md5_list[0] if md5_list else "" + cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5) + if cached: + return cached, "release_asset" + return None, "not_found" @@ -375,7 +376,7 @@ def generate_pack( missing_files = [] untested_files = [] user_provided = [] - seen_destinations = {} + seen_destinations = set() with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for sys_id, system in sorted(config.get("systems", {}).items()): @@ -391,7 +392,7 @@ def generate_pack( dedup_key = full_dest if dedup_key in seen_destinations: continue - seen_destinations[dedup_key] = file_entry.get("sha1") or file_entry.get("md5") or "" + seen_destinations.add(dedup_key) storage = file_entry.get("storage", "embedded") @@ -407,8 +408,8 @@ def generate_pack( local_path, status = resolve_file(file_entry, db, bios_dir, zip_contents) if status == "external": - suffix = os.path.splitext(file_entry["name"])[1] or "" - with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + file_ext = os.path.splitext(file_entry["name"])[1] or "" + with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: tmp_path = tmp.name try: @@ -461,7 +462,7 @@ def generate_pack( continue zf.write(local_path, full_dest) - seen_destinations[full_dest] = fe.get("sha1") or fe.get("md5") or "" + seen_destinations.add(full_dest) extra_count += 1 total_files += 1 diff --git a/scripts/verify.py b/scripts/verify.py index a60c9007..25b6e6b8 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -92,24 +92,28 @@ def resolve_to_local_path(file_entry: dict, db: dict) -> str | None: if os.path.exists(path): return path + # Split comma-separated MD5 lists (Recalbox uses multi-hash) + md5_candidates = [m.strip().lower() for m in md5.split(",") if m.strip()] if md5 else [] + # Skip MD5 lookup for zipped_file entries: the md5 is for the inner ROM, # not the container ZIP, so matching it would resolve to the wrong file. if not has_zipped_file: - if md5 and md5 in by_md5: - sha1_match = by_md5[md5] - if sha1_match in files_db: - path = files_db[sha1_match]["path"] - if os.path.exists(path): - return path - - # Truncated MD5 (batocera-systems bug: 29 chars instead of 32) - if md5 and len(md5) < 32: - for db_md5, db_sha1 in by_md5.items(): - if db_md5.startswith(md5) and db_sha1 in files_db: - path = files_db[db_sha1]["path"] + for md5_candidate in md5_candidates: + if md5_candidate in by_md5: + sha1_match = by_md5[md5_candidate] + if sha1_match in files_db: + path = files_db[sha1_match]["path"] if os.path.exists(path): return path + # Truncated MD5 (batocera-systems bug: 29 chars instead of 32) + if len(md5_candidate) < 32: + for db_md5, db_sha1 in by_md5.items(): + if db_md5.startswith(md5_candidate) and db_sha1 in files_db: + path = files_db[db_sha1]["path"] + if os.path.exists(path): + return path + if name in by_name: # Prefer the candidate whose MD5 matches the expected hash candidates = []