From 7b1c6a723e0ee87a3c7c478da95bd3ae75aaa1c9 Mon Sep 17 00:00:00 2001
From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com>
Date: Wed, 18 Mar 2026 07:13:11 +0100
Subject: [PATCH] refactor: review fixes - resolve coherence + cleanup

1. fetch_large_file moved to last resort (avoids HTTP before name lookup)
2. fetch_large_file receives first MD5 only (not comma-separated string)
3. verify.py MD5 lookup now splits comma-separated + lowercases (matches generate_pack)
4. seen_destinations simplified to set (stored hash was dead data)
5. Variable suffix shadowing renamed to file_ext
---
 scripts/generate_pack.py | 21 +++++++++++----------
 scripts/verify.py        | 28 ++++++++++++++++------------
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py
index 6d138998..3e51ee5a 100644
--- a/scripts/generate_pack.py
+++ b/scripts/generate_pack.py
@@ -153,11 +153,6 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
                     if os.path.exists(local_path):
                         return local_path, "zip_exact"
 
-    # Release assets override local files (authoritative large files)
-    cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=md5_raw or "")
-    if cached:
-        return cached, "release_asset"
-
     # No MD5 specified = any local file with that name is acceptable
     if not md5_list:
         name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
@@ -199,6 +194,12 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
         primary = [p for p, _ in candidates if "/.variants/" not in p]
         return (primary[0] if primary else candidates[0][0]), "hash_mismatch"
 
+    # Last resort: large files from GitHub release assets
+    first_md5 = md5_list[0] if md5_list else ""
+    cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5)
+    if cached:
+        return cached, "release_asset"
+
     return None, "not_found"
 
 
@@ -375,7 +376,7 @@ def generate_pack(
     missing_files = []
     untested_files = []
     user_provided = []
-    seen_destinations = {}
+    seen_destinations = set()
 
     with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
         for sys_id, system in sorted(config.get("systems", {}).items()):
@@ -391,7 +392,7 @@ def generate_pack(
                 dedup_key = full_dest
                 if dedup_key in seen_destinations:
                     continue
-                seen_destinations[dedup_key] = file_entry.get("sha1") or file_entry.get("md5") or ""
+                seen_destinations.add(dedup_key)
 
                 storage = file_entry.get("storage", "embedded")
 
@@ -407,8 +408,8 @@ def generate_pack(
                 local_path, status = resolve_file(file_entry, db, bios_dir, zip_contents)
 
                 if status == "external":
-                    suffix = os.path.splitext(file_entry["name"])[1] or ""
-                    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                    file_ext = os.path.splitext(file_entry["name"])[1] or ""
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
                         tmp_path = tmp.name
 
                     try:
@@ -461,7 +462,7 @@ def generate_pack(
                     continue
 
                 zf.write(local_path, full_dest)
-                seen_destinations[full_dest] = fe.get("sha1") or fe.get("md5") or ""
+                seen_destinations.add(full_dest)
                 extra_count += 1
                 total_files += 1
 
diff --git a/scripts/verify.py b/scripts/verify.py
index a60c9007..25b6e6b8 100644
--- a/scripts/verify.py
+++ b/scripts/verify.py
@@ -92,24 +92,28 @@ def resolve_to_local_path(file_entry: dict, db: dict) -> str | None:
         if os.path.exists(path):
             return path
 
+    # Split comma-separated MD5 lists (Recalbox uses multi-hash)
+    md5_candidates = [m.strip().lower() for m in md5.split(",") if m.strip()] if md5 else []
+
     # Skip MD5 lookup for zipped_file entries: the md5 is for the inner ROM,
     # not the container ZIP, so matching it would resolve to the wrong file.
     if not has_zipped_file:
-        if md5 and md5 in by_md5:
-            sha1_match = by_md5[md5]
-            if sha1_match in files_db:
-                path = files_db[sha1_match]["path"]
-                if os.path.exists(path):
-                    return path
-
-        # Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
-        if md5 and len(md5) < 32:
-            for db_md5, db_sha1 in by_md5.items():
-                if db_md5.startswith(md5) and db_sha1 in files_db:
-                    path = files_db[db_sha1]["path"]
+        for md5_candidate in md5_candidates:
+            if md5_candidate in by_md5:
+                sha1_match = by_md5[md5_candidate]
+                if sha1_match in files_db:
+                    path = files_db[sha1_match]["path"]
                     if os.path.exists(path):
                         return path
 
+            # Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
+            if len(md5_candidate) < 32:
+                for db_md5, db_sha1 in by_md5.items():
+                    if db_md5.startswith(md5_candidate) and db_sha1 in files_db:
+                        path = files_db[db_sha1]["path"]
+                        if os.path.exists(path):
+                            return path
+
     if name in by_name:
         # Prefer the candidate whose MD5 matches the expected hash
         candidates = []