refactor: review fixes - resolve coherence + cleanup

1. fetch_large_file moved to last resort (avoids HTTP before name lookup)
2. fetch_large_file receives first MD5 only (not comma-separated string)
3. verify.py MD5 lookup now splits comma-separated + lowercases (matches generate_pack)
4. seen_destinations simplified to set (stored hash was dead data)
5. Variable suffix shadowing renamed to file_ext
This commit is contained in:
Abdessamad Derraz
2026-03-18 07:13:11 +01:00
parent 7ae995fb32
commit 7b1c6a723e
2 changed files with 27 additions and 22 deletions

View File

@@ -153,11 +153,6 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
if os.path.exists(local_path):
return local_path, "zip_exact"
# Release assets override local files (authoritative large files)
cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=md5_raw or "")
if cached:
return cached, "release_asset"
# No MD5 specified = any local file with that name is acceptable
if not md5_list:
name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
@@ -199,6 +194,12 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
primary = [p for p, _ in candidates if "/.variants/" not in p]
return (primary[0] if primary else candidates[0][0]), "hash_mismatch"
# Last resort: large files from GitHub release assets
first_md5 = md5_list[0] if md5_list else ""
cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5)
if cached:
return cached, "release_asset"
return None, "not_found"
@@ -375,7 +376,7 @@ def generate_pack(
missing_files = []
untested_files = []
user_provided = []
seen_destinations = {}
seen_destinations = set()
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for sys_id, system in sorted(config.get("systems", {}).items()):
@@ -391,7 +392,7 @@ def generate_pack(
dedup_key = full_dest
if dedup_key in seen_destinations:
continue
seen_destinations[dedup_key] = file_entry.get("sha1") or file_entry.get("md5") or ""
seen_destinations.add(dedup_key)
storage = file_entry.get("storage", "embedded")
@@ -407,8 +408,8 @@ def generate_pack(
local_path, status = resolve_file(file_entry, db, bios_dir, zip_contents)
if status == "external":
suffix = os.path.splitext(file_entry["name"])[1] or ""
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
file_ext = os.path.splitext(file_entry["name"])[1] or ""
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
tmp_path = tmp.name
try:
@@ -461,7 +462,7 @@ def generate_pack(
continue
zf.write(local_path, full_dest)
seen_destinations[full_dest] = fe.get("sha1") or fe.get("md5") or ""
seen_destinations.add(full_dest)
extra_count += 1
total_files += 1

View File

@@ -92,24 +92,28 @@ def resolve_to_local_path(file_entry: dict, db: dict) -> str | None:
if os.path.exists(path):
return path
# Split comma-separated MD5 lists (Recalbox uses multi-hash)
md5_candidates = [m.strip().lower() for m in md5.split(",") if m.strip()] if md5 else []
# Skip MD5 lookup for zipped_file entries: the md5 is for the inner ROM,
# not the container ZIP, so matching it would resolve to the wrong file.
if not has_zipped_file:
if md5 and md5 in by_md5:
sha1_match = by_md5[md5]
if sha1_match in files_db:
path = files_db[sha1_match]["path"]
if os.path.exists(path):
return path
# Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
if md5 and len(md5) < 32:
for db_md5, db_sha1 in by_md5.items():
if db_md5.startswith(md5) and db_sha1 in files_db:
path = files_db[db_sha1]["path"]
for md5_candidate in md5_candidates:
if md5_candidate in by_md5:
sha1_match = by_md5[md5_candidate]
if sha1_match in files_db:
path = files_db[sha1_match]["path"]
if os.path.exists(path):
return path
# Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
if len(md5_candidate) < 32:
for db_md5, db_sha1 in by_md5.items():
if db_md5.startswith(md5_candidate) and db_sha1 in files_db:
path = files_db[db_sha1]["path"]
if os.path.exists(path):
return path
if name in by_name:
# Prefer the candidate whose MD5 matches the expected hash
candidates = []