diff --git a/scripts/common.py b/scripts/common.py index b7c2575c..c8f95b17 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -117,6 +117,106 @@ def load_platform_config(platform_name: str, platforms_dir: str = "platforms") - return config +def resolve_local_file( + file_entry: dict, + db: dict, + zip_contents: dict | None = None, +) -> tuple[str | None, str]: + """Resolve a BIOS file to its local path using database.json. + + Single source of truth for file resolution, used by both verify.py + and generate_pack.py. Does NOT handle storage tiers (external/user_provided) + or release assets - callers handle those. + + Returns (local_path, status) where status is one of: + exact, zip_exact, hash_mismatch, not_found. + """ + sha1 = file_entry.get("sha1") + md5_raw = file_entry.get("md5", "") + name = file_entry.get("name", "") + zipped_file = file_entry.get("zipped_file") + + md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] + files_db = db.get("files", {}) + by_md5 = db.get("indexes", {}).get("by_md5", {}) + by_name = db.get("indexes", {}).get("by_name", {}) + + # 1. SHA1 exact match + if sha1 and sha1 in files_db: + path = files_db[sha1]["path"] + if os.path.exists(path): + return path, "exact" + + # 2. MD5 direct lookup (skip for zipped_file: md5 is inner ROM, not container) + if md5_list and not zipped_file: + for md5_candidate in md5_list: + sha1_match = by_md5.get(md5_candidate) + if sha1_match and sha1_match in files_db: + path = files_db[sha1_match]["path"] + if os.path.exists(path): + return path, "exact" + if len(md5_candidate) < 32: + for db_md5, db_sha1 in by_md5.items(): + if db_md5.startswith(md5_candidate) and db_sha1 in files_db: + path = files_db[db_sha1]["path"] + if os.path.exists(path): + return path, "exact" + + # 3. zipped_file content match via pre-built index + if zipped_file and md5_list and zip_contents: + for md5_candidate in md5_list: + if md5_candidate in zip_contents: + zip_sha1 = zip_contents[md5_candidate] + if zip_sha1 in files_db: + path = files_db[zip_sha1]["path"] + if os.path.exists(path): + return path, "zip_exact" + + # 4. No MD5 = any file with that name (existence check) + if not md5_list: + candidates = [] + for match_sha1 in by_name.get(name, []): + if match_sha1 in files_db: + path = files_db[match_sha1]["path"] + if os.path.exists(path): + candidates.append(path) + if candidates: + if zipped_file: + candidates = [p for p in candidates if ".zip" in os.path.basename(p)] + primary = [p for p in candidates if "/.variants/" not in p] + if primary or candidates: + return (primary[0] if primary else candidates[0]), "exact" + + # 5. Name fallback with md5_composite + direct MD5 per candidate + md5_set = set(md5_list) + candidates = [] + for match_sha1 in by_name.get(name, []): + if match_sha1 in files_db: + entry = files_db[match_sha1] + path = entry["path"] + if os.path.exists(path): + candidates.append((path, entry.get("md5", ""))) + + if candidates: + if zipped_file: + candidates = [(p, m) for p, m in candidates if ".zip" in os.path.basename(p)] + if md5_set: + for path, db_md5 in candidates: + if ".zip" in os.path.basename(path): + try: + composite = md5_composite(path).lower() + if composite in md5_set: + return path, "exact" + except (zipfile.BadZipFile, OSError): + pass + if db_md5.lower() in md5_set: + return path, "exact" + primary = [p for p, _ in candidates if "/.variants/" not in p] + return (primary[0] if primary else candidates[0][0]), "hash_mismatch" + + return None, "not_found" + + def safe_extract_zip(zip_path: str, dest_dir: str) -> None: """Extract a ZIP file safely, preventing zip-slip path traversal.""" dest = os.path.realpath(dest_dir) diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index 3e51ee5a..2743f598 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -24,7 +24,7 @@ import zipfile from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) -from common import load_database, load_platform_config, md5_composite +from common import load_database, load_platform_config, md5_composite, resolve_local_file try: import yaml @@ -100,10 +100,10 @@ def _sanitize_path(raw: str) -> str: def resolve_file(file_entry: dict, db: dict, bios_dir: str, zip_contents: dict | None = None) -> tuple[str | None, str]: - """Resolve a BIOS file to its local path using database.json. + """Resolve a BIOS file with storage tiers and release asset fallback. - Returns (local_path, status) where status is one of: - exact, zip_exact, hash_mismatch, external, user_provided, not_found. + Wraps common.resolve_local_file() with pack-specific logic for + storage tiers (external/user_provided) and large file release assets. """ storage = file_entry.get("storage", "embedded") if storage == "user_provided": @@ -111,90 +111,15 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str, if storage == "external": return None, "external" - sha1 = file_entry.get("sha1") - md5_raw = file_entry.get("md5", "") - name = file_entry.get("name", "") - zipped_file = file_entry.get("zipped_file") - - # Recalbox uses comma-separated MD5 lists for accepted variants - md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] - - if sha1 and sha1 in db.get("files", {}): - local_path = db["files"][sha1]["path"] - if os.path.exists(local_path): - return local_path, "exact" - - by_md5 = db.get("indexes", {}).get("by_md5", {}) - - # Skip MD5 direct lookup for zipped_file entries: the md5 is for the inner ROM, - # not the container ZIP. Matching it would resolve to the standalone ROM file. - if md5_list and not zipped_file: - for md5_candidate in md5_list: - sha1_from_md5 = by_md5.get(md5_candidate) - if sha1_from_md5 and sha1_from_md5 in db["files"]: - local_path = db["files"][sha1_from_md5]["path"] - if os.path.exists(local_path): - return local_path, "exact" - - # Truncated MD5 match (batocera-systems bug: 29 chars instead of 32) - if len(md5_candidate) < 32: - for db_md5, db_sha1 in by_md5.items(): - if db_md5.startswith(md5_candidate) and db_sha1 in db["files"]: - local_path = db["files"][db_sha1]["path"] - if os.path.exists(local_path): - return local_path, "exact" - - if zipped_file and md5_list and zip_contents: - for md5_candidate in md5_list: - if md5_candidate in zip_contents: - zip_sha1 = zip_contents[md5_candidate] - if zip_sha1 in db["files"]: - local_path = db["files"][zip_sha1]["path"] - if os.path.exists(local_path): - return local_path, "zip_exact" - - # No MD5 specified = any local file with that name is acceptable - if not md5_list: - name_matches = db.get("indexes", {}).get("by_name", {}).get(name, []) - candidates = [] - for match_sha1 in name_matches: - if match_sha1 in db["files"]: - local_path = db["files"][match_sha1]["path"] - if os.path.exists(local_path): - candidates.append(local_path) - if candidates: - primary = [p for p in candidates if "/.variants/" not in p] - return (primary[0] if primary else candidates[0]), "exact" - - # Name fallback: check md5_composite for ZIPs (Recalbox Zip::Md5Composite) - md5_set = set(md5_list) - name_matches = db.get("indexes", {}).get("by_name", {}).get(name, []) - candidates = [] - for match_sha1 in name_matches: - if match_sha1 in db["files"]: - local_path = db["files"][match_sha1]["path"] - if os.path.exists(local_path): - candidates.append((local_path, db["files"][match_sha1].get("md5", ""))) - - if candidates and md5_set: - # Try md5_composite for ZIP files before falling back to hash_mismatch - for path, db_md5 in candidates: - if ".zip" in os.path.basename(path): - try: - composite = md5_composite(path).lower() - if composite in md5_set: - return path, "exact" - except (zipfile.BadZipFile, OSError): - pass - # Also check direct MD5 match per candidate - if db_md5.lower() in md5_set: - return path, "exact" - - if candidates: - primary = [p for p, _ in candidates if "/.variants/" not in p] - return (primary[0] if primary else candidates[0][0]), "hash_mismatch" + path, status = resolve_local_file(file_entry, db, zip_contents) + if path: + return path, status # Last resort: large files from GitHub release assets + name = file_entry.get("name", "") + sha1 = file_entry.get("sha1") + md5_raw = file_entry.get("md5", "") + md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else [] first_md5 = md5_list[0] if md5_list else "" cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5) if cached: diff --git a/scripts/verify.py b/scripts/verify.py index 25b6e6b8..4d1ebcce 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -28,7 +28,7 @@ except ImportError: sys.exit(1) sys.path.insert(0, os.path.dirname(__file__)) -from common import load_platform_config, md5sum, md5_composite +from common import load_platform_config, md5sum, md5_composite, resolve_local_file DEFAULT_DB = "database.json" DEFAULT_PLATFORMS_DIR = "platforms" @@ -72,78 +72,9 @@ def check_inside_zip(container: str, file_name: str, expected_md5: str) -> str: def resolve_to_local_path(file_entry: dict, db: dict) -> str | None: - """Find the local file path for a BIOS entry using database.json. - - Tries: SHA1 -> MD5 -> name index. Returns the first existing path found. - For zipped_file entries, the md5 refers to the inner ROM, not the ZIP - container, so MD5-based lookup is skipped to avoid resolving to a - standalone ROM file instead of the ZIP. - """ - sha1 = file_entry.get("sha1") - md5 = file_entry.get("md5") - name = file_entry.get("name", "") - has_zipped_file = bool(file_entry.get("zipped_file")) - files_db = db.get("files", {}) - by_md5 = db.get("indexes", {}).get("by_md5", {}) - by_name = db.get("indexes", {}).get("by_name", {}) - - if sha1 and sha1 in files_db: - path = files_db[sha1]["path"] - if os.path.exists(path): - return path - - # Split comma-separated MD5 lists (Recalbox uses multi-hash) - md5_candidates = [m.strip().lower() for m in md5.split(",") if m.strip()] if md5 else [] - - # Skip MD5 lookup for zipped_file entries: the md5 is for the inner ROM, - # not the container ZIP, so matching it would resolve to the wrong file. - if not has_zipped_file: - for md5_candidate in md5_candidates: - if md5_candidate in by_md5: - sha1_match = by_md5[md5_candidate] - if sha1_match in files_db: - path = files_db[sha1_match]["path"] - if os.path.exists(path): - return path - - # Truncated MD5 (batocera-systems bug: 29 chars instead of 32) - if len(md5_candidate) < 32: - for db_md5, db_sha1 in by_md5.items(): - if db_md5.startswith(md5_candidate) and db_sha1 in files_db: - path = files_db[db_sha1]["path"] - if os.path.exists(path): - return path - - if name in by_name: - # Prefer the candidate whose MD5 matches the expected hash - candidates = [] - for match_sha1 in by_name[name]: - if match_sha1 in files_db: - entry = files_db[match_sha1] - path = entry["path"] - if os.path.exists(path): - candidates.append((path, entry.get("md5", ""))) - if candidates: - if has_zipped_file: - candidates = [(p, m) for p, m in candidates if p.endswith(".zip")] - if md5 and not has_zipped_file: - md5_lower = md5.lower() - for path, db_md5 in candidates: - if db_md5.lower() == md5_lower: - return path - # Try composite MD5 for ZIP files (Recalbox uses Zip::Md5Composite) - for path, _ in candidates: - if ".zip" in os.path.basename(path): - try: - if md5_composite(path).lower() == md5_lower: - return path - except (zipfile.BadZipFile, OSError): - pass - if candidates: - primary = [p for p, _ in candidates if "/.variants/" not in p] - return primary[0] if primary else candidates[0][0] - - return None + """Find the local file path for a BIOS entry. Delegates to common.resolve_local_file.""" + path, _ = resolve_local_file(file_entry, db) + return path def verify_entry_existence(file_entry: dict, local_path: str | None) -> dict: