mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-16 13:52:32 -05:00
refactor: extract resolve_local_file to common.py (DRY)
Single source of truth for file resolution logic: - common.py:resolve_local_file() = 80 lines (core resolution) - verify.py:resolve_to_local_path() = 3 lines (thin wrapper) - generate_pack.py:resolve_file() = 20 lines (adds storage tiers + release assets) Before: 103 + 73 = 176 lines of duplicated logic with subtle divergences After: 80 lines shared + 23 lines wrappers = 103 lines total (-41%) Resolution chain: SHA1 -> MD5 multi-hash -> truncated MD5 -> zipped_file index -> name existence -> name composite -> name fallback -> (pack only) release assets
This commit is contained in:
@@ -117,6 +117,106 @@ def load_platform_config(platform_name: str, platforms_dir: str = "platforms") -
|
|||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_local_file(
|
||||||
|
file_entry: dict,
|
||||||
|
db: dict,
|
||||||
|
zip_contents: dict | None = None,
|
||||||
|
) -> tuple[str | None, str]:
|
||||||
|
"""Resolve a BIOS file to its local path using database.json.
|
||||||
|
|
||||||
|
Single source of truth for file resolution, used by both verify.py
|
||||||
|
and generate_pack.py. Does NOT handle storage tiers (external/user_provided)
|
||||||
|
or release assets - callers handle those.
|
||||||
|
|
||||||
|
Returns (local_path, status) where status is one of:
|
||||||
|
exact, zip_exact, hash_mismatch, not_found.
|
||||||
|
"""
|
||||||
|
sha1 = file_entry.get("sha1")
|
||||||
|
md5_raw = file_entry.get("md5", "")
|
||||||
|
name = file_entry.get("name", "")
|
||||||
|
zipped_file = file_entry.get("zipped_file")
|
||||||
|
|
||||||
|
md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else []
|
||||||
|
files_db = db.get("files", {})
|
||||||
|
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
||||||
|
by_name = db.get("indexes", {}).get("by_name", {})
|
||||||
|
|
||||||
|
# 1. SHA1 exact match
|
||||||
|
if sha1 and sha1 in files_db:
|
||||||
|
path = files_db[sha1]["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path, "exact"
|
||||||
|
|
||||||
|
# 2. MD5 direct lookup (skip for zipped_file: md5 is inner ROM, not container)
|
||||||
|
if md5_list and not zipped_file:
|
||||||
|
for md5_candidate in md5_list:
|
||||||
|
sha1_match = by_md5.get(md5_candidate)
|
||||||
|
if sha1_match and sha1_match in files_db:
|
||||||
|
path = files_db[sha1_match]["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path, "exact"
|
||||||
|
if len(md5_candidate) < 32:
|
||||||
|
for db_md5, db_sha1 in by_md5.items():
|
||||||
|
if db_md5.startswith(md5_candidate) and db_sha1 in files_db:
|
||||||
|
path = files_db[db_sha1]["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path, "exact"
|
||||||
|
|
||||||
|
# 3. zipped_file content match via pre-built index
|
||||||
|
if zipped_file and md5_list and zip_contents:
|
||||||
|
for md5_candidate in md5_list:
|
||||||
|
if md5_candidate in zip_contents:
|
||||||
|
zip_sha1 = zip_contents[md5_candidate]
|
||||||
|
if zip_sha1 in files_db:
|
||||||
|
path = files_db[zip_sha1]["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
return path, "zip_exact"
|
||||||
|
|
||||||
|
# 4. No MD5 = any file with that name (existence check)
|
||||||
|
if not md5_list:
|
||||||
|
candidates = []
|
||||||
|
for match_sha1 in by_name.get(name, []):
|
||||||
|
if match_sha1 in files_db:
|
||||||
|
path = files_db[match_sha1]["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
candidates.append(path)
|
||||||
|
if candidates:
|
||||||
|
if zipped_file:
|
||||||
|
candidates = [p for p in candidates if ".zip" in os.path.basename(p)]
|
||||||
|
primary = [p for p in candidates if "/.variants/" not in p]
|
||||||
|
if primary or candidates:
|
||||||
|
return (primary[0] if primary else candidates[0]), "exact"
|
||||||
|
|
||||||
|
# 5. Name fallback with md5_composite + direct MD5 per candidate
|
||||||
|
md5_set = set(md5_list)
|
||||||
|
candidates = []
|
||||||
|
for match_sha1 in by_name.get(name, []):
|
||||||
|
if match_sha1 in files_db:
|
||||||
|
entry = files_db[match_sha1]
|
||||||
|
path = entry["path"]
|
||||||
|
if os.path.exists(path):
|
||||||
|
candidates.append((path, entry.get("md5", "")))
|
||||||
|
|
||||||
|
if candidates:
|
||||||
|
if zipped_file:
|
||||||
|
candidates = [(p, m) for p, m in candidates if ".zip" in os.path.basename(p)]
|
||||||
|
if md5_set:
|
||||||
|
for path, db_md5 in candidates:
|
||||||
|
if ".zip" in os.path.basename(path):
|
||||||
|
try:
|
||||||
|
composite = md5_composite(path).lower()
|
||||||
|
if composite in md5_set:
|
||||||
|
return path, "exact"
|
||||||
|
except (zipfile.BadZipFile, OSError):
|
||||||
|
pass
|
||||||
|
if db_md5.lower() in md5_set:
|
||||||
|
return path, "exact"
|
||||||
|
primary = [p for p, _ in candidates if "/.variants/" not in p]
|
||||||
|
return (primary[0] if primary else candidates[0][0]), "hash_mismatch"
|
||||||
|
|
||||||
|
return None, "not_found"
|
||||||
|
|
||||||
|
|
||||||
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
||||||
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
||||||
dest = os.path.realpath(dest_dir)
|
dest = os.path.realpath(dest_dir)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import zipfile
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
from common import load_database, load_platform_config, md5_composite
|
from common import load_database, load_platform_config, md5_composite, resolve_local_file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import yaml
|
import yaml
|
||||||
@@ -100,10 +100,10 @@ def _sanitize_path(raw: str) -> str:
|
|||||||
|
|
||||||
def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
||||||
zip_contents: dict | None = None) -> tuple[str | None, str]:
|
zip_contents: dict | None = None) -> tuple[str | None, str]:
|
||||||
"""Resolve a BIOS file to its local path using database.json.
|
"""Resolve a BIOS file with storage tiers and release asset fallback.
|
||||||
|
|
||||||
Returns (local_path, status) where status is one of:
|
Wraps common.resolve_local_file() with pack-specific logic for
|
||||||
exact, zip_exact, hash_mismatch, external, user_provided, not_found.
|
storage tiers (external/user_provided) and large file release assets.
|
||||||
"""
|
"""
|
||||||
storage = file_entry.get("storage", "embedded")
|
storage = file_entry.get("storage", "embedded")
|
||||||
if storage == "user_provided":
|
if storage == "user_provided":
|
||||||
@@ -111,90 +111,15 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
|||||||
if storage == "external":
|
if storage == "external":
|
||||||
return None, "external"
|
return None, "external"
|
||||||
|
|
||||||
sha1 = file_entry.get("sha1")
|
path, status = resolve_local_file(file_entry, db, zip_contents)
|
||||||
md5_raw = file_entry.get("md5", "")
|
if path:
|
||||||
name = file_entry.get("name", "")
|
return path, status
|
||||||
zipped_file = file_entry.get("zipped_file")
|
|
||||||
|
|
||||||
# Recalbox uses comma-separated MD5 lists for accepted variants
|
|
||||||
md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else []
|
|
||||||
|
|
||||||
if sha1 and sha1 in db.get("files", {}):
|
|
||||||
local_path = db["files"][sha1]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
return local_path, "exact"
|
|
||||||
|
|
||||||
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
|
||||||
|
|
||||||
# Skip MD5 direct lookup for zipped_file entries: the md5 is for the inner ROM,
|
|
||||||
# not the container ZIP. Matching it would resolve to the standalone ROM file.
|
|
||||||
if md5_list and not zipped_file:
|
|
||||||
for md5_candidate in md5_list:
|
|
||||||
sha1_from_md5 = by_md5.get(md5_candidate)
|
|
||||||
if sha1_from_md5 and sha1_from_md5 in db["files"]:
|
|
||||||
local_path = db["files"][sha1_from_md5]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
return local_path, "exact"
|
|
||||||
|
|
||||||
# Truncated MD5 match (batocera-systems bug: 29 chars instead of 32)
|
|
||||||
if len(md5_candidate) < 32:
|
|
||||||
for db_md5, db_sha1 in by_md5.items():
|
|
||||||
if db_md5.startswith(md5_candidate) and db_sha1 in db["files"]:
|
|
||||||
local_path = db["files"][db_sha1]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
return local_path, "exact"
|
|
||||||
|
|
||||||
if zipped_file and md5_list and zip_contents:
|
|
||||||
for md5_candidate in md5_list:
|
|
||||||
if md5_candidate in zip_contents:
|
|
||||||
zip_sha1 = zip_contents[md5_candidate]
|
|
||||||
if zip_sha1 in db["files"]:
|
|
||||||
local_path = db["files"][zip_sha1]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
return local_path, "zip_exact"
|
|
||||||
|
|
||||||
# No MD5 specified = any local file with that name is acceptable
|
|
||||||
if not md5_list:
|
|
||||||
name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
|
|
||||||
candidates = []
|
|
||||||
for match_sha1 in name_matches:
|
|
||||||
if match_sha1 in db["files"]:
|
|
||||||
local_path = db["files"][match_sha1]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
candidates.append(local_path)
|
|
||||||
if candidates:
|
|
||||||
primary = [p for p in candidates if "/.variants/" not in p]
|
|
||||||
return (primary[0] if primary else candidates[0]), "exact"
|
|
||||||
|
|
||||||
# Name fallback: check md5_composite for ZIPs (Recalbox Zip::Md5Composite)
|
|
||||||
md5_set = set(md5_list)
|
|
||||||
name_matches = db.get("indexes", {}).get("by_name", {}).get(name, [])
|
|
||||||
candidates = []
|
|
||||||
for match_sha1 in name_matches:
|
|
||||||
if match_sha1 in db["files"]:
|
|
||||||
local_path = db["files"][match_sha1]["path"]
|
|
||||||
if os.path.exists(local_path):
|
|
||||||
candidates.append((local_path, db["files"][match_sha1].get("md5", "")))
|
|
||||||
|
|
||||||
if candidates and md5_set:
|
|
||||||
# Try md5_composite for ZIP files before falling back to hash_mismatch
|
|
||||||
for path, db_md5 in candidates:
|
|
||||||
if ".zip" in os.path.basename(path):
|
|
||||||
try:
|
|
||||||
composite = md5_composite(path).lower()
|
|
||||||
if composite in md5_set:
|
|
||||||
return path, "exact"
|
|
||||||
except (zipfile.BadZipFile, OSError):
|
|
||||||
pass
|
|
||||||
# Also check direct MD5 match per candidate
|
|
||||||
if db_md5.lower() in md5_set:
|
|
||||||
return path, "exact"
|
|
||||||
|
|
||||||
if candidates:
|
|
||||||
primary = [p for p, _ in candidates if "/.variants/" not in p]
|
|
||||||
return (primary[0] if primary else candidates[0][0]), "hash_mismatch"
|
|
||||||
|
|
||||||
# Last resort: large files from GitHub release assets
|
# Last resort: large files from GitHub release assets
|
||||||
|
name = file_entry.get("name", "")
|
||||||
|
sha1 = file_entry.get("sha1")
|
||||||
|
md5_raw = file_entry.get("md5", "")
|
||||||
|
md5_list = [m.strip().lower() for m in md5_raw.split(",") if m.strip()] if md5_raw else []
|
||||||
first_md5 = md5_list[0] if md5_list else ""
|
first_md5 = md5_list[0] if md5_list else ""
|
||||||
cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5)
|
cached = fetch_large_file(name, expected_sha1=sha1 or "", expected_md5=first_md5)
|
||||||
if cached:
|
if cached:
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ except ImportError:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
from common import load_platform_config, md5sum, md5_composite
|
from common import load_platform_config, md5sum, md5_composite, resolve_local_file
|
||||||
|
|
||||||
DEFAULT_DB = "database.json"
|
DEFAULT_DB = "database.json"
|
||||||
DEFAULT_PLATFORMS_DIR = "platforms"
|
DEFAULT_PLATFORMS_DIR = "platforms"
|
||||||
@@ -72,78 +72,9 @@ def check_inside_zip(container: str, file_name: str, expected_md5: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def resolve_to_local_path(file_entry: dict, db: dict) -> str | None:
|
def resolve_to_local_path(file_entry: dict, db: dict) -> str | None:
|
||||||
"""Find the local file path for a BIOS entry using database.json.
|
"""Find the local file path for a BIOS entry. Delegates to common.resolve_local_file."""
|
||||||
|
path, _ = resolve_local_file(file_entry, db)
|
||||||
Tries: SHA1 -> MD5 -> name index. Returns the first existing path found.
|
return path
|
||||||
For zipped_file entries, the md5 refers to the inner ROM, not the ZIP
|
|
||||||
container, so MD5-based lookup is skipped to avoid resolving to a
|
|
||||||
standalone ROM file instead of the ZIP.
|
|
||||||
"""
|
|
||||||
sha1 = file_entry.get("sha1")
|
|
||||||
md5 = file_entry.get("md5")
|
|
||||||
name = file_entry.get("name", "")
|
|
||||||
has_zipped_file = bool(file_entry.get("zipped_file"))
|
|
||||||
files_db = db.get("files", {})
|
|
||||||
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
|
||||||
by_name = db.get("indexes", {}).get("by_name", {})
|
|
||||||
|
|
||||||
if sha1 and sha1 in files_db:
|
|
||||||
path = files_db[sha1]["path"]
|
|
||||||
if os.path.exists(path):
|
|
||||||
return path
|
|
||||||
|
|
||||||
# Split comma-separated MD5 lists (Recalbox uses multi-hash)
|
|
||||||
md5_candidates = [m.strip().lower() for m in md5.split(",") if m.strip()] if md5 else []
|
|
||||||
|
|
||||||
# Skip MD5 lookup for zipped_file entries: the md5 is for the inner ROM,
|
|
||||||
# not the container ZIP, so matching it would resolve to the wrong file.
|
|
||||||
if not has_zipped_file:
|
|
||||||
for md5_candidate in md5_candidates:
|
|
||||||
if md5_candidate in by_md5:
|
|
||||||
sha1_match = by_md5[md5_candidate]
|
|
||||||
if sha1_match in files_db:
|
|
||||||
path = files_db[sha1_match]["path"]
|
|
||||||
if os.path.exists(path):
|
|
||||||
return path
|
|
||||||
|
|
||||||
# Truncated MD5 (batocera-systems bug: 29 chars instead of 32)
|
|
||||||
if len(md5_candidate) < 32:
|
|
||||||
for db_md5, db_sha1 in by_md5.items():
|
|
||||||
if db_md5.startswith(md5_candidate) and db_sha1 in files_db:
|
|
||||||
path = files_db[db_sha1]["path"]
|
|
||||||
if os.path.exists(path):
|
|
||||||
return path
|
|
||||||
|
|
||||||
if name in by_name:
|
|
||||||
# Prefer the candidate whose MD5 matches the expected hash
|
|
||||||
candidates = []
|
|
||||||
for match_sha1 in by_name[name]:
|
|
||||||
if match_sha1 in files_db:
|
|
||||||
entry = files_db[match_sha1]
|
|
||||||
path = entry["path"]
|
|
||||||
if os.path.exists(path):
|
|
||||||
candidates.append((path, entry.get("md5", "")))
|
|
||||||
if candidates:
|
|
||||||
if has_zipped_file:
|
|
||||||
candidates = [(p, m) for p, m in candidates if p.endswith(".zip")]
|
|
||||||
if md5 and not has_zipped_file:
|
|
||||||
md5_lower = md5.lower()
|
|
||||||
for path, db_md5 in candidates:
|
|
||||||
if db_md5.lower() == md5_lower:
|
|
||||||
return path
|
|
||||||
# Try composite MD5 for ZIP files (Recalbox uses Zip::Md5Composite)
|
|
||||||
for path, _ in candidates:
|
|
||||||
if ".zip" in os.path.basename(path):
|
|
||||||
try:
|
|
||||||
if md5_composite(path).lower() == md5_lower:
|
|
||||||
return path
|
|
||||||
except (zipfile.BadZipFile, OSError):
|
|
||||||
pass
|
|
||||||
if candidates:
|
|
||||||
primary = [p for p, _ in candidates if "/.variants/" not in p]
|
|
||||||
return primary[0] if primary else candidates[0][0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def verify_entry_existence(file_entry: dict, local_path: str | None) -> dict:
|
def verify_entry_existence(file_entry: dict, local_path: str | None) -> dict:
|
||||||
|
|||||||
Reference in New Issue
Block a user