mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
refactor: move fetch_large_file to common, auto-download on db rebuild
This commit is contained in:
@@ -9,6 +9,8 @@ from __future__ import annotations
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zipfile
|
||||
import zlib
|
||||
from pathlib import Path
|
||||
@@ -694,6 +696,59 @@ def filter_files_by_mode(files: list[dict], standalone: bool) -> list[dict]:
|
||||
return result
|
||||
|
||||
|
||||
LARGE_FILES_RELEASE = "large-files"
|
||||
LARGE_FILES_REPO = "Abdess/retrobios"
|
||||
LARGE_FILES_CACHE = ".cache/large"
|
||||
|
||||
|
||||
def fetch_large_file(name: str, dest_dir: str = LARGE_FILES_CACHE,
|
||||
expected_sha1: str = "", expected_md5: str = "") -> str | None:
|
||||
"""Download a large file from the 'large-files' GitHub release if not cached."""
|
||||
cached = os.path.join(dest_dir, name)
|
||||
if os.path.exists(cached):
|
||||
if expected_sha1 or expected_md5:
|
||||
hashes = compute_hashes(cached)
|
||||
if expected_sha1 and hashes["sha1"].lower() != expected_sha1.lower():
|
||||
os.unlink(cached)
|
||||
elif expected_md5:
|
||||
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
||||
if hashes["md5"].lower() not in md5_list:
|
||||
os.unlink(cached)
|
||||
else:
|
||||
return cached
|
||||
else:
|
||||
return cached
|
||||
else:
|
||||
return cached
|
||||
|
||||
encoded_name = urllib.request.quote(name)
|
||||
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{encoded_name}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
with open(cached, "wb") as f:
|
||||
while True:
|
||||
chunk = resp.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
return None
|
||||
|
||||
if expected_sha1 or expected_md5:
|
||||
hashes = compute_hashes(cached)
|
||||
if expected_sha1 and hashes["sha1"].lower() != expected_sha1.lower():
|
||||
os.unlink(cached)
|
||||
return None
|
||||
if expected_md5:
|
||||
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
||||
if hashes["md5"].lower() not in md5_list:
|
||||
os.unlink(cached)
|
||||
return None
|
||||
return cached
|
||||
|
||||
|
||||
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
||||
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
||||
dest = os.path.realpath(dest_dir)
|
||||
|
||||
@@ -216,17 +216,18 @@ def save_cache(cache_path: str, cache: dict):
|
||||
json.dump(cache, f)
|
||||
|
||||
|
||||
def _load_gitignored_bios_paths() -> set[str]:
|
||||
"""Read .gitignore and return bios/ paths that are listed (large files)."""
|
||||
def _load_gitignored_large_files() -> dict[str, str]:
|
||||
"""Read .gitignore and return {filename: bios_path} for large files."""
|
||||
gitignore = Path(".gitignore")
|
||||
if not gitignore.exists():
|
||||
return set()
|
||||
paths = set()
|
||||
return {}
|
||||
entries = {}
|
||||
for line in gitignore.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("bios/") and not line.startswith("#"):
|
||||
paths.add(line)
|
||||
return paths
|
||||
name = Path(line).name
|
||||
entries[name] = line
|
||||
return entries
|
||||
|
||||
|
||||
def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
||||
@@ -234,14 +235,14 @@ def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
||||
|
||||
Large files (>50 MB) are stored as GitHub release assets and listed
|
||||
in .gitignore. When generate_db runs locally without them, their
|
||||
entries would be lost. This reads the existing database and re-adds
|
||||
entries whose paths match .gitignore bios/ entries.
|
||||
|
||||
If the file exists in .cache/large/, the path is updated so that
|
||||
resolve_local_file can find it for verify and pack generation.
|
||||
entries would be lost. This reads the existing database, downloads
|
||||
missing files from the release, and re-adds entries with paths
|
||||
pointing to the local cache.
|
||||
"""
|
||||
gitignored = _load_gitignored_bios_paths()
|
||||
if not gitignored:
|
||||
from common import fetch_large_file
|
||||
|
||||
large_files = _load_gitignored_large_files()
|
||||
if not large_files:
|
||||
return 0
|
||||
|
||||
try:
|
||||
@@ -250,18 +251,24 @@ def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return 0
|
||||
|
||||
cache_dir = Path(".cache/large")
|
||||
count = 0
|
||||
for sha1, entry in existing_db.get("files", {}).items():
|
||||
if sha1 in files:
|
||||
continue
|
||||
name = entry.get("name", "")
|
||||
path = entry.get("path", "")
|
||||
if path in gitignored and sha1 not in files:
|
||||
# Point to cached copy if available
|
||||
name = entry.get("name", "")
|
||||
cached = cache_dir / name
|
||||
if cached.exists():
|
||||
entry = {**entry, "path": str(cached)}
|
||||
files[sha1] = entry
|
||||
count += 1
|
||||
# Match by gitignored bios/ path OR by filename of a known large file
|
||||
if path not in large_files.values() and name not in large_files:
|
||||
continue
|
||||
cached = fetch_large_file(
|
||||
name,
|
||||
expected_sha1=entry.get("sha1", ""),
|
||||
expected_md5=entry.get("md5", ""),
|
||||
)
|
||||
if cached:
|
||||
entry = {**entry, "path": cached}
|
||||
files[sha1] = entry
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ from pathlib import Path
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from common import (
|
||||
_build_validation_index, build_zip_contents_index, check_file_validation,
|
||||
check_inside_zip, compute_hashes, filter_files_by_mode,
|
||||
check_inside_zip, compute_hashes, fetch_large_file, filter_files_by_mode,
|
||||
group_identical_platforms, list_emulator_profiles, list_system_ids,
|
||||
load_database, load_data_dir_registry, load_emulator_profiles,
|
||||
load_platform_config, md5_composite, resolve_local_file,
|
||||
@@ -43,57 +43,9 @@ DEFAULT_PLATFORMS_DIR = "platforms"
|
||||
DEFAULT_DB_FILE = "database.json"
|
||||
DEFAULT_OUTPUT_DIR = "dist"
|
||||
DEFAULT_BIOS_DIR = "bios"
|
||||
LARGE_FILES_RELEASE = "large-files"
|
||||
LARGE_FILES_REPO = "Abdess/retrobios"
|
||||
|
||||
MAX_ENTRY_SIZE = 512 * 1024 * 1024 # 512MB
|
||||
|
||||
|
||||
def _verify_file_hash(path: str, expected_sha1: str = "",
|
||||
expected_md5: str = "") -> bool:
|
||||
if not expected_sha1 and not expected_md5:
|
||||
return True
|
||||
hashes = compute_hashes(path)
|
||||
if expected_sha1:
|
||||
return hashes["sha1"].lower() == expected_sha1.lower()
|
||||
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
||||
return hashes["md5"].lower() in md5_list
|
||||
|
||||
|
||||
def fetch_large_file(name: str, dest_dir: str = ".cache/large",
|
||||
expected_sha1: str = "", expected_md5: str = "") -> str | None:
|
||||
"""Download a large file from the 'large-files' GitHub release if not cached."""
|
||||
cached = os.path.join(dest_dir, name)
|
||||
if os.path.exists(cached):
|
||||
if expected_sha1 or expected_md5:
|
||||
if _verify_file_hash(cached, expected_sha1, expected_md5):
|
||||
return cached
|
||||
os.unlink(cached)
|
||||
else:
|
||||
return cached
|
||||
|
||||
encoded_name = urllib.request.quote(name)
|
||||
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{encoded_name}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
with open(cached, "wb") as f:
|
||||
while True:
|
||||
chunk = resp.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||
return None
|
||||
|
||||
if expected_sha1 or expected_md5:
|
||||
if not _verify_file_hash(cached, expected_sha1, expected_md5):
|
||||
os.unlink(cached)
|
||||
return None
|
||||
return cached
|
||||
|
||||
|
||||
def _find_candidate_satisfying_both(
|
||||
file_entry: dict,
|
||||
db: dict,
|
||||
|
||||
Reference in New Issue
Block a user