mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-15 13:22:31 -05:00
refactor: move fetch_large_file to common, auto-download on db rebuild
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"generated_at": "2026-03-25T11:51:38Z",
|
"generated_at": "2026-03-25T12:18:43Z",
|
||||||
"total_files": 6733,
|
"total_files": 6733,
|
||||||
"total_size": 5288644732,
|
"total_size": 5288644732,
|
||||||
"files": {
|
"files": {
|
||||||
@@ -67274,7 +67274,7 @@
|
|||||||
"adler32": "701e6531"
|
"adler32": "701e6531"
|
||||||
},
|
},
|
||||||
"ac4b78d53c7a97da2451ca35498395d8dd1e3024": {
|
"ac4b78d53c7a97da2451ca35498395d8dd1e3024": {
|
||||||
"path": "bios/Arcade/Arcade/Firmware.19.0.0.zip",
|
"path": ".cache/large/Firmware.19.0.0.zip",
|
||||||
"name": "Firmware.19.0.0.zip",
|
"name": "Firmware.19.0.0.zip",
|
||||||
"size": 338076508,
|
"size": 338076508,
|
||||||
"sha1": "ac4b78d53c7a97da2451ca35498395d8dd1e3024",
|
"sha1": "ac4b78d53c7a97da2451ca35498395d8dd1e3024",
|
||||||
@@ -67284,7 +67284,7 @@
|
|||||||
"adler32": "471a3291"
|
"adler32": "471a3291"
|
||||||
},
|
},
|
||||||
"add40c002084e8e25768671877b2aa603aaf5cb1": {
|
"add40c002084e8e25768671877b2aa603aaf5cb1": {
|
||||||
"path": "bios/Arcade/Arcade/maclc3.zip",
|
"path": ".cache/large/maclc3.zip",
|
||||||
"name": "maclc3.zip",
|
"name": "maclc3.zip",
|
||||||
"size": 189428461,
|
"size": 189428461,
|
||||||
"sha1": "add40c002084e8e25768671877b2aa603aaf5cb1",
|
"sha1": "add40c002084e8e25768671877b2aa603aaf5cb1",
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ from __future__ import annotations
|
|||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
import zipfile
|
import zipfile
|
||||||
import zlib
|
import zlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -694,6 +696,59 @@ def filter_files_by_mode(files: list[dict], standalone: bool) -> list[dict]:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
LARGE_FILES_RELEASE = "large-files"
|
||||||
|
LARGE_FILES_REPO = "Abdess/retrobios"
|
||||||
|
LARGE_FILES_CACHE = ".cache/large"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_large_file(name: str, dest_dir: str = LARGE_FILES_CACHE,
|
||||||
|
expected_sha1: str = "", expected_md5: str = "") -> str | None:
|
||||||
|
"""Download a large file from the 'large-files' GitHub release if not cached."""
|
||||||
|
cached = os.path.join(dest_dir, name)
|
||||||
|
if os.path.exists(cached):
|
||||||
|
if expected_sha1 or expected_md5:
|
||||||
|
hashes = compute_hashes(cached)
|
||||||
|
if expected_sha1 and hashes["sha1"].lower() != expected_sha1.lower():
|
||||||
|
os.unlink(cached)
|
||||||
|
elif expected_md5:
|
||||||
|
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
||||||
|
if hashes["md5"].lower() not in md5_list:
|
||||||
|
os.unlink(cached)
|
||||||
|
else:
|
||||||
|
return cached
|
||||||
|
else:
|
||||||
|
return cached
|
||||||
|
else:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
encoded_name = urllib.request.quote(name)
|
||||||
|
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{encoded_name}"
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, headers={"User-Agent": "retrobios/1.0"})
|
||||||
|
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||||
|
os.makedirs(dest_dir, exist_ok=True)
|
||||||
|
with open(cached, "wb") as f:
|
||||||
|
while True:
|
||||||
|
chunk = resp.read(65536)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
f.write(chunk)
|
||||||
|
except (urllib.error.URLError, urllib.error.HTTPError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if expected_sha1 or expected_md5:
|
||||||
|
hashes = compute_hashes(cached)
|
||||||
|
if expected_sha1 and hashes["sha1"].lower() != expected_sha1.lower():
|
||||||
|
os.unlink(cached)
|
||||||
|
return None
|
||||||
|
if expected_md5:
|
||||||
|
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
||||||
|
if hashes["md5"].lower() not in md5_list:
|
||||||
|
os.unlink(cached)
|
||||||
|
return None
|
||||||
|
return cached
|
||||||
|
|
||||||
|
|
||||||
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
|
||||||
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
|
||||||
dest = os.path.realpath(dest_dir)
|
dest = os.path.realpath(dest_dir)
|
||||||
|
|||||||
@@ -216,17 +216,18 @@ def save_cache(cache_path: str, cache: dict):
|
|||||||
json.dump(cache, f)
|
json.dump(cache, f)
|
||||||
|
|
||||||
|
|
||||||
def _load_gitignored_bios_paths() -> set[str]:
|
def _load_gitignored_large_files() -> dict[str, str]:
|
||||||
"""Read .gitignore and return bios/ paths that are listed (large files)."""
|
"""Read .gitignore and return {filename: bios_path} for large files."""
|
||||||
gitignore = Path(".gitignore")
|
gitignore = Path(".gitignore")
|
||||||
if not gitignore.exists():
|
if not gitignore.exists():
|
||||||
return set()
|
return {}
|
||||||
paths = set()
|
entries = {}
|
||||||
for line in gitignore.read_text().splitlines():
|
for line in gitignore.read_text().splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith("bios/") and not line.startswith("#"):
|
if line.startswith("bios/") and not line.startswith("#"):
|
||||||
paths.add(line)
|
name = Path(line).name
|
||||||
return paths
|
entries[name] = line
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
||||||
@@ -234,14 +235,14 @@ def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
|||||||
|
|
||||||
Large files (>50 MB) are stored as GitHub release assets and listed
|
Large files (>50 MB) are stored as GitHub release assets and listed
|
||||||
in .gitignore. When generate_db runs locally without them, their
|
in .gitignore. When generate_db runs locally without them, their
|
||||||
entries would be lost. This reads the existing database and re-adds
|
entries would be lost. This reads the existing database, downloads
|
||||||
entries whose paths match .gitignore bios/ entries.
|
missing files from the release, and re-adds entries with paths
|
||||||
|
pointing to the local cache.
|
||||||
If the file exists in .cache/large/, the path is updated so that
|
|
||||||
resolve_local_file can find it for verify and pack generation.
|
|
||||||
"""
|
"""
|
||||||
gitignored = _load_gitignored_bios_paths()
|
from common import fetch_large_file
|
||||||
if not gitignored:
|
|
||||||
|
large_files = _load_gitignored_large_files()
|
||||||
|
if not large_files:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -250,18 +251,24 @@ def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
|||||||
except (FileNotFoundError, json.JSONDecodeError):
|
except (FileNotFoundError, json.JSONDecodeError):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
cache_dir = Path(".cache/large")
|
|
||||||
count = 0
|
count = 0
|
||||||
for sha1, entry in existing_db.get("files", {}).items():
|
for sha1, entry in existing_db.get("files", {}).items():
|
||||||
|
if sha1 in files:
|
||||||
|
continue
|
||||||
|
name = entry.get("name", "")
|
||||||
path = entry.get("path", "")
|
path = entry.get("path", "")
|
||||||
if path in gitignored and sha1 not in files:
|
# Match by gitignored bios/ path OR by filename of a known large file
|
||||||
# Point to cached copy if available
|
if path not in large_files.values() and name not in large_files:
|
||||||
name = entry.get("name", "")
|
continue
|
||||||
cached = cache_dir / name
|
cached = fetch_large_file(
|
||||||
if cached.exists():
|
name,
|
||||||
entry = {**entry, "path": str(cached)}
|
expected_sha1=entry.get("sha1", ""),
|
||||||
files[sha1] = entry
|
expected_md5=entry.get("md5", ""),
|
||||||
count += 1
|
)
|
||||||
|
if cached:
|
||||||
|
entry = {**entry, "path": cached}
|
||||||
|
files[sha1] = entry
|
||||||
|
count += 1
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from pathlib import Path
|
|||||||
sys.path.insert(0, os.path.dirname(__file__))
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
from common import (
|
from common import (
|
||||||
_build_validation_index, build_zip_contents_index, check_file_validation,
|
_build_validation_index, build_zip_contents_index, check_file_validation,
|
||||||
check_inside_zip, compute_hashes, filter_files_by_mode,
|
check_inside_zip, compute_hashes, fetch_large_file, filter_files_by_mode,
|
||||||
group_identical_platforms, list_emulator_profiles, list_system_ids,
|
group_identical_platforms, list_emulator_profiles, list_system_ids,
|
||||||
load_database, load_data_dir_registry, load_emulator_profiles,
|
load_database, load_data_dir_registry, load_emulator_profiles,
|
||||||
load_platform_config, md5_composite, resolve_local_file,
|
load_platform_config, md5_composite, resolve_local_file,
|
||||||
@@ -43,57 +43,9 @@ DEFAULT_PLATFORMS_DIR = "platforms"
|
|||||||
DEFAULT_DB_FILE = "database.json"
|
DEFAULT_DB_FILE = "database.json"
|
||||||
DEFAULT_OUTPUT_DIR = "dist"
|
DEFAULT_OUTPUT_DIR = "dist"
|
||||||
DEFAULT_BIOS_DIR = "bios"
|
DEFAULT_BIOS_DIR = "bios"
|
||||||
LARGE_FILES_RELEASE = "large-files"
|
|
||||||
LARGE_FILES_REPO = "Abdess/retrobios"
|
|
||||||
|
|
||||||
MAX_ENTRY_SIZE = 512 * 1024 * 1024 # 512MB
|
MAX_ENTRY_SIZE = 512 * 1024 * 1024 # 512MB
|
||||||
|
|
||||||
|
|
||||||
def _verify_file_hash(path: str, expected_sha1: str = "",
|
|
||||||
expected_md5: str = "") -> bool:
|
|
||||||
if not expected_sha1 and not expected_md5:
|
|
||||||
return True
|
|
||||||
hashes = compute_hashes(path)
|
|
||||||
if expected_sha1:
|
|
||||||
return hashes["sha1"].lower() == expected_sha1.lower()
|
|
||||||
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
|
|
||||||
return hashes["md5"].lower() in md5_list
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_large_file(name: str, dest_dir: str = ".cache/large",
|
|
||||||
expected_sha1: str = "", expected_md5: str = "") -> str | None:
|
|
||||||
"""Download a large file from the 'large-files' GitHub release if not cached."""
|
|
||||||
cached = os.path.join(dest_dir, name)
|
|
||||||
if os.path.exists(cached):
|
|
||||||
if expected_sha1 or expected_md5:
|
|
||||||
if _verify_file_hash(cached, expected_sha1, expected_md5):
|
|
||||||
return cached
|
|
||||||
os.unlink(cached)
|
|
||||||
else:
|
|
||||||
return cached
|
|
||||||
|
|
||||||
encoded_name = urllib.request.quote(name)
|
|
||||||
url = f"https://github.com/{LARGE_FILES_REPO}/releases/download/{LARGE_FILES_RELEASE}/{encoded_name}"
|
|
||||||
try:
|
|
||||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-pack/1.0"})
|
|
||||||
with urllib.request.urlopen(req, timeout=300) as resp:
|
|
||||||
os.makedirs(dest_dir, exist_ok=True)
|
|
||||||
with open(cached, "wb") as f:
|
|
||||||
while True:
|
|
||||||
chunk = resp.read(65536)
|
|
||||||
if not chunk:
|
|
||||||
break
|
|
||||||
f.write(chunk)
|
|
||||||
except (urllib.error.URLError, urllib.error.HTTPError):
|
|
||||||
return None
|
|
||||||
|
|
||||||
if expected_sha1 or expected_md5:
|
|
||||||
if not _verify_file_hash(cached, expected_sha1, expected_md5):
|
|
||||||
os.unlink(cached)
|
|
||||||
return None
|
|
||||||
return cached
|
|
||||||
|
|
||||||
|
|
||||||
def _find_candidate_satisfying_both(
|
def _find_candidate_satisfying_both(
|
||||||
file_entry: dict,
|
file_entry: dict,
|
||||||
db: dict,
|
db: dict,
|
||||||
|
|||||||
Reference in New Issue
Block a user