mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
fix: pack naming, large file preservation, discrepancy reporting
This commit is contained in:
@@ -416,16 +416,23 @@ def group_identical_platforms(
|
||||
"""Group platforms that produce identical packs (same files + base_destination).
|
||||
|
||||
Returns [(group_of_platform_names, representative), ...].
|
||||
The representative is the root platform (one that does not inherit).
|
||||
"""
|
||||
fingerprints: dict[str, list[str]] = {}
|
||||
representatives: dict[str, str] = {}
|
||||
inherits: dict[str, bool] = {}
|
||||
|
||||
for platform in platforms:
|
||||
try:
|
||||
raw_path = os.path.join(platforms_dir, f"{platform}.yml")
|
||||
with open(raw_path) as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
inherits[platform] = "inherits" in raw
|
||||
config = load_platform_config(platform, platforms_dir)
|
||||
except FileNotFoundError:
|
||||
fingerprints.setdefault(platform, []).append(platform)
|
||||
representatives.setdefault(platform, platform)
|
||||
inherits[platform] = False
|
||||
continue
|
||||
|
||||
base_dest = config.get("base_destination", "")
|
||||
@@ -440,9 +447,16 @@ def group_identical_platforms(
|
||||
|
||||
fp = hashlib.sha1("|".join(sorted(entries)).encode()).hexdigest()
|
||||
fingerprints.setdefault(fp, []).append(platform)
|
||||
representatives.setdefault(fp, platform)
|
||||
# Prefer the root platform (no inherits) as representative
|
||||
if fp not in representatives or (not inherits[platform] and inherits.get(representatives[fp], False)):
|
||||
representatives[fp] = platform
|
||||
|
||||
return [(group, representatives[fp]) for fp, group in fingerprints.items()]
|
||||
result = []
|
||||
for fp, group in fingerprints.items():
|
||||
rep = representatives[fp]
|
||||
ordered = [rep] + [p for p in group if p != rep]
|
||||
result.append((ordered, rep))
|
||||
return result
|
||||
|
||||
|
||||
def resolve_platform_cores(
|
||||
@@ -537,8 +551,10 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
|
||||
"min_size": None, "max_size": None,
|
||||
"crc32": set(), "md5": set(), "sha1": set(), "sha256": set(),
|
||||
"adler32": set(), "crypto_only": set(),
|
||||
"emulators": set(),
|
||||
}
|
||||
sources[fname] = {}
|
||||
index[fname]["emulators"].add(emu_name)
|
||||
index[fname]["checks"].update(checks)
|
||||
# Track non-reproducible crypto checks
|
||||
index[fname]["crypto_only"].update(
|
||||
@@ -584,6 +600,7 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
|
||||
for v in index.values():
|
||||
v["checks"] = sorted(v["checks"])
|
||||
v["crypto_only"] = sorted(v["crypto_only"])
|
||||
v["emulators"] = sorted(v["emulators"])
|
||||
# Keep hash sets as frozensets for O(1) lookup in check_file_validation
|
||||
return index
|
||||
|
||||
|
||||
@@ -216,6 +216,46 @@ def save_cache(cache_path: str, cache: dict):
|
||||
json.dump(cache, f)
|
||||
|
||||
|
||||
def _load_gitignored_bios_paths() -> set[str]:
|
||||
"""Read .gitignore and return bios/ paths that are listed (large files)."""
|
||||
gitignore = Path(".gitignore")
|
||||
if not gitignore.exists():
|
||||
return set()
|
||||
paths = set()
|
||||
for line in gitignore.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("bios/") and not line.startswith("#"):
|
||||
paths.add(line)
|
||||
return paths
|
||||
|
||||
|
||||
def _preserve_large_file_entries(files: dict, db_path: str) -> int:
|
||||
"""Preserve database entries for large files not on disk.
|
||||
|
||||
Large files (>50 MB) are stored as GitHub release assets and listed
|
||||
in .gitignore. When generate_db runs locally without them, their
|
||||
entries would be lost. This reads the existing database and re-adds
|
||||
entries whose paths match .gitignore bios/ entries.
|
||||
"""
|
||||
gitignored = _load_gitignored_bios_paths()
|
||||
if not gitignored:
|
||||
return 0
|
||||
|
||||
try:
|
||||
with open(db_path) as f:
|
||||
existing_db = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
for sha1, entry in existing_db.get("files", {}).items():
|
||||
path = entry.get("path", "")
|
||||
if path in gitignored and sha1 not in files:
|
||||
files[sha1] = entry
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate multi-indexed BIOS database")
|
||||
parser.add_argument("--force", action="store_true", help="Force rehash all files")
|
||||
@@ -236,6 +276,11 @@ def main():
|
||||
if not files:
|
||||
print("Warning: No BIOS files found", file=sys.stderr)
|
||||
|
||||
# Preserve entries for large files stored as release assets (.gitignore)
|
||||
preserved = _preserve_large_file_entries(files, args.output)
|
||||
if preserved:
|
||||
print(f" Preserved {preserved} large file entries from existing database")
|
||||
|
||||
platform_aliases = _collect_all_aliases(files)
|
||||
for sha1, name_list in platform_aliases.items():
|
||||
for alias_entry in name_list:
|
||||
|
||||
@@ -94,6 +94,47 @@ def fetch_large_file(name: str, dest_dir: str = ".cache/large",
|
||||
return cached
|
||||
|
||||
|
||||
def _find_candidate_satisfying_both(
|
||||
file_entry: dict,
|
||||
db: dict,
|
||||
local_path: str,
|
||||
validation_index: dict,
|
||||
bios_dir: str,
|
||||
) -> str | None:
|
||||
"""Search for a repo file that satisfies both platform MD5 and emulator validation.
|
||||
|
||||
When the current file passes platform verification but fails emulator checks,
|
||||
search all candidates with the same name for one that passes both.
|
||||
Returns a better path, or None if no upgrade found.
|
||||
"""
|
||||
fname = file_entry.get("name", "")
|
||||
if not fname:
|
||||
return None
|
||||
entry = validation_index.get(fname)
|
||||
if not entry:
|
||||
return None
|
||||
|
||||
md5_expected = file_entry.get("md5", "")
|
||||
md5_set = {m.strip().lower() for m in md5_expected.split(",") if m.strip()} if md5_expected else set()
|
||||
|
||||
by_name = db.get("indexes", {}).get("by_name", {})
|
||||
files_db = db.get("files", {})
|
||||
|
||||
for sha1 in by_name.get(fname, []):
|
||||
candidate = files_db.get(sha1, {})
|
||||
path = candidate.get("path", "")
|
||||
if not path or not os.path.exists(path) or os.path.realpath(path) == os.path.realpath(local_path):
|
||||
continue
|
||||
# Must still satisfy platform MD5
|
||||
if md5_set and candidate.get("md5", "").lower() not in md5_set:
|
||||
continue
|
||||
# Check emulator validation
|
||||
reason = check_file_validation(path, fname, validation_index, bios_dir)
|
||||
if reason is None:
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_path(raw: str) -> str:
|
||||
"""Strip path traversal components from a relative path."""
|
||||
raw = raw.replace("\\", "/")
|
||||
@@ -118,10 +159,11 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
||||
|
||||
path, status = resolve_local_file(file_entry, db, zip_contents,
|
||||
dest_hint=dest_hint)
|
||||
if path:
|
||||
if path and status != "hash_mismatch":
|
||||
return path, status
|
||||
|
||||
# Last resort: large files from GitHub release assets
|
||||
# Large files from GitHub release assets — tried when local file is
|
||||
# missing OR has a hash mismatch (wrong variant on disk)
|
||||
name = file_entry.get("name", "")
|
||||
sha1 = file_entry.get("sha1")
|
||||
md5_raw = file_entry.get("md5", "")
|
||||
@@ -131,6 +173,10 @@ def resolve_file(file_entry: dict, db: dict, bios_dir: str,
|
||||
if cached:
|
||||
return cached, "release_asset"
|
||||
|
||||
# Fall back to hash_mismatch local file if release asset unavailable
|
||||
if path:
|
||||
return path, status
|
||||
|
||||
return None, "not_found"
|
||||
|
||||
|
||||
@@ -362,20 +408,28 @@ def generate_pack(
|
||||
else:
|
||||
file_status.setdefault(dedup_key, "ok")
|
||||
|
||||
# Emulator-level validation (matches verify.py behavior)
|
||||
# In existence mode: validation is informational (warning, not downgrade)
|
||||
# In md5 mode: validation downgrades OK to UNTESTED
|
||||
# Emulator-level validation: informational only for platform packs.
|
||||
# Platform verification (existence/md5) is the authority for pack status.
|
||||
# Emulator checks are supplementary — logged but don't downgrade.
|
||||
# When a discrepancy is found, try to find a file satisfying both.
|
||||
if (file_status.get(dedup_key) == "ok"
|
||||
and local_path and validation_index):
|
||||
fname = file_entry.get("name", "")
|
||||
reason = check_file_validation(local_path, fname, validation_index)
|
||||
reason = check_file_validation(local_path, fname, validation_index,
|
||||
bios_dir)
|
||||
if reason:
|
||||
if verification_mode == "existence":
|
||||
# Existence mode: file present = OK, validation is extra info
|
||||
file_reasons.setdefault(dedup_key, reason)
|
||||
better = _find_candidate_satisfying_both(
|
||||
file_entry, db, local_path, validation_index, bios_dir,
|
||||
)
|
||||
if better:
|
||||
local_path = better
|
||||
else:
|
||||
file_status[dedup_key] = "untested"
|
||||
file_reasons[dedup_key] = reason
|
||||
ventry = validation_index.get(fname, {})
|
||||
emus = ", ".join(ventry.get("emulators", []))
|
||||
file_reasons.setdefault(
|
||||
dedup_key,
|
||||
f"{platform_display} says OK but {emus} says {reason}",
|
||||
)
|
||||
|
||||
if already_packed:
|
||||
continue
|
||||
@@ -475,7 +529,7 @@ def generate_pack(
|
||||
|
||||
for key, reason in sorted(file_reasons.items()):
|
||||
status = file_status.get(key, "")
|
||||
label = "UNTESTED"
|
||||
label = "UNTESTED" if status == "untested" else "DISCREPANCY"
|
||||
print(f" {label}: {key} — {reason}")
|
||||
for name in missing_files:
|
||||
print(f" MISSING: {name}")
|
||||
@@ -915,10 +969,11 @@ def main():
|
||||
groups = group_identical_platforms(platforms, args.platforms_dir)
|
||||
|
||||
for group_platforms, representative in groups:
|
||||
if len(group_platforms) > 1:
|
||||
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
combined_name = " + ".join(names)
|
||||
print(f"\nGenerating shared pack for {combined_name}...")
|
||||
variants = [p for p in group_platforms if p != representative]
|
||||
if variants:
|
||||
all_names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
label = " / ".join(all_names)
|
||||
print(f"\nGenerating pack for {label}...")
|
||||
else:
|
||||
print(f"\nGenerating pack for {representative}...")
|
||||
|
||||
@@ -929,10 +984,10 @@ def main():
|
||||
zip_contents=zip_contents, data_registry=data_registry,
|
||||
emu_profiles=emu_profiles,
|
||||
)
|
||||
if zip_path and len(group_platforms) > 1:
|
||||
names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
combined_filename = "_".join(n.replace(" ", "") for n in names) + "_BIOS_Pack.zip"
|
||||
new_path = os.path.join(os.path.dirname(zip_path), combined_filename)
|
||||
if zip_path and variants:
|
||||
all_names = [load_platform_config(p, args.platforms_dir).get("platform", p) for p in group_platforms]
|
||||
combined = "_".join(n.replace(" ", "") for n in all_names) + "_BIOS_Pack.zip"
|
||||
new_path = os.path.join(os.path.dirname(zip_path), combined)
|
||||
if new_path != zip_path:
|
||||
os.rename(zip_path, new_path)
|
||||
print(f" Renamed -> {os.path.basename(new_path)}")
|
||||
|
||||
Reference in New Issue
Block a user