Files
libretro/scripts/verify.py
2026-04-01 14:29:31 +02:00

1535 lines
54 KiB
Python

#!/usr/bin/env python3
"""Platform-native BIOS verification engine.
Replicates the exact verification logic of each platform:
- RetroArch/Lakka/RetroPie: file existence only (core_info.c path_is_valid)
- Batocera: MD5 + checkInsideZip, no required distinction (batocera-systems:1062-1091)
- Recalbox: MD5 + mandatory/hashMatchMandatory, 3-color severity (Bios.cpp:109-130)
- RetroBat: same as Batocera
- EmuDeck: MD5 whitelist per system
- BizHawk: SHA1 firmware hash verification
Cross-references emulator profiles to detect undeclared files used by available cores.
Usage:
python scripts/verify.py --all
python scripts/verify.py --platform batocera
python scripts/verify.py --all --include-archived
python scripts/verify.py --all --json
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import zipfile
sys.path.insert(0, os.path.dirname(__file__))
from common import (
build_target_cores_cache,
build_zip_contents_index,
check_inside_zip,
compute_hashes,
expand_platform_declared_names,
filter_systems_by_target,
group_identical_platforms,
list_emulator_profiles,
list_system_ids,
load_data_dir_registry,
load_emulator_profiles,
load_platform_config,
md5_composite,
md5sum,
require_yaml,
resolve_local_file,
resolve_platform_cores,
)
yaml = require_yaml()
from validation import (
_build_validation_index,
_parse_validation,
build_ground_truth,
check_file_validation,
filter_files_by_mode,
)
DEFAULT_DB = "database.json"
DEFAULT_PLATFORMS_DIR = "platforms"
DEFAULT_EMULATORS_DIR = "emulators"
# Status model -aligned with Batocera BiosStatus (batocera-systems:967-969)
class Status:
OK = "ok"
UNTESTED = "untested" # file present, hash not confirmed
MISSING = "missing"
# Severity for per-file required/optional distinction
class Severity:
CRITICAL = "critical" # required file missing or bad hash (Recalbox RED)
WARNING = "warning" # optional missing or hash mismatch (Recalbox YELLOW)
INFO = "info" # optional missing on existence-only platform
OK = "ok" # file verified
_STATUS_ORDER = {Status.OK: 0, Status.UNTESTED: 1, Status.MISSING: 2}
_SEVERITY_ORDER = {
Severity.OK: 0,
Severity.INFO: 1,
Severity.WARNING: 2,
Severity.CRITICAL: 3,
}
# Verification functions
def verify_entry_existence(
file_entry: dict,
local_path: str | None,
validation_index: dict[str, dict] | None = None,
) -> dict:
"""RetroArch verification: path_is_valid() -file exists = OK."""
name = file_entry.get("name", "")
required = file_entry.get("required", True)
if not local_path:
return {"name": name, "status": Status.MISSING, "required": required}
result = {"name": name, "status": Status.OK, "required": required}
if validation_index:
reason = check_file_validation(local_path, name, validation_index)
if reason:
ventry = validation_index.get(name, {})
emus = ", ".join(ventry.get("emulators", []))
result["discrepancy"] = f"file present (OK) but {emus} says {reason}"
return result
def verify_entry_md5(
file_entry: dict,
local_path: str | None,
resolve_status: str = "",
) -> dict:
"""MD5 verification -Batocera md5sum + Recalbox multi-hash + Md5Composite."""
name = file_entry.get("name", "")
expected_md5 = file_entry.get("md5", "")
zipped_file = file_entry.get("zipped_file")
required = file_entry.get("required", True)
base = {"name": name, "required": required}
if expected_md5 and "," in expected_md5:
md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
else:
md5_list = [expected_md5] if expected_md5 else []
if not local_path:
return {**base, "status": Status.MISSING}
if zipped_file:
found_in_zip = False
had_error = False
for md5_candidate in md5_list or [""]:
result = check_inside_zip(local_path, zipped_file, md5_candidate)
if result == Status.OK:
return {**base, "status": Status.OK, "path": local_path}
if result == "error":
had_error = True
elif result != "not_in_zip":
found_in_zip = True
if had_error and not found_in_zip:
return {
**base,
"status": Status.UNTESTED,
"path": local_path,
"reason": f"{local_path} read error",
}
if not found_in_zip:
return {
**base,
"status": Status.UNTESTED,
"path": local_path,
"reason": f"{zipped_file} not found inside ZIP",
}
return {
**base,
"status": Status.UNTESTED,
"path": local_path,
"reason": f"{zipped_file} MD5 mismatch inside ZIP",
}
if not md5_list:
return {**base, "status": Status.OK, "path": local_path}
if resolve_status == "md5_exact":
return {**base, "status": Status.OK, "path": local_path}
actual_md5 = md5sum(local_path)
actual_lower = actual_md5.lower()
for expected in md5_list:
if actual_lower == expected.lower():
return {**base, "status": Status.OK, "path": local_path}
if len(expected) < 32 and actual_lower.startswith(expected.lower()):
return {**base, "status": Status.OK, "path": local_path}
if ".zip" in os.path.basename(local_path):
try:
composite = md5_composite(local_path)
for expected in md5_list:
if composite.lower() == expected.lower():
return {**base, "status": Status.OK, "path": local_path}
except (zipfile.BadZipFile, OSError):
pass
return {
**base,
"status": Status.UNTESTED,
"path": local_path,
"reason": f"expected {md5_list[0][:12]}… got {actual_md5[:12]}",
}
def verify_entry_sha1(
file_entry: dict,
local_path: str | None,
) -> dict:
"""SHA1 verification -BizHawk firmware hash check."""
name = file_entry.get("name", "")
expected_sha1 = file_entry.get("sha1", "")
required = file_entry.get("required", True)
base = {"name": name, "required": required}
if not local_path:
return {**base, "status": Status.MISSING}
if not expected_sha1:
return {**base, "status": Status.OK, "path": local_path}
hashes = compute_hashes(local_path)
actual_sha1 = hashes["sha1"].lower()
if actual_sha1 == expected_sha1.lower():
return {**base, "status": Status.OK, "path": local_path}
return {
**base,
"status": Status.UNTESTED,
"path": local_path,
"reason": f"expected {expected_sha1[:12]}… got {actual_sha1[:12]}",
}
# Severity mapping per platform
def compute_severity(
status: str,
required: bool,
mode: str,
hle_fallback: bool = False,
) -> str:
"""Map (status, required, verification_mode, hle_fallback) -> severity.
Based on native platform behavior + emulator HLE capability:
- RetroArch (existence): required+missing = warning, optional+missing = info
- Batocera/Recalbox/RetroBat/EmuDeck (md5): hash-based verification
- BizHawk (sha1): same severity rules as md5
- hle_fallback: core works without this file via HLE -> always INFO when missing
"""
if status == Status.OK:
return Severity.OK
# HLE fallback: core works without this file regardless of platform requirement
if hle_fallback and status == Status.MISSING:
return Severity.INFO
if mode == "existence":
if status == Status.MISSING:
return Severity.WARNING if required else Severity.INFO
return Severity.OK
# md5 mode (Batocera, Recalbox, RetroBat, EmuDeck)
if status == Status.MISSING:
return Severity.CRITICAL if required else Severity.WARNING
if status == Status.UNTESTED:
return Severity.WARNING
return Severity.OK
# ZIP content index
# Cross-reference: undeclared files used by cores
def _build_expected(file_entry: dict, checks: list[str]) -> dict:
"""Extract expected validation values from an emulator profile file entry."""
expected: dict = {}
if not checks:
return expected
if "size" in checks:
for key in ("size", "min_size", "max_size"):
if file_entry.get(key) is not None:
expected[key] = file_entry[key]
for hash_type in ("crc32", "md5", "sha1", "sha256"):
if hash_type in checks and file_entry.get(hash_type):
expected[hash_type] = file_entry[hash_type]
adler_val = file_entry.get("known_hash_adler32") or file_entry.get("adler32")
if adler_val:
expected["adler32"] = adler_val
return expected
def _name_in_index(
name: str,
by_name: dict,
by_path_suffix: dict | None = None,
data_names: set[str] | None = None,
) -> bool:
"""Check if a name is resolvable in the database indexes or data directories."""
if name in by_name:
return True
basename = name.rsplit("/", 1)[-1]
if basename != name and basename in by_name:
return True
if by_path_suffix and name in by_path_suffix:
return True
if data_names:
if name in data_names or name.lower() in data_names:
return True
if basename != name and (
basename in data_names or basename.lower() in data_names
):
return True
return False
def find_undeclared_files(
config: dict,
emulators_dir: str,
db: dict,
emu_profiles: dict | None = None,
target_cores: set[str] | None = None,
data_names: set[str] | None = None,
include_all: bool = False,
) -> list[dict]:
"""Find files needed by cores but not declared in platform config."""
# Collect all filenames declared by this platform, enriched with
# canonical names from DB via MD5 (handles platform renaming)
declared_names = expand_platform_declared_names(config, db)
# Collect data_directory refs
declared_dd: set[str] = set()
for sys_id, system in config.get("systems", {}).items():
for dd in system.get("data_directories", []):
ref = dd.get("ref", "")
if ref:
declared_dd.add(ref)
by_name = db.get("indexes", {}).get("by_name", {})
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
profiles = (
emu_profiles
if emu_profiles is not None
else load_emulator_profiles(emulators_dir)
)
relevant = resolve_platform_cores(config, profiles, target_cores=target_cores)
standalone_set = set(str(c) for c in config.get("standalone_cores", []))
undeclared = []
seen_files: set[str] = set()
# Track archives: archive_name -> {in_repo, emulator, files: [...], ...}
archive_entries: dict[str, dict] = {}
for emu_name, profile in sorted(profiles.items()):
if profile.get("type") in ("launcher", "alias"):
continue
if emu_name not in relevant:
continue
# Skip agnostic profiles entirely (filename-agnostic BIOS detection)
if profile.get("bios_mode") == "agnostic":
continue
# Check if this profile is standalone: match profile name or any cores: alias
is_standalone = emu_name in standalone_set or bool(
standalone_set & {str(c) for c in profile.get("cores", [])}
)
for f in profile.get("files", []):
fname = f.get("name", "")
if not fname or fname in seen_files:
continue
# Skip pattern placeholders (e.g., <user-selected>.bin)
if "<" in fname or ">" in fname or "*" in fname:
continue
# Skip UI-imported files with explicit path: null (not resolvable by pack)
if "path" in f and f["path"] is None:
continue
# Mode filtering: skip files incompatible with platform's usage
file_mode = f.get("mode")
if file_mode == "standalone" and not is_standalone:
continue
if file_mode == "libretro" and is_standalone:
continue
# Skip files loaded from non-system directories (save_dir, content_dir)
load_from = f.get("load_from", "")
if load_from and load_from != "system_dir":
continue
# Skip agnostic files (filename-agnostic, handled by agnostic scan)
if f.get("agnostic"):
continue
archive = f.get("archive")
# Skip files declared by the platform (by name or archive)
if not include_all:
if fname in declared_names:
seen_files.add(fname)
continue
if archive and archive in declared_names:
seen_files.add(fname)
continue
seen_files.add(fname)
# Archived files are grouped by archive
if archive:
if archive not in archive_entries:
in_repo = _name_in_index(
archive, by_name, by_path_suffix, data_names
)
archive_entries[archive] = {
"emulator": profile.get("emulator", emu_name),
"name": archive,
"archive": archive,
"path": archive,
"required": False,
"hle_fallback": False,
"category": f.get("category", "bios"),
"in_repo": in_repo,
"note": "",
"checks": [],
"source_ref": None,
"expected": {},
"archive_file_count": 0,
"archive_required_count": 0,
}
entry = archive_entries[archive]
entry["archive_file_count"] += 1
if f.get("required", False):
entry["archive_required_count"] += 1
entry["required"] = True
continue
# Determine destination path based on mode
if is_standalone:
dest = f.get("standalone_path") or f.get("path") or fname
else:
dest = f.get("path") or fname
# Resolution: try name, then path basename, then path_suffix
in_repo = _name_in_index(fname, by_name, by_path_suffix, data_names)
if not in_repo and dest != fname:
path_base = dest.rsplit("/", 1)[-1]
in_repo = _name_in_index(path_base, by_name, by_path_suffix, data_names)
checks = _parse_validation(f.get("validation"))
undeclared.append(
{
"emulator": profile.get("emulator", emu_name),
"name": fname,
"path": dest,
"required": f.get("required", False),
"hle_fallback": f.get("hle_fallback", False),
"category": f.get("category", "bios"),
"in_repo": in_repo,
"note": f.get("note", ""),
"checks": sorted(checks) if checks else [],
"source_ref": f.get("source_ref"),
"expected": _build_expected(f, checks),
}
)
# Append grouped archive entries
for entry in sorted(archive_entries.values(), key=lambda e: e["name"]):
undeclared.append(entry)
return undeclared
def find_exclusion_notes(
config: dict,
emulators_dir: str,
emu_profiles: dict | None = None,
target_cores: set[str] | None = None,
) -> list[dict]:
"""Document why certain emulator files are intentionally excluded.
Reports:
- Launchers (BIOS managed by standalone emulator)
- Standalone-only files (not needed in libretro mode)
- Frozen snapshots with files: [] (code doesn't load .info firmware)
- Files covered by data_directories
"""
profiles = (
emu_profiles
if emu_profiles is not None
else load_emulator_profiles(emulators_dir)
)
platform_systems = set()
for sys_id in config.get("systems", {}):
platform_systems.add(sys_id)
relevant = resolve_platform_cores(config, profiles, target_cores=target_cores)
notes = []
for emu_name, profile in sorted(profiles.items()):
emu_systems = set(profile.get("systems", []))
# Match by core resolution OR system intersection (documents all potential emulators)
if emu_name not in relevant and not (emu_systems & platform_systems):
continue
emu_display = profile.get("emulator", emu_name)
# Launcher excluded entirely
if profile.get("type") == "launcher":
notes.append(
{
"emulator": emu_display,
"reason": "launcher",
"detail": profile.get(
"exclusion_note", "BIOS managed by standalone emulator"
),
}
)
continue
# Profile-level exclusion note (frozen snapshots, etc.)
exclusion_note = profile.get("exclusion_note")
if exclusion_note:
notes.append(
{
"emulator": emu_display,
"reason": "exclusion_note",
"detail": exclusion_note,
}
)
continue
# Count standalone-only files -but only report as excluded if the
# platform does NOT use this emulator in standalone mode
standalone_set = set(str(c) for c in config.get("standalone_cores", []))
is_standalone = emu_name in standalone_set or bool(
standalone_set & {str(c) for c in profile.get("cores", [])}
)
if not is_standalone:
standalone_files = [
f for f in profile.get("files", []) if f.get("mode") == "standalone"
]
if standalone_files:
names = [f["name"] for f in standalone_files[:3]]
more = (
f" +{len(standalone_files) - 3}"
if len(standalone_files) > 3
else ""
)
notes.append(
{
"emulator": emu_display,
"reason": "standalone_only",
"detail": f"{len(standalone_files)} files for standalone mode only ({', '.join(names)}{more})",
}
)
return notes
# Platform verification
def _find_best_variant(
file_entry: dict,
db: dict,
current_path: str,
validation_index: dict,
) -> str | None:
"""Search for a repo file that passes both platform MD5 and emulator validation."""
fname = file_entry.get("name", "")
if not fname or fname not in validation_index:
return None
md5_expected = file_entry.get("md5", "")
md5_set = (
{m.strip().lower() for m in md5_expected.split(",") if m.strip()}
if md5_expected
else set()
)
by_name = db.get("indexes", {}).get("by_name", {})
files_db = db.get("files", {})
for sha1 in by_name.get(fname, []):
candidate = files_db.get(sha1, {})
path = candidate.get("path", "")
if (
not path
or not os.path.exists(path)
or os.path.realpath(path) == os.path.realpath(current_path)
):
continue
if md5_set and candidate.get("md5", "").lower() not in md5_set:
continue
reason = check_file_validation(path, fname, validation_index)
if reason is None:
return path
return None
def verify_platform(
config: dict,
db: dict,
emulators_dir: str = DEFAULT_EMULATORS_DIR,
emu_profiles: dict | None = None,
target_cores: set[str] | None = None,
data_dir_registry: dict | None = None,
supplemental_names: set[str] | None = None,
) -> dict:
"""Verify all BIOS files for a platform, including cross-reference gaps."""
mode = config.get("verification_mode", "existence")
platform = config.get("platform", "unknown")
has_zipped = any(
fe.get("zipped_file")
for sys in config.get("systems", {}).values()
for fe in sys.get("files", [])
)
zip_contents = build_zip_contents_index(db) if has_zipped else {}
# Build HLE + validation indexes from emulator profiles
profiles = (
emu_profiles
if emu_profiles is not None
else load_emulator_profiles(emulators_dir)
)
hle_index: dict[str, bool] = {}
for profile in profiles.values():
for f in profile.get("files", []):
if f.get("hle_fallback"):
hle_index[f.get("name", "")] = True
validation_index = _build_validation_index(profiles)
# Filter systems by target
plat_cores = resolve_platform_cores(config, profiles) if target_cores else None
verify_systems = filter_systems_by_target(
config.get("systems", {}),
profiles,
target_cores,
platform_cores=plat_cores,
)
# Per-entry results
details = []
# Per-destination aggregation
file_status: dict[str, str] = {}
file_required: dict[str, bool] = {}
file_severity: dict[str, str] = {}
for sys_id, system in verify_systems.items():
for file_entry in system.get("files", []):
local_path, resolve_status = resolve_local_file(
file_entry,
db,
zip_contents,
data_dir_registry=data_dir_registry,
)
if mode == "existence":
result = verify_entry_existence(
file_entry,
local_path,
validation_index,
)
elif mode == "sha1":
result = verify_entry_sha1(file_entry, local_path)
else:
result = verify_entry_md5(file_entry, local_path, resolve_status)
# Emulator-level validation: informational for platform packs.
# Platform verification (MD5) is the authority. Emulator
# mismatches are reported as discrepancies, not failures.
if result["status"] == Status.OK and local_path and validation_index:
fname = file_entry.get("name", "")
reason = check_file_validation(local_path, fname, validation_index)
if reason:
better = _find_best_variant(
file_entry,
db,
local_path,
validation_index,
)
if not better:
ventry = validation_index.get(fname, {})
emus = ", ".join(ventry.get("emulators", []))
result["discrepancy"] = (
f"{platform} says OK but {emus} says {reason}"
)
result["system"] = sys_id
result["hle_fallback"] = hle_index.get(file_entry.get("name", ""), False)
result["ground_truth"] = build_ground_truth(
file_entry.get("name", ""),
validation_index,
)
details.append(result)
# Aggregate by destination
dest = file_entry.get("destination", file_entry.get("name", ""))
if not dest:
dest = f"{sys_id}/{file_entry.get('name', '')}"
required = file_entry.get("required", True)
cur = result["status"]
prev = file_status.get(dest)
if prev is None or _STATUS_ORDER.get(cur, 0) > _STATUS_ORDER.get(prev, 0):
file_status[dest] = cur
file_required[dest] = required
hle = hle_index.get(file_entry.get("name", ""), False)
sev = compute_severity(cur, required, mode, hle)
prev_sev = file_severity.get(dest)
if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get(
prev_sev, 0
):
file_severity[dest] = sev
# Count by severity
counts = {
Severity.OK: 0,
Severity.INFO: 0,
Severity.WARNING: 0,
Severity.CRITICAL: 0,
}
for s in file_severity.values():
counts[s] = counts.get(s, 0) + 1
# Count by file status (ok/untested/missing)
status_counts: dict[str, int] = {}
for s in file_status.values():
status_counts[s] = status_counts.get(s, 0) + 1
# Cross-reference undeclared files
if supplemental_names is None:
from cross_reference import _build_supplemental_index
supplemental_names = _build_supplemental_index()
undeclared = find_undeclared_files(
config,
emulators_dir,
db,
emu_profiles,
target_cores=target_cores,
data_names=supplemental_names,
)
exclusions = find_exclusion_notes(
config, emulators_dir, emu_profiles, target_cores=target_cores
)
# Ground truth coverage
gt_filenames = set(validation_index)
dest_to_name: dict[str, str] = {}
for sys_id, system in verify_systems.items():
for fe in system.get("files", []):
dest = fe.get("destination", fe.get("name", ""))
if not dest:
dest = f"{sys_id}/{fe.get('name', '')}"
dest_to_name.setdefault(dest, fe.get("name", ""))
with_validation = sum(
1 for dest in file_status if dest_to_name.get(dest, "") in gt_filenames
)
total = len(file_status)
return {
"platform": platform,
"verification_mode": mode,
"total_files": total,
"severity_counts": counts,
"status_counts": status_counts,
"undeclared_files": undeclared,
"exclusion_notes": exclusions,
"details": details,
"ground_truth_coverage": {
"with_validation": with_validation,
"platform_only": total - with_validation,
"total": total,
},
}
# Output
def _format_ground_truth_aggregate(ground_truth: list[dict]) -> str:
"""Format ground truth as a single aggregated line.
Example: beetle_psx [md5], pcsx_rearmed [existence]
"""
parts = []
for gt in ground_truth:
checks_label = "+".join(gt["checks"]) if gt["checks"] else "existence"
parts.append(f"{gt['emulator']} [{checks_label}]")
return ", ".join(parts)
def _format_ground_truth_verbose(ground_truth: list[dict]) -> list[str]:
"""Format ground truth as one line per core with expected values and source ref.
Example: handy validates size=512,crc32=0d973c9d [rom.h:48-49]
"""
lines = []
for gt in ground_truth:
checks_label = "+".join(gt["checks"]) if gt["checks"] else "existence"
expected = gt.get("expected", {})
if expected:
vals = ",".join(f"{k}={v}" for k, v in sorted(expected.items()))
part = f"{gt['emulator']} validates {vals}"
else:
part = f"{gt['emulator']} validates {checks_label}"
if gt.get("source_ref"):
part += f" [{gt['source_ref']}]"
lines.append(part)
return lines
def _print_ground_truth(gt: list[dict], verbose: bool) -> None:
"""Print ground truth lines for a file entry."""
if not gt:
return
if verbose:
for line in _format_ground_truth_verbose(gt):
print(f" {line}")
else:
print(f" {_format_ground_truth_aggregate(gt)}")
def _print_detail_entries(details: list[dict], seen: set[str], verbose: bool) -> None:
"""Print UNTESTED, MISSING, and DISCREPANCY entries from verification details."""
for d in details:
if d["status"] == Status.UNTESTED:
key = f"{d['system']}/{d['name']}"
if key in seen:
continue
seen.add(key)
req = "required" if d.get("required", True) else "optional"
hle = ", HLE available" if d.get("hle_fallback") else ""
reason = d.get("reason", "")
print(f" UNTESTED ({req}{hle}): {key} -{reason}")
_print_ground_truth(d.get("ground_truth", []), verbose)
for d in details:
if d["status"] == Status.MISSING:
key = f"{d['system']}/{d['name']}"
if key in seen:
continue
seen.add(key)
req = "required" if d.get("required", True) else "optional"
hle = ", HLE available" if d.get("hle_fallback") else ""
print(f" MISSING ({req}{hle}): {key}")
_print_ground_truth(d.get("ground_truth", []), verbose)
for d in details:
disc = d.get("discrepancy")
if disc:
key = f"{d['system']}/{d['name']}"
if key in seen:
continue
seen.add(key)
print(f" DISCREPANCY: {key} -{disc}")
_print_ground_truth(d.get("ground_truth", []), verbose)
if verbose:
for d in details:
if d["status"] == Status.OK:
key = f"{d['system']}/{d['name']}"
if key in seen:
continue
seen.add(key)
gt = d.get("ground_truth", [])
if gt:
req = "required" if d.get("required", True) else "optional"
print(f" OK ({req}): {key}")
for line in _format_ground_truth_verbose(gt):
print(f" {line}")
def _print_undeclared_entry(u: dict, prefix: str, verbose: bool) -> None:
"""Print a single undeclared file entry with its validation checks."""
arc_count = u.get("archive_file_count")
if arc_count:
name_label = f"{u['name']} ({arc_count} file{'s' if arc_count != 1 else ''})"
else:
name_label = u["name"]
print(f" {prefix}: {u['emulator']} needs {name_label}")
checks = u.get("checks", [])
if checks:
if verbose:
expected = u.get("expected", {})
if expected:
vals = ",".join(f"{k}={v}" for k, v in sorted(expected.items()))
ref_part = f" [{u['source_ref']}]" if u.get("source_ref") else ""
print(f" validates {vals}{ref_part}")
else:
checks_label = "+".join(checks)
ref_part = f" [{u['source_ref']}]" if u.get("source_ref") else ""
print(f" validates {checks_label}{ref_part}")
else:
print(f" [{'+'.join(checks)}]")
def _print_undeclared_section(result: dict, verbose: bool) -> None:
"""Print cross-reference section for undeclared files used by cores."""
undeclared = result.get("undeclared_files", [])
if not undeclared:
return
bios_files = [u for u in undeclared if u.get("category", "bios") == "bios"]
game_data = [u for u in undeclared if u.get("category", "bios") == "game_data"]
req_not_in_repo = [
u
for u in bios_files
if u["required"] and not u["in_repo"] and not u.get("hle_fallback")
]
req_hle_not_in_repo = [
u
for u in bios_files
if u["required"] and not u["in_repo"] and u.get("hle_fallback")
]
req_in_repo = [u for u in bios_files if u["required"] and u["in_repo"]]
opt_in_repo = [u for u in bios_files if not u["required"] and u["in_repo"]]
opt_not_in_repo = [u for u in bios_files if not u["required"] and not u["in_repo"]]
core_in_pack = len(req_in_repo) + len(opt_in_repo)
core_missing_req = len(req_not_in_repo) + len(req_hle_not_in_repo)
core_missing_opt = len(opt_not_in_repo)
print(
f" Core files: {core_in_pack} in pack, {core_missing_req} required missing, {core_missing_opt} optional missing"
)
for u in req_not_in_repo:
_print_undeclared_entry(u, "MISSING (required)", verbose)
for u in req_hle_not_in_repo:
_print_undeclared_entry(u, "MISSING (required, HLE fallback)", verbose)
if game_data:
gd_missing = [u for u in game_data if not u["in_repo"]]
gd_present = [u for u in game_data if u["in_repo"]]
if gd_missing or gd_present:
print(f" Game data: {len(gd_present)} in pack, {len(gd_missing)} missing")
def print_platform_result(
result: dict, group: list[str], verbose: bool = False
) -> None:
mode = result["verification_mode"]
total = result["total_files"]
c = result["severity_counts"]
label = " / ".join(group)
ok_count = c[Severity.OK]
problems = total - ok_count
# Summary line
if mode == "existence":
if problems:
missing = c.get(Severity.WARNING, 0) + c.get(Severity.CRITICAL, 0)
optional_missing = c.get(Severity.INFO, 0)
parts = [f"{ok_count}/{total} present"]
if missing:
parts.append(f"{missing} missing")
if optional_missing:
parts.append(f"{optional_missing} optional missing")
else:
parts = [f"{ok_count}/{total} present"]
else:
sc = result.get("status_counts", {})
untested = sc.get(Status.UNTESTED, 0)
missing = sc.get(Status.MISSING, 0)
parts = [f"{ok_count}/{total} OK"]
if untested:
parts.append(f"{untested} untested")
if missing:
parts.append(f"{missing} missing")
print(f"{label}: {', '.join(parts)} [{mode}]")
seen_details: set[str] = set()
_print_detail_entries(result["details"], seen_details, verbose)
_print_undeclared_section(result, verbose)
exclusions = result.get("exclusion_notes", [])
if exclusions:
print(f" No external files ({len(exclusions)}):")
for ex in exclusions:
print(f" {ex['emulator']} -{ex['detail']} [{ex['reason']}]")
gt_cov = result.get("ground_truth_coverage")
if gt_cov and gt_cov["total"] > 0:
pct = gt_cov["with_validation"] * 100 // gt_cov["total"]
print(
f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)"
)
if gt_cov["platform_only"]:
print(f" {gt_cov['platform_only']} platform-only (no emulator profile)")
# Emulator/system mode verification
def _effective_validation_label(details: list[dict], validation_index: dict) -> str:
"""Determine the bracket label for the report.
Returns the union of all check types used, e.g. [crc32+existence+size].
"""
all_checks: set[str] = set()
has_files = False
for d in details:
fname = d.get("name", "")
if d.get("note"):
continue # skip informational entries (empty profiles)
has_files = True
entry = validation_index.get(fname)
if entry:
all_checks.update(entry["checks"])
else:
all_checks.add("existence")
if not has_files:
return "existence"
return "+".join(sorted(all_checks))
def verify_emulator(
profile_names: list[str],
emulators_dir: str,
db: dict,
standalone: bool = False,
) -> dict:
"""Verify files for specific emulator profiles."""
load_emulator_profiles(emulators_dir)
zip_contents = build_zip_contents_index(db)
# Also load aliases for redirect messages
all_profiles = load_emulator_profiles(emulators_dir, skip_aliases=False)
# Resolve profile names, reject alias/launcher
selected: list[tuple[str, dict]] = []
for name in profile_names:
if name not in all_profiles:
available = sorted(
k
for k, v in all_profiles.items()
if v.get("type") not in ("alias", "test")
)
print(f"Error: emulator '{name}' not found", file=sys.stderr)
print(f"Available: {', '.join(available[:10])}...", file=sys.stderr)
sys.exit(1)
p = all_profiles[name]
if p.get("type") == "alias":
alias_of = p.get("alias_of", "?")
print(
f"Error: {name} is an alias of {alias_of} -use --emulator {alias_of}",
file=sys.stderr,
)
sys.exit(1)
if p.get("type") == "launcher":
print(
f"Error: {name} is a launcher -use the emulator it launches",
file=sys.stderr,
)
sys.exit(1)
# Check standalone capability
ptype = p.get("type", "libretro")
if standalone and "standalone" not in ptype:
print(
f"Error: {name} ({ptype}) does not support --standalone",
file=sys.stderr,
)
sys.exit(1)
selected.append((name, p))
# Build validation index from selected profiles only
selected_profiles = {n: p for n, p in selected}
validation_index = _build_validation_index(selected_profiles)
data_registry = load_data_dir_registry(
os.path.join(os.path.dirname(__file__), "..", "platforms")
)
details = []
file_status: dict[str, str] = {}
file_severity: dict[str, str] = {}
dest_to_name: dict[str, str] = {}
data_dir_notices: list[str] = []
for emu_name, profile in selected:
files = filter_files_by_mode(profile.get("files", []), standalone)
# Check data directories (only notice if not cached)
for dd in profile.get("data_directories", []):
ref = dd.get("ref", "")
if not ref:
continue
if data_registry and ref in data_registry:
cache_path = data_registry[ref].get("local_cache", "")
if cache_path and os.path.isdir(cache_path):
continue # cached, no notice needed
data_dir_notices.append(ref)
if not files:
details.append(
{
"name": f"({emu_name})",
"status": Status.OK,
"required": False,
"system": "",
"note": f"No files needed for {profile.get('emulator', emu_name)}",
"ground_truth": [],
}
)
continue
# Verify archives as units (e.g., neogeo.zip, aes.zip)
seen_archives: set[str] = set()
for file_entry in files:
archive = file_entry.get("archive")
if archive and archive not in seen_archives:
seen_archives.add(archive)
archive_entry = {"name": archive}
local_path, _ = resolve_local_file(
archive_entry,
db,
zip_contents,
data_dir_registry=data_registry,
)
required = any(
f.get("archive") == archive and f.get("required", True)
for f in files
)
if local_path:
result = {
"name": archive,
"status": Status.OK,
"required": required,
"path": local_path,
}
else:
result = {
"name": archive,
"status": Status.MISSING,
"required": required,
}
result["system"] = file_entry.get("system", "")
result["hle_fallback"] = False
result["ground_truth"] = build_ground_truth(archive, validation_index)
details.append(result)
dest = archive
dest_to_name[dest] = archive
cur = result["status"]
prev = file_status.get(dest)
if prev is None or _STATUS_ORDER.get(cur, 0) > _STATUS_ORDER.get(
prev, 0
):
file_status[dest] = cur
sev = compute_severity(cur, required, "existence", False)
prev_sev = file_severity.get(dest)
if prev_sev is None or _SEVERITY_ORDER.get(
sev, 0
) > _SEVERITY_ORDER.get(prev_sev, 0):
file_severity[dest] = sev
for file_entry in files:
# Skip archived files (verified as archive units above)
if file_entry.get("archive"):
continue
dest_hint = file_entry.get("path", "")
local_path, resolve_status = resolve_local_file(
file_entry,
db,
zip_contents,
dest_hint=dest_hint,
data_dir_registry=data_registry,
)
name = file_entry.get("name", "")
required = file_entry.get("required", True)
hle = file_entry.get("hle_fallback", False)
if not local_path:
result = {"name": name, "status": Status.MISSING, "required": required}
else:
# Apply emulator validation
reason = check_file_validation(local_path, name, validation_index)
if reason:
result = {
"name": name,
"status": Status.UNTESTED,
"required": required,
"path": local_path,
"reason": reason,
}
else:
result = {
"name": name,
"status": Status.OK,
"required": required,
"path": local_path,
}
result["system"] = file_entry.get("system", "")
result["hle_fallback"] = hle
result["ground_truth"] = build_ground_truth(name, validation_index)
details.append(result)
# Aggregate by destination (path if available, else name)
dest = file_entry.get("path", "") or name
dest_to_name[dest] = name
cur = result["status"]
prev = file_status.get(dest)
if prev is None or _STATUS_ORDER.get(cur, 0) > _STATUS_ORDER.get(prev, 0):
file_status[dest] = cur
sev = compute_severity(cur, required, "existence", hle)
prev_sev = file_severity.get(dest)
if prev_sev is None or _SEVERITY_ORDER.get(sev, 0) > _SEVERITY_ORDER.get(
prev_sev, 0
):
file_severity[dest] = sev
counts = {
Severity.OK: 0,
Severity.INFO: 0,
Severity.WARNING: 0,
Severity.CRITICAL: 0,
}
for s in file_severity.values():
counts[s] = counts.get(s, 0) + 1
status_counts: dict[str, int] = {}
for s in file_status.values():
status_counts[s] = status_counts.get(s, 0) + 1
label = _effective_validation_label(details, validation_index)
gt_filenames = set(validation_index)
total = len(file_status)
with_validation = sum(
1 for dest in file_status if dest_to_name.get(dest, "") in gt_filenames
)
return {
"emulators": [n for n, _ in selected],
"verification_mode": label,
"total_files": total,
"severity_counts": counts,
"status_counts": status_counts,
"details": details,
"data_dir_notices": sorted(set(data_dir_notices)),
"ground_truth_coverage": {
"with_validation": with_validation,
"platform_only": total - with_validation,
"total": total,
},
}
def verify_system(
system_ids: list[str],
emulators_dir: str,
db: dict,
standalone: bool = False,
) -> dict:
"""Verify files for all emulators supporting given system IDs."""
profiles = load_emulator_profiles(emulators_dir)
matching = []
for name, profile in sorted(profiles.items()):
if profile.get("type") in ("launcher", "alias", "test"):
continue
emu_systems = set(profile.get("systems", []))
if emu_systems & set(system_ids):
ptype = profile.get("type", "libretro")
if standalone and "standalone" not in ptype:
continue # skip non-standalone in standalone mode
matching.append(name)
if not matching:
all_systems: set[str] = set()
for p in profiles.values():
all_systems.update(p.get("systems", []))
if standalone:
print(
f"No standalone emulators found for system(s): {', '.join(system_ids)}",
file=sys.stderr,
)
else:
print(
f"No emulators found for system(s): {', '.join(system_ids)}",
file=sys.stderr,
)
print(
f"Available systems: {', '.join(sorted(all_systems)[:20])}...",
file=sys.stderr,
)
sys.exit(1)
return verify_emulator(matching, emulators_dir, db, standalone)
def print_emulator_result(result: dict, verbose: bool = False) -> None:
"""Print verification result for emulator/system mode."""
label = " + ".join(result["emulators"])
mode = result["verification_mode"]
total = result["total_files"]
c = result["severity_counts"]
ok_count = c[Severity.OK]
sc = result.get("status_counts", {})
untested = sc.get(Status.UNTESTED, 0)
missing = sc.get(Status.MISSING, 0)
parts = [f"{ok_count}/{total} OK"]
if untested:
parts.append(f"{untested} untested")
if missing:
parts.append(f"{missing} missing")
print(f"{label}: {', '.join(parts)} [{mode}]")
seen = set()
for d in result["details"]:
if d["status"] == Status.UNTESTED:
if d["name"] in seen:
continue
seen.add(d["name"])
req = "required" if d.get("required", True) else "optional"
hle = ", HLE available" if d.get("hle_fallback") else ""
reason = d.get("reason", "")
print(f" UNTESTED ({req}{hle}): {d['name']} -{reason}")
gt = d.get("ground_truth", [])
if gt:
if verbose:
for line in _format_ground_truth_verbose(gt):
print(f" {line}")
else:
print(f" {_format_ground_truth_aggregate(gt)}")
for d in result["details"]:
if d["status"] == Status.MISSING:
if d["name"] in seen:
continue
seen.add(d["name"])
req = "required" if d.get("required", True) else "optional"
hle = ", HLE available" if d.get("hle_fallback") else ""
print(f" MISSING ({req}{hle}): {d['name']}")
gt = d.get("ground_truth", [])
if gt:
if verbose:
for line in _format_ground_truth_verbose(gt):
print(f" {line}")
else:
print(f" {_format_ground_truth_aggregate(gt)}")
for d in result["details"]:
if d.get("note"):
print(f" {d['note']}")
if verbose:
for d in result["details"]:
if d["status"] == Status.OK:
if d["name"] in seen:
continue
seen.add(d["name"])
gt = d.get("ground_truth", [])
if gt:
req = "required" if d.get("required", True) else "optional"
print(f" OK ({req}): {d['name']}")
for line in _format_ground_truth_verbose(gt):
print(f" {line}")
for ref in result.get("data_dir_notices", []):
print(
f" Note: data directory '{ref}' required but not included (use refresh_data_dirs.py)"
)
# Ground truth coverage footer
gt_cov = result.get("ground_truth_coverage")
if gt_cov and gt_cov["total"] > 0:
pct = gt_cov["with_validation"] * 100 // gt_cov["total"]
print(
f" Ground truth: {gt_cov['with_validation']}/{gt_cov['total']} files have emulator validation ({pct}%)"
)
if gt_cov["platform_only"]:
print(f" {gt_cov['platform_only']} platform-only (no emulator profile)")
def main():
parser = argparse.ArgumentParser(description="Platform-native BIOS verification")
parser.add_argument("--platform", "-p", help="Platform name")
parser.add_argument(
"--all", action="store_true", help="Verify all active platforms"
)
parser.add_argument(
"--emulator", "-e", help="Emulator profile name(s), comma-separated"
)
parser.add_argument("--system", "-s", help="System ID(s), comma-separated")
parser.add_argument("--standalone", action="store_true", help="Use standalone mode")
parser.add_argument(
"--list-emulators", action="store_true", help="List available emulators"
)
parser.add_argument(
"--list-systems", action="store_true", help="List available systems"
)
parser.add_argument("--include-archived", action="store_true")
parser.add_argument("--target", "-t", help="Hardware target (e.g., switch, rpi4)")
parser.add_argument(
"--list-targets",
action="store_true",
help="List available targets for the platform",
)
parser.add_argument("--db", default=DEFAULT_DB)
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
parser.add_argument("--emulators-dir", default=DEFAULT_EMULATORS_DIR)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Show emulator ground truth details",
)
parser.add_argument("--json", action="store_true", help="JSON output")
args = parser.parse_args()
if args.list_emulators:
list_emulator_profiles(args.emulators_dir)
return
if args.list_systems:
list_system_ids(args.emulators_dir)
return
if args.list_targets:
if not args.platform:
parser.error("--list-targets requires --platform")
from common import list_available_targets
targets = list_available_targets(args.platform, args.platforms_dir)
if not targets:
print(f"No targets configured for platform '{args.platform}'")
return
for t in targets:
aliases = f" (aliases: {', '.join(t['aliases'])})" if t["aliases"] else ""
print(
f" {t['name']:30s} {t['architecture']:10s} {t['core_count']:>4d} cores{aliases}"
)
return
# Mutual exclusion
modes = sum(1 for x in (args.platform, args.all, args.emulator, args.system) if x)
if modes == 0:
parser.error("Specify --platform, --all, --emulator, or --system")
if modes > 1:
parser.error(
"--platform, --all, --emulator, and --system are mutually exclusive"
)
if args.standalone and not (args.emulator or args.system):
parser.error("--standalone requires --emulator or --system")
if args.target and not (args.platform or args.all):
parser.error("--target requires --platform or --all")
if args.target and (args.emulator or args.system):
parser.error("--target is incompatible with --emulator and --system")
with open(args.db) as f:
db = json.load(f)
# Emulator mode
if args.emulator:
names = [n.strip() for n in args.emulator.split(",") if n.strip()]
result = verify_emulator(names, args.emulators_dir, db, args.standalone)
if args.json:
result["details"] = [
d for d in result["details"] if d["status"] != Status.OK
]
print(json.dumps(result, indent=2))
else:
print_emulator_result(result, verbose=args.verbose)
return
# System mode
if args.system:
system_ids = [s.strip() for s in args.system.split(",") if s.strip()]
result = verify_system(system_ids, args.emulators_dir, db, args.standalone)
if args.json:
result["details"] = [
d for d in result["details"] if d["status"] != Status.OK
]
print(json.dumps(result, indent=2))
else:
print_emulator_result(result, verbose=args.verbose)
return
# Platform mode (existing)
if args.all:
from list_platforms import list_platforms as _list_platforms
platforms = _list_platforms(include_archived=args.include_archived)
elif args.platform:
platforms = [args.platform]
else:
parser.error("Specify --platform or --all")
return
# Load emulator profiles once for cross-reference (not per-platform)
emu_profiles = load_emulator_profiles(args.emulators_dir)
data_registry = load_data_dir_registry(args.platforms_dir)
target_cores_cache: dict[str, set[str] | None] = {}
if args.target:
try:
target_cores_cache, platforms = build_target_cores_cache(
platforms,
args.target,
args.platforms_dir,
is_all=args.all,
)
except (FileNotFoundError, ValueError) as e:
print(f"ERROR: {e}", file=sys.stderr)
sys.exit(1)
# Group identical platforms (same function as generate_pack)
groups = group_identical_platforms(
platforms, args.platforms_dir, target_cores_cache if args.target else None
)
from cross_reference import _build_supplemental_index
suppl_names = _build_supplemental_index()
all_results = {}
group_results: list[tuple[dict, list[str]]] = []
for group_platforms, representative in groups:
config = load_platform_config(representative, args.platforms_dir)
tc = target_cores_cache.get(representative) if args.target else None
result = verify_platform(
config,
db,
args.emulators_dir,
emu_profiles,
target_cores=tc,
data_dir_registry=data_registry,
supplemental_names=suppl_names,
)
names = [
load_platform_config(p, args.platforms_dir).get("platform", p)
for p in group_platforms
]
group_results.append((result, names))
for p in group_platforms:
all_results[p] = result
if not args.json:
for result, group in group_results:
print_platform_result(result, group, verbose=args.verbose)
print()
if args.json:
for r in all_results.values():
r["details"] = [d for d in r["details"] if d["status"] != Status.OK]
print(json.dumps(all_results, indent=2))
if __name__ == "__main__":
main()