feat: add per-emulator ground truth to validation index

This commit is contained in:
Abdessamad Derraz
2026-03-27 23:25:42 +01:00
parent 3672912de7
commit 6d959ff2b0
2 changed files with 95 additions and 4 deletions
+54 -4
View File
@@ -759,16 +759,18 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
Returns {filename: {"checks": [str], "size": int|None, "min_size": int|None, Returns {filename: {"checks": [str], "size": int|None, "min_size": int|None,
"max_size": int|None, "crc32": str|None, "md5": str|None, "sha1": str|None, "max_size": int|None, "crc32": str|None, "md5": str|None, "sha1": str|None,
"adler32": str|None, "crypto_only": [str]}}. "adler32": str|None, "crypto_only": [str], "per_emulator": {emu: detail}}}.
``crypto_only`` lists validation types we cannot reproduce (signature, crypto) ``crypto_only`` lists validation types we cannot reproduce (signature, crypto)
so callers can report them as non-verifiable rather than silently skipping. so callers can report them as non-verifiable rather than silently skipping.
``per_emulator`` preserves each core's individual checks, source_ref, and
expected values before merging, for ground truth reporting.
When multiple emulators reference the same file, merges checks (union). When multiple emulators reference the same file, merges checks (union).
Raises ValueError if two profiles declare conflicting values. Raises ValueError if two profiles declare conflicting values.
""" """
index: dict[str, dict] = {} index: dict[str, dict] = {}
sources: dict[str, dict[str, str]] = {}
for emu_name, profile in profiles.items(): for emu_name, profile in profiles.items():
if profile.get("type") in ("launcher", "alias"): if profile.get("type") in ("launcher", "alias"):
continue continue
@@ -785,9 +787,8 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
"min_size": None, "max_size": None, "min_size": None, "max_size": None,
"crc32": set(), "md5": set(), "sha1": set(), "sha256": set(), "crc32": set(), "md5": set(), "sha1": set(), "sha256": set(),
"adler32": set(), "crypto_only": set(), "adler32": set(), "crypto_only": set(),
"emulators": set(), "emulators": set(), "per_emulator": {},
} }
sources[fname] = {}
index[fname]["emulators"].add(emu_name) index[fname]["emulators"].add(emu_name)
index[fname]["checks"].update(checks) index[fname]["checks"].update(checks)
# Track non-reproducible crypto checks # Track non-reproducible crypto checks
@@ -830,6 +831,34 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
if norm.startswith("0x"): if norm.startswith("0x"):
norm = norm[2:] norm = norm[2:]
index[fname]["adler32"].add(norm) index[fname]["adler32"].add(norm)
# Per-emulator ground truth detail
expected: dict = {}
if "size" in checks:
for key in ("size", "min_size", "max_size"):
if f.get(key) is not None:
expected[key] = f[key]
for hash_type in ("crc32", "md5", "sha1", "sha256"):
if hash_type in checks and f.get(hash_type):
expected[hash_type] = f[hash_type]
adler_val_pe = f.get("known_hash_adler32") or f.get("adler32")
if adler_val_pe:
expected["adler32"] = adler_val_pe
pe_entry = {
"checks": sorted(checks),
"source_ref": f.get("source_ref"),
"expected": expected,
}
pe = index[fname]["per_emulator"]
if emu_name in pe:
# Merge checks from multiple file entries for same emulator
existing = pe[emu_name]
merged_checks = sorted(set(existing["checks"]) | set(pe_entry["checks"]))
existing["checks"] = merged_checks
existing["expected"].update(pe_entry["expected"])
if pe_entry["source_ref"] and not existing["source_ref"]:
existing["source_ref"] = pe_entry["source_ref"]
else:
pe[emu_name] = pe_entry
# Convert sets to sorted tuples/lists for determinism # Convert sets to sorted tuples/lists for determinism
for v in index.values(): for v in index.values():
v["checks"] = sorted(v["checks"]) v["checks"] = sorted(v["checks"])
@@ -839,6 +868,27 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
return index return index
def build_ground_truth(filename: str, validation_index: dict[str, dict]) -> list[dict]:
"""Format per-emulator ground truth for a file from the validation index.
Returns a sorted list of {emulator, checks, source_ref, expected} dicts.
Returns [] if the file has no emulator validation data.
"""
entry = validation_index.get(filename)
if not entry or not entry.get("per_emulator"):
return []
result = []
for emu_name in sorted(entry["per_emulator"]):
detail = entry["per_emulator"][emu_name]
result.append({
"emulator": emu_name,
"checks": detail["checks"],
"source_ref": detail.get("source_ref"),
"expected": detail.get("expected", {}),
})
return result
def check_file_validation( def check_file_validation(
local_path: str, filename: str, validation_index: dict[str, dict], local_path: str, filename: str, validation_index: dict[str, dict],
bios_dir: str = "bios", bios_dir: str = "bios",
+41
View File
@@ -1353,5 +1353,46 @@ class TestE2E(unittest.TestCase):
self.assertNotIn("bios_b.bin", names) self.assertNotIn("bios_b.bin", names)
# ---------------------------------------------------------------
# Validation index per-emulator ground truth (Task: ground truth)
# ---------------------------------------------------------------
def test_111_validation_index_per_emulator(self):
"""Validation index includes per-emulator detail for ground truth."""
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
entry = index["present_req.bin"]
self.assertIn("per_emulator", entry)
pe = entry["per_emulator"]
self.assertIn("test_validation", pe)
detail = pe["test_validation"]
self.assertIn("size", detail["checks"])
self.assertEqual(detail["expected"]["size"], 16)
def test_112_build_ground_truth(self):
"""build_ground_truth returns per-emulator detail for a filename."""
from common import build_ground_truth
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
gt = build_ground_truth("present_req.bin", index)
self.assertIsInstance(gt, list)
self.assertTrue(len(gt) >= 1)
emu_names = {g["emulator"] for g in gt}
self.assertIn("test_validation", emu_names)
for g in gt:
if g["emulator"] == "test_validation":
self.assertIn("size", g["checks"])
self.assertIn("source_ref", g)
self.assertIn("expected", g)
def test_113_build_ground_truth_empty(self):
"""build_ground_truth returns [] for unknown filename."""
from common import build_ground_truth
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
gt = build_ground_truth("nonexistent.bin", index)
self.assertEqual(gt, [])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()