From dee37c253008537b966fa1f896dfbbcc116213d2 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Sun, 29 Mar 2026 13:41:49 +0200 Subject: [PATCH] fix: truth generation uses platform config not registry --- scripts/common.py | 54 ++++++++++++++++++++++++++++++--------- scripts/generate_truth.py | 12 ++++++++- tests/test_e2e.py | 44 +++++++++++++------------------ 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/scripts/common.py b/scripts/common.py index 93762666..6531cd0c 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -1156,6 +1156,8 @@ def _determine_core_mode( return "standalone" return "libretro" ptype = profile.get("type", "libretro") + if "standalone" in ptype and "libretro" in ptype: + return "both" if "standalone" in ptype: return "standalone" return "libretro" @@ -1245,35 +1247,41 @@ def _merge_file_into_system( def generate_platform_truth( platform_name: str, - registry: dict, + config: dict, + registry_entry: dict, profiles: dict[str, dict], db: dict | None = None, target_cores: set[str] | None = None, ) -> dict: """Generate ground-truth system data for a platform from emulator profiles. + Args: + platform_name: platform identifier + config: loaded platform config (via load_platform_config), has cores, + systems, standalone_cores with inheritance resolved + registry_entry: registry metadata for hash_type, verification_mode, etc. + profiles: all loaded emulator profiles + db: optional database for hash enrichment + target_cores: optional hardware target core filter + Returns a dict with platform metadata, systems, and per-file details including which cores reference each file. """ - plat_entry = registry.get(platform_name, {}) - cores_config = plat_entry.get("cores") - - # Build a synthetic config dict for resolve_platform_cores - synthetic_config: dict = {"cores": cores_config} - if "systems" in plat_entry: - synthetic_config["systems"] = plat_entry["systems"] + cores_config = config.get("cores") # Resolve standalone set for mode determination standalone_set: set[str] | None = None - standalone_cores = plat_entry.get("standalone_cores") + standalone_cores = config.get("standalone_cores") if isinstance(standalone_cores, list): standalone_set = {str(c) for c in standalone_cores} - resolved = resolve_platform_cores(synthetic_config, profiles, target_cores) + resolved = resolve_platform_cores(config, profiles, target_cores) systems: dict[str, dict] = {} cores_profiled: set[str] = set() cores_unprofiled: set[str] = set() + # Track which cores contribute to each system + system_cores: dict[str, dict[str, set[str]]] = {} for emu_name in sorted(resolved): profile = profiles.get(emu_name) @@ -1284,7 +1292,10 @@ def generate_platform_truth( mode = _determine_core_mode(emu_name, profile, cores_config, standalone_set) raw_files = profile.get("files", []) - filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone")) + if mode == "both": + filtered = raw_files + else: + filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone")) for fe in filtered: sys_id = fe.get("system", "") @@ -1293,12 +1304,31 @@ def generate_platform_truth( sys_id = sys_ids[0] if sys_ids else "unknown" system = systems.setdefault(sys_id, {}) _merge_file_into_system(system, fe, emu_name, db) + # Track core contribution per system + sys_cov = system_cores.setdefault(sys_id, { + "profiled": set(), "unprofiled": set(), + }) + sys_cov["profiled"].add(emu_name) + + # Track unprofiled cores per system based on profile system lists + for emu_name in cores_unprofiled: + for sys_id in systems: + sys_cov = system_cores.setdefault(sys_id, { + "profiled": set(), "unprofiled": set(), + }) + sys_cov["unprofiled"].add(emu_name) # Convert sets to sorted lists for serialization - for sys_data in systems.values(): + for sys_id, sys_data in systems.items(): for fe in sys_data.get("files", []): fe["_cores"] = sorted(fe.get("_cores", set())) fe["_source_refs"] = sorted(fe.get("_source_refs", set())) + # Add per-system coverage + cov = system_cores.get(sys_id, {}) + sys_data["_coverage"] = { + "cores_profiled": sorted(cov.get("profiled", set())), + "cores_unprofiled": sorted(cov.get("unprofiled", set())), + } return { "platform": platform_name, diff --git a/scripts/generate_truth.py b/scripts/generate_truth.py index 352b90a5..6fb6692b 100644 --- a/scripts/generate_truth.py +++ b/scripts/generate_truth.py @@ -18,6 +18,7 @@ from common import ( list_registered_platforms, load_database, load_emulator_profiles, + load_platform_config, load_target_config, ) @@ -98,8 +99,17 @@ def main(argv: list[str] | None = None) -> None: print(f" {name}: no target config, skipped") continue + # Load platform config (with inheritance) and registry entry + try: + config = load_platform_config(name, args.platforms_dir) + except FileNotFoundError: + print(f" {name}: no platform config, skipped") + continue + registry_entry = registry.get(name, {}) + result = generate_platform_truth( - name, registry, profiles, db=db, target_cores=target_cores, + name, config, registry_entry, profiles, + db=db, target_cores=target_cores, ) out_path = os.path.join(args.output_dir, f"{name}.yml") diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 7e2fb30b..bd6926c3 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -2526,14 +2526,10 @@ class TestE2E(unittest.TestCase): _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) - registry = { - "testplat": { - "cores": ["testcore"], - }, - } + config = {"cores": ["testcore"]} result = generate_platform_truth( - "testplat", registry, profiles, db=None, + "testplat", config, {}, profiles, db=None, ) self.assertEqual(result["platform"], "testplat") @@ -2573,9 +2569,9 @@ class TestE2E(unittest.TestCase): _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) - registry = {"testplat": {"cores": "all_libretro"}} + config = {"cores": "all_libretro"} - result = generate_platform_truth("testplat", registry, profiles) + result = generate_platform_truth("testplat", config, {}, profiles) names = {fe["name"] for fe in result["systems"]["test-system"]["files"]} self.assertIn("both.bin", names) @@ -2605,14 +2601,12 @@ class TestE2E(unittest.TestCase): _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) - registry = { - "testplat": { - "cores": ["dualcore"], - "standalone_cores": ["dualcore"], - }, + config = { + "cores": ["dualcore"], + "standalone_cores": ["dualcore"], } - result = generate_platform_truth("testplat", registry, profiles) + result = generate_platform_truth("testplat", config, {}, profiles) names = {fe["name"] for fe in result["systems"]["test-system"]["files"]} self.assertIn("sa_file.bin", names) @@ -2649,9 +2643,9 @@ class TestE2E(unittest.TestCase): _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) - registry = {"testplat": {"cores": ["core_a", "core_b"]}} + config = {"cores": ["core_a", "core_b"]} - result = generate_platform_truth("testplat", registry, profiles) + result = generate_platform_truth("testplat", config, {}, profiles) sys_files = result["systems"]["test-system"]["files"] self.assertEqual(len(sys_files), 1) @@ -2682,9 +2676,9 @@ class TestE2E(unittest.TestCase): _emulator_profiles_cache.clear() profiles = load_emulator_profiles(self.emulators_dir) - registry = {"testplat": {"cores": ["profiled_core", "unprofiled_core"]}} + config = {"cores": ["profiled_core", "unprofiled_core"]} - result = generate_platform_truth("testplat", registry, profiles) + result = generate_platform_truth("testplat", config, {}, profiles) cov = result["_coverage"] self.assertEqual(cov["cores_profiled"], 1) @@ -3113,13 +3107,11 @@ class TestE2E(unittest.TestCase): def test_truth_diff_integration(self): """Full chain: generate truth from profiles, diff against scraped data.""" - # Registry: one platform with two cores, only core_a has a profile - registry = { - "testplat": { - "cores": ["core_a", "core_b"], - "hash_type": "md5", - "verification_mode": "md5", - }, + # Config: platform with two cores, only core_a has a profile + config = {"cores": ["core_a", "core_b"]} + registry_entry = { + "hash_type": "md5", + "verification_mode": "md5", } # Emulator profile for core_a with 2 files @@ -3161,7 +3153,7 @@ class TestE2E(unittest.TestCase): self.assertNotIn("core_b", profiles) # Generate truth - truth = generate_platform_truth("testplat", registry, profiles, db=None) + truth = generate_platform_truth("testplat", config, registry_entry, profiles, db=None) # Verify truth structure self.assertIn("test-system", truth["systems"])