From dee37c253008537b966fa1f896dfbbcc116213d2 Mon Sep 17 00:00:00 2001
From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com>
Date: Sun, 29 Mar 2026 13:41:49 +0200
Subject: [PATCH] fix: truth generation uses platform config not registry

---
 scripts/common.py         | 54 ++++++++++++++++++++++++++++++---------
 scripts/generate_truth.py | 12 ++++++++-
 tests/test_e2e.py         | 44 +++++++++++++------------------
 3 files changed, 71 insertions(+), 39 deletions(-)

diff --git a/scripts/common.py b/scripts/common.py
index 93762666..6531cd0c 100644
--- a/scripts/common.py
+++ b/scripts/common.py
@@ -1156,6 +1156,8 @@ def _determine_core_mode(
             return "standalone"
         return "libretro"
     ptype = profile.get("type", "libretro")
+    if "standalone" in ptype and "libretro" in ptype:
+        return "both"
     if "standalone" in ptype:
         return "standalone"
     return "libretro"
@@ -1245,35 +1247,41 @@ def _merge_file_into_system(
 
 def generate_platform_truth(
     platform_name: str,
-    registry: dict,
+    config: dict,
+    registry_entry: dict,
     profiles: dict[str, dict],
     db: dict | None = None,
     target_cores: set[str] | None = None,
 ) -> dict:
     """Generate ground-truth system data for a platform from emulator profiles.
 
+    Args:
+        platform_name: platform identifier
+        config: loaded platform config (via load_platform_config), has cores,
+                systems, standalone_cores with inheritance resolved
+        registry_entry: registry metadata for hash_type, verification_mode, etc.
+        profiles: all loaded emulator profiles
+        db: optional database for hash enrichment
+        target_cores: optional hardware target core filter
+
     Returns a dict with platform metadata, systems, and per-file details
     including which cores reference each file.
     """
-    plat_entry = registry.get(platform_name, {})
-    cores_config = plat_entry.get("cores")
-
-    # Build a synthetic config dict for resolve_platform_cores
-    synthetic_config: dict = {"cores": cores_config}
-    if "systems" in plat_entry:
-        synthetic_config["systems"] = plat_entry["systems"]
+    cores_config = config.get("cores")
 
     # Resolve standalone set for mode determination
     standalone_set: set[str] | None = None
-    standalone_cores = plat_entry.get("standalone_cores")
+    standalone_cores = config.get("standalone_cores")
     if isinstance(standalone_cores, list):
         standalone_set = {str(c) for c in standalone_cores}
 
-    resolved = resolve_platform_cores(synthetic_config, profiles, target_cores)
+    resolved = resolve_platform_cores(config, profiles, target_cores)
 
     systems: dict[str, dict] = {}
     cores_profiled: set[str] = set()
     cores_unprofiled: set[str] = set()
+    # Track which cores contribute to each system
+    system_cores: dict[str, dict[str, set[str]]] = {}
 
     for emu_name in sorted(resolved):
         profile = profiles.get(emu_name)
@@ -1284,7 +1292,10 @@ def generate_platform_truth(
 
         mode = _determine_core_mode(emu_name, profile, cores_config, standalone_set)
         raw_files = profile.get("files", [])
-        filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone"))
+        if mode == "both":
+            filtered = raw_files
+        else:
+            filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone"))
 
         for fe in filtered:
             sys_id = fe.get("system", "")
@@ -1293,12 +1304,31 @@ def generate_platform_truth(
                 sys_id = sys_ids[0] if sys_ids else "unknown"
             system = systems.setdefault(sys_id, {})
             _merge_file_into_system(system, fe, emu_name, db)
+            # Track core contribution per system
+            sys_cov = system_cores.setdefault(sys_id, {
+                "profiled": set(), "unprofiled": set(),
+            })
+            sys_cov["profiled"].add(emu_name)
+
+    # Track unprofiled cores per system based on profile system lists
+    for emu_name in cores_unprofiled:
+        for sys_id in systems:
+            sys_cov = system_cores.setdefault(sys_id, {
+                "profiled": set(), "unprofiled": set(),
+            })
+            sys_cov["unprofiled"].add(emu_name)
 
     # Convert sets to sorted lists for serialization
-    for sys_data in systems.values():
+    for sys_id, sys_data in systems.items():
         for fe in sys_data.get("files", []):
             fe["_cores"] = sorted(fe.get("_cores", set()))
             fe["_source_refs"] = sorted(fe.get("_source_refs", set()))
+        # Add per-system coverage
+        cov = system_cores.get(sys_id, {})
+        sys_data["_coverage"] = {
+            "cores_profiled": sorted(cov.get("profiled", set())),
+            "cores_unprofiled": sorted(cov.get("unprofiled", set())),
+        }
 
     return {
         "platform": platform_name,
diff --git a/scripts/generate_truth.py b/scripts/generate_truth.py
index 352b90a5..6fb6692b 100644
--- a/scripts/generate_truth.py
+++ b/scripts/generate_truth.py
@@ -18,6 +18,7 @@ from common import (
     list_registered_platforms,
     load_database,
     load_emulator_profiles,
+    load_platform_config,
     load_target_config,
 )
 
@@ -98,8 +99,17 @@ def main(argv: list[str] | None = None) -> None:
                 print(f"  {name}: no target config, skipped")
                 continue
 
+        # Load platform config (with inheritance) and registry entry
+        try:
+            config = load_platform_config(name, args.platforms_dir)
+        except FileNotFoundError:
+            print(f"  {name}: no platform config, skipped")
+            continue
+        registry_entry = registry.get(name, {})
+
         result = generate_platform_truth(
-            name, registry, profiles, db=db, target_cores=target_cores,
+            name, config, registry_entry, profiles,
+            db=db, target_cores=target_cores,
         )
 
         out_path = os.path.join(args.output_dir, f"{name}.yml")
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index 7e2fb30b..bd6926c3 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -2526,14 +2526,10 @@ class TestE2E(unittest.TestCase):
         _emulator_profiles_cache.clear()
 
         profiles = load_emulator_profiles(self.emulators_dir)
-        registry = {
-            "testplat": {
-                "cores": ["testcore"],
-            },
-        }
+        config = {"cores": ["testcore"]}
 
         result = generate_platform_truth(
-            "testplat", registry, profiles, db=None,
+            "testplat", config, {}, profiles, db=None,
         )
 
         self.assertEqual(result["platform"], "testplat")
@@ -2573,9 +2569,9 @@ class TestE2E(unittest.TestCase):
 
         _emulator_profiles_cache.clear()
         profiles = load_emulator_profiles(self.emulators_dir)
-        registry = {"testplat": {"cores": "all_libretro"}}
+        config = {"cores": "all_libretro"}
 
-        result = generate_platform_truth("testplat", registry, profiles)
+        result = generate_platform_truth("testplat", config, {}, profiles)
         names = {fe["name"] for fe in result["systems"]["test-system"]["files"]}
 
         self.assertIn("both.bin", names)
@@ -2605,14 +2601,12 @@ class TestE2E(unittest.TestCase):
 
         _emulator_profiles_cache.clear()
         profiles = load_emulator_profiles(self.emulators_dir)
-        registry = {
-            "testplat": {
-                "cores": ["dualcore"],
-                "standalone_cores": ["dualcore"],
-            },
+        config = {
+            "cores": ["dualcore"],
+            "standalone_cores": ["dualcore"],
         }
 
-        result = generate_platform_truth("testplat", registry, profiles)
+        result = generate_platform_truth("testplat", config, {}, profiles)
         names = {fe["name"] for fe in result["systems"]["test-system"]["files"]}
 
         self.assertIn("sa_file.bin", names)
@@ -2649,9 +2643,9 @@ class TestE2E(unittest.TestCase):
 
         _emulator_profiles_cache.clear()
         profiles = load_emulator_profiles(self.emulators_dir)
-        registry = {"testplat": {"cores": ["core_a", "core_b"]}}
+        config = {"cores": ["core_a", "core_b"]}
 
-        result = generate_platform_truth("testplat", registry, profiles)
+        result = generate_platform_truth("testplat", config, {}, profiles)
         sys_files = result["systems"]["test-system"]["files"]
         self.assertEqual(len(sys_files), 1)
 
@@ -2682,9 +2676,9 @@ class TestE2E(unittest.TestCase):
 
         _emulator_profiles_cache.clear()
         profiles = load_emulator_profiles(self.emulators_dir)
-        registry = {"testplat": {"cores": ["profiled_core", "unprofiled_core"]}}
+        config = {"cores": ["profiled_core", "unprofiled_core"]}
 
-        result = generate_platform_truth("testplat", registry, profiles)
+        result = generate_platform_truth("testplat", config, {}, profiles)
         cov = result["_coverage"]
 
         self.assertEqual(cov["cores_profiled"], 1)
@@ -3113,13 +3107,11 @@ class TestE2E(unittest.TestCase):
 
     def test_truth_diff_integration(self):
         """Full chain: generate truth from profiles, diff against scraped data."""
-        # Registry: one platform with two cores, only core_a has a profile
-        registry = {
-            "testplat": {
-                "cores": ["core_a", "core_b"],
-                "hash_type": "md5",
-                "verification_mode": "md5",
-            },
+        # Config: platform with two cores, only core_a has a profile
+        config = {"cores": ["core_a", "core_b"]}
+        registry_entry = {
+            "hash_type": "md5",
+            "verification_mode": "md5",
         }
 
         # Emulator profile for core_a with 2 files
@@ -3161,7 +3153,7 @@ class TestE2E(unittest.TestCase):
         self.assertNotIn("core_b", profiles)
 
         # Generate truth
-        truth = generate_platform_truth("testplat", registry, profiles, db=None)
+        truth = generate_platform_truth("testplat", config, registry_entry, profiles, db=None)
 
         # Verify truth structure
         self.assertIn("test-system", truth["systems"])