diff --git a/scripts/common.py b/scripts/common.py index c2b28130..059400da 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -1136,3 +1136,171 @@ def list_platform_system_ids(platform_name: str, platforms_dir: str) -> None: mfr = systems[sys_id].get("manufacturer", "") mfr_display = f" [{mfr.split('|')[0]}]" if mfr else "" print(f" {sys_id:35s} ({file_count} file{'s' if file_count != 1 else ''}){mfr_display}") + + +# --------------------------------------------------------------- +# Truth generation — build ground-truth YAML from emulator profiles +# --------------------------------------------------------------- + +def _determine_core_mode( + emu_name: str, profile: dict, + cores_config: str | list | None, + standalone_set: set[str] | None, +) -> str: + """Determine effective mode (libretro/standalone) for a resolved core.""" + if cores_config == "all_libretro": + return "libretro" + if standalone_set is not None: + profile_names = {emu_name} | {str(c) for c in profile.get("cores", [])} + if profile_names & standalone_set: + return "standalone" + return "libretro" + ptype = profile.get("type", "libretro") + if "standalone" in ptype: + return "standalone" + return "libretro" + + +def _enrich_hashes(entry: dict, db: dict) -> None: + """Fill missing hash fields from the database.""" + sha1 = entry.get("sha1", "") + md5 = entry.get("md5", "") + + record = None + if sha1 and db.get("files"): + record = db["files"].get(sha1) + if record is None and md5: + by_md5 = db.get("by_md5", {}) + ref_sha1 = by_md5.get(md5.lower()) + if ref_sha1 and db.get("files"): + record = db["files"].get(ref_sha1) + if record is None: + return + + for field in ("sha1", "md5", "sha256", "crc32"): + if not entry.get(field) and record.get(field): + entry[field] = record[field] + + +def _merge_file_into_system( + system: dict, file_entry: dict, emu_name: str, db: dict | None, +) -> None: + """Merge a file entry into a system's file list, deduplicating by name.""" + files = system.setdefault("files", []) + name_lower = file_entry["name"].lower() + + existing = None + for f in files: + if f["name"].lower() == name_lower: + existing = f + break + + if existing is not None: + existing["_cores"] = existing.get("_cores", set()) | {emu_name} + existing["_source_refs"] = existing.get("_source_refs", set()) | ( + {file_entry["source_ref"]} if file_entry.get("source_ref") else set() + ) + if file_entry.get("required") and not existing.get("required"): + existing["required"] = True + for h in ("sha1", "md5", "sha256", "crc32"): + theirs = file_entry.get(h, "") + ours = existing.get(h, "") + if theirs and ours and theirs.lower() != ours.lower(): + import sys as _sys + print( + f"WARNING: hash conflict for {file_entry['name']} " + f"({h}: {ours} vs {theirs}, core {emu_name})", + file=_sys.stderr, + ) + elif theirs and not ours: + existing[h] = theirs + return + + entry: dict = {"name": file_entry["name"]} + if file_entry.get("required") is not None: + entry["required"] = file_entry["required"] + for field in ("sha1", "md5", "sha256", "crc32", "size", "path", + "description", "hle_fallback", "category", "note", + "validation", "min_size", "max_size", "aliases"): + val = file_entry.get(field) + if val is not None: + entry[field] = val + entry["_cores"] = {emu_name} + if file_entry.get("source_ref"): + entry["_source_refs"] = {file_entry["source_ref"]} + else: + entry["_source_refs"] = set() + + if db: + _enrich_hashes(entry, db) + + files.append(entry) + + +def generate_platform_truth( + platform_name: str, + registry: dict, + profiles: dict[str, dict], + db: dict | None = None, + target_cores: set[str] | None = None, +) -> dict: + """Generate ground-truth system data for a platform from emulator profiles. + + Returns a dict with platform metadata, systems, and per-file details + including which cores reference each file. + """ + plat_entry = registry.get(platform_name, {}) + cores_config = plat_entry.get("cores") + + # Build a synthetic config dict for resolve_platform_cores + synthetic_config: dict = {"cores": cores_config} + if "systems" in plat_entry: + synthetic_config["systems"] = plat_entry["systems"] + + # Resolve standalone set for mode determination + standalone_set: set[str] | None = None + standalone_cores = plat_entry.get("standalone_cores") + if isinstance(standalone_cores, list): + standalone_set = {str(c) for c in standalone_cores} + + resolved = resolve_platform_cores(synthetic_config, profiles, target_cores) + + systems: dict[str, dict] = {} + cores_profiled: set[str] = set() + cores_unprofiled: set[str] = set() + + for emu_name in sorted(resolved): + profile = profiles.get(emu_name) + if not profile: + cores_unprofiled.add(emu_name) + continue + cores_profiled.add(emu_name) + + mode = _determine_core_mode(emu_name, profile, cores_config, standalone_set) + raw_files = profile.get("files", []) + filtered = filter_files_by_mode(raw_files, standalone=(mode == "standalone")) + + for fe in filtered: + sys_id = fe.get("system", "") + if not sys_id: + sys_ids = profile.get("systems", []) + sys_id = sys_ids[0] if sys_ids else "unknown" + system = systems.setdefault(sys_id, {}) + _merge_file_into_system(system, fe, emu_name, db) + + # Convert sets to sorted lists for serialization + for sys_data in systems.values(): + for fe in sys_data.get("files", []): + fe["_cores"] = sorted(fe.get("_cores", set())) + fe["_source_refs"] = sorted(fe.get("_source_refs", set())) + + return { + "platform": platform_name, + "generated": True, + "systems": systems, + "_coverage": { + "cores_resolved": len(resolved), + "cores_profiled": len(cores_profiled), + "cores_unprofiled": sorted(cores_unprofiled), + }, + } diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 5b8cab83..a857da36 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -32,6 +32,7 @@ import yaml from common import ( _build_validation_index, build_zip_contents_index, check_file_validation, check_inside_zip, compute_hashes, filter_files_by_mode, + generate_platform_truth, group_identical_platforms, load_emulator_profiles, load_platform_config, md5_composite, md5sum, parse_md5_list, resolve_local_file, resolve_platform_cores, safe_extract_zip, @@ -2493,6 +2494,59 @@ class TestE2E(unittest.TestCase): self.assertEqual(os.path.basename(path), "data_only.bin") self.assertEqual(status, "data_dir") + def test_168_generate_truth_basic(self): + """generate_platform_truth resolves cores and builds system truth.""" + import yaml as _yaml + + profile = { + "emulator": "TestCore", + "type": "libretro", + "systems": ["test-system"], + "cores": ["testcore"], + "files": [ + { + "name": "bios.bin", + "system": "test-system", + "required": True, + "sha1": "aabbccdd" * 5, + "md5": "11223344" * 4, + "size": 1024, + "path": "TestConsole/bios.bin", + "source_ref": "main.cpp:42", + }, + ], + } + profile_path = os.path.join(self.emulators_dir, "testcore.yml") + with open(profile_path, "w") as f: + _yaml.dump(profile, f) + + # Clear profile cache so fresh load picks up our file + from common import _emulator_profiles_cache + _emulator_profiles_cache.clear() + + profiles = load_emulator_profiles(self.emulators_dir) + registry = { + "testplat": { + "cores": ["testcore"], + }, + } + + result = generate_platform_truth( + "testplat", registry, profiles, db=None, + ) + + self.assertEqual(result["platform"], "testplat") + self.assertTrue(result["generated"]) + self.assertIn("test-system", result["systems"]) + sys_files = result["systems"]["test-system"]["files"] + self.assertEqual(len(sys_files), 1) + fe = sys_files[0] + self.assertEqual(fe["name"], "bios.bin") + self.assertTrue(fe["required"]) + self.assertEqual(fe["sha1"], "aabbccdd" * 5) + self.assertIn("testcore", fe["_cores"]) + self.assertIn("main.cpp:42", fe["_source_refs"]) + def test_90_registry_install_metadata(self): """Registry install section is accessible.""" import yaml