diff --git a/scripts/common.py b/scripts/common.py index d8c5739f..9040d939 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -915,6 +915,39 @@ def filter_systems_by_target( return filtered +def expand_platform_declared_names(config: dict, db: dict) -> set[str]: + """Build set of file names declared by a platform config. + + Enriches the set with canonical names and aliases from the database + by resolving each platform file's MD5 through by_md5. This handles + cases where a platform declares a file under a different name than + the emulator profile (e.g. Batocera ROM1 vs gsplus ROM). + """ + declared: set[str] = set() + by_md5 = db.get("indexes", {}).get("by_md5", {}) + files_db = db.get("files", {}) + for system in config.get("systems", {}).values(): + for fe in system.get("files", []): + name = fe.get("name", "") + if name: + declared.add(name) + md5 = fe.get("md5", "") + if not md5: + continue + # Skip multi-hash and zippedFile entries (inner ROM MD5, not file MD5) + if "," in md5 or fe.get("zippedFile"): + continue + sha1 = by_md5.get(md5.lower()) + if not sha1: + continue + entry = files_db.get(sha1, {}) + db_name = entry.get("name", "") + if db_name: + declared.add(db_name) + for alias in entry.get("aliases", []): + declared.add(alias) + return declared + # Validation and mode filtering -extracted to validation.py for SoC. # Re-exported below for backward compatibility. diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index 6d3b51f3..8b861959 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(__file__)) from common import ( MANUFACTURER_PREFIXES, build_target_cores_cache, build_zip_contents_index, check_inside_zip, - compute_hashes, fetch_large_file, group_identical_platforms, + compute_hashes, expand_platform_declared_names, fetch_large_file, group_identical_platforms, list_emulator_profiles, list_platform_system_ids, list_registered_platforms, filter_systems_by_target, list_system_ids, load_database, load_data_dir_registry, load_emulator_profiles, load_platform_config, @@ -371,12 +371,8 @@ def _collect_emulator_extras( by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {}) # Build set of filenames already covered (platform baseline + first pass extras) - covered_names: set[str] = set() - for sys_id, system in config.get("systems", {}).items(): - for fe in system.get("files", []): - n = fe.get("name", "") - if n: - covered_names.add(n) + # Enriched with canonical names from DB via MD5 (handles platform renaming) + covered_names = expand_platform_declared_names(config, db) for e in extras: covered_names.add(e["name"]) diff --git a/scripts/verify.py b/scripts/verify.py index 7e5a4ad2..3ba9e454 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -31,10 +31,11 @@ from pathlib import Path sys.path.insert(0, os.path.dirname(__file__)) from common import ( build_target_cores_cache, build_zip_contents_index, check_inside_zip, - compute_hashes, filter_systems_by_target, group_identical_platforms, - list_emulator_profiles, list_system_ids, load_data_dir_registry, - load_emulator_profiles, load_platform_config, md5sum, md5_composite, - require_yaml, resolve_local_file, resolve_platform_cores, + compute_hashes, expand_platform_declared_names, filter_systems_by_target, + group_identical_platforms, list_emulator_profiles, list_system_ids, + load_data_dir_registry, load_emulator_profiles, load_platform_config, + md5sum, md5_composite, require_yaml, resolve_local_file, + resolve_platform_cores, ) yaml = require_yaml() @@ -261,13 +262,9 @@ def find_undeclared_files( data_names: set[str] | None = None, ) -> list[dict]: """Find files needed by cores but not declared in platform config.""" - # Collect all filenames declared by this platform - declared_names: set[str] = set() - for sys_id, system in config.get("systems", {}).items(): - for fe in system.get("files", []): - name = fe.get("name", "") - if name: - declared_names.add(name) + # Collect all filenames declared by this platform, enriched with + # canonical names from DB via MD5 (handles platform renaming) + declared_names = expand_platform_declared_names(config, db) # Collect data_directory refs declared_dd: set[str] = set() diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 112b2525..d9bad085 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -31,6 +31,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts")) import yaml from common import ( build_zip_contents_index, check_inside_zip, compute_hashes, + expand_platform_declared_names, group_identical_platforms, load_emulator_profiles, load_platform_config, md5_composite, md5sum, parse_md5_list, resolve_local_file, resolve_platform_cores, safe_extract_zip, @@ -269,6 +270,12 @@ class TestE2E(unittest.TestCase): {"ref": "test-data-dir", "destination": "TestData"}, ], }, + "sys-renamed": { + "files": [ + {"name": "renamed_file.bin", "destination": "renamed_file.bin", + "md5": f["correct_hash.bin"]["md5"], "required": True}, + ], + }, }, } with open(os.path.join(self.platforms_dir, "test_md5.yml"), "w") as fh: @@ -490,6 +497,19 @@ class TestE2E(unittest.TestCase): with open(os.path.join(self.emulators_dir, "test_subdir_core.yml"), "w") as fh: yaml.dump(emu_subdir, fh) + # Emulator whose file is declared by platform under a different name + # (e.g. gsplus ROM vs Batocera ROM1) — hash-based matching should resolve + emu_renamed = { + "emulator": "TestRenamed", + "type": "standalone", + "systems": ["sys-renamed"], + "files": [ + {"name": "correct_hash.bin", "required": True}, + ], + } + with open(os.path.join(self.emulators_dir, "test_renamed.yml"), "w") as fh: + yaml.dump(emu_renamed, fh) + # --------------------------------------------------------------- # THE TEST -one method per feature area, all using same fixtures # --------------------------------------------------------------- @@ -3251,6 +3271,25 @@ class TestE2E(unittest.TestCase): self.assertEqual(div["extra_unprofiled"][0]["name"], "phantom.bin") self.assertNotIn("extra_phantom", div) + def test_173_cross_ref_hash_matching(self): + """Platform file under different name matched by MD5 is not undeclared.""" + config = load_platform_config("test_md5", self.platforms_dir) + profiles = load_emulator_profiles(self.emulators_dir) + undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles) + names = {u["name"] for u in undeclared} + # correct_hash.bin is declared by platform as renamed_file.bin with same MD5 + # hash-based matching should suppress it from undeclared + self.assertNotIn("correct_hash.bin", names) + + def test_174_expand_platform_declared_names(self): + """expand_platform_declared_names enriches with DB canonical names.""" + config = load_platform_config("test_md5", self.platforms_dir) + result = expand_platform_declared_names(config, self.db) + # renamed_file.bin is declared directly + self.assertIn("renamed_file.bin", result) + # correct_hash.bin is the DB canonical name for the same MD5 + self.assertIn("correct_hash.bin", result) + if __name__ == "__main__": unittest.main()