mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
feat: hash-based matching for cross-reference
expand_platform_declared_names resolves platform file MD5s through the database to recover canonical names and aliases, eliminating false positive undeclared files when a platform renames a file (e.g. Batocera ROM1 vs gsplus ROM).
This commit is contained in:
@@ -915,6 +915,39 @@ def filter_systems_by_target(
|
|||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def expand_platform_declared_names(config: dict, db: dict) -> set[str]:
|
||||||
|
"""Build set of file names declared by a platform config.
|
||||||
|
|
||||||
|
Enriches the set with canonical names and aliases from the database
|
||||||
|
by resolving each platform file's MD5 through by_md5. This handles
|
||||||
|
cases where a platform declares a file under a different name than
|
||||||
|
the emulator profile (e.g. Batocera ROM1 vs gsplus ROM).
|
||||||
|
"""
|
||||||
|
declared: set[str] = set()
|
||||||
|
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
||||||
|
files_db = db.get("files", {})
|
||||||
|
for system in config.get("systems", {}).values():
|
||||||
|
for fe in system.get("files", []):
|
||||||
|
name = fe.get("name", "")
|
||||||
|
if name:
|
||||||
|
declared.add(name)
|
||||||
|
md5 = fe.get("md5", "")
|
||||||
|
if not md5:
|
||||||
|
continue
|
||||||
|
# Skip multi-hash and zippedFile entries (inner ROM MD5, not file MD5)
|
||||||
|
if "," in md5 or fe.get("zippedFile"):
|
||||||
|
continue
|
||||||
|
sha1 = by_md5.get(md5.lower())
|
||||||
|
if not sha1:
|
||||||
|
continue
|
||||||
|
entry = files_db.get(sha1, {})
|
||||||
|
db_name = entry.get("name", "")
|
||||||
|
if db_name:
|
||||||
|
declared.add(db_name)
|
||||||
|
for alias in entry.get("aliases", []):
|
||||||
|
declared.add(alias)
|
||||||
|
return declared
|
||||||
|
|
||||||
|
|
||||||
# Validation and mode filtering -extracted to validation.py for SoC.
|
# Validation and mode filtering -extracted to validation.py for SoC.
|
||||||
# Re-exported below for backward compatibility.
|
# Re-exported below for backward compatibility.
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ sys.path.insert(0, os.path.dirname(__file__))
|
|||||||
from common import (
|
from common import (
|
||||||
MANUFACTURER_PREFIXES,
|
MANUFACTURER_PREFIXES,
|
||||||
build_target_cores_cache, build_zip_contents_index, check_inside_zip,
|
build_target_cores_cache, build_zip_contents_index, check_inside_zip,
|
||||||
compute_hashes, fetch_large_file, group_identical_platforms,
|
compute_hashes, expand_platform_declared_names, fetch_large_file, group_identical_platforms,
|
||||||
list_emulator_profiles, list_platform_system_ids, list_registered_platforms,
|
list_emulator_profiles, list_platform_system_ids, list_registered_platforms,
|
||||||
filter_systems_by_target, list_system_ids, load_database,
|
filter_systems_by_target, list_system_ids, load_database,
|
||||||
load_data_dir_registry, load_emulator_profiles, load_platform_config,
|
load_data_dir_registry, load_emulator_profiles, load_platform_config,
|
||||||
@@ -371,12 +371,8 @@ def _collect_emulator_extras(
|
|||||||
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
|
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
|
||||||
|
|
||||||
# Build set of filenames already covered (platform baseline + first pass extras)
|
# Build set of filenames already covered (platform baseline + first pass extras)
|
||||||
covered_names: set[str] = set()
|
# Enriched with canonical names from DB via MD5 (handles platform renaming)
|
||||||
for sys_id, system in config.get("systems", {}).items():
|
covered_names = expand_platform_declared_names(config, db)
|
||||||
for fe in system.get("files", []):
|
|
||||||
n = fe.get("name", "")
|
|
||||||
if n:
|
|
||||||
covered_names.add(n)
|
|
||||||
for e in extras:
|
for e in extras:
|
||||||
covered_names.add(e["name"])
|
covered_names.add(e["name"])
|
||||||
|
|
||||||
|
|||||||
@@ -31,10 +31,11 @@ from pathlib import Path
|
|||||||
sys.path.insert(0, os.path.dirname(__file__))
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
from common import (
|
from common import (
|
||||||
build_target_cores_cache, build_zip_contents_index, check_inside_zip,
|
build_target_cores_cache, build_zip_contents_index, check_inside_zip,
|
||||||
compute_hashes, filter_systems_by_target, group_identical_platforms,
|
compute_hashes, expand_platform_declared_names, filter_systems_by_target,
|
||||||
list_emulator_profiles, list_system_ids, load_data_dir_registry,
|
group_identical_platforms, list_emulator_profiles, list_system_ids,
|
||||||
load_emulator_profiles, load_platform_config, md5sum, md5_composite,
|
load_data_dir_registry, load_emulator_profiles, load_platform_config,
|
||||||
require_yaml, resolve_local_file, resolve_platform_cores,
|
md5sum, md5_composite, require_yaml, resolve_local_file,
|
||||||
|
resolve_platform_cores,
|
||||||
)
|
)
|
||||||
|
|
||||||
yaml = require_yaml()
|
yaml = require_yaml()
|
||||||
@@ -261,13 +262,9 @@ def find_undeclared_files(
|
|||||||
data_names: set[str] | None = None,
|
data_names: set[str] | None = None,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Find files needed by cores but not declared in platform config."""
|
"""Find files needed by cores but not declared in platform config."""
|
||||||
# Collect all filenames declared by this platform
|
# Collect all filenames declared by this platform, enriched with
|
||||||
declared_names: set[str] = set()
|
# canonical names from DB via MD5 (handles platform renaming)
|
||||||
for sys_id, system in config.get("systems", {}).items():
|
declared_names = expand_platform_declared_names(config, db)
|
||||||
for fe in system.get("files", []):
|
|
||||||
name = fe.get("name", "")
|
|
||||||
if name:
|
|
||||||
declared_names.add(name)
|
|
||||||
|
|
||||||
# Collect data_directory refs
|
# Collect data_directory refs
|
||||||
declared_dd: set[str] = set()
|
declared_dd: set[str] = set()
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
|
|||||||
import yaml
|
import yaml
|
||||||
from common import (
|
from common import (
|
||||||
build_zip_contents_index, check_inside_zip, compute_hashes,
|
build_zip_contents_index, check_inside_zip, compute_hashes,
|
||||||
|
expand_platform_declared_names,
|
||||||
group_identical_platforms, load_emulator_profiles, load_platform_config,
|
group_identical_platforms, load_emulator_profiles, load_platform_config,
|
||||||
md5_composite, md5sum, parse_md5_list, resolve_local_file,
|
md5_composite, md5sum, parse_md5_list, resolve_local_file,
|
||||||
resolve_platform_cores, safe_extract_zip,
|
resolve_platform_cores, safe_extract_zip,
|
||||||
@@ -269,6 +270,12 @@ class TestE2E(unittest.TestCase):
|
|||||||
{"ref": "test-data-dir", "destination": "TestData"},
|
{"ref": "test-data-dir", "destination": "TestData"},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"sys-renamed": {
|
||||||
|
"files": [
|
||||||
|
{"name": "renamed_file.bin", "destination": "renamed_file.bin",
|
||||||
|
"md5": f["correct_hash.bin"]["md5"], "required": True},
|
||||||
|
],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
with open(os.path.join(self.platforms_dir, "test_md5.yml"), "w") as fh:
|
with open(os.path.join(self.platforms_dir, "test_md5.yml"), "w") as fh:
|
||||||
@@ -490,6 +497,19 @@ class TestE2E(unittest.TestCase):
|
|||||||
with open(os.path.join(self.emulators_dir, "test_subdir_core.yml"), "w") as fh:
|
with open(os.path.join(self.emulators_dir, "test_subdir_core.yml"), "w") as fh:
|
||||||
yaml.dump(emu_subdir, fh)
|
yaml.dump(emu_subdir, fh)
|
||||||
|
|
||||||
|
# Emulator whose file is declared by platform under a different name
|
||||||
|
# (e.g. gsplus ROM vs Batocera ROM1) — hash-based matching should resolve
|
||||||
|
emu_renamed = {
|
||||||
|
"emulator": "TestRenamed",
|
||||||
|
"type": "standalone",
|
||||||
|
"systems": ["sys-renamed"],
|
||||||
|
"files": [
|
||||||
|
{"name": "correct_hash.bin", "required": True},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
with open(os.path.join(self.emulators_dir, "test_renamed.yml"), "w") as fh:
|
||||||
|
yaml.dump(emu_renamed, fh)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# THE TEST -one method per feature area, all using same fixtures
|
# THE TEST -one method per feature area, all using same fixtures
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
@@ -3251,6 +3271,25 @@ class TestE2E(unittest.TestCase):
|
|||||||
self.assertEqual(div["extra_unprofiled"][0]["name"], "phantom.bin")
|
self.assertEqual(div["extra_unprofiled"][0]["name"], "phantom.bin")
|
||||||
self.assertNotIn("extra_phantom", div)
|
self.assertNotIn("extra_phantom", div)
|
||||||
|
|
||||||
|
def test_173_cross_ref_hash_matching(self):
|
||||||
|
"""Platform file under different name matched by MD5 is not undeclared."""
|
||||||
|
config = load_platform_config("test_md5", self.platforms_dir)
|
||||||
|
profiles = load_emulator_profiles(self.emulators_dir)
|
||||||
|
undeclared = find_undeclared_files(config, self.emulators_dir, self.db, profiles)
|
||||||
|
names = {u["name"] for u in undeclared}
|
||||||
|
# correct_hash.bin is declared by platform as renamed_file.bin with same MD5
|
||||||
|
# hash-based matching should suppress it from undeclared
|
||||||
|
self.assertNotIn("correct_hash.bin", names)
|
||||||
|
|
||||||
|
def test_174_expand_platform_declared_names(self):
|
||||||
|
"""expand_platform_declared_names enriches with DB canonical names."""
|
||||||
|
config = load_platform_config("test_md5", self.platforms_dir)
|
||||||
|
result = expand_platform_declared_names(config, self.db)
|
||||||
|
# renamed_file.bin is declared directly
|
||||||
|
self.assertIn("renamed_file.bin", result)
|
||||||
|
# correct_hash.bin is the DB canonical name for the same MD5
|
||||||
|
self.assertIn("correct_hash.bin", result)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user