mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
fix: normalize system IDs in diff matching
This commit is contained in:
@@ -1455,7 +1455,12 @@ def _update_summary(summary: dict, sys_div: dict) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def diff_platform_truth(truth: dict, scraped: dict) -> dict:
|
def diff_platform_truth(truth: dict, scraped: dict) -> dict:
|
||||||
"""Compare truth YAML against scraped YAML, returning divergences."""
|
"""Compare truth YAML against scraped YAML, returning divergences.
|
||||||
|
|
||||||
|
System IDs are matched using normalized forms (via _norm_system_id) to
|
||||||
|
handle naming differences between emulator profiles and scraped platforms
|
||||||
|
(e.g. 'sega-game-gear' vs 'sega-gamegear').
|
||||||
|
"""
|
||||||
truth_systems = truth.get("systems", {})
|
truth_systems = truth.get("systems", {})
|
||||||
scraped_systems = scraped.get("systems", {})
|
scraped_systems = scraped.get("systems", {})
|
||||||
|
|
||||||
@@ -1474,30 +1479,46 @@ def diff_platform_truth(truth: dict, scraped: dict) -> dict:
|
|||||||
divergences: dict[str, dict] = {}
|
divergences: dict[str, dict] = {}
|
||||||
uncovered_systems: list[str] = []
|
uncovered_systems: list[str] = []
|
||||||
|
|
||||||
all_sys_ids = sorted(set(truth_systems) | set(scraped_systems))
|
# Build normalized-ID lookup for truth systems
|
||||||
|
norm_to_truth: dict[str, str] = {}
|
||||||
|
for sid in truth_systems:
|
||||||
|
norm_to_truth[_norm_system_id(sid)] = sid
|
||||||
|
|
||||||
for sys_id in all_sys_ids:
|
# Match scraped systems to truth via normalized IDs
|
||||||
in_truth = sys_id in truth_systems
|
matched_truth: set[str] = set()
|
||||||
in_scraped = sys_id in scraped_systems
|
|
||||||
|
|
||||||
if in_scraped and not in_truth:
|
for s_sid in sorted(scraped_systems):
|
||||||
uncovered_systems.append(sys_id)
|
norm = _norm_system_id(s_sid)
|
||||||
summary["systems_uncovered"] += 1
|
t_sid = norm_to_truth.get(norm)
|
||||||
continue
|
|
||||||
|
|
||||||
|
if t_sid is None:
|
||||||
|
# Also try exact match (in case normalization is lossy)
|
||||||
|
if s_sid in truth_systems:
|
||||||
|
t_sid = s_sid
|
||||||
|
else:
|
||||||
|
uncovered_systems.append(s_sid)
|
||||||
|
summary["systems_uncovered"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
matched_truth.add(t_sid)
|
||||||
summary["systems_compared"] += 1
|
summary["systems_compared"] += 1
|
||||||
|
sys_div = _diff_system(truth_systems[t_sid], scraped_systems[s_sid])
|
||||||
if in_truth and not in_scraped:
|
|
||||||
# All truth files are missing
|
|
||||||
truth_sys = truth_systems[sys_id]
|
|
||||||
sys_div = _diff_system(truth_sys, {"files": []})
|
|
||||||
else:
|
|
||||||
truth_sys = truth_systems[sys_id]
|
|
||||||
scraped_sys = scraped_systems[sys_id]
|
|
||||||
sys_div = _diff_system(truth_sys, scraped_sys)
|
|
||||||
|
|
||||||
if _has_divergences(sys_div):
|
if _has_divergences(sys_div):
|
||||||
divergences[sys_id] = sys_div
|
divergences[s_sid] = sys_div
|
||||||
|
_update_summary(summary, sys_div)
|
||||||
|
summary["systems_partially_covered"] += 1
|
||||||
|
else:
|
||||||
|
summary["systems_fully_covered"] += 1
|
||||||
|
|
||||||
|
# Truth systems not matched by any scraped system — all files missing
|
||||||
|
for t_sid in sorted(truth_systems):
|
||||||
|
if t_sid in matched_truth:
|
||||||
|
continue
|
||||||
|
summary["systems_compared"] += 1
|
||||||
|
sys_div = _diff_system(truth_systems[t_sid], {"files": []})
|
||||||
|
if _has_divergences(sys_div):
|
||||||
|
divergences[t_sid] = sys_div
|
||||||
_update_summary(summary, sys_div)
|
_update_summary(summary, sys_div)
|
||||||
summary["systems_partially_covered"] += 1
|
summary["systems_partially_covered"] += 1
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -3007,6 +3007,36 @@ class TestE2E(unittest.TestCase):
|
|||||||
self.assertEqual(hm["scraped_md5"], "scraped_hash")
|
self.assertEqual(hm["scraped_md5"], "scraped_hash")
|
||||||
|
|
||||||
|
|
||||||
|
def test_104_diff_truth_normalized_system_ids(self):
|
||||||
|
"""Diff matches systems with different ID formats via normalization."""
|
||||||
|
from common import diff_platform_truth
|
||||||
|
|
||||||
|
truth = {
|
||||||
|
"systems": {
|
||||||
|
"sega-gamegear": {
|
||||||
|
"_coverage": {"cores_profiled": ["c"], "cores_unprofiled": []},
|
||||||
|
"files": [
|
||||||
|
{"name": "bios.gg", "required": True, "md5": "a" * 32,
|
||||||
|
"_cores": ["c"], "_source_refs": []},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scraped = {
|
||||||
|
"systems": {
|
||||||
|
"sega-game-gear": {
|
||||||
|
"files": [
|
||||||
|
{"name": "bios.gg", "required": True, "md5": "a" * 32},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = diff_platform_truth(truth, scraped)
|
||||||
|
self.assertEqual(result["summary"]["systems_uncovered"], 0)
|
||||||
|
self.assertEqual(result["summary"]["total_missing"], 0)
|
||||||
|
self.assertEqual(result["summary"]["systems_compared"], 1)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# native_id preservation
|
# native_id preservation
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user