From 2aab7420d771b61b98bfc1727ae0f8fbdedba90a Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Sun, 29 Mar 2026 13:09:08 +0200 Subject: [PATCH] feat: add diff_platform_truth function and tests --- scripts/common.py | 162 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_e2e.py | 130 ++++++++++++++++++++++++++++++++++++- 2 files changed, 291 insertions(+), 1 deletion(-) diff --git a/scripts/common.py b/scripts/common.py index fce07427..7ccc95e4 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -1310,3 +1310,165 @@ def generate_platform_truth( "cores_unprofiled": sorted(cores_unprofiled), }, } + + +# ------------------------------------------------------------------- +# Platform truth diffing +# ------------------------------------------------------------------- + +def _diff_system(truth_sys: dict, scraped_sys: dict) -> dict: + """Compare files between truth and scraped for a single system.""" + # Build truth index: name.lower() -> entry, alias.lower() -> entry + truth_index: dict[str, dict] = {} + for fe in truth_sys.get("files", []): + truth_index[fe["name"].lower()] = fe + for alias in fe.get("aliases", []): + truth_index[alias.lower()] = fe + + # Build scraped index: name.lower() -> entry + scraped_index: dict[str, dict] = {} + for fe in scraped_sys.get("files", []): + scraped_index[fe["name"].lower()] = fe + + missing: list[dict] = [] + hash_mismatch: list[dict] = [] + required_mismatch: list[dict] = [] + extra_phantom: list[dict] = [] + extra_unprofiled: list[dict] = [] + + matched_truth_names: set[str] = set() + + # Compare scraped files against truth + for s_key, s_entry in scraped_index.items(): + t_entry = truth_index.get(s_key) + if t_entry is None: + continue + matched_truth_names.add(t_entry["name"].lower()) + + # Hash comparison + for h in ("sha1", "md5", "crc32"): + t_hash = t_entry.get(h, "") + s_hash = s_entry.get(h, "") + if t_hash and s_hash and t_hash.lower() != s_hash.lower(): + hash_mismatch.append({ + "name": s_entry["name"], + "hash_type": h, + f"truth_{h}": t_hash, + f"scraped_{h}": s_hash, + "truth_cores": list(t_entry.get("_cores", [])), + }) + break + + # Required mismatch + t_req = t_entry.get("required") + s_req = s_entry.get("required") + if t_req is not None and s_req is not None and t_req != s_req: + required_mismatch.append({ + "name": s_entry["name"], + "truth_required": t_req, + "scraped_required": s_req, + }) + + # Truth files not matched -> missing + for fe in truth_sys.get("files", []): + if fe["name"].lower() not in matched_truth_names: + missing.append({ + "name": fe["name"], + "cores": list(fe.get("_cores", [])), + "source_refs": list(fe.get("_source_refs", [])), + }) + + # Scraped files not in truth -> extra + coverage = truth_sys.get("_coverage", {}) + has_unprofiled = bool(coverage.get("cores_unprofiled")) + for s_key, s_entry in scraped_index.items(): + if s_key not in truth_index: + entry = {"name": s_entry["name"]} + if has_unprofiled: + extra_unprofiled.append(entry) + else: + extra_phantom.append(entry) + + result: dict = {} + if missing: + result["missing"] = missing + if hash_mismatch: + result["hash_mismatch"] = hash_mismatch + if required_mismatch: + result["required_mismatch"] = required_mismatch + if extra_phantom: + result["extra_phantom"] = extra_phantom + if extra_unprofiled: + result["extra_unprofiled"] = extra_unprofiled + return result + + +def _has_divergences(sys_div: dict) -> bool: + """Check if a system divergence dict contains any actual divergences.""" + return bool(sys_div) + + +def _update_summary(summary: dict, sys_div: dict) -> None: + """Update summary counters from a system divergence dict.""" + summary["total_missing"] += len(sys_div.get("missing", [])) + summary["total_extra_phantom"] += len(sys_div.get("extra_phantom", [])) + summary["total_extra_unprofiled"] += len(sys_div.get("extra_unprofiled", [])) + summary["total_hash_mismatch"] += len(sys_div.get("hash_mismatch", [])) + summary["total_required_mismatch"] += len(sys_div.get("required_mismatch", [])) + + +def diff_platform_truth(truth: dict, scraped: dict) -> dict: + """Compare truth YAML against scraped YAML, returning divergences.""" + truth_systems = truth.get("systems", {}) + scraped_systems = scraped.get("systems", {}) + + summary = { + "systems_compared": 0, + "systems_fully_covered": 0, + "systems_partially_covered": 0, + "systems_uncovered": 0, + "total_missing": 0, + "total_extra_phantom": 0, + "total_extra_unprofiled": 0, + "total_hash_mismatch": 0, + "total_required_mismatch": 0, + } + + divergences: dict[str, dict] = {} + uncovered_systems: list[str] = [] + + all_sys_ids = sorted(set(truth_systems) | set(scraped_systems)) + + for sys_id in all_sys_ids: + in_truth = sys_id in truth_systems + in_scraped = sys_id in scraped_systems + + if in_scraped and not in_truth: + uncovered_systems.append(sys_id) + summary["systems_uncovered"] += 1 + continue + + summary["systems_compared"] += 1 + + if in_truth and not in_scraped: + # All truth files are missing + truth_sys = truth_systems[sys_id] + sys_div = _diff_system(truth_sys, {"files": []}) + else: + truth_sys = truth_systems[sys_id] + scraped_sys = scraped_systems[sys_id] + sys_div = _diff_system(truth_sys, scraped_sys) + + if _has_divergences(sys_div): + divergences[sys_id] = sys_div + _update_summary(summary, sys_div) + summary["systems_partially_covered"] += 1 + else: + summary["systems_fully_covered"] += 1 + + result: dict = {"summary": summary} + if divergences: + result["divergences"] = divergences + if uncovered_systems: + result["uncovered_systems"] = uncovered_systems + return result diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 88aadc0c..6d862807 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -31,7 +31,8 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts")) import yaml from common import ( _build_validation_index, build_zip_contents_index, check_file_validation, - check_inside_zip, compute_hashes, filter_files_by_mode, + check_inside_zip, compute_hashes, diff_platform_truth, + filter_files_by_mode, generate_platform_truth, group_identical_platforms, load_emulator_profiles, load_platform_config, md5_composite, md5sum, parse_md5_list, resolve_local_file, @@ -2885,5 +2886,132 @@ class TestE2E(unittest.TestCase): self.assertFalse((missing_dir / "prod.keys").exists()) + # --------------------------------------------------------------- + # diff_platform_truth tests + # --------------------------------------------------------------- + + def test_98_diff_truth_missing(self): + """Truth has 2 files, scraped has 1 -> 1 missing with cores/source_refs.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + {"name": "bios_a.bin", "required": True, "md5": "aaa", + "_cores": ["core_a"], "_source_refs": ["src/a.c:10"]}, + {"name": "bios_b.bin", "required": False, "md5": "bbb", + "_cores": ["core_a"], "_source_refs": ["src/b.c:20"]}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [{"name": "bios_a.bin", "md5": "aaa"}], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_missing"], 1) + div = result["divergences"]["test-sys"] + self.assertEqual(len(div["missing"]), 1) + m = div["missing"][0] + self.assertEqual(m["name"], "bios_b.bin") + self.assertEqual(m["cores"], ["core_a"]) + self.assertEqual(m["source_refs"], ["src/b.c:20"]) + + def test_99_diff_truth_extra_phantom(self): + """All cores profiled, scraped has extra file -> extra_phantom.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + {"name": "bios.bin", "md5": "aaa", + "_cores": ["core_a"], "_source_refs": []}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [ + {"name": "bios.bin", "md5": "aaa"}, + {"name": "phantom.bin", "md5": "zzz"}, + ], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_extra_phantom"], 1) + div = result["divergences"]["test-sys"] + self.assertEqual(len(div["extra_phantom"]), 1) + self.assertEqual(div["extra_phantom"][0]["name"], "phantom.bin") + + def test_100_diff_truth_extra_unprofiled(self): + """Some cores unprofiled, scraped has extra -> extra_unprofiled.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], + "cores_unprofiled": ["core_b"]}, + "files": [ + {"name": "bios.bin", "md5": "aaa", + "_cores": ["core_a"], "_source_refs": []}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [ + {"name": "bios.bin", "md5": "aaa"}, + {"name": "extra.bin", "md5": "yyy"}, + ], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_extra_unprofiled"], 1) + div = result["divergences"]["test-sys"] + self.assertEqual(len(div["extra_unprofiled"]), 1) + self.assertEqual(div["extra_unprofiled"][0]["name"], "extra.bin") + + def test_101_diff_truth_alias_matching(self): + """Truth file with aliases, scraped uses alias -> not extra or missing.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + {"name": "bios.bin", "md5": "aaa", "aliases": ["alt.bin"], + "_cores": ["core_a"], "_source_refs": []}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [{"name": "alt.bin", "md5": "aaa"}], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_missing"], 0) + self.assertEqual(result["summary"]["total_extra_phantom"], 0) + self.assertNotIn("test-sys", result.get("divergences", {})) + + def test_102_diff_truth_case_insensitive(self): + """Truth 'BIOS.ROM', scraped 'bios.rom' -> match, no missing.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + {"name": "BIOS.ROM", "md5": "aaa", + "_cores": ["core_a"], "_source_refs": []}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [{"name": "bios.rom", "md5": "aaa"}], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_missing"], 0) + self.assertNotIn("test-sys", result.get("divergences", {})) + + def test_103_diff_truth_hash_mismatch(self): + """Same file, different md5 -> hash_mismatch with truth_cores.""" + truth = {"systems": {"test-sys": { + "_coverage": {"cores_profiled": ["core_a"], "cores_unprofiled": []}, + "files": [ + {"name": "bios.bin", "md5": "truth_hash", + "_cores": ["core_a", "core_b"], + "_source_refs": ["src/x.c:5"]}, + ], + }}} + scraped = {"systems": {"test-sys": { + "files": [{"name": "bios.bin", "md5": "scraped_hash"}], + }}} + result = diff_platform_truth(truth, scraped) + self.assertEqual(result["summary"]["total_hash_mismatch"], 1) + div = result["divergences"]["test-sys"] + self.assertEqual(len(div["hash_mismatch"]), 1) + hm = div["hash_mismatch"][0] + self.assertEqual(hm["name"], "bios.bin") + self.assertEqual(hm["truth_cores"], ["core_a", "core_b"]) + self.assertEqual(hm["truth_md5"], "truth_hash") + self.assertEqual(hm["scraped_md5"], "scraped_hash") + + if __name__ == "__main__": unittest.main()