mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
feat: unify gap analysis with verify results and source provenance
Single source of truth for gap page: verification status from verify.py (verified/untested/missing/mismatch), file provenance from cross_reference (bios/data/large_file/missing). cross_reference.py: _find_in_repo -> _resolve_source returning source category, stop skipping storage: release/large_file, add by_path_suffix lookup, all_declared param for global check. generate_site.py: gap page now shows verification by platform, 18 hash mismatches, and core complement with provenance breakdown.
This commit is contained in:
@@ -20,6 +20,8 @@ theme:
|
|||||||
icon: material/brightness-4
|
icon: material/brightness-4
|
||||||
name: Switch to auto
|
name: Switch to auto
|
||||||
font: false
|
font: false
|
||||||
|
icon:
|
||||||
|
logo: material/chip
|
||||||
features:
|
features:
|
||||||
- navigation.tabs
|
- navigation.tabs
|
||||||
- navigation.sections
|
- navigation.sections
|
||||||
@@ -29,6 +31,8 @@ theme:
|
|||||||
- search.highlight
|
- search.highlight
|
||||||
- content.tabs.link
|
- content.tabs.link
|
||||||
- toc.follow
|
- toc.follow
|
||||||
|
extra_css:
|
||||||
|
- stylesheets/extra.css
|
||||||
markdown_extensions:
|
markdown_extensions:
|
||||||
- tables
|
- tables
|
||||||
- admonition
|
- admonition
|
||||||
|
|||||||
@@ -103,32 +103,41 @@ def _build_supplemental_index(
|
|||||||
return names
|
return names
|
||||||
|
|
||||||
|
|
||||||
def _find_in_repo(
|
def _resolve_source(
|
||||||
fname: str,
|
fname: str,
|
||||||
by_name: dict[str, list],
|
by_name: dict[str, list],
|
||||||
by_name_lower: dict[str, str],
|
by_name_lower: dict[str, str],
|
||||||
data_names: set[str] | None = None,
|
data_names: set[str] | None = None,
|
||||||
) -> bool:
|
by_path_suffix: dict | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Return the source category for a file, or None if not found.
|
||||||
|
|
||||||
|
Returns ``"bios"`` (in database.json / bios/), ``"data"`` (in data/),
|
||||||
|
or ``None`` (not available anywhere).
|
||||||
|
"""
|
||||||
|
# bios/ via database.json by_name
|
||||||
if fname in by_name:
|
if fname in by_name:
|
||||||
return True
|
return "bios"
|
||||||
# For directory entries or paths, extract the meaningful basename
|
|
||||||
stripped = fname.rstrip("/")
|
stripped = fname.rstrip("/")
|
||||||
basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None
|
basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None
|
||||||
if basename and basename in by_name:
|
if basename and basename in by_name:
|
||||||
return True
|
return "bios"
|
||||||
key = fname.lower()
|
key = fname.lower()
|
||||||
if key in by_name_lower:
|
if key in by_name_lower:
|
||||||
return True
|
return "bios"
|
||||||
if basename:
|
if basename:
|
||||||
key = basename.lower()
|
if basename.lower() in by_name_lower:
|
||||||
if key in by_name_lower:
|
return "bios"
|
||||||
return True
|
# bios/ via by_path_suffix (regional variants)
|
||||||
|
if by_path_suffix and fname in by_path_suffix:
|
||||||
|
return "bios"
|
||||||
|
# data/ supplemental index
|
||||||
if data_names:
|
if data_names:
|
||||||
if fname in data_names or key in data_names:
|
if fname in data_names or key in data_names:
|
||||||
return True
|
return "data"
|
||||||
if basename and (basename in data_names or basename.lower() in data_names):
|
if basename and (basename in data_names or basename.lower() in data_names):
|
||||||
return True
|
return "data"
|
||||||
return False
|
return None
|
||||||
|
|
||||||
|
|
||||||
def cross_reference(
|
def cross_reference(
|
||||||
@@ -137,30 +146,44 @@ def cross_reference(
|
|||||||
db: dict,
|
db: dict,
|
||||||
platform_data_dirs: dict[str, set[str]] | None = None,
|
platform_data_dirs: dict[str, set[str]] | None = None,
|
||||||
data_names: set[str] | None = None,
|
data_names: set[str] | None = None,
|
||||||
|
all_declared: set[str] | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Compare emulator profiles against platform declarations.
|
"""Compare emulator profiles against platform declarations.
|
||||||
|
|
||||||
Returns a report with gaps (files emulators need but platforms don't list)
|
Returns a report with gaps (files emulators need but platforms don't list)
|
||||||
and coverage stats. Files covered by matching data_directories between
|
and coverage stats. Each gap entry carries a ``source`` field indicating
|
||||||
emulator profile and platform config are not reported as gaps.
|
where the file is available: ``"bios"`` (bios/ via database.json),
|
||||||
Checks both bios/ (via database) and data/ (via data_names index).
|
``"data"`` (data/ directory), ``"large_file"`` (GitHub release asset),
|
||||||
|
or ``"missing"`` (not available anywhere).
|
||||||
|
|
||||||
|
The boolean ``in_repo`` is derived: ``source != "missing"``.
|
||||||
|
|
||||||
|
When *all_declared* is provided (flat set of every filename declared by
|
||||||
|
any platform for any system), it is used for the ``in_platform`` check
|
||||||
|
instead of the per-system lookup. This is appropriate for the global
|
||||||
|
gap analysis page where "undeclared" means "no platform declares it at all".
|
||||||
"""
|
"""
|
||||||
platform_data_dirs = platform_data_dirs or {}
|
platform_data_dirs = platform_data_dirs or {}
|
||||||
by_name = db.get("indexes", {}).get("by_name", {})
|
by_name = db.get("indexes", {}).get("by_name", {})
|
||||||
by_name_lower = {k.lower(): k for k in by_name}
|
by_name_lower = {k.lower(): k for k in by_name}
|
||||||
|
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
||||||
|
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
|
||||||
|
db_files = db.get("files", {})
|
||||||
report = {}
|
report = {}
|
||||||
|
|
||||||
for emu_name, profile in profiles.items():
|
for emu_name, profile in profiles.items():
|
||||||
emu_files = profile.get("files", [])
|
emu_files = profile.get("files", [])
|
||||||
systems = profile.get("systems", [])
|
systems = profile.get("systems", [])
|
||||||
|
|
||||||
|
if all_declared is not None:
|
||||||
|
platform_names = all_declared
|
||||||
|
else:
|
||||||
platform_names = set()
|
platform_names = set()
|
||||||
for sys_id in systems:
|
for sys_id in systems:
|
||||||
platform_names.update(declared.get(sys_id, set()))
|
platform_names.update(declared.get(sys_id, set()))
|
||||||
|
|
||||||
gaps = []
|
gaps = []
|
||||||
covered = []
|
covered = []
|
||||||
by_md5 = db.get("indexes", {}).get("by_md5", {})
|
|
||||||
for f in emu_files:
|
for f in emu_files:
|
||||||
fname = f.get("name", "")
|
fname = f.get("name", "")
|
||||||
if not fname:
|
if not fname:
|
||||||
@@ -174,37 +197,45 @@ def cross_reference(
|
|||||||
if "path" in f and f["path"] is None:
|
if "path" in f and f["path"] is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip release asset files (stored in GitHub releases, not bios/)
|
|
||||||
if f.get("storage") == "release":
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip standalone-only files
|
# Skip standalone-only files
|
||||||
file_mode = f.get("mode", "both")
|
file_mode = f.get("mode", "both")
|
||||||
if file_mode == "standalone":
|
if file_mode == "standalone":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
in_platform = fname in platform_names
|
# --- resolve source provenance ---
|
||||||
in_repo = _find_in_repo(fname, by_name, by_name_lower, data_names)
|
storage = f.get("storage", "")
|
||||||
if not in_repo:
|
if storage in ("release", "large_file"):
|
||||||
|
source = "large_file"
|
||||||
|
else:
|
||||||
|
source = _resolve_source(
|
||||||
|
fname, by_name, by_name_lower, data_names, by_path_suffix
|
||||||
|
)
|
||||||
|
if source is None:
|
||||||
path_field = f.get("path", "")
|
path_field = f.get("path", "")
|
||||||
if path_field and path_field != fname:
|
if path_field and path_field != fname:
|
||||||
in_repo = _find_in_repo(
|
source = _resolve_source(
|
||||||
path_field, by_name, by_name_lower, data_names
|
path_field, by_name, by_name_lower,
|
||||||
|
data_names, by_path_suffix,
|
||||||
)
|
)
|
||||||
# Try MD5 hash match (handles files that exist under different names)
|
# Try MD5 hash match
|
||||||
if not in_repo:
|
if source is None:
|
||||||
md5_raw = f.get("md5", "")
|
md5_raw = f.get("md5", "")
|
||||||
if md5_raw:
|
if md5_raw:
|
||||||
for md5_val in md5_raw.split(","):
|
for md5_val in md5_raw.split(","):
|
||||||
md5_val = md5_val.strip().lower()
|
md5_val = md5_val.strip().lower()
|
||||||
if md5_val and by_md5.get(md5_val):
|
if md5_val and by_md5.get(md5_val):
|
||||||
in_repo = True
|
source = "bios"
|
||||||
break
|
break
|
||||||
# Try SHA1 hash match
|
# Try SHA1 hash match
|
||||||
if not in_repo:
|
if source is None:
|
||||||
sha1 = f.get("sha1", "")
|
sha1 = f.get("sha1", "")
|
||||||
if sha1 and sha1 in db.get("files", {}):
|
if sha1 and sha1 in db_files:
|
||||||
in_repo = True
|
source = "bios"
|
||||||
|
if source is None:
|
||||||
|
source = "missing"
|
||||||
|
|
||||||
|
in_repo = source != "missing"
|
||||||
|
in_platform = fname in platform_names
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
"name": fname,
|
"name": fname,
|
||||||
@@ -213,6 +244,7 @@ def cross_reference(
|
|||||||
"source_ref": f.get("source_ref", ""),
|
"source_ref": f.get("source_ref", ""),
|
||||||
"in_platform": in_platform,
|
"in_platform": in_platform,
|
||||||
"in_repo": in_repo,
|
"in_repo": in_repo,
|
||||||
|
"source": source,
|
||||||
}
|
}
|
||||||
|
|
||||||
if not in_platform:
|
if not in_platform:
|
||||||
@@ -227,7 +259,10 @@ def cross_reference(
|
|||||||
"platform_covered": len(covered),
|
"platform_covered": len(covered),
|
||||||
"gaps": len(gaps),
|
"gaps": len(gaps),
|
||||||
"gap_in_repo": sum(1 for g in gaps if g["in_repo"]),
|
"gap_in_repo": sum(1 for g in gaps if g["in_repo"]),
|
||||||
"gap_missing": sum(1 for g in gaps if not g["in_repo"]),
|
"gap_missing": sum(1 for g in gaps if g["source"] == "missing"),
|
||||||
|
"gap_bios": sum(1 for g in gaps if g["source"] == "bios"),
|
||||||
|
"gap_data": sum(1 for g in gaps if g["source"] == "data"),
|
||||||
|
"gap_large_file": sum(1 for g in gaps if g["source"] == "large_file"),
|
||||||
"gap_details": gaps,
|
"gap_details": gaps,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -240,15 +275,19 @@ def print_report(report: dict) -> None:
|
|||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
|
|
||||||
total_gaps = 0
|
total_gaps = 0
|
||||||
total_in_repo = 0
|
totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
|
||||||
total_missing = 0
|
|
||||||
|
|
||||||
for emu_name, data in sorted(report.items()):
|
for emu_name, data in sorted(report.items()):
|
||||||
gaps = data["gaps"]
|
gaps = data["gaps"]
|
||||||
if gaps == 0:
|
if gaps == 0:
|
||||||
status = "OK"
|
continue
|
||||||
else:
|
|
||||||
status = f"{data['gap_in_repo']} in repo, {data['gap_missing']} missing"
|
parts = []
|
||||||
|
for key in ("bios", "data", "large_file", "missing"):
|
||||||
|
count = data.get(f"gap_{key}", 0)
|
||||||
|
if count:
|
||||||
|
parts.append(f"{count} {key}")
|
||||||
|
status = ", ".join(parts) if parts else "OK"
|
||||||
|
|
||||||
print(f"\n{data['emulator']} ({', '.join(data['systems'])})")
|
print(f"\n{data['emulator']} ({', '.join(data['systems'])})")
|
||||||
print(
|
print(
|
||||||
@@ -256,23 +295,24 @@ def print_report(report: dict) -> None:
|
|||||||
f"{data['platform_covered']} declared by platforms, "
|
f"{data['platform_covered']} declared by platforms, "
|
||||||
f"{gaps} undeclared"
|
f"{gaps} undeclared"
|
||||||
)
|
)
|
||||||
|
|
||||||
if gaps > 0:
|
|
||||||
print(f" Gaps: {status}")
|
print(f" Gaps: {status}")
|
||||||
|
|
||||||
for g in data["gap_details"]:
|
for g in data["gap_details"]:
|
||||||
req = "*" if g["required"] else " "
|
req = "*" if g["required"] else " "
|
||||||
loc = "repo" if g["in_repo"] else "MISSING"
|
src = g.get("source", "missing").upper()
|
||||||
note = f" -- {g['note']}" if g["note"] else ""
|
note = f" -- {g['note']}" if g["note"] else ""
|
||||||
print(f" {req} {g['name']} [{loc}]{note}")
|
print(f" {req} {g['name']} [{src}]{note}")
|
||||||
|
|
||||||
total_gaps += gaps
|
total_gaps += gaps
|
||||||
total_in_repo += data["gap_in_repo"]
|
for key in totals:
|
||||||
total_missing += data["gap_missing"]
|
totals[key] += data.get(f"gap_{key}", 0)
|
||||||
|
|
||||||
print(f"\n{'=' * 60}")
|
print(f"\n{'=' * 60}")
|
||||||
print(f"Total: {total_gaps} undeclared files across all emulators")
|
print(f"Total: {total_gaps} undeclared files across all emulators")
|
||||||
print(f" {total_in_repo} already in repo (can be added to packs)")
|
available = totals["bios"] + totals["data"] + totals["large_file"]
|
||||||
print(f" {total_missing} missing from repo (need to be sourced)")
|
print(f" {available} available (bios: {totals['bios']}, data: {totals['data']}, "
|
||||||
|
f"large_file: {totals['large_file']})")
|
||||||
|
print(f" {totals['missing']} missing (need to be sourced)")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user