diff --git a/mkdocs.yml b/mkdocs.yml index 1549d12e..5da4d6bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,6 +20,8 @@ theme: icon: material/brightness-4 name: Switch to auto font: false + icon: + logo: material/chip features: - navigation.tabs - navigation.sections @@ -29,6 +31,8 @@ theme: - search.highlight - content.tabs.link - toc.follow +extra_css: +- stylesheets/extra.css markdown_extensions: - tables - admonition diff --git a/scripts/cross_reference.py b/scripts/cross_reference.py index 57a69aa5..83e7e8d4 100644 --- a/scripts/cross_reference.py +++ b/scripts/cross_reference.py @@ -103,32 +103,41 @@ def _build_supplemental_index( return names -def _find_in_repo( +def _resolve_source( fname: str, by_name: dict[str, list], by_name_lower: dict[str, str], data_names: set[str] | None = None, -) -> bool: + by_path_suffix: dict | None = None, +) -> str | None: + """Return the source category for a file, or None if not found. + + Returns ``"bios"`` (in database.json / bios/), ``"data"`` (in data/), + or ``None`` (not available anywhere). + """ + # bios/ via database.json by_name if fname in by_name: - return True - # For directory entries or paths, extract the meaningful basename + return "bios" stripped = fname.rstrip("/") basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None if basename and basename in by_name: - return True + return "bios" key = fname.lower() if key in by_name_lower: - return True + return "bios" if basename: - key = basename.lower() - if key in by_name_lower: - return True + if basename.lower() in by_name_lower: + return "bios" + # bios/ via by_path_suffix (regional variants) + if by_path_suffix and fname in by_path_suffix: + return "bios" + # data/ supplemental index if data_names: if fname in data_names or key in data_names: - return True + return "data" if basename and (basename in data_names or basename.lower() in data_names): - return True - return False + return "data" + return None def cross_reference( @@ -137,30 +146,44 @@ def cross_reference( db: dict, platform_data_dirs: dict[str, set[str]] | None = None, data_names: set[str] | None = None, + all_declared: set[str] | None = None, ) -> dict: """Compare emulator profiles against platform declarations. Returns a report with gaps (files emulators need but platforms don't list) - and coverage stats. Files covered by matching data_directories between - emulator profile and platform config are not reported as gaps. - Checks both bios/ (via database) and data/ (via data_names index). + and coverage stats. Each gap entry carries a ``source`` field indicating + where the file is available: ``"bios"`` (bios/ via database.json), + ``"data"`` (data/ directory), ``"large_file"`` (GitHub release asset), + or ``"missing"`` (not available anywhere). + + The boolean ``in_repo`` is derived: ``source != "missing"``. + + When *all_declared* is provided (flat set of every filename declared by + any platform for any system), it is used for the ``in_platform`` check + instead of the per-system lookup. This is appropriate for the global + gap analysis page where "undeclared" means "no platform declares it at all". """ platform_data_dirs = platform_data_dirs or {} by_name = db.get("indexes", {}).get("by_name", {}) by_name_lower = {k.lower(): k for k in by_name} + by_md5 = db.get("indexes", {}).get("by_md5", {}) + by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {}) + db_files = db.get("files", {}) report = {} for emu_name, profile in profiles.items(): emu_files = profile.get("files", []) systems = profile.get("systems", []) - platform_names = set() - for sys_id in systems: - platform_names.update(declared.get(sys_id, set())) + if all_declared is not None: + platform_names = all_declared + else: + platform_names = set() + for sys_id in systems: + platform_names.update(declared.get(sys_id, set())) gaps = [] covered = [] - by_md5 = db.get("indexes", {}).get("by_md5", {}) for f in emu_files: fname = f.get("name", "") if not fname: @@ -174,37 +197,45 @@ def cross_reference( if "path" in f and f["path"] is None: continue - # Skip release asset files (stored in GitHub releases, not bios/) - if f.get("storage") == "release": - continue - # Skip standalone-only files file_mode = f.get("mode", "both") if file_mode == "standalone": continue + # --- resolve source provenance --- + storage = f.get("storage", "") + if storage in ("release", "large_file"): + source = "large_file" + else: + source = _resolve_source( + fname, by_name, by_name_lower, data_names, by_path_suffix + ) + if source is None: + path_field = f.get("path", "") + if path_field and path_field != fname: + source = _resolve_source( + path_field, by_name, by_name_lower, + data_names, by_path_suffix, + ) + # Try MD5 hash match + if source is None: + md5_raw = f.get("md5", "") + if md5_raw: + for md5_val in md5_raw.split(","): + md5_val = md5_val.strip().lower() + if md5_val and by_md5.get(md5_val): + source = "bios" + break + # Try SHA1 hash match + if source is None: + sha1 = f.get("sha1", "") + if sha1 and sha1 in db_files: + source = "bios" + if source is None: + source = "missing" + + in_repo = source != "missing" in_platform = fname in platform_names - in_repo = _find_in_repo(fname, by_name, by_name_lower, data_names) - if not in_repo: - path_field = f.get("path", "") - if path_field and path_field != fname: - in_repo = _find_in_repo( - path_field, by_name, by_name_lower, data_names - ) - # Try MD5 hash match (handles files that exist under different names) - if not in_repo: - md5_raw = f.get("md5", "") - if md5_raw: - for md5_val in md5_raw.split(","): - md5_val = md5_val.strip().lower() - if md5_val and by_md5.get(md5_val): - in_repo = True - break - # Try SHA1 hash match - if not in_repo: - sha1 = f.get("sha1", "") - if sha1 and sha1 in db.get("files", {}): - in_repo = True entry = { "name": fname, @@ -213,6 +244,7 @@ def cross_reference( "source_ref": f.get("source_ref", ""), "in_platform": in_platform, "in_repo": in_repo, + "source": source, } if not in_platform: @@ -227,7 +259,10 @@ def cross_reference( "platform_covered": len(covered), "gaps": len(gaps), "gap_in_repo": sum(1 for g in gaps if g["in_repo"]), - "gap_missing": sum(1 for g in gaps if not g["in_repo"]), + "gap_missing": sum(1 for g in gaps if g["source"] == "missing"), + "gap_bios": sum(1 for g in gaps if g["source"] == "bios"), + "gap_data": sum(1 for g in gaps if g["source"] == "data"), + "gap_large_file": sum(1 for g in gaps if g["source"] == "large_file"), "gap_details": gaps, } @@ -240,15 +275,19 @@ def print_report(report: dict) -> None: print("=" * 60) total_gaps = 0 - total_in_repo = 0 - total_missing = 0 + totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0} for emu_name, data in sorted(report.items()): gaps = data["gaps"] if gaps == 0: - status = "OK" - else: - status = f"{data['gap_in_repo']} in repo, {data['gap_missing']} missing" + continue + + parts = [] + for key in ("bios", "data", "large_file", "missing"): + count = data.get(f"gap_{key}", 0) + if count: + parts.append(f"{count} {key}") + status = ", ".join(parts) if parts else "OK" print(f"\n{data['emulator']} ({', '.join(data['systems'])})") print( @@ -256,23 +295,24 @@ def print_report(report: dict) -> None: f"{data['platform_covered']} declared by platforms, " f"{gaps} undeclared" ) + print(f" Gaps: {status}") - if gaps > 0: - print(f" Gaps: {status}") - for g in data["gap_details"]: - req = "*" if g["required"] else " " - loc = "repo" if g["in_repo"] else "MISSING" - note = f" -- {g['note']}" if g["note"] else "" - print(f" {req} {g['name']} [{loc}]{note}") + for g in data["gap_details"]: + req = "*" if g["required"] else " " + src = g.get("source", "missing").upper() + note = f" -- {g['note']}" if g["note"] else "" + print(f" {req} {g['name']} [{src}]{note}") total_gaps += gaps - total_in_repo += data["gap_in_repo"] - total_missing += data["gap_missing"] + for key in totals: + totals[key] += data.get(f"gap_{key}", 0) print(f"\n{'=' * 60}") print(f"Total: {total_gaps} undeclared files across all emulators") - print(f" {total_in_repo} already in repo (can be added to packs)") - print(f" {total_missing} missing from repo (need to be sourced)") + available = totals["bios"] + totals["data"] + totals["large_file"] + print(f" {available} available (bios: {totals['bios']}, data: {totals['data']}, " + f"large_file: {totals['large_file']})") + print(f" {totals['missing']} missing (need to be sourced)") def main(): diff --git a/scripts/generate_site.py b/scripts/generate_site.py index e2252f95..7eadc2b9 100644 --- a/scripts/generate_site.py +++ b/scripts/generate_site.py @@ -38,6 +38,19 @@ GENERATED_DIRS = ["platforms", "systems", "emulators"] WIKI_SRC_DIR = "wiki" # manually maintained wiki sources SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png" +CLS_LABELS = { + "official_port": "Official ports", + "community_fork": "Community forks", + "pure_libretro": "Pure libretro", + "game_engine": "Game engines", + "enhanced_fork": "Enhanced forks", + "frozen_snapshot": "Frozen snapshots", + "embedded_hle": "Embedded HLE", + "launcher": "Launchers", + "unclassified": "Unclassified", + "other": "Other", +} + # Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking _system_page_map: dict[str, tuple[str, str]] = {} @@ -229,80 +242,93 @@ def generate_home( cls = p.get("core_classification", "unclassified") classifications[cls] = classifications.get(cls, 0) + 1 + # Count total systems across all profiles + all_systems = set() + for p in unique.values(): + all_systems.update(p.get("systems", [])) + lines = [ + '
', + "", f"# {SITE_NAME}", "", "Source-verified BIOS and firmware packs for retrogaming platforms.", "", - "## Quick start", + "
", "", - "1. Find your platform in the table below", - "2. Click **Pack** to download the ZIP", - "3. Extract to your emulator's BIOS directory", + '
', "", - "| Platform | Extract to |", - "|----------|-----------|", - "| RetroArch / Lakka | `system/` |", - "| Batocera | `/userdata/bios/` |", - "| Recalbox | `/recalbox/share/bios/` |", - "| RetroBat | `bios/` |", - "| RetroDECK | `~/retrodeck/bios/` |", - "| EmuDeck | `Emulation/bios/` |", + '
', + f'{total_files:,}', + 'Files', + "
", "", - "---", + '
', + f'{len(coverages)}', + 'Platforms', + "
", "", - "## Methodology", + '
', + f'{emulator_count}', + 'Emulators profiled', + "
", "", - "Documentation and metadata can drift from what emulators actually load at runtime.", - "To keep packs accurate, each file here is checked against the emulator's source code.", + '
', + f'{_fmt_size(total_size)}', + 'Total size', + "
", "", - "The source code is the primary reference because it reflects actual behavior.", - "Other sources remain useful but are verified against it:", - "", - "1. **Upstream emulator source** - what the original project loads (Dolphin, PCSX2, Mednafen...)", - "2. **Libretro core source** - the RetroArch port, which may adapt paths or add files", - "3. **`.info` declarations** - metadata that platforms rely on, checked for accuracy", - "", - f"**{emulator_count}** emulators profiled. " - f"Each profile documents what the code loads, what it validates, " - f"and where the port differs from the original.", - "", - f"**{total_files:,}** files | **{len(coverages)}** platforms | " - f"**{emulator_count}** emulator profiles | **{_fmt_size(total_size)}** total", - "", - "---", + "
", "", ] - # Platform table + # Platforms FIRST (main action) lines.extend( [ "## Platforms", "", - "| | Platform | Coverage | Verified | Download |", - "|---|----------|----------|----------|----------|", + "| | Platform | Files | Verification | Download |", + "|---|----------|-------|-------------|----------|", ] ) + mode_icons = {"md5": "MD5", "sha1": "SHA1", "existence": "exists"} + for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): display = cov["platform"] - pct = _pct(cov["present"], cov["total"]) logo_url = (registry or {}).get(name, {}).get("logo", "") logo_md = ( f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else "" ) + mode_label = mode_icons.get(cov["mode"], cov["mode"]) lines.append( f"| {logo_md} | [{display}](platforms/{name}.md) | " - f"{cov['present']}/{cov['total']} ({pct}) | " - f"{cov['verified']} | " - f"[Pack]({RELEASE_URL}) |" + f"{cov['present']:,} | {mode_label} | " + f"[Pack]({RELEASE_URL}){{ .md-button .md-button--primary }} |" ) + # Quick start (collapsible -- secondary info) + lines.extend( + [ + "", + '??? info "Where to extract"', + "", + " | Platform | Extract to |", + " |----------|-----------|", + " | RetroArch / Lakka | `system/` |", + " | Batocera | `/userdata/bios/` |", + " | Recalbox | `/recalbox/share/bios/` |", + " | RetroBat | `bios/` |", + " | RetroDECK | `~/retrodeck/bios/` |", + " | EmuDeck | `Emulation/bios/` |", + "", + ] + ) + # Emulator classification breakdown lines.extend( [ - "", "## Emulator profiles", "", "| Classification | Count |", @@ -310,21 +336,41 @@ def generate_home( ] ) for cls, count in sorted(classifications.items(), key=lambda x: -x[1]): - lines.append(f"| {cls} | {count} |") + label = CLS_LABELS.get(cls, cls) + lines.append(f"| [{label}](emulators/index.md#{cls}) | {count} |") + + # Methodology (collapsible) + lines.extend( + [ + "", + '??? abstract "Methodology"', + "", + " Each file is checked against the emulator's source code. " + "Documentation and metadata can drift from actual runtime behavior, " + "so the source is the primary reference.", + "", + " 1. **Upstream emulator source** -- what the original project " + "loads (Dolphin, PCSX2, Mednafen...)", + " 2. **Libretro core source** -- the RetroArch port, which may " + "adapt paths or add files", + " 3. **`.info` declarations** -- metadata that platforms rely on, " + "checked for accuracy", + "", + ] + ) # Quick links lines.extend( [ - "", "---", "", - "[Systems](systems/){ .md-button } " - "[Emulators](emulators/){ .md-button } " + "[Systems](systems/index.md){ .md-button } " + "[Emulators](emulators/index.md){ .md-button } " "[Cross-reference](cross-reference.md){ .md-button } " "[Gap Analysis](gaps.md){ .md-button } " "[Contributing](contributing.md){ .md-button .md-button--primary }", "", - f"*Generated on {ts}.*", + f'
Generated on {ts}.
', ] ) @@ -335,24 +381,61 @@ def generate_home( def generate_platform_index(coverages: dict) -> str: + total_files = sum(c["total"] for c in coverages.values()) + total_present = sum(c["present"] for c in coverages.values()) + total_verified = sum(c["verified"] for c in coverages.values()) + lines = [ f"# Platforms - {SITE_NAME}", "", + f"{len(coverages)} supported platforms, " + f"{total_present:,}/{total_files:,} files present, " + f"{total_verified:,} verified.", + "", "| Platform | Coverage | Verification | Status |", "|----------|----------|-------------|--------|", ] + mode_labels = { + "md5": 'MD5', + "sha1": 'SHA1', + "existence": 'existence', + } + for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): display = cov["platform"] + pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0 pct = _pct(cov["present"], cov["total"]) plat_status = cov["config"].get("status", "active") - status = ( - "archived" if plat_status == "archived" else _status_icon(cov["percentage"]) + + badge_cls = ( + "rb-badge-success" + if pct_val >= 95 + else "rb-badge-warning" + if pct_val >= 70 + else "rb-badge-danger" ) + coverage_str = ( + f'{cov["present"]}/{cov["total"]} ' + f'{pct}' + ) + + mode_html = mode_labels.get( + cov["mode"], + f'{cov["mode"]}', + ) + + if plat_status == "archived": + status = 'archived' + elif pct_val >= 100: + status = 'complete' + elif pct_val >= 95: + status = 'near' + else: + status = 'partial' + lines.append( - f"| [{display}]({name}.md) | " - f"{cov['present']}/{cov['total']} ({pct}) | " - f"{cov['mode']} | {status} |" + f"| [{display}]({name}.md) | {coverage_str} | {mode_html} | {status} |" ) return "\n".join(lines) + "\n" @@ -379,14 +462,58 @@ def generate_platform_page( hash_type = config.get("hash_type", "") base_dest = config.get("base_destination", "") + pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0 + coverage_badge = ( + "rb-badge-success" + if pct_val >= 95 + else "rb-badge-warning" + if pct_val >= 70 + else "rb-badge-danger" + ) + mode_badge = ( + "rb-badge-success" if mode in ("md5", "sha1") else "rb-badge-info" + ) + lines = [ f"# {display} - {SITE_NAME}", "", - logo_md + "| | |", - "|---|---|", - f"| Verification | {mode} |", - f"| Hash type | {hash_type} |", + logo_md, ] + + # Stat cards + lines.extend( + [ + '
', + "", + '
', + f'{cov["present"]}/{cov["total"]}', + f'Coverage ({pct})', + "
", + "", + '
', + f'{cov["verified"]}', + 'Verified', + "
", + "", + '
', + f'{cov["missing"]}', + 'Missing', + "
", + "", + '
', + f'' + f'{mode}', + 'Verification', + "
", + "", + "
", + "", + "| | |", + "|---|---|", + ] + ) + if hash_type: + lines.append(f"| Hash type | {hash_type} |") if version: lines.append(f"| Version | {version} |") if base_dest: @@ -396,10 +523,8 @@ def generate_platform_page( lines.extend( [ "", - f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | " - f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}", - "", - f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}", + f"[Download {display} Pack]({RELEASE_URL})" + "{ .md-button .md-button--primary }", "", ] ) @@ -431,7 +556,13 @@ def generate_platform_page( ok_count = sum(1 for f in files if f["status"] == "ok") total = len(files) non_ok = total - ok_count - status = "OK" if non_ok == 0 else f"{non_ok} issue{'s' if non_ok > 1 else ''}" + if non_ok == 0: + status = 'OK' + else: + status = ( + f'' + f'{non_ok} issue{"s" if non_ok > 1 else ""}' + ) sys_emus = [] if emulator_files: for emu_name, emu_data in emulator_files.items(): @@ -537,9 +668,18 @@ def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]: def generate_systems_index(manufacturers: dict) -> str: + total_mfr = len(manufacturers) + total_consoles = sum(len(c) for c in manufacturers.values()) + total_files = sum( + len(files) for consoles in manufacturers.values() for files in consoles.values() + ) + lines = [ f"# Systems - {SITE_NAME}", "", + f"{total_mfr} manufacturers, {total_consoles} consoles, " + f"{total_files:,} files in the repository.", + "", "| Manufacturer | Consoles | Files |", "|-------------|----------|-------|", ] @@ -671,18 +811,22 @@ def generate_emulators_index(profiles: dict) -> str: entries = by_class.get(cls, []) if not entries: continue + label = CLS_LABELS.get(cls, cls) desc = cls_desc.get(cls, "") - lines.append(f"| [{cls}](#{cls}) | {len(entries)} | {desc} |") + lines.append(f"| [{label}](#{cls}) | {len(entries)} | {desc} |") lines.append("") - # Per-classification sections for cls in cls_order: entries = by_class.get(cls, []) if not entries: continue + label = CLS_LABELS.get(cls, cls) + desc = cls_desc.get(cls, "") lines.extend( [ - f"## {cls}", + f'## {label} {{ #{cls} }}', + "", + f"*{desc}* -- {len(entries)} profiles", "", "| Engine | Systems | Files |", "|--------|---------|-------|", @@ -696,7 +840,9 @@ def generate_emulators_index(profiles: dict) -> str: sys_str = ", ".join(systems[:3]) if len(systems) > 3: sys_str += f" +{len(systems) - 3}" - lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {len(files)} |") + file_count = len(files) + file_str = str(file_count) if file_count else "-" + lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {file_str} |") lines.append("") if aliases: @@ -750,7 +896,8 @@ def generate_emulator_page( f"| Type | {emu_type} |", ] if classification: - lines.append(f"| Classification | {classification} |") + cls_display = CLS_LABELS.get(classification, classification) + lines.append(f"| Classification | {cls_display} |") if source: lines.append(f"| Source | [{source}]({source}) |") if upstream and upstream != source: @@ -939,40 +1086,79 @@ def generate_emulator_page( size_options = f.get("size_options", []) size_range = f.get("size_range", "") - # Status badges + # Status badges (HTML) badges = [] if required: - badges.append("**required**") - else: - badges.append("optional") - if hle: - badges.append("HLE available") - if mode: - badges.append(mode) - if category and category != "bios": - badges.append(category) - if region: badges.append( + 'required' + ) + else: + badges.append( + 'optional' + ) + if not in_repo: + badges.append( + 'missing' + ) + elif in_repo: + badges.append( + 'in repo' + ) + if hle: + badges.append( + 'HLE fallback' + ) + if mode: + badges.append( + f'{mode}' + ) + if category and category != "bios": + badges.append( + f'{category}' + ) + if region: + region_str = ( ", ".join(region) if isinstance(region, list) else str(region) ) + badges.append( + f'{region_str}' + ) if storage and storage != "embedded": - badges.append(storage) + badges.append( + f'{storage}' + ) if bundled: - badges.append("bundled in binary") + badges.append( + 'bundled' + ) if embedded: - badges.append("embedded") + badges.append( + 'embedded' + ) if has_builtin: - badges.append("has built-in fallback") + badges.append( + 'built-in fallback' + ) if archive: - badges.append(f"in `{archive}`") + badges.append( + f'in {archive}' + ) if ftype and ftype != "bios": - badges.append(ftype) - if not in_repo: - badges.append("missing from repo") + badges.append( + f'{ftype}' + ) - lines.append(f"**`{fname}`** -{', '.join(badges)}") + badge_str = " ".join(badges) + border_cls = ( + "rb-file-entry-required" if required else "rb-file-entry-optional" + ) + lines.append( + f'
' + ) + lines.append("") + lines.append(f"**`{fname}`** {badge_str}") if desc: - lines.append(f": {desc}") + lines.append(f"
{desc}") lines.append("") details = [] @@ -1069,6 +1255,8 @@ def generate_emulator_page( if len(contents) > 10: lines.append(f" - ... and {len(contents) - 10} more") lines.append("") + lines.append("
") + lines.append("") # Data directories if data_dirs: @@ -1090,181 +1278,354 @@ def generate_gap_analysis( profiles: dict, coverages: dict, db: dict, + data_names: set[str] | None = None, ) -> str: - """Generate a global gap analysis page showing all missing/undeclared files.""" - by_name = db.get("indexes", {}).get("by_name", {}) - platform_files = _build_platform_file_index(coverages) + """Generate a unified gap analysis page. + + Combines verification results (from coverages/verify.py) with source + provenance (from cross_reference) into a single truth dashboard. + + Sections: + 1. Verification status -- aggregated across all platforms + 2. Problem files -- missing, untested, hash mismatch + 3. Core complement -- emulator files not declared by any platform + """ + from cross_reference import cross_reference as run_cross_reference + + from common import resolve_platform_cores + + # ---- Section 1: aggregate verify results across all platforms ---- + + total_verified = 0 + total_untested = 0 + total_missing_verify = 0 + total_files_verify = 0 + + platform_problems: list[dict] = [] + for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): + total_verified += cov["verified"] + total_untested += cov["untested"] + total_missing_verify += cov["missing"] + total_files_verify += cov["total"] + + for d in cov["details"]: + if d["status"] != "ok" or d.get("discrepancy"): + platform_problems.append({ + "platform": cov["platform"], + "platform_key": pname, + "name": d["name"], + "status": d["status"], + "required": d.get("required", True), + "reason": d.get("reason", ""), + "discrepancy": d.get("discrepancy", ""), + "system": d.get("system", ""), + }) + + pct_verified = ( + f"{total_verified / total_files_verify * 100:.0f}%" + if total_files_verify + else "0%" + ) lines = [ f"# Gap Analysis - {SITE_NAME}", "", - "Files that emulators load but platforms don't declare, and their availability.", + "Unified view of BIOS verification, file provenance, and coverage gaps.", + "", + '
', + "", + '
', + f'{total_files_verify:,}', + 'Total files (all platforms)', + "
", + "", + '
', + f'{total_verified:,}', + f'Verified ({pct_verified})', + "
", + "", + '
', + f'{total_untested:,}', + 'Untested', + "
", + "", + '
', + f'{total_missing_verify:,}', + 'Missing', + "
", + "", + "
", "", ] - # Global stats - total_undeclared = 0 - total_in_repo = 0 - total_missing = 0 + # ---- Verification per platform ---- - # Build global set of all platform-declared filenames (once) - all_platform_names = set() - for pfiles in platform_files.values(): - all_platform_names.update(pfiles) + lines.extend([ + "## Verification by Platform", + "", + "| Platform | Files | Verified | Untested | Missing | Mode |", + "|----------|------:|---------:|---------:|--------:|------|", + ]) - emulator_gaps = [] - for emu_name, profile in sorted(profiles.items()): - if profile.get("type") == "alias": - continue - files = profile.get("files", []) - if not files: - continue - - undeclared = [] - for f in files: - fname = f.get("name", "") - if not fname or fname.startswith("<"): - continue - if fname not in all_platform_names: - in_repo = fname in by_name - undeclared.append( - { - "name": fname, - "required": f.get("required", False), - "in_repo": in_repo, - "source_ref": f.get("source_ref", ""), - } - ) - total_undeclared += 1 - if in_repo: - total_in_repo += 1 - else: - total_missing += 1 - - if undeclared: - emulator_gaps.append( - (emu_name, profile.get("emulator", emu_name), undeclared) - ) - - lines.extend( - [ - "## Summary", - "", - "| Metric | Count |", - "|--------|-------|", - f"| Total undeclared files | {total_undeclared} |", - f"| Already in repo | {total_in_repo} |", - f"| Missing from repo | {total_missing} |", - f"| Emulators with gaps | {len(emulator_gaps)} |", - "", - ] - ) - - # Per-emulator breakdown - lines.extend( - [ - "## Per Emulator", - "", - "| Emulator | Undeclared | In Repo | Missing |", - "|----------|-----------|---------|---------|", - ] - ) - - for emu_name, display, gaps in sorted(emulator_gaps, key=lambda x: -len(x[2])): - in_repo = sum(1 for g in gaps if g["in_repo"]) - missing = len(gaps) - in_repo + for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]): + display = cov["platform"] + m = cov["missing"] + u = cov["untested"] + missing_str = ( + f'{m}' + if m > 0 + else '0' + ) + untested_str = ( + f'{u}' + if u > 0 + else str(u) + ) lines.append( - f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |" + f"| [{display}](platforms/{pname}.md) " + f"| {cov['total']} " + f"| {cov['verified']} " + f"| {untested_str} " + f"| {missing_str} " + f"| {cov['mode']} |" + ) + lines.append("") + + # ---- Section 2: Problem files ---- + + missing_files: dict[str, dict] = {} + untested_files: dict[str, dict] = {} + mismatch_files: dict[str, dict] = {} + + for p in platform_problems: + fname = p["name"] + if p["status"] == "missing": + entry = missing_files.setdefault(fname, { + "name": fname, "required": p["required"], + "platforms": [], "reason": p["reason"], + }) + entry["platforms"].append(p["platform"]) + if p["required"]: + entry["required"] = True + elif p["status"] == "untested": + entry = untested_files.setdefault(fname, { + "name": fname, "required": p["required"], + "platforms": [], "reason": p["reason"], + }) + entry["platforms"].append(p["platform"]) + if p.get("discrepancy"): + entry = mismatch_files.setdefault(fname, { + "name": fname, "platforms": [], + "discrepancy": p["discrepancy"], + }) + entry["platforms"].append(p["platform"]) + + total_problems = len(missing_files) + len(untested_files) + len(mismatch_files) + + if total_problems > 0: + lines.extend([ + "## Problem Files", + "", + f"{len(missing_files)} missing, {len(untested_files)} untested, " + f"{len(mismatch_files)} hash mismatch.", + "", + ]) + + if missing_files: + lines.extend([ + f'### Missing ' + f"{len(missing_files)} files", + "", + "| File | Required | Platforms |", + "|------|----------|-----------|", + ]) + for fname in sorted(missing_files): + f = missing_files[fname] + req = "yes" if f["required"] else "no" + plats = ", ".join(sorted(set(f["platforms"]))) + lines.append(f"| `{fname}` | {req} | {plats} |") + lines.append("") + + if untested_files: + lines.extend([ + f'### Untested ' + f"{len(untested_files)} files", + "", + "Present but hash not verified.", + "", + "| File | Platforms | Reason |", + "|------|----------|--------|", + ]) + for fname in sorted(untested_files): + f = untested_files[fname] + plats = ", ".join(sorted(set(f["platforms"]))) + lines.append(f"| `{fname}` | {plats} | {f['reason']} |") + lines.append("") + + if mismatch_files: + lines.extend([ + f'### Hash Mismatch ' + f"{len(mismatch_files)} files", + "", + "Platform says OK but emulator validation disagrees.", + "", + "| File | Platforms | Discrepancy |", + "|------|----------|-------------|", + ]) + for fname in sorted(mismatch_files): + f = mismatch_files[fname] + plats = ", ".join(sorted(set(f["platforms"]))) + lines.append(f"| `{fname}` | {plats} | {f['discrepancy']} |") + lines.append("") + + # ---- Section 3: Core complement (cross-reference provenance) ---- + + all_declared: set[str] = set() + declared: dict[str, set[str]] = {} + for _name, cov in coverages.items(): + config = cov["config"] + for sys_id, system in config.get("systems", {}).items(): + for fe in system.get("files", []): + fname = fe.get("name", "") + if fname: + declared.setdefault(sys_id, set()).add(fname) + all_declared.add(fname) + + active_profiles = { + k: v for k, v in profiles.items() if v.get("type") != "alias" + } + + report = run_cross_reference( + active_profiles, declared, db, + data_names=data_names, all_declared=all_declared, + ) + + src_totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0} + total_undeclared = 0 + emulator_gaps = [] + + for emu_name, data in sorted(report.items()): + if data["gaps"] == 0: + continue + total_undeclared += data["gaps"] + for key in src_totals: + src_totals[key] += data.get(f"gap_{key}", 0) + emulator_gaps.append((emu_name, data)) + + if total_undeclared > 0: + total_available = ( + src_totals["bios"] + src_totals["data"] + src_totals["large_file"] + ) + pct_available = ( + f"{total_available / total_undeclared * 100:.0f}%" + if total_undeclared + else "0%" ) - # Missing files detail (not in repo) - all_missing = set() - missing_details = [] - for emu_name, display, gaps in emulator_gaps: - for g in gaps: - if not g["in_repo"] and g["name"] not in all_missing: - all_missing.add(g["name"]) - missing_details.append( - { + lines.extend([ + "## Core Complement", + "", + f"Files loaded by emulators but not declared by any platform. " + f"{total_undeclared:,} files across {len(emulator_gaps)} emulators, " + f"{total_available:,} available ({pct_available}), " + f"{src_totals['missing']} to source.", + "", + "### Provenance", + "", + "| Source | Count | Description |", + "|--------|------:|-------------|", + f"| bios/ | {src_totals['bios']} | In repository (database.json) |", + f"| data/ | {src_totals['data']} | Data directories (buildbot, GitHub) |", + f"| release | {src_totals['large_file']} " + "| GitHub release assets (large files) |", + f"| missing | {src_totals['missing']} | Not available, needs sourcing |", + "", + "### Per Emulator", + "", + "| Emulator | Undeclared | bios | data | release | Missing |", + "|----------|----------:|-----:|-----:|--------:|--------:|", + ]) + + for emu_name, data in sorted(emulator_gaps, key=lambda x: -x[1]["gaps"]): + display = data["emulator"] + m = data.get("gap_missing", 0) + missing_str = ( + f'{m}' + if m > 0 + else '0' + ) + lines.append( + f"| [{display}](emulators/{emu_name}.md) " + f"| {data['gaps']} " + f"| {data.get('gap_bios', 0)} " + f"| {data.get('gap_data', 0)} " + f"| {data.get('gap_large_file', 0)} " + f"| {missing_str} |" + ) + lines.append("") + + # List truly missing files with platform impact + emu_to_platforms: dict[str, set[str]] = {} + unique_profiles = { + k: v + for k, v in profiles.items() + if v.get("type") not in ("alias", "test") + } + for pname in coverages: + config = coverages[pname]["config"] + matched = resolve_platform_cores(config, unique_profiles) + for emu_name in matched: + emu_to_platforms.setdefault(emu_name, set()).add(pname) + + all_src_missing: set[str] = set() + src_missing_details: list[dict] = [] + for emu_name, data in emulator_gaps: + for g in data["gap_details"]: + if g["source"] == "missing" and g["name"] not in all_src_missing: + all_src_missing.add(g["name"]) + src_missing_details.append({ "name": g["name"], - "emulator": display, + "emulator": data["emulator"], + "emu_key": emu_name, "required": g["required"], "source_ref": g["source_ref"], - } + }) + + if src_missing_details: + req_src = [m for m in src_missing_details if m["required"]] + lines.extend([ + f"### Files to Source ({len(src_missing_details)} unique, " + f"{len(req_src)} required)", + "", + "| File | Emulator | Required | Affects platforms | Source ref |", + "|------|----------|----------|------------------|-----------|", + ]) + for m in sorted( + src_missing_details, + key=lambda x: (not x["required"], x["name"]), + ): + plats = sorted(emu_to_platforms.get(m["emu_key"], set())) + plat_badges = ( + " ".join( + f'{p}' + for p in plats + ) + if plats + else "-" ) - - # Build reverse map: emulator -> platforms that use it (via cores: field) - from common import resolve_platform_cores - - emu_to_platforms: dict[str, set[str]] = {} - unique_profiles = { - k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test") - } - for pname in coverages: - config = coverages[pname]["config"] - matched = resolve_platform_cores(config, unique_profiles) - for emu_name in matched: - emu_to_platforms.setdefault(emu_name, set()).add(pname) - - if missing_details: - req_missing = [m for m in missing_details if m["required"]] - opt_missing = [m for m in missing_details if not m["required"]] - - lines.extend( - [ - "", - f"## Missing Files ({len(missing_details)} unique, {len(req_missing)} required)", - "", - "Files loaded by emulators but not available in the repository.", - "Adding these files would improve pack completeness.", - "", - ] - ) - - if req_missing: - lines.extend( - [ - "### Required (highest priority)", - "", - "These files are needed for the emulator to function.", - "", - "| File | Emulator | Affects platforms | Source |", - "|------|----------|------------------|--------|", - ] - ) - for m in sorted(req_missing, key=lambda x: x["name"]): - emu_key = next( - ( - k - for k, v in profiles.items() - if v.get("emulator") == m["emulator"] - ), - "", - ) - plats = sorted(emu_to_platforms.get(emu_key, set())) - plat_str = ", ".join(plats) if plats else "-" + req = "yes" if m["required"] else "no" lines.append( - f"| `{m['name']}` | {m['emulator']} | {plat_str} | {m['source_ref']} |" + f"| `{m['name']}` | {m['emulator']} | {req} | " + f"{plat_badges} | {m['source_ref']} |" ) lines.append("") - if opt_missing: - lines.extend( - [ - "### Optional", - "", - "| File | Emulator | Source |", - "|------|----------|--------|", - ] - ) - for m in sorted(opt_missing, key=lambda x: x["name"]): - lines.append(f"| `{m['name']}` | {m['emulator']} | {m['source_ref']} |") - lines.append("") - - lines.extend(["", f"*Generated on {_timestamp()}*"]) + lines.extend(["", f'
Generated on {_timestamp()}.
']) return "\n".join(lines) + "\n" + + def generate_cross_reference( coverages: dict, profiles: dict, @@ -1280,10 +1641,19 @@ def generate_cross_reference( for core in p.get("cores", [pname]): core_to_profile[str(core)] = pname + total_cores = len(unique) + total_upstreams = len({ + p.get("upstream", p.get("source", "")) + for p in unique.values() + if p.get("upstream") or p.get("source") + }) + lines = [ f"# Cross-reference - {SITE_NAME}", "", - "Platform >Core >Systems >Upstream emulator.", + f"Platform > Core > Systems > Upstream emulator. " + f"{total_cores} cores across {len(coverages)} platforms, " + f"tracing back to {total_upstreams} upstream projects.", "", "The libretro core is a port of the upstream emulator. " "Files, features, and validation may differ between the two.", @@ -1333,7 +1703,8 @@ def generate_cross_reference( for emu_name in sorted(matched.keys()): p = matched[emu_name] emu_display = p.get("emulator", emu_name) - cls = p.get("core_classification", "-") + cls_raw = p.get("core_classification", "-") + cls = CLS_LABELS.get(cls_raw, cls_raw) p.get("type", "") upstream = p.get("upstream", "") source = p.get("source", "") @@ -1413,7 +1784,8 @@ def generate_cross_reference( classifications = set() all_plats: set[str] = set() for c in cores: - classifications.add(unique[c].get("core_classification", "-")) + raw_cls = unique[c].get("core_classification", "-") + classifications.add(CLS_LABELS.get(raw_cls, raw_cls)) all_plats.update(platform_membership.get(c, set())) cls_str = ", ".join(sorted(classifications)) @@ -1799,7 +2171,8 @@ def main(): # Generate gap analysis page print("Generating gap analysis page...") write_if_changed( - str(docs / "gaps.md"), generate_gap_analysis(profiles, coverages, db) + str(docs / "gaps.md"), + generate_gap_analysis(profiles, coverages, db, suppl_names), ) # Wiki pages: copy manually maintained sources + generate dynamic ones @@ -1850,6 +2223,8 @@ theme: icon: material/brightness-4 name: Switch to auto font: false + icon: + logo: material/chip features: - navigation.tabs - navigation.sections @@ -1859,6 +2234,8 @@ theme: - search.highlight - content.tabs.link - toc.follow +extra_css: +- stylesheets/extra.css markdown_extensions: - tables - admonition