diff --git a/mkdocs.yml b/mkdocs.yml
index 1549d12e..5da4d6bd 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -20,6 +20,8 @@ theme:
icon: material/brightness-4
name: Switch to auto
font: false
+ icon:
+ logo: material/chip
features:
- navigation.tabs
- navigation.sections
@@ -29,6 +31,8 @@ theme:
- search.highlight
- content.tabs.link
- toc.follow
+extra_css:
+- stylesheets/extra.css
markdown_extensions:
- tables
- admonition
diff --git a/scripts/cross_reference.py b/scripts/cross_reference.py
index 57a69aa5..83e7e8d4 100644
--- a/scripts/cross_reference.py
+++ b/scripts/cross_reference.py
@@ -103,32 +103,41 @@ def _build_supplemental_index(
return names
-def _find_in_repo(
+def _resolve_source(
fname: str,
by_name: dict[str, list],
by_name_lower: dict[str, str],
data_names: set[str] | None = None,
-) -> bool:
+ by_path_suffix: dict | None = None,
+) -> str | None:
+ """Return the source category for a file, or None if not found.
+
+ Returns ``"bios"`` (in database.json / bios/), ``"data"`` (in data/),
+ or ``None`` (not available anywhere).
+ """
+ # bios/ via database.json by_name
if fname in by_name:
- return True
- # For directory entries or paths, extract the meaningful basename
+ return "bios"
stripped = fname.rstrip("/")
basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None
if basename and basename in by_name:
- return True
+ return "bios"
key = fname.lower()
if key in by_name_lower:
- return True
+ return "bios"
if basename:
- key = basename.lower()
- if key in by_name_lower:
- return True
+ if basename.lower() in by_name_lower:
+ return "bios"
+ # bios/ via by_path_suffix (regional variants)
+ if by_path_suffix and fname in by_path_suffix:
+ return "bios"
+ # data/ supplemental index
if data_names:
if fname in data_names or key in data_names:
- return True
+ return "data"
if basename and (basename in data_names or basename.lower() in data_names):
- return True
- return False
+ return "data"
+ return None
def cross_reference(
@@ -137,30 +146,44 @@ def cross_reference(
db: dict,
platform_data_dirs: dict[str, set[str]] | None = None,
data_names: set[str] | None = None,
+ all_declared: set[str] | None = None,
) -> dict:
"""Compare emulator profiles against platform declarations.
Returns a report with gaps (files emulators need but platforms don't list)
- and coverage stats. Files covered by matching data_directories between
- emulator profile and platform config are not reported as gaps.
- Checks both bios/ (via database) and data/ (via data_names index).
+ and coverage stats. Each gap entry carries a ``source`` field indicating
+ where the file is available: ``"bios"`` (bios/ via database.json),
+ ``"data"`` (data/ directory), ``"large_file"`` (GitHub release asset),
+ or ``"missing"`` (not available anywhere).
+
+ The boolean ``in_repo`` is derived: ``source != "missing"``.
+
+ When *all_declared* is provided (flat set of every filename declared by
+ any platform for any system), it is used for the ``in_platform`` check
+ instead of the per-system lookup. This is appropriate for the global
+ gap analysis page where "undeclared" means "no platform declares it at all".
"""
platform_data_dirs = platform_data_dirs or {}
by_name = db.get("indexes", {}).get("by_name", {})
by_name_lower = {k.lower(): k for k in by_name}
+ by_md5 = db.get("indexes", {}).get("by_md5", {})
+ by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
+ db_files = db.get("files", {})
report = {}
for emu_name, profile in profiles.items():
emu_files = profile.get("files", [])
systems = profile.get("systems", [])
- platform_names = set()
- for sys_id in systems:
- platform_names.update(declared.get(sys_id, set()))
+ if all_declared is not None:
+ platform_names = all_declared
+ else:
+ platform_names = set()
+ for sys_id in systems:
+ platform_names.update(declared.get(sys_id, set()))
gaps = []
covered = []
- by_md5 = db.get("indexes", {}).get("by_md5", {})
for f in emu_files:
fname = f.get("name", "")
if not fname:
@@ -174,37 +197,45 @@ def cross_reference(
if "path" in f and f["path"] is None:
continue
- # Skip release asset files (stored in GitHub releases, not bios/)
- if f.get("storage") == "release":
- continue
-
# Skip standalone-only files
file_mode = f.get("mode", "both")
if file_mode == "standalone":
continue
+ # --- resolve source provenance ---
+ storage = f.get("storage", "")
+ if storage in ("release", "large_file"):
+ source = "large_file"
+ else:
+ source = _resolve_source(
+ fname, by_name, by_name_lower, data_names, by_path_suffix
+ )
+ if source is None:
+ path_field = f.get("path", "")
+ if path_field and path_field != fname:
+ source = _resolve_source(
+ path_field, by_name, by_name_lower,
+ data_names, by_path_suffix,
+ )
+ # Try MD5 hash match
+ if source is None:
+ md5_raw = f.get("md5", "")
+ if md5_raw:
+ for md5_val in md5_raw.split(","):
+ md5_val = md5_val.strip().lower()
+ if md5_val and by_md5.get(md5_val):
+ source = "bios"
+ break
+ # Try SHA1 hash match
+ if source is None:
+ sha1 = f.get("sha1", "")
+ if sha1 and sha1 in db_files:
+ source = "bios"
+ if source is None:
+ source = "missing"
+
+ in_repo = source != "missing"
in_platform = fname in platform_names
- in_repo = _find_in_repo(fname, by_name, by_name_lower, data_names)
- if not in_repo:
- path_field = f.get("path", "")
- if path_field and path_field != fname:
- in_repo = _find_in_repo(
- path_field, by_name, by_name_lower, data_names
- )
- # Try MD5 hash match (handles files that exist under different names)
- if not in_repo:
- md5_raw = f.get("md5", "")
- if md5_raw:
- for md5_val in md5_raw.split(","):
- md5_val = md5_val.strip().lower()
- if md5_val and by_md5.get(md5_val):
- in_repo = True
- break
- # Try SHA1 hash match
- if not in_repo:
- sha1 = f.get("sha1", "")
- if sha1 and sha1 in db.get("files", {}):
- in_repo = True
entry = {
"name": fname,
@@ -213,6 +244,7 @@ def cross_reference(
"source_ref": f.get("source_ref", ""),
"in_platform": in_platform,
"in_repo": in_repo,
+ "source": source,
}
if not in_platform:
@@ -227,7 +259,10 @@ def cross_reference(
"platform_covered": len(covered),
"gaps": len(gaps),
"gap_in_repo": sum(1 for g in gaps if g["in_repo"]),
- "gap_missing": sum(1 for g in gaps if not g["in_repo"]),
+ "gap_missing": sum(1 for g in gaps if g["source"] == "missing"),
+ "gap_bios": sum(1 for g in gaps if g["source"] == "bios"),
+ "gap_data": sum(1 for g in gaps if g["source"] == "data"),
+ "gap_large_file": sum(1 for g in gaps if g["source"] == "large_file"),
"gap_details": gaps,
}
@@ -240,15 +275,19 @@ def print_report(report: dict) -> None:
print("=" * 60)
total_gaps = 0
- total_in_repo = 0
- total_missing = 0
+ totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
for emu_name, data in sorted(report.items()):
gaps = data["gaps"]
if gaps == 0:
- status = "OK"
- else:
- status = f"{data['gap_in_repo']} in repo, {data['gap_missing']} missing"
+ continue
+
+ parts = []
+ for key in ("bios", "data", "large_file", "missing"):
+ count = data.get(f"gap_{key}", 0)
+ if count:
+ parts.append(f"{count} {key}")
+ status = ", ".join(parts) if parts else "OK"
print(f"\n{data['emulator']} ({', '.join(data['systems'])})")
print(
@@ -256,23 +295,24 @@ def print_report(report: dict) -> None:
f"{data['platform_covered']} declared by platforms, "
f"{gaps} undeclared"
)
+ print(f" Gaps: {status}")
- if gaps > 0:
- print(f" Gaps: {status}")
- for g in data["gap_details"]:
- req = "*" if g["required"] else " "
- loc = "repo" if g["in_repo"] else "MISSING"
- note = f" -- {g['note']}" if g["note"] else ""
- print(f" {req} {g['name']} [{loc}]{note}")
+ for g in data["gap_details"]:
+ req = "*" if g["required"] else " "
+ src = g.get("source", "missing").upper()
+ note = f" -- {g['note']}" if g["note"] else ""
+ print(f" {req} {g['name']} [{src}]{note}")
total_gaps += gaps
- total_in_repo += data["gap_in_repo"]
- total_missing += data["gap_missing"]
+ for key in totals:
+ totals[key] += data.get(f"gap_{key}", 0)
print(f"\n{'=' * 60}")
print(f"Total: {total_gaps} undeclared files across all emulators")
- print(f" {total_in_repo} already in repo (can be added to packs)")
- print(f" {total_missing} missing from repo (need to be sourced)")
+ available = totals["bios"] + totals["data"] + totals["large_file"]
+ print(f" {available} available (bios: {totals['bios']}, data: {totals['data']}, "
+ f"large_file: {totals['large_file']})")
+ print(f" {totals['missing']} missing (need to be sourced)")
def main():
diff --git a/scripts/generate_site.py b/scripts/generate_site.py
index e2252f95..7eadc2b9 100644
--- a/scripts/generate_site.py
+++ b/scripts/generate_site.py
@@ -38,6 +38,19 @@ GENERATED_DIRS = ["platforms", "systems", "emulators"]
WIKI_SRC_DIR = "wiki" # manually maintained wiki sources
SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png"
+CLS_LABELS = {
+ "official_port": "Official ports",
+ "community_fork": "Community forks",
+ "pure_libretro": "Pure libretro",
+ "game_engine": "Game engines",
+ "enhanced_fork": "Enhanced forks",
+ "frozen_snapshot": "Frozen snapshots",
+ "embedded_hle": "Embedded HLE",
+ "launcher": "Launchers",
+ "unclassified": "Unclassified",
+ "other": "Other",
+}
+
# Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking
_system_page_map: dict[str, tuple[str, str]] = {}
@@ -229,80 +242,93 @@ def generate_home(
cls = p.get("core_classification", "unclassified")
classifications[cls] = classifications.get(cls, 0) + 1
+ # Count total systems across all profiles
+ all_systems = set()
+ for p in unique.values():
+ all_systems.update(p.get("systems", []))
+
lines = [
+ '
',
+ "",
f"# {SITE_NAME}",
"",
"Source-verified BIOS and firmware packs for retrogaming platforms.",
"",
- "## Quick start",
+ "
",
"",
- "1. Find your platform in the table below",
- "2. Click **Pack** to download the ZIP",
- "3. Extract to your emulator's BIOS directory",
+ '',
"",
- "| Platform | Extract to |",
- "|----------|-----------|",
- "| RetroArch / Lakka | `system/` |",
- "| Batocera | `/userdata/bios/` |",
- "| Recalbox | `/recalbox/share/bios/` |",
- "| RetroBat | `bios/` |",
- "| RetroDECK | `~/retrodeck/bios/` |",
- "| EmuDeck | `Emulation/bios/` |",
+ '
',
+ f'{total_files:,}',
+ 'Files',
+ "
",
"",
- "---",
+ '
',
+ f'{len(coverages)}',
+ 'Platforms',
+ "
",
"",
- "## Methodology",
+ '
',
+ f'{emulator_count}',
+ 'Emulators profiled',
+ "
",
"",
- "Documentation and metadata can drift from what emulators actually load at runtime.",
- "To keep packs accurate, each file here is checked against the emulator's source code.",
+ '
',
+ f'{_fmt_size(total_size)}',
+ 'Total size',
+ "
",
"",
- "The source code is the primary reference because it reflects actual behavior.",
- "Other sources remain useful but are verified against it:",
- "",
- "1. **Upstream emulator source** - what the original project loads (Dolphin, PCSX2, Mednafen...)",
- "2. **Libretro core source** - the RetroArch port, which may adapt paths or add files",
- "3. **`.info` declarations** - metadata that platforms rely on, checked for accuracy",
- "",
- f"**{emulator_count}** emulators profiled. "
- f"Each profile documents what the code loads, what it validates, "
- f"and where the port differs from the original.",
- "",
- f"**{total_files:,}** files | **{len(coverages)}** platforms | "
- f"**{emulator_count}** emulator profiles | **{_fmt_size(total_size)}** total",
- "",
- "---",
+ "
",
"",
]
- # Platform table
+ # Platforms FIRST (main action)
lines.extend(
[
"## Platforms",
"",
- "| | Platform | Coverage | Verified | Download |",
- "|---|----------|----------|----------|----------|",
+ "| | Platform | Files | Verification | Download |",
+ "|---|----------|-------|-------------|----------|",
]
)
+ mode_icons = {"md5": "MD5", "sha1": "SHA1", "existence": "exists"}
+
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
- pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = (
f"{{ width=20 loading=lazy }}" if logo_url else ""
)
+ mode_label = mode_icons.get(cov["mode"], cov["mode"])
lines.append(
f"| {logo_md} | [{display}](platforms/{name}.md) | "
- f"{cov['present']}/{cov['total']} ({pct}) | "
- f"{cov['verified']} | "
- f"[Pack]({RELEASE_URL}) |"
+ f"{cov['present']:,} | {mode_label} | "
+ f"[Pack]({RELEASE_URL}){{ .md-button .md-button--primary }} |"
)
+ # Quick start (collapsible -- secondary info)
+ lines.extend(
+ [
+ "",
+ '??? info "Where to extract"',
+ "",
+ " | Platform | Extract to |",
+ " |----------|-----------|",
+ " | RetroArch / Lakka | `system/` |",
+ " | Batocera | `/userdata/bios/` |",
+ " | Recalbox | `/recalbox/share/bios/` |",
+ " | RetroBat | `bios/` |",
+ " | RetroDECK | `~/retrodeck/bios/` |",
+ " | EmuDeck | `Emulation/bios/` |",
+ "",
+ ]
+ )
+
# Emulator classification breakdown
lines.extend(
[
- "",
"## Emulator profiles",
"",
"| Classification | Count |",
@@ -310,21 +336,41 @@ def generate_home(
]
)
for cls, count in sorted(classifications.items(), key=lambda x: -x[1]):
- lines.append(f"| {cls} | {count} |")
+ label = CLS_LABELS.get(cls, cls)
+ lines.append(f"| [{label}](emulators/index.md#{cls}) | {count} |")
+
+ # Methodology (collapsible)
+ lines.extend(
+ [
+ "",
+ '??? abstract "Methodology"',
+ "",
+ " Each file is checked against the emulator's source code. "
+ "Documentation and metadata can drift from actual runtime behavior, "
+ "so the source is the primary reference.",
+ "",
+ " 1. **Upstream emulator source** -- what the original project "
+ "loads (Dolphin, PCSX2, Mednafen...)",
+ " 2. **Libretro core source** -- the RetroArch port, which may "
+ "adapt paths or add files",
+ " 3. **`.info` declarations** -- metadata that platforms rely on, "
+ "checked for accuracy",
+ "",
+ ]
+ )
# Quick links
lines.extend(
[
- "",
"---",
"",
- "[Systems](systems/){ .md-button } "
- "[Emulators](emulators/){ .md-button } "
+ "[Systems](systems/index.md){ .md-button } "
+ "[Emulators](emulators/index.md){ .md-button } "
"[Cross-reference](cross-reference.md){ .md-button } "
"[Gap Analysis](gaps.md){ .md-button } "
"[Contributing](contributing.md){ .md-button .md-button--primary }",
"",
- f"*Generated on {ts}.*",
+ f'Generated on {ts}.
',
]
)
@@ -335,24 +381,61 @@ def generate_home(
def generate_platform_index(coverages: dict) -> str:
+ total_files = sum(c["total"] for c in coverages.values())
+ total_present = sum(c["present"] for c in coverages.values())
+ total_verified = sum(c["verified"] for c in coverages.values())
+
lines = [
f"# Platforms - {SITE_NAME}",
"",
+ f"{len(coverages)} supported platforms, "
+ f"{total_present:,}/{total_files:,} files present, "
+ f"{total_verified:,} verified.",
+ "",
"| Platform | Coverage | Verification | Status |",
"|----------|----------|-------------|--------|",
]
+ mode_labels = {
+ "md5": 'MD5',
+ "sha1": 'SHA1',
+ "existence": 'existence',
+ }
+
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
+ pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
pct = _pct(cov["present"], cov["total"])
plat_status = cov["config"].get("status", "active")
- status = (
- "archived" if plat_status == "archived" else _status_icon(cov["percentage"])
+
+ badge_cls = (
+ "rb-badge-success"
+ if pct_val >= 95
+ else "rb-badge-warning"
+ if pct_val >= 70
+ else "rb-badge-danger"
)
+ coverage_str = (
+ f'{cov["present"]}/{cov["total"]} '
+ f'{pct}'
+ )
+
+ mode_html = mode_labels.get(
+ cov["mode"],
+ f'{cov["mode"]}',
+ )
+
+ if plat_status == "archived":
+ status = 'archived'
+ elif pct_val >= 100:
+ status = 'complete'
+ elif pct_val >= 95:
+ status = 'near'
+ else:
+ status = 'partial'
+
lines.append(
- f"| [{display}]({name}.md) | "
- f"{cov['present']}/{cov['total']} ({pct}) | "
- f"{cov['mode']} | {status} |"
+ f"| [{display}]({name}.md) | {coverage_str} | {mode_html} | {status} |"
)
return "\n".join(lines) + "\n"
@@ -379,14 +462,58 @@ def generate_platform_page(
hash_type = config.get("hash_type", "")
base_dest = config.get("base_destination", "")
+ pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
+ coverage_badge = (
+ "rb-badge-success"
+ if pct_val >= 95
+ else "rb-badge-warning"
+ if pct_val >= 70
+ else "rb-badge-danger"
+ )
+ mode_badge = (
+ "rb-badge-success" if mode in ("md5", "sha1") else "rb-badge-info"
+ )
+
lines = [
f"# {display} - {SITE_NAME}",
"",
- logo_md + "| | |",
- "|---|---|",
- f"| Verification | {mode} |",
- f"| Hash type | {hash_type} |",
+ logo_md,
]
+
+ # Stat cards
+ lines.extend(
+ [
+ '',
+ "",
+ '
',
+ f'{cov["present"]}/{cov["total"]}',
+ f'Coverage ({pct})',
+ "
",
+ "",
+ '
',
+ f'{cov["verified"]}',
+ 'Verified',
+ "
",
+ "",
+ '
',
+ f'{cov["missing"]}',
+ 'Missing',
+ "
",
+ "",
+ '
',
+ f''
+ f'{mode}',
+ 'Verification',
+ "
",
+ "",
+ "
",
+ "",
+ "| | |",
+ "|---|---|",
+ ]
+ )
+ if hash_type:
+ lines.append(f"| Hash type | {hash_type} |")
if version:
lines.append(f"| Version | {version} |")
if base_dest:
@@ -396,10 +523,8 @@ def generate_platform_page(
lines.extend(
[
"",
- f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | "
- f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}",
- "",
- f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}",
+ f"[Download {display} Pack]({RELEASE_URL})"
+ "{ .md-button .md-button--primary }",
"",
]
)
@@ -431,7 +556,13 @@ def generate_platform_page(
ok_count = sum(1 for f in files if f["status"] == "ok")
total = len(files)
non_ok = total - ok_count
- status = "OK" if non_ok == 0 else f"{non_ok} issue{'s' if non_ok > 1 else ''}"
+ if non_ok == 0:
+ status = 'OK'
+ else:
+ status = (
+ f''
+ f'{non_ok} issue{"s" if non_ok > 1 else ""}'
+ )
sys_emus = []
if emulator_files:
for emu_name, emu_data in emulator_files.items():
@@ -537,9 +668,18 @@ def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]:
def generate_systems_index(manufacturers: dict) -> str:
+ total_mfr = len(manufacturers)
+ total_consoles = sum(len(c) for c in manufacturers.values())
+ total_files = sum(
+ len(files) for consoles in manufacturers.values() for files in consoles.values()
+ )
+
lines = [
f"# Systems - {SITE_NAME}",
"",
+ f"{total_mfr} manufacturers, {total_consoles} consoles, "
+ f"{total_files:,} files in the repository.",
+ "",
"| Manufacturer | Consoles | Files |",
"|-------------|----------|-------|",
]
@@ -671,18 +811,22 @@ def generate_emulators_index(profiles: dict) -> str:
entries = by_class.get(cls, [])
if not entries:
continue
+ label = CLS_LABELS.get(cls, cls)
desc = cls_desc.get(cls, "")
- lines.append(f"| [{cls}](#{cls}) | {len(entries)} | {desc} |")
+ lines.append(f"| [{label}](#{cls}) | {len(entries)} | {desc} |")
lines.append("")
- # Per-classification sections
for cls in cls_order:
entries = by_class.get(cls, [])
if not entries:
continue
+ label = CLS_LABELS.get(cls, cls)
+ desc = cls_desc.get(cls, "")
lines.extend(
[
- f"## {cls}",
+ f'## {label} {{ #{cls} }}',
+ "",
+ f"*{desc}* -- {len(entries)} profiles",
"",
"| Engine | Systems | Files |",
"|--------|---------|-------|",
@@ -696,7 +840,9 @@ def generate_emulators_index(profiles: dict) -> str:
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems) - 3}"
- lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {len(files)} |")
+ file_count = len(files)
+ file_str = str(file_count) if file_count else "-"
+ lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {file_str} |")
lines.append("")
if aliases:
@@ -750,7 +896,8 @@ def generate_emulator_page(
f"| Type | {emu_type} |",
]
if classification:
- lines.append(f"| Classification | {classification} |")
+ cls_display = CLS_LABELS.get(classification, classification)
+ lines.append(f"| Classification | {cls_display} |")
if source:
lines.append(f"| Source | [{source}]({source}) |")
if upstream and upstream != source:
@@ -939,40 +1086,79 @@ def generate_emulator_page(
size_options = f.get("size_options", [])
size_range = f.get("size_range", "")
- # Status badges
+ # Status badges (HTML)
badges = []
if required:
- badges.append("**required**")
- else:
- badges.append("optional")
- if hle:
- badges.append("HLE available")
- if mode:
- badges.append(mode)
- if category and category != "bios":
- badges.append(category)
- if region:
badges.append(
+ 'required'
+ )
+ else:
+ badges.append(
+ 'optional'
+ )
+ if not in_repo:
+ badges.append(
+ 'missing'
+ )
+ elif in_repo:
+ badges.append(
+ 'in repo'
+ )
+ if hle:
+ badges.append(
+ 'HLE fallback'
+ )
+ if mode:
+ badges.append(
+ f'{mode}'
+ )
+ if category and category != "bios":
+ badges.append(
+ f'{category}'
+ )
+ if region:
+ region_str = (
", ".join(region) if isinstance(region, list) else str(region)
)
+ badges.append(
+ f'{region_str}'
+ )
if storage and storage != "embedded":
- badges.append(storage)
+ badges.append(
+ f'{storage}'
+ )
if bundled:
- badges.append("bundled in binary")
+ badges.append(
+ 'bundled'
+ )
if embedded:
- badges.append("embedded")
+ badges.append(
+ 'embedded'
+ )
if has_builtin:
- badges.append("has built-in fallback")
+ badges.append(
+ 'built-in fallback'
+ )
if archive:
- badges.append(f"in `{archive}`")
+ badges.append(
+ f'in {archive}'
+ )
if ftype and ftype != "bios":
- badges.append(ftype)
- if not in_repo:
- badges.append("missing from repo")
+ badges.append(
+ f'{ftype}'
+ )
- lines.append(f"**`{fname}`** -{', '.join(badges)}")
+ badge_str = " ".join(badges)
+ border_cls = (
+ "rb-file-entry-required" if required else "rb-file-entry-optional"
+ )
+ lines.append(
+ f''
+ )
+ lines.append("")
+ lines.append(f"**`{fname}`** {badge_str}")
if desc:
- lines.append(f": {desc}")
+ lines.append(f"
{desc}")
lines.append("")
details = []
@@ -1069,6 +1255,8 @@ def generate_emulator_page(
if len(contents) > 10:
lines.append(f" - ... and {len(contents) - 10} more")
lines.append("")
+ lines.append("
")
+ lines.append("")
# Data directories
if data_dirs:
@@ -1090,181 +1278,354 @@ def generate_gap_analysis(
profiles: dict,
coverages: dict,
db: dict,
+ data_names: set[str] | None = None,
) -> str:
- """Generate a global gap analysis page showing all missing/undeclared files."""
- by_name = db.get("indexes", {}).get("by_name", {})
- platform_files = _build_platform_file_index(coverages)
+ """Generate a unified gap analysis page.
+
+ Combines verification results (from coverages/verify.py) with source
+ provenance (from cross_reference) into a single truth dashboard.
+
+ Sections:
+ 1. Verification status -- aggregated across all platforms
+ 2. Problem files -- missing, untested, hash mismatch
+ 3. Core complement -- emulator files not declared by any platform
+ """
+ from cross_reference import cross_reference as run_cross_reference
+
+ from common import resolve_platform_cores
+
+ # ---- Section 1: aggregate verify results across all platforms ----
+
+ total_verified = 0
+ total_untested = 0
+ total_missing_verify = 0
+ total_files_verify = 0
+
+ platform_problems: list[dict] = []
+ for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
+ total_verified += cov["verified"]
+ total_untested += cov["untested"]
+ total_missing_verify += cov["missing"]
+ total_files_verify += cov["total"]
+
+ for d in cov["details"]:
+ if d["status"] != "ok" or d.get("discrepancy"):
+ platform_problems.append({
+ "platform": cov["platform"],
+ "platform_key": pname,
+ "name": d["name"],
+ "status": d["status"],
+ "required": d.get("required", True),
+ "reason": d.get("reason", ""),
+ "discrepancy": d.get("discrepancy", ""),
+ "system": d.get("system", ""),
+ })
+
+ pct_verified = (
+ f"{total_verified / total_files_verify * 100:.0f}%"
+ if total_files_verify
+ else "0%"
+ )
lines = [
f"# Gap Analysis - {SITE_NAME}",
"",
- "Files that emulators load but platforms don't declare, and their availability.",
+ "Unified view of BIOS verification, file provenance, and coverage gaps.",
+ "",
+ '',
+ "",
+ '
',
+ f'{total_files_verify:,}',
+ 'Total files (all platforms)',
+ "
",
+ "",
+ '
',
+ f'{total_verified:,}',
+ f'Verified ({pct_verified})',
+ "
",
+ "",
+ '
',
+ f'{total_untested:,}',
+ 'Untested',
+ "
",
+ "",
+ '
',
+ f'{total_missing_verify:,}',
+ 'Missing',
+ "
",
+ "",
+ "
",
"",
]
- # Global stats
- total_undeclared = 0
- total_in_repo = 0
- total_missing = 0
+ # ---- Verification per platform ----
- # Build global set of all platform-declared filenames (once)
- all_platform_names = set()
- for pfiles in platform_files.values():
- all_platform_names.update(pfiles)
+ lines.extend([
+ "## Verification by Platform",
+ "",
+ "| Platform | Files | Verified | Untested | Missing | Mode |",
+ "|----------|------:|---------:|---------:|--------:|------|",
+ ])
- emulator_gaps = []
- for emu_name, profile in sorted(profiles.items()):
- if profile.get("type") == "alias":
- continue
- files = profile.get("files", [])
- if not files:
- continue
-
- undeclared = []
- for f in files:
- fname = f.get("name", "")
- if not fname or fname.startswith("<"):
- continue
- if fname not in all_platform_names:
- in_repo = fname in by_name
- undeclared.append(
- {
- "name": fname,
- "required": f.get("required", False),
- "in_repo": in_repo,
- "source_ref": f.get("source_ref", ""),
- }
- )
- total_undeclared += 1
- if in_repo:
- total_in_repo += 1
- else:
- total_missing += 1
-
- if undeclared:
- emulator_gaps.append(
- (emu_name, profile.get("emulator", emu_name), undeclared)
- )
-
- lines.extend(
- [
- "## Summary",
- "",
- "| Metric | Count |",
- "|--------|-------|",
- f"| Total undeclared files | {total_undeclared} |",
- f"| Already in repo | {total_in_repo} |",
- f"| Missing from repo | {total_missing} |",
- f"| Emulators with gaps | {len(emulator_gaps)} |",
- "",
- ]
- )
-
- # Per-emulator breakdown
- lines.extend(
- [
- "## Per Emulator",
- "",
- "| Emulator | Undeclared | In Repo | Missing |",
- "|----------|-----------|---------|---------|",
- ]
- )
-
- for emu_name, display, gaps in sorted(emulator_gaps, key=lambda x: -len(x[2])):
- in_repo = sum(1 for g in gaps if g["in_repo"])
- missing = len(gaps) - in_repo
+ for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
+ display = cov["platform"]
+ m = cov["missing"]
+ u = cov["untested"]
+ missing_str = (
+ f'{m}'
+ if m > 0
+ else '0'
+ )
+ untested_str = (
+ f'{u}'
+ if u > 0
+ else str(u)
+ )
lines.append(
- f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |"
+ f"| [{display}](platforms/{pname}.md) "
+ f"| {cov['total']} "
+ f"| {cov['verified']} "
+ f"| {untested_str} "
+ f"| {missing_str} "
+ f"| {cov['mode']} |"
+ )
+ lines.append("")
+
+ # ---- Section 2: Problem files ----
+
+ missing_files: dict[str, dict] = {}
+ untested_files: dict[str, dict] = {}
+ mismatch_files: dict[str, dict] = {}
+
+ for p in platform_problems:
+ fname = p["name"]
+ if p["status"] == "missing":
+ entry = missing_files.setdefault(fname, {
+ "name": fname, "required": p["required"],
+ "platforms": [], "reason": p["reason"],
+ })
+ entry["platforms"].append(p["platform"])
+ if p["required"]:
+ entry["required"] = True
+ elif p["status"] == "untested":
+ entry = untested_files.setdefault(fname, {
+ "name": fname, "required": p["required"],
+ "platforms": [], "reason": p["reason"],
+ })
+ entry["platforms"].append(p["platform"])
+ if p.get("discrepancy"):
+ entry = mismatch_files.setdefault(fname, {
+ "name": fname, "platforms": [],
+ "discrepancy": p["discrepancy"],
+ })
+ entry["platforms"].append(p["platform"])
+
+ total_problems = len(missing_files) + len(untested_files) + len(mismatch_files)
+
+ if total_problems > 0:
+ lines.extend([
+ "## Problem Files",
+ "",
+ f"{len(missing_files)} missing, {len(untested_files)} untested, "
+ f"{len(mismatch_files)} hash mismatch.",
+ "",
+ ])
+
+ if missing_files:
+ lines.extend([
+ f'### Missing '
+ f"{len(missing_files)} files",
+ "",
+ "| File | Required | Platforms |",
+ "|------|----------|-----------|",
+ ])
+ for fname in sorted(missing_files):
+ f = missing_files[fname]
+ req = "yes" if f["required"] else "no"
+ plats = ", ".join(sorted(set(f["platforms"])))
+ lines.append(f"| `{fname}` | {req} | {plats} |")
+ lines.append("")
+
+ if untested_files:
+ lines.extend([
+ f'### Untested '
+ f"{len(untested_files)} files",
+ "",
+ "Present but hash not verified.",
+ "",
+ "| File | Platforms | Reason |",
+ "|------|----------|--------|",
+ ])
+ for fname in sorted(untested_files):
+ f = untested_files[fname]
+ plats = ", ".join(sorted(set(f["platforms"])))
+ lines.append(f"| `{fname}` | {plats} | {f['reason']} |")
+ lines.append("")
+
+ if mismatch_files:
+ lines.extend([
+ f'### Hash Mismatch '
+ f"{len(mismatch_files)} files",
+ "",
+ "Platform says OK but emulator validation disagrees.",
+ "",
+ "| File | Platforms | Discrepancy |",
+ "|------|----------|-------------|",
+ ])
+ for fname in sorted(mismatch_files):
+ f = mismatch_files[fname]
+ plats = ", ".join(sorted(set(f["platforms"])))
+ lines.append(f"| `{fname}` | {plats} | {f['discrepancy']} |")
+ lines.append("")
+
+ # ---- Section 3: Core complement (cross-reference provenance) ----
+
+ all_declared: set[str] = set()
+ declared: dict[str, set[str]] = {}
+ for _name, cov in coverages.items():
+ config = cov["config"]
+ for sys_id, system in config.get("systems", {}).items():
+ for fe in system.get("files", []):
+ fname = fe.get("name", "")
+ if fname:
+ declared.setdefault(sys_id, set()).add(fname)
+ all_declared.add(fname)
+
+ active_profiles = {
+ k: v for k, v in profiles.items() if v.get("type") != "alias"
+ }
+
+ report = run_cross_reference(
+ active_profiles, declared, db,
+ data_names=data_names, all_declared=all_declared,
+ )
+
+ src_totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
+ total_undeclared = 0
+ emulator_gaps = []
+
+ for emu_name, data in sorted(report.items()):
+ if data["gaps"] == 0:
+ continue
+ total_undeclared += data["gaps"]
+ for key in src_totals:
+ src_totals[key] += data.get(f"gap_{key}", 0)
+ emulator_gaps.append((emu_name, data))
+
+ if total_undeclared > 0:
+ total_available = (
+ src_totals["bios"] + src_totals["data"] + src_totals["large_file"]
+ )
+ pct_available = (
+ f"{total_available / total_undeclared * 100:.0f}%"
+ if total_undeclared
+ else "0%"
)
- # Missing files detail (not in repo)
- all_missing = set()
- missing_details = []
- for emu_name, display, gaps in emulator_gaps:
- for g in gaps:
- if not g["in_repo"] and g["name"] not in all_missing:
- all_missing.add(g["name"])
- missing_details.append(
- {
+ lines.extend([
+ "## Core Complement",
+ "",
+ f"Files loaded by emulators but not declared by any platform. "
+ f"{total_undeclared:,} files across {len(emulator_gaps)} emulators, "
+ f"{total_available:,} available ({pct_available}), "
+ f"{src_totals['missing']} to source.",
+ "",
+ "### Provenance",
+ "",
+ "| Source | Count | Description |",
+ "|--------|------:|-------------|",
+ f"| bios/ | {src_totals['bios']} | In repository (database.json) |",
+ f"| data/ | {src_totals['data']} | Data directories (buildbot, GitHub) |",
+ f"| release | {src_totals['large_file']} "
+ "| GitHub release assets (large files) |",
+ f"| missing | {src_totals['missing']} | Not available, needs sourcing |",
+ "",
+ "### Per Emulator",
+ "",
+ "| Emulator | Undeclared | bios | data | release | Missing |",
+ "|----------|----------:|-----:|-----:|--------:|--------:|",
+ ])
+
+ for emu_name, data in sorted(emulator_gaps, key=lambda x: -x[1]["gaps"]):
+ display = data["emulator"]
+ m = data.get("gap_missing", 0)
+ missing_str = (
+ f'{m}'
+ if m > 0
+ else '0'
+ )
+ lines.append(
+ f"| [{display}](emulators/{emu_name}.md) "
+ f"| {data['gaps']} "
+ f"| {data.get('gap_bios', 0)} "
+ f"| {data.get('gap_data', 0)} "
+ f"| {data.get('gap_large_file', 0)} "
+ f"| {missing_str} |"
+ )
+ lines.append("")
+
+ # List truly missing files with platform impact
+ emu_to_platforms: dict[str, set[str]] = {}
+ unique_profiles = {
+ k: v
+ for k, v in profiles.items()
+ if v.get("type") not in ("alias", "test")
+ }
+ for pname in coverages:
+ config = coverages[pname]["config"]
+ matched = resolve_platform_cores(config, unique_profiles)
+ for emu_name in matched:
+ emu_to_platforms.setdefault(emu_name, set()).add(pname)
+
+ all_src_missing: set[str] = set()
+ src_missing_details: list[dict] = []
+ for emu_name, data in emulator_gaps:
+ for g in data["gap_details"]:
+ if g["source"] == "missing" and g["name"] not in all_src_missing:
+ all_src_missing.add(g["name"])
+ src_missing_details.append({
"name": g["name"],
- "emulator": display,
+ "emulator": data["emulator"],
+ "emu_key": emu_name,
"required": g["required"],
"source_ref": g["source_ref"],
- }
+ })
+
+ if src_missing_details:
+ req_src = [m for m in src_missing_details if m["required"]]
+ lines.extend([
+ f"### Files to Source ({len(src_missing_details)} unique, "
+ f"{len(req_src)} required)",
+ "",
+ "| File | Emulator | Required | Affects platforms | Source ref |",
+ "|------|----------|----------|------------------|-----------|",
+ ])
+ for m in sorted(
+ src_missing_details,
+ key=lambda x: (not x["required"], x["name"]),
+ ):
+ plats = sorted(emu_to_platforms.get(m["emu_key"], set()))
+ plat_badges = (
+ " ".join(
+ f'{p}'
+ for p in plats
+ )
+ if plats
+ else "-"
)
-
- # Build reverse map: emulator -> platforms that use it (via cores: field)
- from common import resolve_platform_cores
-
- emu_to_platforms: dict[str, set[str]] = {}
- unique_profiles = {
- k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
- }
- for pname in coverages:
- config = coverages[pname]["config"]
- matched = resolve_platform_cores(config, unique_profiles)
- for emu_name in matched:
- emu_to_platforms.setdefault(emu_name, set()).add(pname)
-
- if missing_details:
- req_missing = [m for m in missing_details if m["required"]]
- opt_missing = [m for m in missing_details if not m["required"]]
-
- lines.extend(
- [
- "",
- f"## Missing Files ({len(missing_details)} unique, {len(req_missing)} required)",
- "",
- "Files loaded by emulators but not available in the repository.",
- "Adding these files would improve pack completeness.",
- "",
- ]
- )
-
- if req_missing:
- lines.extend(
- [
- "### Required (highest priority)",
- "",
- "These files are needed for the emulator to function.",
- "",
- "| File | Emulator | Affects platforms | Source |",
- "|------|----------|------------------|--------|",
- ]
- )
- for m in sorted(req_missing, key=lambda x: x["name"]):
- emu_key = next(
- (
- k
- for k, v in profiles.items()
- if v.get("emulator") == m["emulator"]
- ),
- "",
- )
- plats = sorted(emu_to_platforms.get(emu_key, set()))
- plat_str = ", ".join(plats) if plats else "-"
+ req = "yes" if m["required"] else "no"
lines.append(
- f"| `{m['name']}` | {m['emulator']} | {plat_str} | {m['source_ref']} |"
+ f"| `{m['name']}` | {m['emulator']} | {req} | "
+ f"{plat_badges} | {m['source_ref']} |"
)
lines.append("")
- if opt_missing:
- lines.extend(
- [
- "### Optional",
- "",
- "| File | Emulator | Source |",
- "|------|----------|--------|",
- ]
- )
- for m in sorted(opt_missing, key=lambda x: x["name"]):
- lines.append(f"| `{m['name']}` | {m['emulator']} | {m['source_ref']} |")
- lines.append("")
-
- lines.extend(["", f"*Generated on {_timestamp()}*"])
+ lines.extend(["", f'Generated on {_timestamp()}.
'])
return "\n".join(lines) + "\n"
+
+
def generate_cross_reference(
coverages: dict,
profiles: dict,
@@ -1280,10 +1641,19 @@ def generate_cross_reference(
for core in p.get("cores", [pname]):
core_to_profile[str(core)] = pname
+ total_cores = len(unique)
+ total_upstreams = len({
+ p.get("upstream", p.get("source", ""))
+ for p in unique.values()
+ if p.get("upstream") or p.get("source")
+ })
+
lines = [
f"# Cross-reference - {SITE_NAME}",
"",
- "Platform >Core >Systems >Upstream emulator.",
+ f"Platform > Core > Systems > Upstream emulator. "
+ f"{total_cores} cores across {len(coverages)} platforms, "
+ f"tracing back to {total_upstreams} upstream projects.",
"",
"The libretro core is a port of the upstream emulator. "
"Files, features, and validation may differ between the two.",
@@ -1333,7 +1703,8 @@ def generate_cross_reference(
for emu_name in sorted(matched.keys()):
p = matched[emu_name]
emu_display = p.get("emulator", emu_name)
- cls = p.get("core_classification", "-")
+ cls_raw = p.get("core_classification", "-")
+ cls = CLS_LABELS.get(cls_raw, cls_raw)
p.get("type", "")
upstream = p.get("upstream", "")
source = p.get("source", "")
@@ -1413,7 +1784,8 @@ def generate_cross_reference(
classifications = set()
all_plats: set[str] = set()
for c in cores:
- classifications.add(unique[c].get("core_classification", "-"))
+ raw_cls = unique[c].get("core_classification", "-")
+ classifications.add(CLS_LABELS.get(raw_cls, raw_cls))
all_plats.update(platform_membership.get(c, set()))
cls_str = ", ".join(sorted(classifications))
@@ -1799,7 +2171,8 @@ def main():
# Generate gap analysis page
print("Generating gap analysis page...")
write_if_changed(
- str(docs / "gaps.md"), generate_gap_analysis(profiles, coverages, db)
+ str(docs / "gaps.md"),
+ generate_gap_analysis(profiles, coverages, db, suppl_names),
)
# Wiki pages: copy manually maintained sources + generate dynamic ones
@@ -1850,6 +2223,8 @@ theme:
icon: material/brightness-4
name: Switch to auto
font: false
+ icon:
+ logo: material/chip
features:
- navigation.tabs
- navigation.sections
@@ -1859,6 +2234,8 @@ theme:
- search.highlight
- content.tabs.link
- toc.follow
+extra_css:
+- stylesheets/extra.css
markdown_extensions:
- tables
- admonition