#!/usr/bin/env python3
"""Generate MkDocs site pages from database.json, platform configs, and emulator profiles.
Reads the same data sources as verify.py and generate_pack.py to produce
a complete documentation site. Zero manual content.
Usage:
python scripts/generate_site.py
python scripts/generate_site.py --db database.json --platforms-dir platforms
"""
from __future__ import annotations
import argparse
import os
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import (
list_registered_platforms,
load_database,
load_emulator_profiles,
require_yaml,
write_if_changed,
)
yaml = require_yaml()
from generate_readme import compute_coverage
DOCS_DIR = "docs"
SITE_NAME = "RetroBIOS"
REPO_URL = "https://github.com/Abdess/retrobios"
RELEASE_URL = f"{REPO_URL}/releases/latest"
GENERATED_DIRS = ["platforms", "systems", "emulators"]
WIKI_SRC_DIR = "wiki" # manually maintained wiki sources
SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png"
CLS_LABELS = {
"official_port": "Official ports",
"community_fork": "Community forks",
"pure_libretro": "Pure libretro",
"game_engine": "Game engines",
"enhanced_fork": "Enhanced forks",
"frozen_snapshot": "Frozen snapshots",
"embedded_hle": "Embedded HLE",
"launcher": "Launchers",
"unclassified": "Unclassified",
"other": "Other",
}
# Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking
_system_page_map: dict[str, tuple[str, str]] = {}
def _build_system_page_map_from_data(
manufacturers: dict,
coverages: dict,
db: dict,
) -> None:
"""Build system_id -> (manufacturer_slug, console_name) mapping.
Uses platform file paths to trace system_id -> bios directory -> manufacturer page.
"""
db.get("files", {})
db.get("indexes", {}).get("by_name", {})
# Build reverse index: filename -> (manufacturer, console) from bios/ structure
file_to_console: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
for console, entries in consoles.items():
for entry in entries:
file_to_console[entry["name"]] = (mfr, console)
# Build normalized console name index for fuzzy matching
console_norm: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
mfr_norm = mfr.lower().replace(" ", "-")
for console in consoles:
norm = console.lower().replace(" ", "-")
entry = (slug, console)
console_norm[norm] = entry
console_norm[f"{mfr_norm}-{norm}"] = entry
# Short aliases: strip common manufacturer prefix words
for prefix in (
f"{mfr_norm}-",
"nintendo-",
"sega-",
"sony-",
"snk-",
"nec-",
):
if norm.startswith(prefix.replace(f"{mfr_norm}-", "")):
pass # already covered by norm
key = f"{prefix}{norm}"
console_norm[key] = entry
# Map system_id -> (manufacturer, console) via platform file entries
for cov in coverages.values():
config = cov["config"]
for sys_id, system in config.get("systems", {}).items():
if sys_id in _system_page_map:
continue
# Strategy 1: trace via file paths in DB
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname in file_to_console:
mfr, console = file_to_console[fname]
slug = mfr.lower().replace(" ", "-")
_system_page_map[sys_id] = (slug, console)
break
if sys_id in _system_page_map:
continue
# Strategy 2: fuzzy match system_id against console directory names
if sys_id in console_norm:
_system_page_map[sys_id] = console_norm[sys_id]
else:
# Try partial match: "nintendo-wii" matches "Wii" under "Nintendo"
parts = sys_id.split("-")
for i in range(len(parts)):
suffix = "-".join(parts[i:])
if suffix in console_norm:
_system_page_map[sys_id] = console_norm[suffix]
break
def _system_link(sys_id: str, prefix: str = "") -> str:
"""Generate a markdown link to a system page with anchor."""
if sys_id in _system_page_map:
slug, console = _system_page_map[sys_id]
anchor = console.lower().replace(" ", "-").replace("/", "-")
return f"[{sys_id}]({prefix}systems/{slug}.md#{anchor})"
return sys_id
def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None:
"""Render any YAML value as indented markdown."""
pad = " " * indent
if isinstance(val, dict):
for k, v in val.items():
if isinstance(v, dict):
lines.append(f"{pad}**{k}:**")
lines.append("")
_render_yaml_value(lines, v, indent + 4)
elif isinstance(v, list):
lines.append(f"{pad}**{k}:**")
lines.append("")
for item in v:
if isinstance(item, dict):
parts = [
f"{ik}: {iv}"
for ik, iv in item.items()
if not isinstance(iv, (dict, list))
]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
lines.append("")
else:
# Truncate very long strings in tables
sv = str(v)
if len(sv) > 200:
sv = sv[:200] + "..."
lines.append(f"{pad}- **{k}:** {sv}")
elif isinstance(val, list):
for item in val:
if isinstance(item, dict):
parts = [
f"{ik}: {iv}"
for ik, iv in item.items()
if not isinstance(iv, (dict, list))
]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
elif isinstance(val, str) and "\n" in val:
for line in val.split("\n"):
lines.append(f"{pad}{line}")
else:
lines.append(f"{pad}{val}")
def _platform_link(name: str, display: str, prefix: str = "") -> str:
"""Generate a markdown link to a platform page."""
return f"[{display}]({prefix}platforms/{name}.md)"
def _emulator_link(name: str, prefix: str = "") -> str:
"""Generate a markdown link to an emulator page."""
return f"[{name}]({prefix}emulators/{name}.md)"
def _timestamp() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _fmt_size(size: int) -> str:
if size >= 1024 * 1024 * 1024:
return f"{size / (1024**3):.1f} GB"
if size >= 1024 * 1024:
return f"{size / (1024**2):.1f} MB"
if size >= 1024:
return f"{size / 1024:.1f} KB"
return f"{size} B"
def _pct(n: int, total: int) -> str:
if total == 0:
return "0%"
return f"{n / total * 100:.1f}%"
def _status_icon(pct: float) -> str:
if pct >= 100:
return "OK"
if pct >= 95:
return "~OK"
return "partial"
# Home page
def generate_home(
db: dict, coverages: dict, profiles: dict, registry: dict | None = None
) -> str:
total_files = db.get("total_files", 0)
total_size = db.get("total_size", 0)
ts = _timestamp()
unique = {
k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
}
emulator_count = len(unique)
# Classification stats
classifications: dict[str, int] = {}
for p in unique.values():
cls = p.get("core_classification", "unclassified")
classifications[cls] = classifications.get(cls, 0) + 1
# Count total systems across all profiles
all_systems = set()
for p in unique.values():
all_systems.update(p.get("systems", []))
lines = [
'
',
"",
f"# {SITE_NAME}",
"",
"Source-verified BIOS and firmware packs for retrogaming platforms.",
"",
"
",
"",
'',
"",
'
',
f'{total_files:,}',
'Files',
"
",
"",
'
',
f'{len(coverages)}',
'Platforms',
"
",
"",
'
',
f'{emulator_count}',
'Emulators profiled',
"
",
"",
'
',
f'{_fmt_size(total_size)}',
'Total size',
"
",
"",
"
",
"",
]
# Platforms FIRST (main action)
lines.extend(
[
"## Platforms",
"",
"| | Platform | Files | Verification | Download |",
"|---|----------|-------|-------------|----------|",
]
)
mode_icons = {"md5": "MD5", "sha1": "SHA1", "existence": "exists"}
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = (
f"{{ width=20 loading=lazy }}" if logo_url else ""
)
mode_label = mode_icons.get(cov["mode"], cov["mode"])
lines.append(
f"| {logo_md} | [{display}](platforms/{name}.md) | "
f"{cov['present']:,} | {mode_label} | "
f"[Pack]({RELEASE_URL}){{ .md-button .md-button--primary }} |"
)
# Quick start (collapsible -- secondary info)
lines.extend(
[
"",
'??? info "Where to extract"',
"",
" | Platform | Extract to |",
" |----------|-----------|",
" | RetroArch / Lakka | `system/` |",
" | Batocera | `/userdata/bios/` |",
" | Recalbox | `/recalbox/share/bios/` |",
" | RetroBat | `bios/` |",
" | RetroDECK | `~/retrodeck/bios/` |",
" | EmuDeck | `Emulation/bios/` |",
"",
]
)
# Emulator classification breakdown
lines.extend(
[
"## Emulator profiles",
"",
"| Classification | Count |",
"|---------------|-------|",
]
)
for cls, count in sorted(classifications.items(), key=lambda x: -x[1]):
label = CLS_LABELS.get(cls, cls)
lines.append(f"| [{label}](emulators/index.md#{cls}) | {count} |")
# Methodology (collapsible)
lines.extend(
[
"",
'??? abstract "Methodology"',
"",
" Each file is checked against the emulator's source code. "
"Documentation and metadata can drift from actual runtime behavior, "
"so the source is the primary reference.",
"",
" 1. **Upstream emulator source** -- what the original project "
"loads (Dolphin, PCSX2, Mednafen...)",
" 2. **Libretro core source** -- the RetroArch port, which may "
"adapt paths or add files",
" 3. **`.info` declarations** -- metadata that platforms rely on, "
"checked for accuracy",
"",
]
)
# Quick links
lines.extend(
[
"---",
"",
"[Systems](systems/index.md){ .md-button } "
"[Emulators](emulators/index.md){ .md-button } "
"[Cross-reference](cross-reference.md){ .md-button } "
"[Gap Analysis](gaps.md){ .md-button } "
"[Contributing](contributing.md){ .md-button .md-button--primary }",
"",
f'Generated on {ts}.
',
]
)
return "\n".join(lines) + "\n"
# Platform pages
def generate_platform_index(coverages: dict) -> str:
total_files = sum(c["total"] for c in coverages.values())
total_present = sum(c["present"] for c in coverages.values())
total_verified = sum(c["verified"] for c in coverages.values())
lines = [
f"# Platforms - {SITE_NAME}",
"",
f"{len(coverages)} supported platforms, "
f"{total_present:,}/{total_files:,} files present, "
f"{total_verified:,} verified.",
"",
"| Platform | Coverage | Verification | Status |",
"|----------|----------|-------------|--------|",
]
mode_labels = {
"md5": 'MD5',
"sha1": 'SHA1',
"existence": 'existence',
}
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
pct = _pct(cov["present"], cov["total"])
plat_status = cov["config"].get("status", "active")
badge_cls = (
"rb-badge-success"
if pct_val >= 95
else "rb-badge-warning"
if pct_val >= 70
else "rb-badge-danger"
)
coverage_str = (
f'{cov["present"]}/{cov["total"]} '
f'{pct}'
)
mode_html = mode_labels.get(
cov["mode"],
f'{cov["mode"]}',
)
if plat_status == "archived":
status = 'archived'
elif pct_val >= 100:
status = 'complete'
elif pct_val >= 95:
status = 'near'
else:
status = 'partial'
lines.append(
f"| [{display}]({name}.md) | {coverage_str} | {mode_html} | {status} |"
)
return "\n".join(lines) + "\n"
def generate_platform_page(
name: str,
cov: dict,
registry: dict | None = None,
emulator_files: dict | None = None,
) -> str:
config = cov["config"]
display = cov["platform"]
mode = cov["mode"]
pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = (
f"{{ width=48 align=right }}\n\n" if logo_url else ""
)
homepage = config.get("homepage", "")
version = config.get("version", "")
hash_type = config.get("hash_type", "")
base_dest = config.get("base_destination", "")
pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
coverage_badge = (
"rb-badge-success"
if pct_val >= 95
else "rb-badge-warning"
if pct_val >= 70
else "rb-badge-danger"
)
mode_badge = (
"rb-badge-success" if mode in ("md5", "sha1") else "rb-badge-info"
)
lines = [
f"# {display} - {SITE_NAME}",
"",
logo_md,
]
# Stat cards
lines.extend(
[
'',
"",
'
',
f'{cov["present"]}/{cov["total"]}',
f'Coverage ({pct})',
"
",
"",
'
',
f'{cov["verified"]}',
'Verified',
"
",
"",
'
',
f'{cov["missing"]}',
'Missing',
"
",
"",
'
',
f''
f'{mode}',
'Verification',
"
",
"",
"
",
"",
"| | |",
"|---|---|",
]
)
if hash_type:
lines.append(f"| Hash type | {hash_type} |")
if version:
lines.append(f"| Version | {version} |")
if base_dest:
lines.append(f"| BIOS path | `{base_dest}/` |")
if homepage:
lines.append(f"| Homepage | [{homepage}]({homepage}) |")
lines.extend(
[
"",
f"[Download {display} Pack]({RELEASE_URL})"
"{ .md-button .md-button--primary }",
"",
]
)
# Build lookup from config file entries (has hashes/sizes)
config_files: dict[str, dict] = {}
for sys_id, system in config.get("systems", {}).items():
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname:
config_files[fname] = fe
# Group details by system
by_system: dict[str, list] = {}
for d in cov["details"]:
sys_id = d.get("system", "unknown")
by_system.setdefault(sys_id, []).append(d)
# System summary table (quick navigation)
lines.extend(
[
"## Systems overview",
"",
"| System | Files | Status | Emulators |",
"|--------|-------|--------|-----------|",
]
)
for sys_id, files in sorted(by_system.items()):
ok_count = sum(1 for f in files if f["status"] == "ok")
total = len(files)
non_ok = total - ok_count
if non_ok == 0:
status = 'OK'
else:
status = (
f''
f'{non_ok} issue{"s" if non_ok > 1 else ""}'
)
sys_emus = []
if emulator_files:
for emu_name, emu_data in emulator_files.items():
if sys_id in emu_data.get("systems", set()):
sys_emus.append(emu_name)
emu_str = ", ".join(sys_emus[:3])
if len(sys_emus) > 3:
emu_str += f" +{len(sys_emus) - 3}"
anchor = sys_id.replace(" ", "-")
lines.append(
f"| [{sys_id}](#{anchor}) | {ok_count}/{total} | {status} | {emu_str} |"
)
lines.append("")
# Per-system detail sections (collapsible for large platforms)
use_collapsible = len(by_system) > 10
for sys_id, files in sorted(by_system.items()):
ok_count = sum(1 for f in files if f["status"] == "ok")
total = len(files)
sys_emus = []
if emulator_files:
for emu_name, emu_data in emulator_files.items():
if sys_id in emu_data.get("systems", set()):
sys_emus.append(emu_name)
sys_link = _system_link(sys_id, "../")
anchor = sys_id.replace(" ", "-")
if use_collapsible:
status_tag = "OK" if ok_count == total else f"{total - ok_count} issues"
lines.append(f'')
lines.append(f'??? note "{sys_id} ({ok_count}/{total} - {status_tag})"')
lines.append("")
pad = " "
else:
lines.append(f"## {sys_link}")
lines.append("")
pad = ""
lines.append(f"{pad}{ok_count}/{total} files verified")
if sys_emus:
emu_links = ", ".join(_emulator_link(e, "../") for e in sorted(sys_emus))
lines.append(f"{pad}Emulators: {emu_links}")
lines.append("")
# File listing
for f in sorted(files, key=lambda x: x["name"]):
status = f["status"]
fname = f["name"]
cfg_entry = config_files.get(fname, {})
sha1 = cfg_entry.get("sha1", f.get("sha1", ""))
md5 = cfg_entry.get("md5", f.get("expected_md5", ""))
size = cfg_entry.get("size", f.get("size", 0))
if status == "ok":
status_display = "OK"
elif status == "untested":
reason = f.get("reason", "")
status_display = f"untested: {reason}" if reason else "untested"
elif status == "missing":
status_display = "**missing**"
else:
status_display = status
size_str = _fmt_size(size) if size else ""
details = [status_display]
if size_str:
details.append(size_str)
lines.append(f"{pad}- `{fname}` - {', '.join(details)}")
# Show full hashes on a sub-line (useful for copy-paste)
if sha1 or md5:
hash_parts = []
if sha1:
hash_parts.append(f"SHA1: `{sha1}`")
if md5:
hash_parts.append(f"MD5: `{md5}`")
lines.append(f"{pad} {' | '.join(hash_parts)}")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# System pages
def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]:
"""Group files by manufacturer -> console -> files."""
manufacturers: dict[str, dict[str, list]] = {}
for sha1, entry in db.get("files", {}).items():
path = entry.get("path", "")
parts = path.split("/")
if len(parts) < 3 or parts[0] != "bios":
continue
manufacturer = parts[1]
console = parts[2]
manufacturers.setdefault(manufacturer, {}).setdefault(console, []).append(entry)
return manufacturers
def generate_systems_index(manufacturers: dict) -> str:
total_mfr = len(manufacturers)
total_consoles = sum(len(c) for c in manufacturers.values())
total_files = sum(
len(files) for consoles in manufacturers.values() for files in consoles.values()
)
lines = [
f"# Systems - {SITE_NAME}",
"",
f"{total_mfr} manufacturers, {total_consoles} consoles, "
f"{total_files:,} files in the repository.",
"",
"| Manufacturer | Consoles | Files |",
"|-------------|----------|-------|",
]
for mfr in sorted(manufacturers.keys()):
consoles = manufacturers[mfr]
file_count = sum(len(files) for files in consoles.values())
slug = mfr.lower().replace(" ", "-")
lines.append(f"| [{mfr}]({slug}.md) | {len(consoles)} | {file_count} |")
return "\n".join(lines) + "\n"
def generate_system_page(
manufacturer: str,
consoles: dict[str, list],
platform_files: dict[str, set],
emulator_files: dict[str, dict],
) -> str:
manufacturer.lower().replace(" ", "-")
lines = [
f"# {manufacturer} - {SITE_NAME}",
"",
]
for console_name in sorted(consoles.keys()):
files = consoles[console_name]
icon_name = f"{manufacturer} - {console_name}".replace("/", " ")
icon_url = f"{SYSTEM_ICON_BASE}/{icon_name.replace(' ', '%20')}.png"
lines.append(f"## {{ width=24 }} {console_name}")
lines.append("")
# Separate main files from variants
main_files = [f for f in files if "/.variants/" not in f["path"]]
variant_files = [f for f in files if "/.variants/" in f["path"]]
for f in sorted(main_files, key=lambda x: x["name"]):
name = f["name"]
sha1_full = f.get("sha1", "unknown")
md5_full = f.get("md5", "unknown")
size = _fmt_size(f.get("size", 0))
# Cross-reference: which platforms declare this file
plats = sorted(p for p, names in platform_files.items() if name in names)
# Cross-reference: which emulators load this file
emus = sorted(
e
for e, data in emulator_files.items()
if name in data.get("files", set())
)
lines.append(f"**`{name}`** ({size})")
lines.append("")
lines.append(f"- SHA1: `{sha1_full}`")
lines.append(f"- MD5: `{md5_full}`")
if plats:
plat_links = [_platform_link(p, p, "../") for p in plats]
lines.append(f"- Platforms: {', '.join(plat_links)}")
if emus:
emu_links = [_emulator_link(e, "../") for e in emus]
lines.append(f"- Emulators: {', '.join(emu_links)}")
lines.append("")
if variant_files:
lines.append("**Variants:**")
lines.append("")
for v in sorted(variant_files, key=lambda x: x["name"]):
vname = v["name"]
vmd5 = v.get("md5", "unknown")
lines.append(f"- `{vname}` MD5: `{vmd5}`")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# Emulator pages
def generate_emulators_index(profiles: dict) -> str:
unique = {
k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
}
aliases = {k: v for k, v in profiles.items() if v.get("type") == "alias"}
# Group by classification
by_class: dict[str, list[tuple[str, dict]]] = {}
for name in sorted(unique.keys()):
p = unique[name]
cls = p.get("core_classification", "other")
by_class.setdefault(cls, []).append((name, p))
total_files = sum(len(p.get("files", [])) for p in unique.values())
lines = [
f"# Emulators - {SITE_NAME}",
"",
f"**{len(unique)}** emulator profiles, **{total_files}** files total, **{len(aliases)}** aliases.",
"",
"| Classification | Count | Description |",
"|---------------|-------|-------------|",
]
cls_desc = {
"official_port": "Same author maintains both standalone and libretro",
"community_fork": "Third-party port to libretro",
"pure_libretro": "Built for libretro, no standalone version",
"game_engine": "Game engine reimplementation",
"enhanced_fork": "Fork with added features",
"frozen_snapshot": "Frozen at an old version",
"embedded_hle": "All ROMs compiled into binary",
"launcher": "Launches an external emulator",
"other": "Unclassified",
}
cls_order = [
"official_port",
"community_fork",
"pure_libretro",
"game_engine",
"enhanced_fork",
"frozen_snapshot",
"embedded_hle",
"launcher",
"other",
]
for cls in cls_order:
entries = by_class.get(cls, [])
if not entries:
continue
label = CLS_LABELS.get(cls, cls)
desc = cls_desc.get(cls, "")
lines.append(f"| [{label}](#{cls}) | {len(entries)} | {desc} |")
lines.append("")
for cls in cls_order:
entries = by_class.get(cls, [])
if not entries:
continue
label = CLS_LABELS.get(cls, cls)
desc = cls_desc.get(cls, "")
lines.extend(
[
f'## {label} {{ #{cls} }}',
"",
f"*{desc}* -- {len(entries)} profiles",
"",
"| Engine | Systems | Files |",
"|--------|---------|-------|",
]
)
for name, p in entries:
emu_name = p.get("emulator", name)
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems) - 3}"
file_count = len(files)
file_str = str(file_count) if file_count else "-"
lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {file_str} |")
lines.append("")
if aliases:
lines.extend(["## Aliases", ""])
lines.append("| Core | Points to |")
lines.append("|------|-----------|")
for name in sorted(aliases.keys()):
parent = aliases[name].get(
"alias_of", aliases[name].get("bios_identical_to", "unknown")
)
lines.append(f"| {name} | [{parent}]({parent}.md) |")
lines.append("")
return "\n".join(lines) + "\n"
def generate_emulator_page(
name: str, profile: dict, db: dict, platform_files: dict | None = None
) -> str:
if profile.get("type") == "alias":
parent = profile.get("alias_of", profile.get("bios_identical_to", "unknown"))
return (
f"# {name} - {SITE_NAME}\n\n"
f"This core uses the same firmware as **{parent}**.\n\n"
f"See [{parent}]({parent}.md) for details.\n"
)
emu_name = profile.get("emulator", name)
emu_type = profile.get("type", "unknown")
classification = profile.get("core_classification", "")
source = profile.get("source", "")
upstream = profile.get("upstream", "")
version = profile.get("core_version", "unknown")
profile.get("display_name", emu_name)
profiled = profile.get("profiled_date", "unknown")
systems = profile.get("systems", [])
cores = profile.get("cores", [name])
files = profile.get("files", [])
notes_raw = profile.get("notes", profile.get("note", ""))
notes = (
str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else ""
)
exclusion = profile.get("exclusion_note", "")
data_dirs = profile.get("data_directories", [])
lines = [
f"# {emu_name} - {SITE_NAME}",
"",
"| | |",
"|---|---|",
f"| Type | {emu_type} |",
]
if classification:
cls_display = CLS_LABELS.get(classification, classification)
lines.append(f"| Classification | {cls_display} |")
if source:
lines.append(f"| Source | [{source}]({source}) |")
if upstream and upstream != source:
lines.append(f"| Upstream | [{upstream}]({upstream}) |")
lines.append(f"| Version | {version} |")
lines.append(f"| Profiled | {profiled} |")
if cores:
lines.append(f"| Cores | {', '.join(str(c) for c in cores)} |")
if systems:
sys_links = [_system_link(s, "../") for s in systems]
lines.append(f"| Systems | {', '.join(sys_links)} |")
mame_ver = profile.get("mame_version", "")
if mame_ver:
lines.append(f"| MAME version | {mame_ver} |")
author = profile.get("author", "")
if author:
lines.append(f"| Author | {author} |")
based_on = profile.get("based_on", "")
if based_on:
lines.append(f"| Based on | {based_on} |")
# Additional metadata fields (scalar values only -complex ones go to collapsible sections)
for field, label in [
("core", "Core ID"),
("core_name", "Core name"),
("bios_size", "BIOS size"),
("bios_directory", "BIOS directory"),
("bios_detection", "BIOS detection"),
("bios_selection", "BIOS selection"),
("firmware_file", "Firmware file"),
("firmware_source", "Firmware source"),
("firmware_install", "Firmware install"),
("firmware_detection", "Firmware detection"),
("resources_directory", "Resources directory"),
("rom_path", "ROM path"),
("game_count", "Game count"),
("verification", "Verification mode"),
("source_ref", "Source ref"),
("analysis_date", "Analysis date"),
("analysis_commit", "Analysis commit"),
]:
val = profile.get(field)
if val is None or val == "" or isinstance(val, (dict, list)):
continue
if isinstance(val, str) and val.startswith("http"):
lines.append(f"| {label} | [{val}]({val}) |")
else:
lines.append(f"| {label} | {val} |")
lines.append("")
# Platform-specific details (rich structured data)
platform_details = profile.get("platform_details")
if platform_details and isinstance(platform_details, dict):
lines.extend(['???+ info "Platform details"', ""])
for pk, pv in platform_details.items():
if isinstance(pv, dict):
lines.append(f" **{pk}:**")
for sk, sv in pv.items():
lines.append(f" - {sk}: {sv}")
elif isinstance(pv, list):
lines.append(f" **{pk}:** {', '.join(str(x) for x in pv)}")
else:
lines.append(f" **{pk}:** {pv}")
lines.append("")
# All remaining structured data blocks as collapsible sections
_structured_blocks = [
("analysis", "Source analysis"),
("memory_layout", "Memory layout"),
("regions", "Regions"),
("nvm_layout", "NVM layout"),
("model_kickstart_map", "Model kickstart map"),
("builtin_boot_roms", "Built-in boot ROMs"),
("common_bios_filenames", "Common BIOS filenames"),
("valid_bios_crc32", "Valid BIOS CRC32"),
("dev_flash", "dev_flash"),
("dev_flash2", "dev_flash2"),
("dev_flash3", "dev_flash3"),
("firmware_modules", "Firmware modules"),
("firmware_titles", "Firmware titles"),
("fallback_fonts", "Fallback fonts"),
("io_devices", "I/O devices"),
("partitions", "Partitions"),
("mlc_structure", "MLC structure"),
("machine_directories", "Machine directories"),
("machine_properties", "Machine properties"),
("whdload_kickstarts", "WHDLoad kickstarts"),
("bios_identical_to", "BIOS identical to"),
("pack_structure", "Pack structure"),
("firmware_version", "Firmware version"),
]
for field, label in _structured_blocks:
val = profile.get(field)
if val is None:
continue
lines.append(f'???+ abstract "{label}"')
lines.append("")
_render_yaml_value(lines, val, indent=4)
lines.append("")
# Notes
if notes:
indented = notes.replace("\n", "\n ")
lines.extend(['???+ note "Technical notes"', f" {indented}", ""])
if not files:
lines.append("No BIOS or firmware files required.")
if exclusion:
lines.extend(
[
"",
'!!! info "Why no files"',
f" {exclusion}",
]
)
else:
by_name = db.get("indexes", {}).get("by_name", {})
db.get("files", {})
# Stats by category
bios_files = [f for f in files if f.get("category", "bios") == "bios"]
game_data = [f for f in files if f.get("category") == "game_data"]
bios_zips = [f for f in files if f.get("category") == "bios_zip"]
in_repo_count = sum(1 for f in files if f.get("name", "") in by_name)
missing_count = len(files) - in_repo_count
req_count = sum(1 for f in files if f.get("required"))
opt_count = len(files) - req_count
hle_count = sum(1 for f in files if f.get("hle_fallback"))
parts = [f"**{len(files)} files**"]
parts.append(f"{req_count} required, {opt_count} optional")
parts.append(f"{in_repo_count} in repo, {missing_count} missing")
if hle_count:
parts.append(f"{hle_count} with HLE fallback")
lines.append(" | ".join(parts))
if game_data or bios_zips:
cats = []
if bios_files:
cats.append(f"{len(bios_files)} BIOS")
if game_data:
cats.append(f"{len(game_data)} game data")
if bios_zips:
cats.append(f"{len(bios_zips)} BIOS ZIPs")
lines.append(f"Categories: {', '.join(cats)}")
lines.append("")
# File table
for f in files:
fname = f.get("name", "")
required = f.get("required", False)
in_repo = fname in by_name
source_ref = f.get("source_ref", "")
mode = f.get("mode", "")
hle = f.get("hle_fallback", False)
aliases = f.get("aliases", [])
category = f.get("category", "")
validation = f.get("validation", [])
size = f.get("size")
fnote = f.get("note", f.get("notes", ""))
storage = f.get("storage", "")
fmd5 = f.get("md5", "")
fsha1 = f.get("sha1", "")
fcrc32 = f.get("crc32", "")
fsha256 = f.get("sha256", "")
fadler32 = f.get("known_hash_adler32", "")
fmin = f.get("min_size")
fmax = f.get("max_size")
desc = f.get("description", "")
region = f.get("region", "")
archive = f.get("archive", "")
fpath = f.get("path", "")
fsystem = f.get("system", "")
priority = f.get("priority")
fast_boot = f.get("fast_boot")
bundled = f.get("bundled", False)
embedded = f.get("embedded", False)
has_builtin = f.get("has_builtin", False)
contents = f.get("contents", [])
config_key = f.get("config_key", "")
dest = f.get("dest", f.get("destination", ""))
ftype = f.get("type", "")
fpattern = f.get("pattern", "")
region_check = f.get("region_check")
size_note = f.get("size_note", "")
size_options = f.get("size_options", [])
size_range = f.get("size_range", "")
# Status badges (HTML)
badges = []
if required:
badges.append(
'required'
)
else:
badges.append(
'optional'
)
if not in_repo:
badges.append(
'missing'
)
elif in_repo:
badges.append(
'in repo'
)
if hle:
badges.append(
'HLE fallback'
)
if mode:
badges.append(
f'{mode}'
)
if category and category != "bios":
badges.append(
f'{category}'
)
if region:
region_str = (
", ".join(region) if isinstance(region, list) else str(region)
)
badges.append(
f'{region_str}'
)
if storage and storage != "embedded":
badges.append(
f'{storage}'
)
if bundled:
badges.append(
'bundled'
)
if embedded:
badges.append(
'embedded'
)
if has_builtin:
badges.append(
'built-in fallback'
)
if archive:
badges.append(
f'in {archive}'
)
if ftype and ftype != "bios":
badges.append(
f'{ftype}'
)
badge_str = " ".join(badges)
border_cls = (
"rb-file-entry-required" if required else "rb-file-entry-optional"
)
lines.append(
f''
)
lines.append("")
lines.append(f"**`{fname}`** {badge_str}")
if desc:
lines.append(f"
{desc}")
lines.append("")
details = []
if fpath and fpath != fname:
details.append(f"Path: `{fpath}`")
if fsystem:
details.append(f"System: {_system_link(fsystem, '../')}")
if size:
size_str = _fmt_size(size)
if fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
size_str += f" ({', '.join(bounds)})"
details.append(f"Size: {size_str}")
elif fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
details.append(f"Size: {', '.join(bounds)}")
if fsha1:
details.append(f"SHA1: `{fsha1}`")
if fmd5:
details.append(f"MD5: `{fmd5}`")
if fcrc32:
details.append(f"CRC32: `{fcrc32}`")
if fsha256:
details.append(f"SHA256: `{fsha256}`")
if fadler32:
details.append(f"Adler32: `{fadler32}`")
if aliases:
details.append(f"Aliases: {', '.join(f'`{a}`' for a in aliases)}")
if priority is not None:
details.append(f"Priority: {priority}")
if fast_boot is not None:
details.append(f"Fast boot: {'yes' if fast_boot else 'no'}")
if validation:
if isinstance(validation, list):
details.append(f"Validation: {', '.join(validation)}")
elif isinstance(validation, dict):
for scope, checks in validation.items():
details.append(f"Validation ({scope}): {', '.join(checks)}")
if source_ref:
details.append(f"Source: `{source_ref}`")
if platform_files:
plats = sorted(
p for p, names in platform_files.items() if fname in names
)
if plats:
plat_links = [_platform_link(p, p, "../") for p in plats]
details.append(f"Platforms: {', '.join(plat_links)}")
if dest and dest != fname and dest != fpath:
details.append(f"Destination: `{dest}`")
if config_key:
details.append(f"Config key: `{config_key}`")
if fpattern:
details.append(f"Pattern: `{fpattern}`")
if region_check is not None:
details.append(f"Region check: {'yes' if region_check else 'no'}")
if size_note:
details.append(f"Size note: {size_note}")
if size_options:
details.append(
f"Size options: {', '.join(_fmt_size(s) for s in size_options)}"
)
if size_range:
details.append(f"Size range: {size_range}")
if details:
for d in details:
lines.append(f"- {d}")
if fnote:
lines.append(f"- {fnote}")
if contents:
lines.append(f"- Contents ({len(contents)} entries):")
for c in contents[:10]:
if isinstance(c, dict):
cname = c.get("name", "")
cdesc = c.get("description", "")
csize = c.get("size", "")
parts = [f"`{cname}`"]
if cdesc:
parts.append(cdesc)
if csize:
parts.append(_fmt_size(csize))
lines.append(f" - {' -'.join(parts)}")
else:
lines.append(f" - {c}")
if len(contents) > 10:
lines.append(f" - ... and {len(contents) - 10} more")
lines.append("")
lines.append("
")
lines.append("")
# Data directories
if data_dirs:
lines.extend(["## Data directories", ""])
for dd in data_dirs:
ref = dd.get("ref", "")
dest = dd.get("destination", "")
lines.append(f"- `{ref}` >`{dest}`")
lines.append("")
lines.extend([f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
# Contributing page
def generate_gap_analysis(
profiles: dict,
coverages: dict,
db: dict,
data_names: set[str] | None = None,
) -> str:
"""Generate a unified gap analysis page.
Combines verification results (from coverages/verify.py) with source
provenance (from cross_reference) into a single truth dashboard.
Sections:
1. Verification status -- aggregated across all platforms
2. Problem files -- missing, untested, hash mismatch
3. Core complement -- emulator files not declared by any platform
"""
from cross_reference import cross_reference as run_cross_reference
from common import resolve_platform_cores
# ---- Section 1: aggregate verify results across all platforms ----
total_verified = 0
total_untested = 0
total_missing_verify = 0
total_files_verify = 0
platform_problems: list[dict] = []
for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
total_verified += cov["verified"]
total_untested += cov["untested"]
total_missing_verify += cov["missing"]
total_files_verify += cov["total"]
for d in cov["details"]:
if d["status"] != "ok" or d.get("discrepancy"):
platform_problems.append({
"platform": cov["platform"],
"platform_key": pname,
"name": d["name"],
"status": d["status"],
"required": d.get("required", True),
"reason": d.get("reason", ""),
"discrepancy": d.get("discrepancy", ""),
"system": d.get("system", ""),
})
pct_verified = (
f"{total_verified / total_files_verify * 100:.0f}%"
if total_files_verify
else "0%"
)
lines = [
f"# Gap Analysis - {SITE_NAME}",
"",
"Unified view of BIOS verification, file provenance, and coverage gaps.",
"",
'',
"",
'
',
f'{total_files_verify:,}',
'Total files (all platforms)',
"
",
"",
'
',
f'{total_verified:,}',
f'Verified ({pct_verified})',
"
",
"",
'
',
f'{total_untested:,}',
'Untested',
"
",
"",
'
',
f'{total_missing_verify:,}',
'Missing',
"
",
"",
"
",
"",
]
# ---- Verification per platform ----
lines.extend([
"## Verification by Platform",
"",
"| Platform | Files | Verified | Untested | Missing | Mode |",
"|----------|------:|---------:|---------:|--------:|------|",
])
for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
m = cov["missing"]
u = cov["untested"]
missing_str = (
f'{m}'
if m > 0
else '0'
)
untested_str = (
f'{u}'
if u > 0
else str(u)
)
lines.append(
f"| [{display}](platforms/{pname}.md) "
f"| {cov['total']} "
f"| {cov['verified']} "
f"| {untested_str} "
f"| {missing_str} "
f"| {cov['mode']} |"
)
lines.append("")
# ---- Section 2: Problem files ----
missing_files: dict[str, dict] = {}
untested_files: dict[str, dict] = {}
mismatch_files: dict[str, dict] = {}
for p in platform_problems:
fname = p["name"]
if p["status"] == "missing":
entry = missing_files.setdefault(fname, {
"name": fname, "required": p["required"],
"platforms": [], "reason": p["reason"],
})
entry["platforms"].append(p["platform"])
if p["required"]:
entry["required"] = True
elif p["status"] == "untested":
entry = untested_files.setdefault(fname, {
"name": fname, "required": p["required"],
"platforms": [], "reason": p["reason"],
})
entry["platforms"].append(p["platform"])
if p.get("discrepancy"):
entry = mismatch_files.setdefault(fname, {
"name": fname, "platforms": [],
"discrepancy": p["discrepancy"],
})
entry["platforms"].append(p["platform"])
total_problems = len(missing_files) + len(untested_files) + len(mismatch_files)
if total_problems > 0:
lines.extend([
"## Problem Files",
"",
f"{len(missing_files)} missing, {len(untested_files)} untested, "
f"{len(mismatch_files)} hash mismatch.",
"",
])
if missing_files:
lines.extend([
f'### Missing '
f"{len(missing_files)} files",
"",
"| File | Required | Platforms |",
"|------|----------|-----------|",
])
for fname in sorted(missing_files):
f = missing_files[fname]
req = "yes" if f["required"] else "no"
plats = ", ".join(sorted(set(f["platforms"])))
lines.append(f"| `{fname}` | {req} | {plats} |")
lines.append("")
if untested_files:
lines.extend([
f'### Untested '
f"{len(untested_files)} files",
"",
"Present but hash not verified.",
"",
"| File | Platforms | Reason |",
"|------|----------|--------|",
])
for fname in sorted(untested_files):
f = untested_files[fname]
plats = ", ".join(sorted(set(f["platforms"])))
lines.append(f"| `{fname}` | {plats} | {f['reason']} |")
lines.append("")
if mismatch_files:
lines.extend([
f'### Hash Mismatch '
f"{len(mismatch_files)} files",
"",
"Platform says OK but emulator validation disagrees.",
"",
"| File | Platforms | Discrepancy |",
"|------|----------|-------------|",
])
for fname in sorted(mismatch_files):
f = mismatch_files[fname]
plats = ", ".join(sorted(set(f["platforms"])))
lines.append(f"| `{fname}` | {plats} | {f['discrepancy']} |")
lines.append("")
# ---- Section 3: Core complement (cross-reference provenance) ----
all_declared: set[str] = set()
declared: dict[str, set[str]] = {}
for _name, cov in coverages.items():
config = cov["config"]
for sys_id, system in config.get("systems", {}).items():
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname:
declared.setdefault(sys_id, set()).add(fname)
all_declared.add(fname)
active_profiles = {
k: v for k, v in profiles.items() if v.get("type") != "alias"
}
report = run_cross_reference(
active_profiles, declared, db,
data_names=data_names, all_declared=all_declared,
)
src_totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
total_undeclared = 0
emulator_gaps = []
for emu_name, data in sorted(report.items()):
if data["gaps"] == 0:
continue
total_undeclared += data["gaps"]
for key in src_totals:
src_totals[key] += data.get(f"gap_{key}", 0)
emulator_gaps.append((emu_name, data))
if total_undeclared > 0:
total_available = (
src_totals["bios"] + src_totals["data"] + src_totals["large_file"]
)
pct_available = (
f"{total_available / total_undeclared * 100:.0f}%"
if total_undeclared
else "0%"
)
lines.extend([
"## Core Complement",
"",
f"Files loaded by emulators but not declared by any platform. "
f"{total_undeclared:,} files across {len(emulator_gaps)} emulators, "
f"{total_available:,} available ({pct_available}), "
f"{src_totals['missing']} to source.",
"",
"### Provenance",
"",
"| Source | Count | Description |",
"|--------|------:|-------------|",
f"| bios/ | {src_totals['bios']} | In repository (database.json) |",
f"| data/ | {src_totals['data']} | Data directories (buildbot, GitHub) |",
f"| release | {src_totals['large_file']} "
"| GitHub release assets (large files) |",
f"| missing | {src_totals['missing']} | Not available, needs sourcing |",
"",
"### Per Emulator",
"",
"| Emulator | Undeclared | bios | data | release | Missing |",
"|----------|----------:|-----:|-----:|--------:|--------:|",
])
for emu_name, data in sorted(emulator_gaps, key=lambda x: -x[1]["gaps"]):
display = data["emulator"]
m = data.get("gap_missing", 0)
missing_str = (
f'{m}'
if m > 0
else '0'
)
lines.append(
f"| [{display}](emulators/{emu_name}.md) "
f"| {data['gaps']} "
f"| {data.get('gap_bios', 0)} "
f"| {data.get('gap_data', 0)} "
f"| {data.get('gap_large_file', 0)} "
f"| {missing_str} |"
)
lines.append("")
# List truly missing files with platform impact
emu_to_platforms: dict[str, set[str]] = {}
unique_profiles = {
k: v
for k, v in profiles.items()
if v.get("type") not in ("alias", "test")
}
for pname in coverages:
config = coverages[pname]["config"]
matched = resolve_platform_cores(config, unique_profiles)
for emu_name in matched:
emu_to_platforms.setdefault(emu_name, set()).add(pname)
all_src_missing: set[str] = set()
src_missing_details: list[dict] = []
for emu_name, data in emulator_gaps:
for g in data["gap_details"]:
if g["source"] == "missing" and g["name"] not in all_src_missing:
all_src_missing.add(g["name"])
src_missing_details.append({
"name": g["name"],
"emulator": data["emulator"],
"emu_key": emu_name,
"required": g["required"],
"source_ref": g["source_ref"],
})
if src_missing_details:
req_src = [m for m in src_missing_details if m["required"]]
lines.extend([
f"### Files to Source ({len(src_missing_details)} unique, "
f"{len(req_src)} required)",
"",
"| File | Emulator | Required | Affects platforms | Source ref |",
"|------|----------|----------|------------------|-----------|",
])
for m in sorted(
src_missing_details,
key=lambda x: (not x["required"], x["name"]),
):
plats = sorted(emu_to_platforms.get(m["emu_key"], set()))
plat_badges = (
" ".join(
f'{p}'
for p in plats
)
if plats
else "-"
)
req = "yes" if m["required"] else "no"
lines.append(
f"| `{m['name']}` | {m['emulator']} | {req} | "
f"{plat_badges} | {m['source_ref']} |"
)
lines.append("")
lines.extend(["", f'Generated on {_timestamp()}.
'])
return "\n".join(lines) + "\n"
def generate_cross_reference(
coverages: dict,
profiles: dict,
) -> str:
"""Generate cross-reference: Platform -> Core -> Systems -> Upstream."""
unique = {
k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
}
# Build core -> profile lookup by core name
core_to_profile: dict[str, str] = {}
for pname, p in unique.items():
for core in p.get("cores", [pname]):
core_to_profile[str(core)] = pname
total_cores = len(unique)
total_upstreams = len({
p.get("upstream", p.get("source", ""))
for p in unique.values()
if p.get("upstream") or p.get("source")
})
lines = [
f"# Cross-reference - {SITE_NAME}",
"",
f"Platform > Core > Systems > Upstream emulator. "
f"{total_cores} cores across {len(coverages)} platforms, "
f"tracing back to {total_upstreams} upstream projects.",
"",
"The libretro core is a port of the upstream emulator. "
"Files, features, and validation may differ between the two.",
"",
]
# Per platform
for pname in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
cov = coverages[pname]
display = cov["platform"]
config = cov["config"]
platform_cores = config.get("cores", [])
lines.append(f'??? abstract "[{display}](platforms/{pname}.md)"')
lines.append("")
# Resolve which profiles this platform uses
if platform_cores == "all_libretro":
matched = {
k: v for k, v in unique.items() if "libretro" in v.get("type", "")
}
elif isinstance(platform_cores, list):
matched = {}
for cname in platform_cores:
cname_str = str(cname)
if cname_str in unique:
matched[cname_str] = unique[cname_str]
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
matched[pkey] = unique[pkey]
else:
# Fallback: system intersection
psystems = set(config.get("systems", {}).keys())
matched = {
k: v for k, v in unique.items() if set(v.get("systems", [])) & psystems
}
if platform_cores == "all_libretro":
lines.append(f" **{len(matched)} cores** (all libretro)")
else:
lines.append(f" **{len(matched)} cores**")
lines.append("")
lines.append(" | Core | Classification | Systems | Files | Upstream |")
lines.append(" |------|---------------|---------|-------|----------|")
for emu_name in sorted(matched.keys()):
p = matched[emu_name]
emu_display = p.get("emulator", emu_name)
cls_raw = p.get("core_classification", "-")
cls = CLS_LABELS.get(cls_raw, cls_raw)
p.get("type", "")
upstream = p.get("upstream", "")
source = p.get("source", "")
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems) - 3}"
file_count = len(files)
# Count mode divergences
libretro_only = sum(1 for f in files if f.get("mode") == "libretro")
standalone_only = sum(1 for f in files if f.get("mode") == "standalone")
file_str = str(file_count)
if libretro_only or standalone_only:
parts = []
if libretro_only:
parts.append(f"{libretro_only} libretro-only")
if standalone_only:
parts.append(f"{standalone_only} standalone-only")
file_str += f" ({', '.join(parts)})"
upstream_display = "-"
if upstream:
upstream_short = upstream.replace("https://github.com/", "")
upstream_display = f"[{upstream_short}]({upstream})"
elif source:
source_short = source.replace("https://github.com/", "")
upstream_display = f"[{source_short}]({source})"
lines.append(
f" | [{emu_display}](emulators/{emu_name}.md) | {cls} | "
f"{sys_str} | {file_str} | {upstream_display} |"
)
lines.append("")
# Reverse view: by upstream emulator
lines.extend(
[
"## By upstream emulator",
"",
"| Upstream | Cores | Classification | Platforms |",
"|----------|-------|---------------|-----------|",
]
)
# Group profiles by upstream
by_upstream: dict[str, list[str]] = {}
for emu_name, p in sorted(unique.items()):
upstream = p.get("upstream", p.get("source", ""))
if upstream:
by_upstream.setdefault(upstream, []).append(emu_name)
# Build platform membership per core
platform_membership: dict[str, set[str]] = {}
for pname, cov in coverages.items():
config = cov["config"]
pcores = config.get("cores", [])
if pcores == "all_libretro":
for k, v in unique.items():
if "libretro" in v.get("type", ""):
platform_membership.setdefault(k, set()).add(pname)
elif isinstance(pcores, list):
for cname in pcores:
cname_str = str(cname)
if cname_str in unique:
platform_membership.setdefault(cname_str, set()).add(pname)
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
platform_membership.setdefault(pkey, set()).add(pname)
for upstream_url in sorted(by_upstream.keys()):
cores = by_upstream[upstream_url]
upstream_short = upstream_url.replace("https://github.com/", "")
classifications = set()
all_plats: set[str] = set()
for c in cores:
raw_cls = unique[c].get("core_classification", "-")
classifications.add(CLS_LABELS.get(raw_cls, raw_cls))
all_plats.update(platform_membership.get(c, set()))
cls_str = ", ".join(sorted(classifications))
plat_str = ", ".join(sorted(all_plats)) if all_plats else "-"
core_links = ", ".join(f"[{c}](emulators/{c}.md)" for c in sorted(cores))
lines.append(
f"| [{upstream_short}]({upstream_url}) | {core_links} | "
f"{cls_str} | {plat_str} |"
)
lines.extend(["", f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
def generate_contributing() -> str:
return """# Contributing - RetroBIOS
## Add a BIOS file
1. Fork this repository
2. Place the file in `bios/Manufacturer/Console/filename`
3. Variants (alternate hashes for the same file): place in `bios/Manufacturer/Console/.variants/`
4. Create a Pull Request - hashes are verified automatically
## Add a platform
1. Create a scraper in `scripts/scraper/` (inherit `BaseScraper`)
2. Read the platform's upstream source code to understand its BIOS check logic
3. Add entry to `platforms/_registry.yml`
4. Generate the platform YAML config
5. Test: `python scripts/verify.py --platform `
## Add an emulator profile
1. Clone the emulator's source code
2. Search for BIOS/firmware loading (grep for `bios`, `rom`, `firmware`, `fopen`)
3. Document every file the emulator loads with source code references
4. Write YAML to `emulators/.yml`
5. Test: `python scripts/cross_reference.py --emulator `
## File conventions
- `bios/Manufacturer/Console/filename` for canonical files
- `bios/Manufacturer/Console/.variants/filename.sha1prefix` for alternate versions
- Files >50 MB go in GitHub release assets (`large-files` release)
- RPG Maker and ScummVM directories are excluded from deduplication
## PR validation
The CI automatically:
- Computes SHA1/MD5/CRC32 of new files
- Checks against known hashes in platform configs
- Reports coverage impact
"""
# Wiki pages
# index, architecture, tools, profiling are maintained as wiki/ sources
# and copied verbatim by main(). Only data-model is generated dynamically.
def generate_wiki_data_model(db: dict, profiles: dict) -> str:
"""Generate data model documentation from actual database structure."""
files_count = len(db.get("files", {}))
by_md5 = len(db.get("indexes", {}).get("by_md5", {}))
by_name = len(db.get("indexes", {}).get("by_name", {}))
by_crc32 = len(db.get("indexes", {}).get("by_crc32", {}))
by_path = len(db.get("indexes", {}).get("by_path_suffix", {}))
lines = [
f"# Data model - {SITE_NAME}",
"",
"## database.json",
"",
f"Primary key: SHA1. **{files_count}** file entries.",
"",
"Each entry:",
"",
"```json",
"{",
' "path": "bios/Nintendo/GameCube/GC/USA/IPL.bin",',
' "name": "IPL.bin",',
' "size": 2097152,',
' "sha1": "...",',
' "md5": "...",',
' "sha256": "...",',
' "crc32": "...",',
' "adler32": "..."',
"}",
"```",
"",
"### Indexes",
"",
"| Index | Entries | Purpose |",
"|-------|---------|---------|",
f"| `by_md5` | {by_md5} | MD5 to SHA1 lookup (Batocera, Recalbox verification) |",
f"| `by_name` | {by_name} | filename to SHA1 list (name-based resolution) |",
f"| `by_crc32` | {by_crc32} | CRC32 to SHA1 lookup |",
f"| `by_path_suffix` | {by_path} | relative path to SHA1 (regional variant disambiguation) |",
"",
"### File resolution order",
"",
"`resolve_local_file` tries these steps in order:",
"",
"1. Path suffix exact match (for regional variants with same filename)",
"2. SHA1 exact match",
"3. MD5 direct lookup (supports truncated Batocera 29-char MD5)",
"4. Name + alias lookup without hash (existence mode)",
"5. Name + alias with md5_composite / direct MD5 per candidate",
"6. zippedFile content match via inner ROM MD5 index",
"7. MAME clone fallback (deduped ZIP mapped to canonical name)",
"",
"## Platform YAML",
"",
"Scraped from upstream sources. Structure:",
"",
"```yaml",
"platform: Batocera",
"verification_mode: md5 # how the platform checks files",
"hash_type: md5 # hash type in file entries",
"base_destination: bios # root directory for BIOS files",
"systems:",
" system-id:",
" files:",
" - name: filename",
" destination: path/in/bios/dir",
" md5: expected_hash",
" sha1: expected_hash",
" required: true",
"```",
"",
"Supports inheritance (`inherits: retroarch`) and shared groups",
"(`includes: [group_name]` referencing `_shared.yml`).",
"",
"## Emulator YAML",
"",
f"**{len(profiles)}** profiles. Source-verified from emulator code.",
"",
"See the [profiling guide](profiling.md) for the full field reference.",
"",
]
return "\n".join(lines) + "\n"
# Build cross-reference indexes
def _build_platform_file_index(coverages: dict) -> dict[str, set]:
"""Map platform_name -> set of declared file names."""
index = {}
for name, cov in coverages.items():
names = set()
config = cov["config"]
for system in config.get("systems", {}).values():
for fe in system.get("files", []):
names.add(fe.get("name", ""))
index[name] = names
return index
def _build_emulator_file_index(profiles: dict) -> dict[str, dict]:
"""Map emulator_name -> {files: set, systems: set} for cross-reference."""
index = {}
for name, profile in profiles.items():
if profile.get("type") == "alias":
continue
index[name] = {
"files": {f.get("name", "") for f in profile.get("files", [])},
"systems": set(profile.get("systems", [])),
}
return index
# mkdocs.yml nav generator
def generate_mkdocs_nav(
coverages: dict,
manufacturers: dict,
profiles: dict,
) -> list:
"""Generate the nav section for mkdocs.yml."""
platform_nav = [{"Overview": "platforms/index.md"}]
for name in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
display = coverages[name]["platform"]
platform_nav.append({display: f"platforms/{name}.md"})
system_nav = [{"Overview": "systems/index.md"}]
for mfr in sorted(manufacturers.keys()):
slug = mfr.lower().replace(" ", "-")
system_nav.append({mfr: f"systems/{slug}.md"})
unique_profiles = {
k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
}
# Group emulators by classification for nav
by_class: dict[str, list[tuple[str, str]]] = {}
for name in sorted(unique_profiles.keys()):
p = unique_profiles[name]
cls = p.get("core_classification", "other")
display = p.get("emulator", name)
by_class.setdefault(cls, []).append((display, f"emulators/{name}.md"))
# Classification display names
cls_labels = {
"pure_libretro": "Pure libretro",
"official_port": "Official ports",
"community_fork": "Community forks",
"frozen_snapshot": "Frozen snapshots",
"enhanced_fork": "Enhanced forks",
"game_engine": "Game engines",
"embedded_hle": "Embedded HLE",
"launcher": "Launchers",
"other": "Other",
}
emu_nav: list = [{"Overview": "emulators/index.md"}]
for cls in [
"official_port",
"community_fork",
"pure_libretro",
"game_engine",
"enhanced_fork",
"frozen_snapshot",
"embedded_hle",
"launcher",
"other",
]:
entries = by_class.get(cls, [])
if not entries:
continue
label = cls_labels.get(cls, cls)
sub = [{display: path} for display, path in entries]
emu_nav.append({f"{label} ({len(entries)})": sub})
wiki_nav = [
{"Overview": "wiki/index.md"},
{"Getting started": "wiki/getting-started.md"},
{"FAQ": "wiki/faq.md"},
{"Troubleshooting": "wiki/troubleshooting.md"},
{"Architecture": "wiki/architecture.md"},
{"Tools": "wiki/tools.md"},
{"Advanced usage": "wiki/advanced-usage.md"},
{"Verification modes": "wiki/verification-modes.md"},
{"Data model": "wiki/data-model.md"},
{"Profiling guide": "wiki/profiling.md"},
{"Adding a platform": "wiki/adding-a-platform.md"},
{"Adding a scraper": "wiki/adding-a-scraper.md"},
{"Testing guide": "wiki/testing-guide.md"},
{"Release process": "wiki/release-process.md"},
]
return [
{"Home": "index.md"},
{"Platforms": platform_nav},
{"Systems": system_nav},
{"Emulators": emu_nav},
{"Cross-reference": "cross-reference.md"},
{"Gap Analysis": "gaps.md"},
{"Wiki": wiki_nav},
{"Contributing": "contributing.md"},
]
# Main
def main():
parser = argparse.ArgumentParser(
description="Generate MkDocs site from project data"
)
parser.add_argument("--db", default="database.json")
parser.add_argument("--platforms-dir", default="platforms")
parser.add_argument("--emulators-dir", default="emulators")
parser.add_argument("--docs-dir", default=DOCS_DIR)
args = parser.parse_args()
db = load_database(args.db)
docs = Path(args.docs_dir)
# Clean generated dirs (preserve docs/superpowers/)
for d in GENERATED_DIRS:
target = docs / d
if target.exists():
shutil.rmtree(target)
# Ensure output dirs
for d in GENERATED_DIRS:
(docs / d).mkdir(parents=True, exist_ok=True)
registry_path = Path(args.platforms_dir) / "_registry.yml"
registry = {}
if registry_path.exists():
with open(registry_path) as f:
registry = (yaml.safe_load(f) or {}).get("platforms", {})
platform_names = list_registered_platforms(
args.platforms_dir, include_archived=True
)
from common import load_data_dir_registry
from cross_reference import _build_supplemental_index
data_registry = load_data_dir_registry(args.platforms_dir)
suppl_names = _build_supplemental_index()
print("Computing platform coverage...")
coverages = {}
for name in sorted(platform_names):
try:
cov = compute_coverage(
name, args.platforms_dir, db, data_registry, suppl_names
)
coverages[name] = cov
print(
f" {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})"
)
except FileNotFoundError as e:
print(f" {name}: skipped ({e})", file=sys.stderr)
print("Loading emulator profiles...")
profiles = load_emulator_profiles(args.emulators_dir, skip_aliases=False)
unique_count = sum(1 for p in profiles.values() if p.get("type") != "alias")
print(
f" {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)"
)
# Build cross-reference indexes
platform_files = _build_platform_file_index(coverages)
emulator_files = _build_emulator_file_index(profiles)
# Generate home
print("Generating home page...")
write_if_changed(
str(docs / "index.md"), generate_home(db, coverages, profiles, registry)
)
# Build system_id -> manufacturer page map (needed by all generators)
print("Building system cross-reference map...")
manufacturers = _group_by_manufacturer(db)
_build_system_page_map_from_data(manufacturers, coverages, db)
print(f" {len(_system_page_map)} system IDs mapped to pages")
# Generate platform pages
print("Generating platform pages...")
write_if_changed(
str(docs / "platforms" / "index.md"), generate_platform_index(coverages)
)
for name, cov in coverages.items():
write_if_changed(
str(docs / "platforms" / f"{name}.md"),
generate_platform_page(name, cov, registry, emulator_files),
)
# Generate system pages
print("Generating system pages...")
write_if_changed(
str(docs / "systems" / "index.md"), generate_systems_index(manufacturers)
)
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
page = generate_system_page(mfr, consoles, platform_files, emulator_files)
write_if_changed(str(docs / "systems" / f"{slug}.md"), page)
# Generate emulator pages
print("Generating emulator pages...")
write_if_changed(
str(docs / "emulators" / "index.md"), generate_emulators_index(profiles)
)
for name, profile in profiles.items():
page = generate_emulator_page(name, profile, db, platform_files)
write_if_changed(str(docs / "emulators" / f"{name}.md"), page)
# Generate cross-reference page
print("Generating cross-reference page...")
write_if_changed(
str(docs / "cross-reference.md"), generate_cross_reference(coverages, profiles)
)
# Generate gap analysis page
print("Generating gap analysis page...")
write_if_changed(
str(docs / "gaps.md"),
generate_gap_analysis(profiles, coverages, db, suppl_names),
)
# Wiki pages: copy manually maintained sources + generate dynamic ones
print("Generating wiki pages...")
wiki_dest = docs / "wiki"
wiki_dest.mkdir(parents=True, exist_ok=True)
wiki_src = Path(WIKI_SRC_DIR)
if wiki_src.is_dir():
for src_file in wiki_src.glob("*.md"):
shutil.copy2(src_file, wiki_dest / src_file.name)
# data-model.md is generated (contains live DB stats)
write_if_changed(
str(wiki_dest / "data-model.md"), generate_wiki_data_model(db, profiles)
)
# Generate contributing
print("Generating contributing page...")
write_if_changed(str(docs / "contributing.md"), generate_contributing())
# Update mkdocs.yml nav section only (avoid yaml.dump round-trip mangling quotes)
print("Updating mkdocs.yml nav...")
nav = generate_mkdocs_nav(coverages, manufacturers, profiles)
nav_yaml = yaml.dump(
{"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True
)
# Rewrite mkdocs.yml entirely (static config + generated nav)
mkdocs_static = """\
site_name: RetroBIOS
site_url: https://abdess.github.io/retrobios/
repo_url: https://github.com/Abdess/retrobios
repo_name: Abdess/retrobios
theme:
name: material
palette:
- media: (prefers-color-scheme)
toggle:
icon: material/brightness-auto
name: Switch to light mode
- media: '(prefers-color-scheme: light)'
scheme: default
toggle:
icon: material/brightness-7
name: Switch to dark mode
- media: '(prefers-color-scheme: dark)'
scheme: slate
toggle:
icon: material/brightness-4
name: Switch to auto
font: false
icon:
logo: material/chip
features:
- navigation.tabs
- navigation.sections
- navigation.top
- navigation.indexes
- search.suggest
- search.highlight
- content.tabs.link
- toc.follow
extra_css:
- stylesheets/extra.css
markdown_extensions:
- tables
- admonition
- attr_list
- md_in_html
- toc:
permalink: true
- pymdownx.details
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.tabbed:
alternate_style: true
plugins:
- search
"""
write_if_changed("mkdocs.yml", mkdocs_static + nav_yaml)
total_pages = (
1 # home
+ 1
+ len(coverages) # platform index + detail
+ 1
+ len(manufacturers) # system index + detail
+ 1 # cross-reference
+ 1
+ len(profiles) # emulator index + detail
+ 1 # gap analysis
+ 14 # wiki pages (copied from wiki/ + generated data-model)
+ 1 # contributing
)
print(f"\nGenerated {total_pages} pages in {args.docs_dir}/")
if __name__ == "__main__":
main()