Files
libretro/scripts/generate_site.py

1435 lines
54 KiB
Python

#!/usr/bin/env python3
"""Generate MkDocs site pages from database.json, platform configs, and emulator profiles.
Reads the same data sources as verify.py and generate_pack.py to produce
a complete documentation site. Zero manual content.
Usage:
python scripts/generate_site.py
python scripts/generate_site.py --db database.json --platforms-dir platforms
"""
from __future__ import annotations
import argparse
import json
import os
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
sys.exit(1)
sys.path.insert(0, os.path.dirname(__file__))
from common import load_database, load_emulator_profiles, load_platform_config
from generate_readme import compute_coverage
from verify import verify_platform
DOCS_DIR = "docs"
SITE_NAME = "RetroBIOS"
REPO_URL = "https://github.com/Abdess/retrobios"
RELEASE_URL = f"{REPO_URL}/releases/latest"
GENERATED_DIRS = ["platforms", "systems", "emulators"]
SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png"
# Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking
_system_page_map: dict[str, tuple[str, str]] = {}
def _build_system_page_map_from_data(
manufacturers: dict, coverages: dict, db: dict,
) -> None:
"""Build system_id -> (manufacturer_slug, console_name) mapping.
Uses platform file paths to trace system_id -> bios directory -> manufacturer page.
"""
files_db = db.get("files", {})
by_name = db.get("indexes", {}).get("by_name", {})
# Build reverse index: filename -> (manufacturer, console) from bios/ structure
file_to_console: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
for console, entries in consoles.items():
for entry in entries:
file_to_console[entry["name"]] = (mfr, console)
# Build normalized console name index for fuzzy matching
console_norm: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
mfr_norm = mfr.lower().replace(" ", "-")
for console in consoles:
norm = console.lower().replace(" ", "-")
entry = (slug, console)
console_norm[norm] = entry
console_norm[f"{mfr_norm}-{norm}"] = entry
# Short aliases: strip common manufacturer prefix words
for prefix in (f"{mfr_norm}-", "nintendo-", "sega-", "sony-", "snk-", "nec-"):
if norm.startswith(prefix.replace(f"{mfr_norm}-", "")):
pass # already covered by norm
key = f"{prefix}{norm}"
console_norm[key] = entry
# Map system_id -> (manufacturer, console) via platform file entries
for cov in coverages.values():
config = cov["config"]
for sys_id, system in config.get("systems", {}).items():
if sys_id in _system_page_map:
continue
# Strategy 1: trace via file paths in DB
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname in file_to_console:
mfr, console = file_to_console[fname]
slug = mfr.lower().replace(" ", "-")
_system_page_map[sys_id] = (slug, console)
break
if sys_id in _system_page_map:
continue
# Strategy 2: fuzzy match system_id against console directory names
if sys_id in console_norm:
_system_page_map[sys_id] = console_norm[sys_id]
else:
# Try partial match: "nintendo-wii" matches "Wii" under "Nintendo"
parts = sys_id.split("-")
for i in range(len(parts)):
suffix = "-".join(parts[i:])
if suffix in console_norm:
_system_page_map[sys_id] = console_norm[suffix]
break
def _system_link(sys_id: str, prefix: str = "") -> str:
"""Generate a markdown link to a system page with anchor."""
if sys_id in _system_page_map:
slug, console = _system_page_map[sys_id]
anchor = console.lower().replace(" ", "-").replace("/", "-")
return f"[{sys_id}]({prefix}systems/{slug}.md#{anchor})"
return sys_id
def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None:
"""Render any YAML value as indented markdown."""
pad = " " * indent
if isinstance(val, dict):
for k, v in val.items():
if isinstance(v, dict):
lines.append(f"{pad}**{k}:**")
lines.append("")
_render_yaml_value(lines, v, indent + 4)
elif isinstance(v, list):
lines.append(f"{pad}**{k}:**")
lines.append("")
for item in v:
if isinstance(item, dict):
parts = [f"{ik}: {iv}" for ik, iv in item.items()
if not isinstance(iv, (dict, list))]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
lines.append("")
else:
# Truncate very long strings in tables
sv = str(v)
if len(sv) > 200:
sv = sv[:200] + "..."
lines.append(f"{pad}- **{k}:** {sv}")
elif isinstance(val, list):
for item in val:
if isinstance(item, dict):
parts = [f"{ik}: {iv}" for ik, iv in item.items()
if not isinstance(iv, (dict, list))]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
elif isinstance(val, str) and "\n" in val:
for line in val.split("\n"):
lines.append(f"{pad}{line}")
else:
lines.append(f"{pad}{val}")
def _platform_link(name: str, display: str, prefix: str = "") -> str:
"""Generate a markdown link to a platform page."""
return f"[{display}]({prefix}platforms/{name}.md)"
def _emulator_link(name: str, prefix: str = "") -> str:
"""Generate a markdown link to an emulator page."""
return f"[{name}]({prefix}emulators/{name}.md)"
def _timestamp() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _fmt_size(size: int) -> str:
if size >= 1024 * 1024 * 1024:
return f"{size / (1024**3):.1f} GB"
if size >= 1024 * 1024:
return f"{size / (1024**2):.1f} MB"
if size >= 1024:
return f"{size / 1024:.1f} KB"
return f"{size} B"
def _pct(n: int, total: int) -> str:
if total == 0:
return "0%"
return f"{n / total * 100:.1f}%"
def _status_icon(pct: float) -> str:
if pct >= 100:
return "OK"
if pct >= 95:
return "~OK"
return "partial"
# ---------------------------------------------------------------------------
# Home page
# ---------------------------------------------------------------------------
def generate_home(db: dict, coverages: dict, profiles: dict,
registry: dict | None = None) -> str:
total_files = db.get("total_files", 0)
total_size = db.get("total_size", 0)
ts = _timestamp()
unique = {k: v for k, v in profiles.items()
if v.get("type") not in ("alias", "test")}
emulator_count = len(unique)
# Classification stats
classifications: dict[str, int] = {}
for p in unique.values():
cls = p.get("core_classification", "unclassified")
classifications[cls] = classifications.get(cls, 0) + 1
lines = [
f"# {SITE_NAME}",
"",
"Source-verified BIOS and firmware packs for retrogaming platforms.",
"",
"---",
"",
"## Methodology",
"",
"Documentation and metadata can drift from what emulators actually load at runtime.",
"To keep packs accurate, each file here is checked against the emulator's source code.",
"",
"The source code is the primary reference because it reflects actual behavior.",
"Other sources remain useful but are verified against it:",
"",
"1. **Upstream emulator source** - what the original project loads (Dolphin, PCSX2, Mednafen...)",
"2. **Libretro core source** - the RetroArch port, which may adapt paths or add files",
"3. **`.info` declarations** - metadata that platforms rely on, checked for accuracy",
"",
f"**{emulator_count}** emulators profiled. "
f"Each profile documents what the code loads, what it validates, "
f"and where the port differs from the original.",
"",
f"**{total_files:,}** files | **{len(coverages)}** platforms | "
f"**{emulator_count}** emulator profiles | **{_fmt_size(total_size)}** total",
"",
"---",
"",
]
# Platform table
lines.extend([
"## Platforms",
"",
"| | Platform | Coverage | Verified | Download |",
"|---|----------|----------|----------|----------|",
])
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else ""
lines.append(
f"| {logo_md} | [{display}](platforms/{name}.md) | "
f"{cov['present']}/{cov['total']} ({pct}) | "
f"{cov['verified']} | "
f"[Pack]({RELEASE_URL}) |"
)
# Emulator classification breakdown
lines.extend([
"",
"## Emulator profiles",
"",
"| Classification | Count |",
"|---------------|-------|",
])
for cls, count in sorted(classifications.items(), key=lambda x: -x[1]):
lines.append(f"| {cls} | {count} |")
# Quick links
lines.extend([
"",
"---",
"",
f"[Systems](systems/){{ .md-button }} "
f"[Emulators](emulators/){{ .md-button }} "
f"[Cross-reference](cross-reference.md){{ .md-button }} "
f"[Gap Analysis](gaps.md){{ .md-button }} "
f"[Contributing](contributing.md){{ .md-button .md-button--primary }}",
"",
f"*Generated on {ts}.*",
])
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Platform pages
# ---------------------------------------------------------------------------
def generate_platform_index(coverages: dict) -> str:
lines = [
f"# Platforms - {SITE_NAME}",
"",
"| Platform | Coverage | Verification | Status |",
"|----------|----------|-------------|--------|",
]
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
pct = _pct(cov["present"], cov["total"])
plat_status = cov["config"].get("status", "active")
status = "archived" if plat_status == "archived" else _status_icon(cov["percentage"])
lines.append(
f"| [{display}]({name}.md) | "
f"{cov['present']}/{cov['total']} ({pct}) | "
f"{cov['mode']} | {status} |"
)
return "\n".join(lines) + "\n"
def generate_platform_page(name: str, cov: dict, registry: dict | None = None,
emulator_files: dict | None = None) -> str:
config = cov["config"]
display = cov["platform"]
mode = cov["mode"]
pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = f"![{display}]({logo_url}){{ width=48 align=right }}\n\n" if logo_url else ""
homepage = config.get("homepage", "")
version = config.get("version", "")
hash_type = config.get("hash_type", "")
base_dest = config.get("base_destination", "")
lines = [
f"# {display} - {SITE_NAME}",
"",
logo_md + f"| | |",
"|---|---|",
f"| Verification | {mode} |",
f"| Hash type | {hash_type} |",
]
if version:
lines.append(f"| Version | {version} |")
if base_dest:
lines.append(f"| BIOS path | `{base_dest}/` |")
if homepage:
lines.append(f"| Homepage | [{homepage}]({homepage}) |")
lines.extend([
"",
f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | "
f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}",
"",
f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}",
"",
])
# Build lookup from config file entries (has hashes/sizes)
config_files: dict[str, dict] = {}
for sys_id, system in config.get("systems", {}).items():
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname:
config_files[fname] = fe
# Group details by system
by_system: dict[str, list] = {}
for d in cov["details"]:
sys_id = d.get("system", "unknown")
by_system.setdefault(sys_id, []).append(d)
for sys_id, files in sorted(by_system.items()):
ok_count = sum(1 for f in files if f["status"] == "ok")
total = len(files)
# Cross-ref: emulators that handle this system
sys_emus = []
if emulator_files:
for emu_name, emu_data in emulator_files.items():
if sys_id in emu_data.get("systems", set()):
sys_emus.append(emu_name)
sys_link = _system_link(sys_id, "../")
lines.append(f"## {sys_link}")
lines.append("")
lines.append(f"{ok_count}/{total} files verified")
if sys_emus:
emu_links = ", ".join(_emulator_link(e, "../") for e in sorted(sys_emus))
lines.append(f"Emulators: {emu_links}")
lines.append("")
# File table with hashes and sizes
lines.append("| File | Status | Size | SHA1 | MD5 |")
lines.append("|------|--------|------|------|-----|")
for f in sorted(files, key=lambda x: x["name"]):
status = f["status"]
fname = f["name"]
# Pull hashes/size from platform config entry
cfg_entry = config_files.get(fname, {})
sha1 = cfg_entry.get("sha1", f.get("sha1", ""))
md5 = cfg_entry.get("md5", f.get("expected_md5", ""))
size = cfg_entry.get("size", f.get("size", 0))
size_str = _fmt_size(size) if size else "-"
sha1_str = f"`{sha1[:12]}...`" if sha1 and len(sha1) > 12 else (f"`{sha1}`" if sha1 else "-")
md5_str = f"`{md5[:12]}...`" if md5 and len(md5) > 12 else (f"`{md5}`" if md5 else "-")
if status == "ok":
status_display = "OK"
elif status == "untested":
reason = f.get("reason", "")
status_display = f"Untested: {reason}" if reason else "Untested"
elif status == "missing":
status_display = "Missing"
else:
status_display = status
lines.append(f"| `{fname}` | {status_display} | {size_str} | {sha1_str} | {md5_str} |")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# System pages
# ---------------------------------------------------------------------------
def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]:
"""Group files by manufacturer -> console -> files."""
manufacturers: dict[str, dict[str, list]] = {}
for sha1, entry in db.get("files", {}).items():
path = entry.get("path", "")
parts = path.split("/")
if len(parts) < 3 or parts[0] != "bios":
continue
manufacturer = parts[1]
console = parts[2]
manufacturers.setdefault(manufacturer, {}).setdefault(console, []).append(entry)
return manufacturers
def generate_systems_index(manufacturers: dict) -> str:
lines = [
f"# Systems - {SITE_NAME}",
"",
"| Manufacturer | Consoles | Files |",
"|-------------|----------|-------|",
]
for mfr in sorted(manufacturers.keys()):
consoles = manufacturers[mfr]
file_count = sum(len(files) for files in consoles.values())
slug = mfr.lower().replace(" ", "-")
lines.append(f"| [{mfr}]({slug}.md) | {len(consoles)} | {file_count} |")
return "\n".join(lines) + "\n"
def generate_system_page(
manufacturer: str,
consoles: dict[str, list],
platform_files: dict[str, set],
emulator_files: dict[str, dict],
) -> str:
slug = manufacturer.lower().replace(" ", "-")
lines = [
f"# {manufacturer} - {SITE_NAME}",
"",
]
for console_name in sorted(consoles.keys()):
files = consoles[console_name]
icon_name = f"{manufacturer} - {console_name}".replace("/", " ")
icon_url = f"{SYSTEM_ICON_BASE}/{icon_name.replace(' ', '%20')}.png"
lines.append(f"## ![{console_name}]({icon_url}){{ width=24 }} {console_name}")
lines.append("")
# Separate main files from variants
main_files = [f for f in files if "/.variants/" not in f["path"]]
variant_files = [f for f in files if "/.variants/" in f["path"]]
for f in sorted(main_files, key=lambda x: x["name"]):
name = f["name"]
sha1_full = f.get("sha1", "unknown")
md5_full = f.get("md5", "unknown")
size = _fmt_size(f.get("size", 0))
# Cross-reference: which platforms declare this file
plats = sorted(p for p, names in platform_files.items() if name in names)
# Cross-reference: which emulators load this file
emus = sorted(e for e, data in emulator_files.items() if name in data.get("files", set()))
lines.append(f"**`{name}`** ({size})")
lines.append("")
lines.append(f"- SHA1: `{sha1_full}`")
lines.append(f"- MD5: `{md5_full}`")
if plats:
plat_links = [_platform_link(p, p, "../../") for p in plats]
lines.append(f"- Platforms: {', '.join(plat_links)}")
if emus:
emu_links = [_emulator_link(e, "../../") for e in emus]
lines.append(f"- Emulators: {', '.join(emu_links)}")
lines.append("")
if variant_files:
lines.append("**Variants:**")
lines.append("")
for v in sorted(variant_files, key=lambda x: x["name"]):
vname = v["name"]
vmd5 = v.get("md5", "unknown")
lines.append(f"- `{vname}` MD5: `{vmd5}`")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Emulator pages
# ---------------------------------------------------------------------------
def generate_emulators_index(profiles: dict) -> str:
lines = [
f"# Emulators - {SITE_NAME}",
"",
"| Engine | Type | Systems | Files |",
"|--------|------|---------|-------|",
]
unique = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")}
test_cores = {k: v for k, v in profiles.items() if v.get("type") == "test"}
aliases = {k: v for k, v in profiles.items() if v.get("type") == "alias"}
for name in sorted(unique.keys()):
p = unique[name]
emu_name = p.get("emulator", name)
emu_type = p.get("type", "unknown")
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems)-3}"
lines.append(
f"| [{emu_name}]({name}.md) | {emu_type} | "
f"{sys_str} | {len(files)} |"
)
if aliases:
lines.extend(["", "## Aliases", ""])
lines.append("| Core | Points to |")
lines.append("|------|-----------|")
for name in sorted(aliases.keys()):
parent = aliases[name].get("alias_of", "unknown")
lines.append(f"| {name} | [{parent}]({parent}.md) |")
return "\n".join(lines) + "\n"
def generate_emulator_page(name: str, profile: dict, db: dict,
platform_files: dict | None = None) -> str:
if profile.get("type") == "alias":
parent = profile.get("alias_of", "unknown")
return (
f"# {name} - {SITE_NAME}\n\n"
f"This core uses the same firmware as **{parent}**.\n\n"
f"See [{parent}]({parent}.md) for details.\n"
)
emu_name = profile.get("emulator", name)
emu_type = profile.get("type", "unknown")
classification = profile.get("core_classification", "")
source = profile.get("source", "")
upstream = profile.get("upstream", "")
version = profile.get("core_version", "unknown")
display = profile.get("display_name", emu_name)
profiled = profile.get("profiled_date", "unknown")
systems = profile.get("systems", [])
cores = profile.get("cores", [name])
files = profile.get("files", [])
notes_raw = profile.get("notes", profile.get("note", ""))
notes = str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else ""
exclusion = profile.get("exclusion_note", "")
data_dirs = profile.get("data_directories", [])
lines = [
f"# {emu_name} - {SITE_NAME}",
"",
f"| | |",
f"|---|---|",
f"| Type | {emu_type} |",
]
if classification:
lines.append(f"| Classification | {classification} |")
if source:
lines.append(f"| Source | [{source}]({source}) |")
if upstream and upstream != source:
lines.append(f"| Upstream | [{upstream}]({upstream}) |")
lines.append(f"| Version | {version} |")
lines.append(f"| Profiled | {profiled} |")
if cores:
lines.append(f"| Cores | {', '.join(str(c) for c in cores)} |")
if systems:
sys_links = [_system_link(s, "../") for s in systems]
lines.append(f"| Systems | {', '.join(sys_links)} |")
mame_ver = profile.get("mame_version", "")
if mame_ver:
lines.append(f"| MAME version | {mame_ver} |")
author = profile.get("author", "")
if author:
lines.append(f"| Author | {author} |")
based_on = profile.get("based_on", "")
if based_on:
lines.append(f"| Based on | {based_on} |")
# Additional metadata fields (scalar values only -complex ones go to collapsible sections)
for field, label in [
("core", "Core ID"), ("core_name", "Core name"),
("bios_size", "BIOS size"), ("bios_directory", "BIOS directory"),
("bios_detection", "BIOS detection"), ("bios_selection", "BIOS selection"),
("firmware_file", "Firmware file"), ("firmware_source", "Firmware source"),
("firmware_install", "Firmware install"), ("firmware_detection", "Firmware detection"),
("resources_directory", "Resources directory"), ("rom_path", "ROM path"),
("game_count", "Game count"), ("verification", "Verification mode"),
("source_ref", "Source ref"), ("analysis_date", "Analysis date"),
("analysis_commit", "Analysis commit"),
]:
val = profile.get(field)
if val is None or val == "" or isinstance(val, (dict, list)):
continue
if isinstance(val, str) and val.startswith("http"):
lines.append(f"| {label} | [{val}]({val}) |")
else:
lines.append(f"| {label} | {val} |")
lines.append("")
# Platform-specific details (rich structured data)
platform_details = profile.get("platform_details")
if platform_details and isinstance(platform_details, dict):
lines.extend(["???+ info \"Platform details\"", ""])
for pk, pv in platform_details.items():
if isinstance(pv, dict):
lines.append(f" **{pk}:**")
for sk, sv in pv.items():
lines.append(f" - {sk}: {sv}")
elif isinstance(pv, list):
lines.append(f" **{pk}:** {', '.join(str(x) for x in pv)}")
else:
lines.append(f" **{pk}:** {pv}")
lines.append("")
# All remaining structured data blocks as collapsible sections
_structured_blocks = [
("analysis", "Source analysis"),
("memory_layout", "Memory layout"),
("regions", "Regions"),
("nvm_layout", "NVM layout"),
("model_kickstart_map", "Model kickstart map"),
("builtin_boot_roms", "Built-in boot ROMs"),
("common_bios_filenames", "Common BIOS filenames"),
("valid_bios_crc32", "Valid BIOS CRC32"),
("dev_flash", "dev_flash"),
("dev_flash2", "dev_flash2"),
("dev_flash3", "dev_flash3"),
("firmware_modules", "Firmware modules"),
("firmware_titles", "Firmware titles"),
("fallback_fonts", "Fallback fonts"),
("io_devices", "I/O devices"),
("partitions", "Partitions"),
("mlc_structure", "MLC structure"),
("machine_directories", "Machine directories"),
("machine_properties", "Machine properties"),
("whdload_kickstarts", "WHDLoad kickstarts"),
("bios_identical_to", "BIOS identical to"),
("pack_structure", "Pack structure"),
("firmware_version", "Firmware version"),
]
for field, label in _structured_blocks:
val = profile.get(field)
if val is None:
continue
lines.append(f"???+ abstract \"{label}\"")
lines.append("")
_render_yaml_value(lines, val, indent=4)
lines.append("")
# Notes
if notes:
indented = notes.replace("\n", "\n ")
lines.extend(["???+ note \"Technical notes\"",
f" {indented}",
""])
if not files:
lines.append("No BIOS or firmware files required.")
if exclusion:
lines.extend([
"",
f"!!! info \"Why no files\"",
f" {exclusion}",
])
else:
by_name = db.get("indexes", {}).get("by_name", {})
files_db = db.get("files", {})
# Stats by category
bios_files = [f for f in files if f.get("category", "bios") == "bios"]
game_data = [f for f in files if f.get("category") == "game_data"]
bios_zips = [f for f in files if f.get("category") == "bios_zip"]
in_repo_count = sum(1 for f in files if f.get("name", "") in by_name)
missing_count = len(files) - in_repo_count
req_count = sum(1 for f in files if f.get("required"))
opt_count = len(files) - req_count
hle_count = sum(1 for f in files if f.get("hle_fallback"))
parts = [f"**{len(files)} files**"]
parts.append(f"{req_count} required, {opt_count} optional")
parts.append(f"{in_repo_count} in repo, {missing_count} missing")
if hle_count:
parts.append(f"{hle_count} with HLE fallback")
lines.append(" | ".join(parts))
if game_data or bios_zips:
cats = []
if bios_files:
cats.append(f"{len(bios_files)} BIOS")
if game_data:
cats.append(f"{len(game_data)} game data")
if bios_zips:
cats.append(f"{len(bios_zips)} BIOS ZIPs")
lines.append(f"Categories: {', '.join(cats)}")
lines.append("")
# File table
for f in files:
fname = f.get("name", "")
required = f.get("required", False)
in_repo = fname in by_name
source_ref = f.get("source_ref", "")
mode = f.get("mode", "")
hle = f.get("hle_fallback", False)
aliases = f.get("aliases", [])
category = f.get("category", "")
validation = f.get("validation", [])
size = f.get("size")
fnote = f.get("note", f.get("notes", ""))
storage = f.get("storage", "")
fmd5 = f.get("md5", "")
fsha1 = f.get("sha1", "")
fcrc32 = f.get("crc32", "")
fsha256 = f.get("sha256", "")
fadler32 = f.get("known_hash_adler32", "")
fmin = f.get("min_size")
fmax = f.get("max_size")
desc = f.get("description", "")
region = f.get("region", "")
archive = f.get("archive", "")
fpath = f.get("path", "")
fsystem = f.get("system", "")
priority = f.get("priority")
fast_boot = f.get("fast_boot")
bundled = f.get("bundled", False)
embedded = f.get("embedded", False)
has_builtin = f.get("has_builtin", False)
contents = f.get("contents", [])
config_key = f.get("config_key", "")
dest = f.get("dest", f.get("destination", ""))
ftype = f.get("type", "")
fpattern = f.get("pattern", "")
region_check = f.get("region_check")
size_note = f.get("size_note", "")
size_options = f.get("size_options", [])
size_range = f.get("size_range", "")
# Status badges
badges = []
if required:
badges.append("**required**")
else:
badges.append("optional")
if hle:
badges.append("HLE available")
if mode:
badges.append(mode)
if category and category != "bios":
badges.append(category)
if region:
badges.append(", ".join(region) if isinstance(region, list) else str(region))
if storage and storage != "embedded":
badges.append(storage)
if bundled:
badges.append("bundled in binary")
if embedded:
badges.append("embedded")
if has_builtin:
badges.append("has built-in fallback")
if archive:
badges.append(f"in `{archive}`")
if ftype and ftype != "bios":
badges.append(ftype)
if not in_repo:
badges.append("missing from repo")
lines.append(f"**`{fname}`** -{', '.join(badges)}")
if desc:
lines.append(f": {desc}")
lines.append("")
details = []
if fpath and fpath != fname:
details.append(f"Path: `{fpath}`")
if fsystem:
details.append(f"System: {_system_link(fsystem, '../')}")
if size:
size_str = _fmt_size(size)
if fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
size_str += f" ({', '.join(bounds)})"
details.append(f"Size: {size_str}")
elif fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
details.append(f"Size: {', '.join(bounds)}")
if fsha1:
details.append(f"SHA1: `{fsha1}`")
if fmd5:
details.append(f"MD5: `{fmd5}`")
if fcrc32:
details.append(f"CRC32: `{fcrc32}`")
if fsha256:
details.append(f"SHA256: `{fsha256}`")
if fadler32:
details.append(f"Adler32: `{fadler32}`")
if aliases:
details.append(f"Aliases: {', '.join(f'`{a}`' for a in aliases)}")
if priority is not None:
details.append(f"Priority: {priority}")
if fast_boot is not None:
details.append(f"Fast boot: {'yes' if fast_boot else 'no'}")
if validation:
if isinstance(validation, list):
details.append(f"Validation: {', '.join(validation)}")
elif isinstance(validation, dict):
for scope, checks in validation.items():
details.append(f"Validation ({scope}): {', '.join(checks)}")
if source_ref:
details.append(f"Source: `{source_ref}`")
if platform_files:
plats = sorted(p for p, names in platform_files.items() if fname in names)
if plats:
plat_links = [_platform_link(p, p, "../") for p in plats]
details.append(f"Platforms: {', '.join(plat_links)}")
if dest and dest != fname and dest != fpath:
details.append(f"Destination: `{dest}`")
if config_key:
details.append(f"Config key: `{config_key}`")
if fpattern:
details.append(f"Pattern: `{fpattern}`")
if region_check is not None:
details.append(f"Region check: {'yes' if region_check else 'no'}")
if size_note:
details.append(f"Size note: {size_note}")
if size_options:
details.append(f"Size options: {', '.join(_fmt_size(s) for s in size_options)}")
if size_range:
details.append(f"Size range: {size_range}")
if details:
for d in details:
lines.append(f"- {d}")
if fnote:
lines.append(f"- {fnote}")
if contents:
lines.append(f"- Contents ({len(contents)} entries):")
for c in contents[:10]:
if isinstance(c, dict):
cname = c.get("name", "")
cdesc = c.get("description", "")
csize = c.get("size", "")
parts = [f"`{cname}`"]
if cdesc:
parts.append(cdesc)
if csize:
parts.append(_fmt_size(csize))
lines.append(f" - {' -'.join(parts)}")
else:
lines.append(f" - {c}")
if len(contents) > 10:
lines.append(f" - ... and {len(contents) - 10} more")
lines.append("")
# Data directories
if data_dirs:
lines.extend(["## Data directories", ""])
for dd in data_dirs:
ref = dd.get("ref", "")
dest = dd.get("destination", "")
lines.append(f"- `{ref}` >`{dest}`")
lines.append("")
lines.extend([f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Contributing page
# ---------------------------------------------------------------------------
def generate_gap_analysis(
profiles: dict,
coverages: dict,
db: dict,
) -> str:
"""Generate a global gap analysis page showing all missing/undeclared files."""
by_name = db.get("indexes", {}).get("by_name", {})
platform_files = _build_platform_file_index(coverages)
lines = [
f"# Gap Analysis - {SITE_NAME}",
"",
"Files that emulators load but platforms don't declare, and their availability.",
"",
]
# Global stats
total_undeclared = 0
total_in_repo = 0
total_missing = 0
# Build global set of all platform-declared filenames (once)
all_platform_names = set()
for pfiles in platform_files.values():
all_platform_names.update(pfiles)
emulator_gaps = []
for emu_name, profile in sorted(profiles.items()):
if profile.get("type") == "alias":
continue
files = profile.get("files", [])
if not files:
continue
undeclared = []
for f in files:
fname = f.get("name", "")
if not fname or fname.startswith("<"):
continue
if fname not in all_platform_names:
in_repo = fname in by_name
undeclared.append({
"name": fname,
"required": f.get("required", False),
"in_repo": in_repo,
"source_ref": f.get("source_ref", ""),
})
total_undeclared += 1
if in_repo:
total_in_repo += 1
else:
total_missing += 1
if undeclared:
emulator_gaps.append((emu_name, profile.get("emulator", emu_name), undeclared))
lines.extend([
"## Summary",
"",
f"| Metric | Count |",
f"|--------|-------|",
f"| Total undeclared files | {total_undeclared} |",
f"| Already in repo | {total_in_repo} |",
f"| Missing from repo | {total_missing} |",
f"| Emulators with gaps | {len(emulator_gaps)} |",
"",
])
# Per-emulator breakdown
lines.extend([
"## Per Emulator",
"",
"| Emulator | Undeclared | In Repo | Missing |",
"|----------|-----------|---------|---------|",
])
for emu_name, display, gaps in sorted(emulator_gaps, key=lambda x: -len(x[2])):
in_repo = sum(1 for g in gaps if g["in_repo"])
missing = len(gaps) - in_repo
lines.append(f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |")
# Missing files detail (not in repo)
all_missing = set()
missing_details = []
for emu_name, display, gaps in emulator_gaps:
for g in gaps:
if not g["in_repo"] and g["name"] not in all_missing:
all_missing.add(g["name"])
missing_details.append({
"name": g["name"],
"emulator": display,
"required": g["required"],
"source_ref": g["source_ref"],
})
if missing_details:
lines.extend([
"",
f"## Missing Files ({len(missing_details)} unique)",
"",
"Files loaded by emulators but not available in the repository.",
"",
"| File | Emulator | Required | Source |",
"|------|----------|----------|--------|",
])
for m in sorted(missing_details, key=lambda x: x["name"]):
req = "yes" if m["required"] else "no"
lines.append(f"| `{m['name']}` | {m['emulator']} | {req} | {m['source_ref']} |")
lines.extend(["", f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
def generate_cross_reference(
coverages: dict,
profiles: dict,
) -> str:
"""Generate cross-reference: Platform -> Core -> Systems -> Upstream."""
unique = {k: v for k, v in profiles.items()
if v.get("type") not in ("alias", "test")}
# Build core -> profile lookup by core name
core_to_profile: dict[str, str] = {}
for pname, p in unique.items():
for core in p.get("cores", [pname]):
core_to_profile[str(core)] = pname
lines = [
f"# Cross-reference - {SITE_NAME}",
"",
"Platform >Core >Systems >Upstream emulator.",
"",
"The libretro core is a port of the upstream emulator. "
"Files, features, and validation may differ between the two.",
"",
]
# Per platform
for pname in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
cov = coverages[pname]
display = cov["platform"]
config = cov["config"]
platform_cores = config.get("cores", [])
lines.append(f"## [{display}](platforms/{pname}.md)")
lines.append("")
# Resolve which profiles this platform uses
if platform_cores == "all_libretro":
matched = {k: v for k, v in unique.items()
if "libretro" in v.get("type", "")}
elif isinstance(platform_cores, list):
matched = {}
for cname in platform_cores:
cname_str = str(cname)
if cname_str in unique:
matched[cname_str] = unique[cname_str]
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
matched[pkey] = unique[pkey]
else:
# Fallback: system intersection
psystems = set(config.get("systems", {}).keys())
matched = {k: v for k, v in unique.items()
if set(v.get("systems", [])) & psystems}
if platform_cores == "all_libretro":
lines.append(f"**{len(matched)} cores** (all libretro)")
else:
lines.append(f"**{len(matched)} cores**")
lines.append("")
lines.append("| Core | Classification | Systems | Files | Upstream |")
lines.append("|------|---------------|---------|-------|----------|")
for emu_name in sorted(matched.keys()):
p = matched[emu_name]
emu_display = p.get("emulator", emu_name)
cls = p.get("core_classification", "-")
emu_type = p.get("type", "")
upstream = p.get("upstream", "")
source = p.get("source", "")
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems) - 3}"
file_count = len(files)
# Count mode divergences
libretro_only = sum(1 for f in files if f.get("mode") == "libretro")
standalone_only = sum(1 for f in files if f.get("mode") == "standalone")
file_str = str(file_count)
if libretro_only or standalone_only:
parts = []
if libretro_only:
parts.append(f"{libretro_only} libretro-only")
if standalone_only:
parts.append(f"{standalone_only} standalone-only")
file_str += f" ({', '.join(parts)})"
upstream_display = "-"
if upstream:
upstream_short = upstream.replace("https://github.com/", "")
upstream_display = f"[{upstream_short}]({upstream})"
elif source:
source_short = source.replace("https://github.com/", "")
upstream_display = f"[{source_short}]({source})"
lines.append(
f"| [{emu_display}](emulators/{emu_name}.md) | {cls} | "
f"{sys_str} | {file_str} | {upstream_display} |"
)
lines.append("")
# Reverse view: by upstream emulator
lines.extend([
"## By upstream emulator",
"",
"| Upstream | Cores | Classification | Platforms |",
"|----------|-------|---------------|-----------|",
])
# Group profiles by upstream
by_upstream: dict[str, list[str]] = {}
for emu_name, p in sorted(unique.items()):
upstream = p.get("upstream", p.get("source", ""))
if upstream:
by_upstream.setdefault(upstream, []).append(emu_name)
# Build platform membership per core
platform_membership: dict[str, set[str]] = {}
for pname, cov in coverages.items():
config = cov["config"]
pcores = config.get("cores", [])
if pcores == "all_libretro":
for k, v in unique.items():
if "libretro" in v.get("type", ""):
platform_membership.setdefault(k, set()).add(pname)
elif isinstance(pcores, list):
for cname in pcores:
cname_str = str(cname)
if cname_str in unique:
platform_membership.setdefault(cname_str, set()).add(pname)
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
platform_membership.setdefault(pkey, set()).add(pname)
for upstream_url in sorted(by_upstream.keys()):
cores = by_upstream[upstream_url]
upstream_short = upstream_url.replace("https://github.com/", "")
classifications = set()
all_plats: set[str] = set()
for c in cores:
classifications.add(unique[c].get("core_classification", "-"))
all_plats.update(platform_membership.get(c, set()))
cls_str = ", ".join(sorted(classifications))
plat_str = ", ".join(sorted(all_plats)) if all_plats else "-"
core_links = ", ".join(f"[{c}](emulators/{c}.md)" for c in sorted(cores))
lines.append(
f"| [{upstream_short}]({upstream_url}) | {core_links} | "
f"{cls_str} | {plat_str} |"
)
lines.extend(["", f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
def generate_contributing() -> str:
return """# Contributing - RetroBIOS
## Add a BIOS file
1. Fork this repository
2. Place the file in `bios/Manufacturer/Console/filename`
3. Variants (alternate hashes for the same file): place in `bios/Manufacturer/Console/.variants/`
4. Create a Pull Request - hashes are verified automatically
## Add a platform
1. Create a scraper in `scripts/scraper/` (inherit `BaseScraper`)
2. Read the platform's upstream source code to understand its BIOS check logic
3. Add entry to `platforms/_registry.yml`
4. Generate the platform YAML config
5. Test: `python scripts/verify.py --platform <name>`
## Add an emulator profile
1. Clone the emulator's source code
2. Search for BIOS/firmware loading (grep for `bios`, `rom`, `firmware`, `fopen`)
3. Document every file the emulator loads with source code references
4. Write YAML to `emulators/<name>.yml`
5. Test: `python scripts/cross_reference.py --emulator <name>`
## File conventions
- `bios/Manufacturer/Console/filename` for canonical files
- `bios/Manufacturer/Console/.variants/filename.sha1prefix` for alternate versions
- Files >50 MB go in GitHub release assets (`large-files` release)
- RPG Maker and ScummVM directories are excluded from deduplication
## PR validation
The CI automatically:
- Computes SHA1/MD5/CRC32 of new files
- Checks against known hashes in platform configs
- Reports coverage impact
"""
# ---------------------------------------------------------------------------
# Build cross-reference indexes
# ---------------------------------------------------------------------------
def _build_platform_file_index(coverages: dict) -> dict[str, set]:
"""Map platform_name -> set of declared file names."""
index = {}
for name, cov in coverages.items():
names = set()
config = cov["config"]
for system in config.get("systems", {}).values():
for fe in system.get("files", []):
names.add(fe.get("name", ""))
index[name] = names
return index
def _build_emulator_file_index(profiles: dict) -> dict[str, dict]:
"""Map emulator_name -> {files: set, systems: set} for cross-reference."""
index = {}
for name, profile in profiles.items():
if profile.get("type") == "alias":
continue
index[name] = {
"files": {f.get("name", "") for f in profile.get("files", [])},
"systems": set(profile.get("systems", [])),
}
return index
# ---------------------------------------------------------------------------
# mkdocs.yml nav generator
# ---------------------------------------------------------------------------
def generate_mkdocs_nav(
coverages: dict,
manufacturers: dict,
profiles: dict,
) -> list:
"""Generate the nav section for mkdocs.yml."""
platform_nav = [{"Overview": "platforms/index.md"}]
for name in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
display = coverages[name]["platform"]
platform_nav.append({display: f"platforms/{name}.md"})
system_nav = [{"Overview": "systems/index.md"}]
for mfr in sorted(manufacturers.keys()):
slug = mfr.lower().replace(" ", "-")
system_nav.append({mfr: f"systems/{slug}.md"})
unique_profiles = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")}
emu_nav = [{"Overview": "emulators/index.md"}]
for name in sorted(unique_profiles.keys()):
display = unique_profiles[name].get("emulator", name)
emu_nav.append({display: f"emulators/{name}.md"})
return [
{"Home": "index.md"},
{"Platforms": platform_nav},
{"Systems": system_nav},
{"Emulators": emu_nav},
{"Cross-reference": "cross-reference.md"},
{"Gap Analysis": "gaps.md"},
{"Contributing": "contributing.md"},
]
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(description="Generate MkDocs site from project data")
parser.add_argument("--db", default="database.json")
parser.add_argument("--platforms-dir", default="platforms")
parser.add_argument("--emulators-dir", default="emulators")
parser.add_argument("--docs-dir", default=DOCS_DIR)
args = parser.parse_args()
db = load_database(args.db)
docs = Path(args.docs_dir)
# Clean generated dirs (preserve docs/superpowers/)
for d in GENERATED_DIRS:
target = docs / d
if target.exists():
shutil.rmtree(target)
# Ensure output dirs
for d in GENERATED_DIRS:
(docs / d).mkdir(parents=True, exist_ok=True)
# Load registry for platform metadata (logos, etc.)
registry_path = Path(args.platforms_dir) / "_registry.yml"
registry = {}
if registry_path.exists():
with open(registry_path) as f:
registry = (yaml.safe_load(f) or {}).get("platforms", {})
# Load platform configs
platform_names = [
p.stem for p in Path(args.platforms_dir).glob("*.yml")
if not p.name.startswith("_")
]
print("Computing platform coverage...")
coverages = {}
for name in sorted(platform_names):
try:
cov = compute_coverage(name, args.platforms_dir, db)
coverages[name] = cov
print(f" {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})")
except FileNotFoundError as e:
print(f" {name}: skipped ({e})", file=sys.stderr)
# Load emulator profiles
print("Loading emulator profiles...")
profiles = load_emulator_profiles(args.emulators_dir, skip_aliases=False)
unique_count = sum(1 for p in profiles.values() if p.get("type") != "alias")
print(f" {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)")
# Build cross-reference indexes
platform_files = _build_platform_file_index(coverages)
emulator_files = _build_emulator_file_index(profiles)
# Generate home
print("Generating home page...")
(docs / "index.md").write_text(generate_home(db, coverages, profiles, registry))
# Build system_id -> manufacturer page map (needed by all generators)
print("Building system cross-reference map...")
manufacturers = _group_by_manufacturer(db)
_build_system_page_map_from_data(manufacturers, coverages, db)
print(f" {len(_system_page_map)} system IDs mapped to pages")
# Generate platform pages
print("Generating platform pages...")
(docs / "platforms" / "index.md").write_text(generate_platform_index(coverages))
for name, cov in coverages.items():
(docs / "platforms" / f"{name}.md").write_text(generate_platform_page(name, cov, registry, emulator_files))
# Generate system pages
print("Generating system pages...")
(docs / "systems" / "index.md").write_text(generate_systems_index(manufacturers))
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
page = generate_system_page(mfr, consoles, platform_files, emulator_files)
(docs / "systems" / f"{slug}.md").write_text(page)
# Generate emulator pages
print("Generating emulator pages...")
(docs / "emulators" / "index.md").write_text(generate_emulators_index(profiles))
for name, profile in profiles.items():
page = generate_emulator_page(name, profile, db, platform_files)
(docs / "emulators" / f"{name}.md").write_text(page)
# Generate cross-reference page
print("Generating cross-reference page...")
(docs / "cross-reference.md").write_text(
generate_cross_reference(coverages, profiles)
)
# Generate gap analysis page
print("Generating gap analysis page...")
(docs / "gaps.md").write_text(
generate_gap_analysis(profiles, coverages, db)
)
# Generate contributing
print("Generating contributing page...")
(docs / "contributing.md").write_text(generate_contributing())
# Update mkdocs.yml nav section only (avoid yaml.dump round-trip mangling quotes)
print("Updating mkdocs.yml nav...")
nav = generate_mkdocs_nav(coverages, manufacturers, profiles)
nav_yaml = yaml.dump({"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True)
with open("mkdocs.yml") as f:
content = f.read()
# Replace nav section (everything from \nnav: to the next top-level key or EOF)
import re
if "\nnav:" in content:
content = re.sub(r'\nnav:\n(?:[ \t]+.*\n?)*', '\n' + nav_yaml, content, count=1)
else:
content += "\n" + nav_yaml
with open("mkdocs.yml", "w") as f:
f.write(content)
total_pages = (
1 # home
+ 1 + len(coverages) # platform index + detail
+ 1 + len(manufacturers) # system index + detail
+ 1 # cross-reference
+ 1 + len(profiles) # emulator index + detail
+ 1 # gap analysis
+ 1 # contributing
)
print(f"\nGenerated {total_pages} pages in {args.docs_dir}/")
if __name__ == "__main__":
main()