Files
libretro/scripts/generate_site.py
2026-03-25 14:32:48 +01:00

1406 lines
52 KiB
Python

#!/usr/bin/env python3
"""Generate MkDocs site pages from database.json, platform configs, and emulator profiles.
Reads the same data sources as verify.py and generate_pack.py to produce
a complete documentation site. Zero manual content.
Usage:
python scripts/generate_site.py
python scripts/generate_site.py --db database.json --platforms-dir platforms
"""
from __future__ import annotations
import argparse
import json
import os
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
sys.exit(1)
sys.path.insert(0, os.path.dirname(__file__))
from common import load_database, load_emulator_profiles, load_platform_config
from generate_readme import compute_coverage
from verify import verify_platform
DOCS_DIR = "docs"
SITE_NAME = "RetroBIOS"
REPO_URL = "https://github.com/Abdess/retrobios"
RELEASE_URL = f"{REPO_URL}/releases/latest"
GENERATED_DIRS = ["platforms", "systems", "emulators"]
SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png"
# Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking
_system_page_map: dict[str, tuple[str, str]] = {}
def _build_system_page_map_from_data(
manufacturers: dict, coverages: dict, db: dict,
) -> None:
"""Build system_id -> (manufacturer_slug, console_name) mapping.
Uses platform file paths to trace system_id -> bios directory -> manufacturer page.
"""
files_db = db.get("files", {})
by_name = db.get("indexes", {}).get("by_name", {})
# Build reverse index: filename -> (manufacturer, console) from bios/ structure
file_to_console: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
for console, entries in consoles.items():
for entry in entries:
file_to_console[entry["name"]] = (mfr, console)
# Build normalized console name index for fuzzy matching
console_norm: dict[str, tuple[str, str]] = {}
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
mfr_norm = mfr.lower().replace(" ", "-")
for console in consoles:
norm = console.lower().replace(" ", "-")
entry = (slug, console)
console_norm[norm] = entry
console_norm[f"{mfr_norm}-{norm}"] = entry
# Short aliases: strip common manufacturer prefix words
for prefix in (f"{mfr_norm}-", "nintendo-", "sega-", "sony-", "snk-", "nec-"):
if norm.startswith(prefix.replace(f"{mfr_norm}-", "")):
pass # already covered by norm
key = f"{prefix}{norm}"
console_norm[key] = entry
# Map system_id -> (manufacturer, console) via platform file entries
for cov in coverages.values():
config = cov["config"]
for sys_id, system in config.get("systems", {}).items():
if sys_id in _system_page_map:
continue
# Strategy 1: trace via file paths in DB
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname in file_to_console:
mfr, console = file_to_console[fname]
slug = mfr.lower().replace(" ", "-")
_system_page_map[sys_id] = (slug, console)
break
if sys_id in _system_page_map:
continue
# Strategy 2: fuzzy match system_id against console directory names
if sys_id in console_norm:
_system_page_map[sys_id] = console_norm[sys_id]
else:
# Try partial match: "nintendo-wii" matches "Wii" under "Nintendo"
parts = sys_id.split("-")
for i in range(len(parts)):
suffix = "-".join(parts[i:])
if suffix in console_norm:
_system_page_map[sys_id] = console_norm[suffix]
break
def _system_link(sys_id: str, prefix: str = "") -> str:
"""Generate a markdown link to a system page with anchor."""
if sys_id in _system_page_map:
slug, console = _system_page_map[sys_id]
anchor = console.lower().replace(" ", "-").replace("/", "-")
return f"[{sys_id}]({prefix}systems/{slug}.md#{anchor})"
return sys_id
def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None:
"""Render any YAML value as indented markdown."""
pad = " " * indent
if isinstance(val, dict):
for k, v in val.items():
if isinstance(v, dict):
lines.append(f"{pad}**{k}:**")
_render_yaml_value(lines, v, indent + 4)
elif isinstance(v, list):
lines.append(f"{pad}**{k}:**")
for item in v:
if isinstance(item, dict):
parts = [f"{ik}: {iv}" for ik, iv in item.items()]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
else:
lines.append(f"{pad}- **{k}:** {v}")
elif isinstance(val, list):
for item in val:
if isinstance(item, dict):
parts = [f"{ik}: {iv}" for ik, iv in item.items()]
lines.append(f"{pad}- {', '.join(parts)}")
else:
lines.append(f"{pad}- {item}")
else:
lines.append(f"{pad}{val}")
def _platform_link(name: str, display: str, prefix: str = "") -> str:
"""Generate a markdown link to a platform page."""
return f"[{display}]({prefix}platforms/{name}.md)"
def _emulator_link(name: str, prefix: str = "") -> str:
"""Generate a markdown link to an emulator page."""
return f"[{name}]({prefix}emulators/{name}.md)"
def _timestamp() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _fmt_size(size: int) -> str:
if size >= 1024 * 1024 * 1024:
return f"{size / (1024**3):.1f} GB"
if size >= 1024 * 1024:
return f"{size / (1024**2):.1f} MB"
if size >= 1024:
return f"{size / 1024:.1f} KB"
return f"{size} B"
def _pct(n: int, total: int) -> str:
if total == 0:
return "0%"
return f"{n / total * 100:.1f}%"
def _status_icon(pct: float) -> str:
if pct >= 100:
return "OK"
if pct >= 95:
return "~OK"
return "partial"
# ---------------------------------------------------------------------------
# Home page
# ---------------------------------------------------------------------------
def generate_home(db: dict, coverages: dict, profiles: dict,
registry: dict | None = None) -> str:
total_files = db.get("total_files", 0)
total_size = db.get("total_size", 0)
ts = _timestamp()
unique = {k: v for k, v in profiles.items()
if v.get("type") not in ("alias", "test")}
emulator_count = len(unique)
# Classification stats
classifications: dict[str, int] = {}
for p in unique.values():
cls = p.get("core_classification", "unclassified")
classifications[cls] = classifications.get(cls, 0) + 1
lines = [
f"# {SITE_NAME}",
"",
"Complete BIOS and firmware collection for retrogaming emulators.",
"",
"---",
"",
f"**{total_files:,}** files | **{len(coverages)}** platforms | "
f"**{emulator_count}** emulator profiles | **{_fmt_size(total_size)}** total",
"",
]
# Platform table
lines.extend([
"## Platforms",
"",
"| | Platform | Coverage | Verified | Download |",
"|---|----------|----------|----------|----------|",
])
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else ""
lines.append(
f"| {logo_md} | [{display}](platforms/{name}.md) | "
f"{cov['present']}/{cov['total']} ({pct}) | "
f"{cov['verified']} | "
f"[Pack]({RELEASE_URL}) |"
)
# Emulator classification breakdown
lines.extend([
"",
"## Emulator profiles",
"",
"| Classification | Count |",
"|---------------|-------|",
])
for cls, count in sorted(classifications.items(), key=lambda x: -x[1]):
lines.append(f"| {cls} | {count} |")
# Quick links
lines.extend([
"",
"---",
"",
f"[Systems](systems/){{ .md-button }} "
f"[Emulators](emulators/){{ .md-button }} "
f"[Cross-reference](cross-reference.md){{ .md-button }} "
f"[Gap Analysis](gaps.md){{ .md-button }} "
f"[Contributing](contributing.md){{ .md-button .md-button--primary }}",
"",
f"*Generated on {ts}.*",
])
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Platform pages
# ---------------------------------------------------------------------------
def generate_platform_index(coverages: dict) -> str:
lines = [
f"# Platforms - {SITE_NAME}",
"",
"| Platform | Coverage | Verification | Status |",
"|----------|----------|-------------|--------|",
]
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
display = cov["platform"]
pct = _pct(cov["present"], cov["total"])
plat_status = cov["config"].get("status", "active")
status = "archived" if plat_status == "archived" else _status_icon(cov["percentage"])
lines.append(
f"| [{display}]({name}.md) | "
f"{cov['present']}/{cov['total']} ({pct}) | "
f"{cov['mode']} | {status} |"
)
return "\n".join(lines) + "\n"
def generate_platform_page(name: str, cov: dict, registry: dict | None = None,
emulator_files: dict | None = None) -> str:
config = cov["config"]
display = cov["platform"]
mode = cov["mode"]
pct = _pct(cov["present"], cov["total"])
logo_url = (registry or {}).get(name, {}).get("logo", "")
logo_md = f"![{display}]({logo_url}){{ width=48 align=right }}\n\n" if logo_url else ""
homepage = config.get("homepage", "")
version = config.get("version", "")
hash_type = config.get("hash_type", "")
base_dest = config.get("base_destination", "")
lines = [
f"# {display} - {SITE_NAME}",
"",
logo_md + f"| | |",
"|---|---|",
f"| Verification | {mode} |",
f"| Hash type | {hash_type} |",
]
if version:
lines.append(f"| Version | {version} |")
if base_dest:
lines.append(f"| BIOS path | `{base_dest}/` |")
if homepage:
lines.append(f"| Homepage | [{homepage}]({homepage}) |")
lines.extend([
"",
f"**Coverage:** {cov['present']}/{cov['total']} ({pct}) | "
f"**Verified:** {cov['verified']} | **Untested:** {cov['untested']} | **Missing:** {cov['missing']}",
"",
f"[Download {display} Pack]({RELEASE_URL}){{ .md-button }}",
"",
])
# Build lookup from config file entries (has hashes/sizes)
config_files: dict[str, dict] = {}
for sys_id, system in config.get("systems", {}).items():
for fe in system.get("files", []):
fname = fe.get("name", "")
if fname:
config_files[fname] = fe
# Group details by system
by_system: dict[str, list] = {}
for d in cov["details"]:
sys_id = d.get("system", "unknown")
by_system.setdefault(sys_id, []).append(d)
for sys_id, files in sorted(by_system.items()):
ok_count = sum(1 for f in files if f["status"] == "ok")
total = len(files)
# Cross-ref: emulators that handle this system
sys_emus = []
if emulator_files:
for emu_name, emu_data in emulator_files.items():
if sys_id in emu_data.get("systems", set()):
sys_emus.append(emu_name)
sys_link = _system_link(sys_id, "../")
lines.append(f"## {sys_link}")
lines.append("")
lines.append(f"{ok_count}/{total} files verified")
if sys_emus:
emu_links = ", ".join(_emulator_link(e, "../") for e in sorted(sys_emus))
lines.append(f"Emulators: {emu_links}")
lines.append("")
# File table with hashes and sizes
lines.append("| File | Status | Size | SHA1 | MD5 |")
lines.append("|------|--------|------|------|-----|")
for f in sorted(files, key=lambda x: x["name"]):
status = f["status"]
fname = f["name"]
# Pull hashes/size from platform config entry
cfg_entry = config_files.get(fname, {})
sha1 = cfg_entry.get("sha1", f.get("sha1", ""))
md5 = cfg_entry.get("md5", f.get("expected_md5", ""))
size = cfg_entry.get("size", f.get("size", 0))
size_str = _fmt_size(size) if size else "-"
sha1_str = f"`{sha1[:12]}...`" if sha1 and len(sha1) > 12 else (f"`{sha1}`" if sha1 else "-")
md5_str = f"`{md5[:12]}...`" if md5 and len(md5) > 12 else (f"`{md5}`" if md5 else "-")
if status == "ok":
status_display = "OK"
elif status == "untested":
reason = f.get("reason", "")
status_display = f"Untested: {reason}" if reason else "Untested"
elif status == "missing":
status_display = "Missing"
else:
status_display = status
lines.append(f"| `{fname}` | {status_display} | {size_str} | {sha1_str} | {md5_str} |")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# System pages
# ---------------------------------------------------------------------------
def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]:
"""Group files by manufacturer -> console -> files."""
manufacturers: dict[str, dict[str, list]] = {}
for sha1, entry in db.get("files", {}).items():
path = entry.get("path", "")
parts = path.split("/")
if len(parts) < 3 or parts[0] != "bios":
continue
manufacturer = parts[1]
console = parts[2]
manufacturers.setdefault(manufacturer, {}).setdefault(console, []).append(entry)
return manufacturers
def generate_systems_index(manufacturers: dict) -> str:
lines = [
f"# Systems - {SITE_NAME}",
"",
"| Manufacturer | Consoles | Files |",
"|-------------|----------|-------|",
]
for mfr in sorted(manufacturers.keys()):
consoles = manufacturers[mfr]
file_count = sum(len(files) for files in consoles.values())
slug = mfr.lower().replace(" ", "-")
lines.append(f"| [{mfr}]({slug}.md) | {len(consoles)} | {file_count} |")
return "\n".join(lines) + "\n"
def generate_system_page(
manufacturer: str,
consoles: dict[str, list],
platform_files: dict[str, set],
emulator_files: dict[str, dict],
) -> str:
slug = manufacturer.lower().replace(" ", "-")
lines = [
f"# {manufacturer} - {SITE_NAME}",
"",
]
for console_name in sorted(consoles.keys()):
files = consoles[console_name]
icon_name = f"{manufacturer} - {console_name}".replace("/", " ")
icon_url = f"{SYSTEM_ICON_BASE}/{icon_name.replace(' ', '%20')}.png"
lines.append(f"## ![{console_name}]({icon_url}){{ width=24 }} {console_name}")
lines.append("")
# Separate main files from variants
main_files = [f for f in files if "/.variants/" not in f["path"]]
variant_files = [f for f in files if "/.variants/" in f["path"]]
for f in sorted(main_files, key=lambda x: x["name"]):
name = f["name"]
sha1_full = f.get("sha1", "unknown")
md5_full = f.get("md5", "unknown")
size = _fmt_size(f.get("size", 0))
# Cross-reference: which platforms declare this file
plats = sorted(p for p, names in platform_files.items() if name in names)
# Cross-reference: which emulators load this file
emus = sorted(e for e, data in emulator_files.items() if name in data.get("files", set()))
lines.append(f"**`{name}`** ({size})")
lines.append("")
lines.append(f"- SHA1: `{sha1_full}`")
lines.append(f"- MD5: `{md5_full}`")
if plats:
plat_links = [_platform_link(p, p, "../../") for p in plats]
lines.append(f"- Platforms: {', '.join(plat_links)}")
if emus:
emu_links = [_emulator_link(e, "../../") for e in emus]
lines.append(f"- Emulators: {', '.join(emu_links)}")
lines.append("")
if variant_files:
lines.append("**Variants:**")
lines.append("")
for v in sorted(variant_files, key=lambda x: x["name"]):
vname = v["name"]
vmd5 = v.get("md5", "unknown")
lines.append(f"- `{vname}` MD5: `{vmd5}`")
lines.append("")
lines.append(f"*Generated on {_timestamp()}*")
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Emulator pages
# ---------------------------------------------------------------------------
def generate_emulators_index(profiles: dict) -> str:
lines = [
f"# Emulators - {SITE_NAME}",
"",
"| Engine | Type | Systems | Files |",
"|--------|------|---------|-------|",
]
unique = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")}
test_cores = {k: v for k, v in profiles.items() if v.get("type") == "test"}
aliases = {k: v for k, v in profiles.items() if v.get("type") == "alias"}
for name in sorted(unique.keys()):
p = unique[name]
emu_name = p.get("emulator", name)
emu_type = p.get("type", "unknown")
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems)-3}"
lines.append(
f"| [{emu_name}]({name}.md) | {emu_type} | "
f"{sys_str} | {len(files)} |"
)
if aliases:
lines.extend(["", "## Aliases", ""])
lines.append("| Core | Points to |")
lines.append("|------|-----------|")
for name in sorted(aliases.keys()):
parent = aliases[name].get("alias_of", "unknown")
lines.append(f"| {name} | [{parent}]({parent}.md) |")
return "\n".join(lines) + "\n"
def generate_emulator_page(name: str, profile: dict, db: dict,
platform_files: dict | None = None) -> str:
if profile.get("type") == "alias":
parent = profile.get("alias_of", "unknown")
return (
f"# {name} - {SITE_NAME}\n\n"
f"This core uses the same firmware as **{parent}**.\n\n"
f"See [{parent}]({parent}.md) for details.\n"
)
emu_name = profile.get("emulator", name)
emu_type = profile.get("type", "unknown")
classification = profile.get("core_classification", "")
source = profile.get("source", "")
upstream = profile.get("upstream", "")
version = profile.get("core_version", "unknown")
display = profile.get("display_name", emu_name)
profiled = profile.get("profiled_date", "unknown")
systems = profile.get("systems", [])
cores = profile.get("cores", [name])
files = profile.get("files", [])
notes_raw = profile.get("notes", profile.get("note", ""))
notes = str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else ""
exclusion = profile.get("exclusion_note", "")
data_dirs = profile.get("data_directories", [])
lines = [
f"# {emu_name} - {SITE_NAME}",
"",
f"| | |",
f"|---|---|",
f"| Type | {emu_type} |",
]
if classification:
lines.append(f"| Classification | {classification} |")
if source:
lines.append(f"| Source | [{source}]({source}) |")
if upstream and upstream != source:
lines.append(f"| Upstream | [{upstream}]({upstream}) |")
lines.append(f"| Version | {version} |")
lines.append(f"| Profiled | {profiled} |")
if cores:
lines.append(f"| Cores | {', '.join(str(c) for c in cores)} |")
if systems:
sys_links = [_system_link(s, "../") for s in systems]
lines.append(f"| Systems | {', '.join(sys_links)} |")
mame_ver = profile.get("mame_version", "")
if mame_ver:
lines.append(f"| MAME version | {mame_ver} |")
author = profile.get("author", "")
if author:
lines.append(f"| Author | {author} |")
based_on = profile.get("based_on", "")
if based_on:
lines.append(f"| Based on | {based_on} |")
fw_ver = profile.get("firmware_version", "")
if fw_ver:
lines.append(f"| Firmware version | {fw_ver} |")
# Additional metadata fields
for field, label in [
("core", "Core ID"), ("core_name", "Core name"),
("bios_size", "BIOS size"), ("bios_directory", "BIOS directory"),
("bios_detection", "BIOS detection"), ("bios_selection", "BIOS selection"),
("firmware_file", "Firmware file"), ("firmware_source", "Firmware source"),
("firmware_install", "Firmware install"), ("firmware_detection", "Firmware detection"),
("resources_directory", "Resources directory"), ("rom_path", "ROM path"),
("game_count", "Game count"), ("verification", "Verification mode"),
("source_ref", "Source ref"), ("analysis_date", "Analysis date"),
("analysis_commit", "Analysis commit"),
]:
val = profile.get(field)
if val is not None and val != "":
if isinstance(val, str) and val.startswith("http"):
lines.append(f"| {label} | [{val}]({val}) |")
else:
lines.append(f"| {label} | {val} |")
lines.append("")
# Platform-specific details (rich structured data)
platform_details = profile.get("platform_details")
if platform_details and isinstance(platform_details, dict):
lines.extend(["???+ info \"Platform details\"", ""])
for pk, pv in platform_details.items():
if isinstance(pv, dict):
lines.append(f" **{pk}:**")
for sk, sv in pv.items():
lines.append(f" - {sk}: {sv}")
elif isinstance(pv, list):
lines.append(f" **{pk}:** {', '.join(str(x) for x in pv)}")
else:
lines.append(f" **{pk}:** {pv}")
lines.append("")
# All remaining structured data blocks as collapsible sections
_structured_blocks = [
("analysis", "Source analysis"),
("memory_layout", "Memory layout"),
("regions", "Regions"),
("nvm_layout", "NVM layout"),
("model_kickstart_map", "Model kickstart map"),
("builtin_boot_roms", "Built-in boot ROMs"),
("common_bios_filenames", "Common BIOS filenames"),
("valid_bios_crc32", "Valid BIOS CRC32"),
("dev_flash", "dev_flash"),
("dev_flash2", "dev_flash2"),
("dev_flash3", "dev_flash3"),
("firmware_modules", "Firmware modules"),
("firmware_titles", "Firmware titles"),
("fallback_fonts", "Fallback fonts"),
("io_devices", "I/O devices"),
("partitions", "Partitions"),
("mlc_structure", "MLC structure"),
("machine_directories", "Machine directories"),
("machine_properties", "Machine properties"),
("whdload_kickstarts", "WHDLoad kickstarts"),
("bios_identical_to", "BIOS identical to"),
("pack_structure", "Pack structure"),
]
for field, label in _structured_blocks:
val = profile.get(field)
if val is None:
continue
lines.append(f"???+ abstract \"{label}\"")
lines.append("")
_render_yaml_value(lines, val, indent=4)
lines.append("")
# Notes
if notes:
indented = notes.replace("\n", "\n ")
lines.extend(["???+ note \"Technical notes\"",
f" {indented}",
""])
if not files:
lines.append("No BIOS or firmware files required.")
if exclusion:
lines.extend([
"",
f"!!! info \"Why no files\"",
f" {exclusion}",
])
else:
by_name = db.get("indexes", {}).get("by_name", {})
files_db = db.get("files", {})
# Stats by category
bios_files = [f for f in files if f.get("category", "bios") == "bios"]
game_data = [f for f in files if f.get("category") == "game_data"]
bios_zips = [f for f in files if f.get("category") == "bios_zip"]
in_repo_count = sum(1 for f in files if f.get("name", "") in by_name)
missing_count = len(files) - in_repo_count
req_count = sum(1 for f in files if f.get("required"))
opt_count = len(files) - req_count
hle_count = sum(1 for f in files if f.get("hle_fallback"))
parts = [f"**{len(files)} files**"]
parts.append(f"{req_count} required, {opt_count} optional")
parts.append(f"{in_repo_count} in repo, {missing_count} missing")
if hle_count:
parts.append(f"{hle_count} with HLE fallback")
lines.append(" | ".join(parts))
if game_data or bios_zips:
cats = []
if bios_files:
cats.append(f"{len(bios_files)} BIOS")
if game_data:
cats.append(f"{len(game_data)} game data")
if bios_zips:
cats.append(f"{len(bios_zips)} BIOS ZIPs")
lines.append(f"Categories: {', '.join(cats)}")
lines.append("")
# File table
for f in files:
fname = f.get("name", "")
required = f.get("required", False)
in_repo = fname in by_name
source_ref = f.get("source_ref", "")
mode = f.get("mode", "")
hle = f.get("hle_fallback", False)
aliases = f.get("aliases", [])
category = f.get("category", "")
validation = f.get("validation", [])
size = f.get("size")
fnote = f.get("note", f.get("notes", ""))
storage = f.get("storage", "")
fmd5 = f.get("md5", "")
fsha1 = f.get("sha1", "")
fcrc32 = f.get("crc32", "")
fsha256 = f.get("sha256", "")
fadler32 = f.get("known_hash_adler32", "")
fmin = f.get("min_size")
fmax = f.get("max_size")
desc = f.get("description", "")
region = f.get("region", "")
archive = f.get("archive", "")
fpath = f.get("path", "")
fsystem = f.get("system", "")
priority = f.get("priority")
fast_boot = f.get("fast_boot")
bundled = f.get("bundled", False)
embedded = f.get("embedded", False)
has_builtin = f.get("has_builtin", False)
contents = f.get("contents", [])
config_key = f.get("config_key", "")
dest = f.get("dest", f.get("destination", ""))
ftype = f.get("type", "")
fpattern = f.get("pattern", "")
region_check = f.get("region_check")
size_note = f.get("size_note", "")
size_options = f.get("size_options", [])
size_range = f.get("size_range", "")
# Status badges
badges = []
if required:
badges.append("**required**")
else:
badges.append("optional")
if hle:
badges.append("HLE available")
if mode:
badges.append(mode)
if category and category != "bios":
badges.append(category)
if region:
badges.append(", ".join(region) if isinstance(region, list) else str(region))
if storage and storage != "embedded":
badges.append(storage)
if bundled:
badges.append("bundled in binary")
if embedded:
badges.append("embedded")
if has_builtin:
badges.append("has built-in fallback")
if archive:
badges.append(f"in `{archive}`")
if ftype and ftype != "bios":
badges.append(ftype)
if not in_repo:
badges.append("missing from repo")
lines.append(f"**`{fname}`** — {', '.join(badges)}")
if desc:
lines.append(f": {desc}")
lines.append("")
details = []
if fpath and fpath != fname:
details.append(f"Path: `{fpath}`")
if fsystem:
details.append(f"System: {_system_link(fsystem, '../')}")
if size:
size_str = _fmt_size(size)
if fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
size_str += f" ({', '.join(bounds)})"
details.append(f"Size: {size_str}")
elif fmin or fmax:
bounds = []
if fmin:
bounds.append(f"min {_fmt_size(fmin)}")
if fmax:
bounds.append(f"max {_fmt_size(fmax)}")
details.append(f"Size: {', '.join(bounds)}")
if fsha1:
details.append(f"SHA1: `{fsha1}`")
if fmd5:
details.append(f"MD5: `{fmd5}`")
if fcrc32:
details.append(f"CRC32: `{fcrc32}`")
if fsha256:
details.append(f"SHA256: `{fsha256}`")
if fadler32:
details.append(f"Adler32: `{fadler32}`")
if aliases:
details.append(f"Aliases: {', '.join(f'`{a}`' for a in aliases)}")
if priority is not None:
details.append(f"Priority: {priority}")
if fast_boot is not None:
details.append(f"Fast boot: {'yes' if fast_boot else 'no'}")
if validation:
if isinstance(validation, list):
details.append(f"Validation: {', '.join(validation)}")
elif isinstance(validation, dict):
for scope, checks in validation.items():
details.append(f"Validation ({scope}): {', '.join(checks)}")
if source_ref:
details.append(f"Source: `{source_ref}`")
if platform_files:
plats = sorted(p for p, names in platform_files.items() if fname in names)
if plats:
plat_links = [_platform_link(p, p, "../") for p in plats]
details.append(f"Platforms: {', '.join(plat_links)}")
if dest and dest != fname and dest != fpath:
details.append(f"Destination: `{dest}`")
if config_key:
details.append(f"Config key: `{config_key}`")
if fpattern:
details.append(f"Pattern: `{fpattern}`")
if region_check is not None:
details.append(f"Region check: {'yes' if region_check else 'no'}")
if size_note:
details.append(f"Size note: {size_note}")
if size_options:
details.append(f"Size options: {', '.join(_fmt_size(s) for s in size_options)}")
if size_range:
details.append(f"Size range: {size_range}")
if details:
for d in details:
lines.append(f"- {d}")
if fnote:
lines.append(f"- {fnote}")
if contents:
lines.append(f"- Contents ({len(contents)} entries):")
for c in contents[:10]:
if isinstance(c, dict):
cname = c.get("name", "")
cdesc = c.get("description", "")
csize = c.get("size", "")
parts = [f"`{cname}`"]
if cdesc:
parts.append(cdesc)
if csize:
parts.append(_fmt_size(csize))
lines.append(f" - {''.join(parts)}")
else:
lines.append(f" - {c}")
if len(contents) > 10:
lines.append(f" - ... and {len(contents) - 10} more")
lines.append("")
# Data directories
if data_dirs:
lines.extend(["## Data directories", ""])
for dd in data_dirs:
ref = dd.get("ref", "")
dest = dd.get("destination", "")
lines.append(f"- `{ref}` → `{dest}`")
lines.append("")
lines.extend([f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Contributing page
# ---------------------------------------------------------------------------
def generate_gap_analysis(
profiles: dict,
coverages: dict,
db: dict,
) -> str:
"""Generate a global gap analysis page showing all missing/undeclared files."""
by_name = db.get("indexes", {}).get("by_name", {})
platform_files = _build_platform_file_index(coverages)
lines = [
f"# Gap Analysis - {SITE_NAME}",
"",
"Files that emulators load but platforms don't declare, and their availability.",
"",
]
# Global stats
total_undeclared = 0
total_in_repo = 0
total_missing = 0
# Build global set of all platform-declared filenames (once)
all_platform_names = set()
for pfiles in platform_files.values():
all_platform_names.update(pfiles)
emulator_gaps = []
for emu_name, profile in sorted(profiles.items()):
if profile.get("type") == "alias":
continue
files = profile.get("files", [])
if not files:
continue
undeclared = []
for f in files:
fname = f.get("name", "")
if not fname or fname.startswith("<"):
continue
if fname not in all_platform_names:
in_repo = fname in by_name
undeclared.append({
"name": fname,
"required": f.get("required", False),
"in_repo": in_repo,
"source_ref": f.get("source_ref", ""),
})
total_undeclared += 1
if in_repo:
total_in_repo += 1
else:
total_missing += 1
if undeclared:
emulator_gaps.append((emu_name, profile.get("emulator", emu_name), undeclared))
lines.extend([
"## Summary",
"",
f"| Metric | Count |",
f"|--------|-------|",
f"| Total undeclared files | {total_undeclared} |",
f"| Already in repo | {total_in_repo} |",
f"| Missing from repo | {total_missing} |",
f"| Emulators with gaps | {len(emulator_gaps)} |",
"",
])
# Per-emulator breakdown
lines.extend([
"## Per Emulator",
"",
"| Emulator | Undeclared | In Repo | Missing |",
"|----------|-----------|---------|---------|",
])
for emu_name, display, gaps in sorted(emulator_gaps, key=lambda x: -len(x[2])):
in_repo = sum(1 for g in gaps if g["in_repo"])
missing = len(gaps) - in_repo
lines.append(f"| [{display}](emulators/{emu_name}.md) | {len(gaps)} | {in_repo} | {missing} |")
# Missing files detail (not in repo)
all_missing = set()
missing_details = []
for emu_name, display, gaps in emulator_gaps:
for g in gaps:
if not g["in_repo"] and g["name"] not in all_missing:
all_missing.add(g["name"])
missing_details.append({
"name": g["name"],
"emulator": display,
"required": g["required"],
"source_ref": g["source_ref"],
})
if missing_details:
lines.extend([
"",
f"## Missing Files ({len(missing_details)} unique)",
"",
"Files loaded by emulators but not available in the repository.",
"",
"| File | Emulator | Required | Source |",
"|------|----------|----------|--------|",
])
for m in sorted(missing_details, key=lambda x: x["name"]):
req = "yes" if m["required"] else "no"
lines.append(f"| `{m['name']}` | {m['emulator']} | {req} | {m['source_ref']} |")
lines.extend(["", f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
def generate_cross_reference(
coverages: dict,
profiles: dict,
) -> str:
"""Generate cross-reference: Platform -> Core -> Systems -> Upstream."""
unique = {k: v for k, v in profiles.items()
if v.get("type") not in ("alias", "test")}
# Build core -> profile lookup by core name
core_to_profile: dict[str, str] = {}
for pname, p in unique.items():
for core in p.get("cores", [pname]):
core_to_profile[str(core)] = pname
lines = [
f"# Cross-reference - {SITE_NAME}",
"",
"Platform → Core → Systems → Upstream emulator.",
"",
"The libretro core is a port of the upstream emulator. "
"Files, features, and validation may differ between the two.",
"",
]
# Per platform
for pname in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
cov = coverages[pname]
display = cov["platform"]
config = cov["config"]
platform_cores = config.get("cores", [])
lines.append(f"## [{display}](platforms/{pname}.md)")
lines.append("")
# Resolve which profiles this platform uses
if platform_cores == "all_libretro":
matched = {k: v for k, v in unique.items()
if "libretro" in v.get("type", "")}
elif isinstance(platform_cores, list):
matched = {}
for cname in platform_cores:
cname_str = str(cname)
if cname_str in unique:
matched[cname_str] = unique[cname_str]
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
matched[pkey] = unique[pkey]
else:
# Fallback: system intersection
psystems = set(config.get("systems", {}).keys())
matched = {k: v for k, v in unique.items()
if set(v.get("systems", [])) & psystems}
if platform_cores == "all_libretro":
lines.append(f"**{len(matched)} cores** (all libretro)")
else:
lines.append(f"**{len(matched)} cores**")
lines.append("")
lines.append("| Core | Classification | Systems | Files | Upstream |")
lines.append("|------|---------------|---------|-------|----------|")
for emu_name in sorted(matched.keys()):
p = matched[emu_name]
emu_display = p.get("emulator", emu_name)
cls = p.get("core_classification", "-")
emu_type = p.get("type", "")
upstream = p.get("upstream", "")
source = p.get("source", "")
systems = p.get("systems", [])
files = p.get("files", [])
sys_str = ", ".join(systems[:3])
if len(systems) > 3:
sys_str += f" +{len(systems) - 3}"
file_count = len(files)
# Count mode divergences
libretro_only = sum(1 for f in files if f.get("mode") == "libretro")
standalone_only = sum(1 for f in files if f.get("mode") == "standalone")
file_str = str(file_count)
if libretro_only or standalone_only:
parts = []
if libretro_only:
parts.append(f"{libretro_only} libretro-only")
if standalone_only:
parts.append(f"{standalone_only} standalone-only")
file_str += f" ({', '.join(parts)})"
upstream_display = "-"
if upstream:
upstream_short = upstream.replace("https://github.com/", "")
upstream_display = f"[{upstream_short}]({upstream})"
elif source:
source_short = source.replace("https://github.com/", "")
upstream_display = f"[{source_short}]({source})"
lines.append(
f"| [{emu_display}](emulators/{emu_name}.md) | {cls} | "
f"{sys_str} | {file_str} | {upstream_display} |"
)
lines.append("")
# Reverse view: by upstream emulator
lines.extend([
"## By upstream emulator",
"",
"| Upstream | Cores | Classification | Platforms |",
"|----------|-------|---------------|-----------|",
])
# Group profiles by upstream
by_upstream: dict[str, list[str]] = {}
for emu_name, p in sorted(unique.items()):
upstream = p.get("upstream", p.get("source", ""))
if upstream:
by_upstream.setdefault(upstream, []).append(emu_name)
# Build platform membership per core
platform_membership: dict[str, set[str]] = {}
for pname, cov in coverages.items():
config = cov["config"]
pcores = config.get("cores", [])
if pcores == "all_libretro":
for k, v in unique.items():
if "libretro" in v.get("type", ""):
platform_membership.setdefault(k, set()).add(pname)
elif isinstance(pcores, list):
for cname in pcores:
cname_str = str(cname)
if cname_str in unique:
platform_membership.setdefault(cname_str, set()).add(pname)
elif cname_str in core_to_profile:
pkey = core_to_profile[cname_str]
platform_membership.setdefault(pkey, set()).add(pname)
for upstream_url in sorted(by_upstream.keys()):
cores = by_upstream[upstream_url]
upstream_short = upstream_url.replace("https://github.com/", "")
classifications = set()
all_plats: set[str] = set()
for c in cores:
classifications.add(unique[c].get("core_classification", "-"))
all_plats.update(platform_membership.get(c, set()))
cls_str = ", ".join(sorted(classifications))
plat_str = ", ".join(sorted(all_plats)) if all_plats else "-"
core_links = ", ".join(f"[{c}](emulators/{c}.md)" for c in sorted(cores))
lines.append(
f"| [{upstream_short}]({upstream_url}) | {core_links} | "
f"{cls_str} | {plat_str} |"
)
lines.extend(["", f"*Generated on {_timestamp()}*"])
return "\n".join(lines) + "\n"
def generate_contributing() -> str:
return """# Contributing - RetroBIOS
## Add a BIOS file
1. Fork this repository
2. Place the file in `bios/Manufacturer/Console/filename`
3. Variants (alternate hashes for the same file): place in `bios/Manufacturer/Console/.variants/`
4. Create a Pull Request - hashes are verified automatically
## Add a platform
1. Create a scraper in `scripts/scraper/` (inherit `BaseScraper`)
2. Read the platform's upstream source code to understand its BIOS check logic
3. Add entry to `platforms/_registry.yml`
4. Generate the platform YAML config
5. Test: `python scripts/verify.py --platform <name>`
## Add an emulator profile
1. Clone the emulator's source code
2. Search for BIOS/firmware loading (grep for `bios`, `rom`, `firmware`, `fopen`)
3. Document every file the emulator loads with source code references
4. Write YAML to `emulators/<name>.yml`
5. Test: `python scripts/cross_reference.py --emulator <name>`
## File conventions
- `bios/Manufacturer/Console/filename` for canonical files
- `bios/Manufacturer/Console/.variants/filename.sha1prefix` for alternate versions
- Files >50 MB go in GitHub release assets (`large-files` release)
- RPG Maker and ScummVM directories are excluded from deduplication
## PR validation
The CI automatically:
- Computes SHA1/MD5/CRC32 of new files
- Checks against known hashes in platform configs
- Reports coverage impact
"""
# ---------------------------------------------------------------------------
# Build cross-reference indexes
# ---------------------------------------------------------------------------
def _build_platform_file_index(coverages: dict) -> dict[str, set]:
"""Map platform_name -> set of declared file names."""
index = {}
for name, cov in coverages.items():
names = set()
config = cov["config"]
for system in config.get("systems", {}).values():
for fe in system.get("files", []):
names.add(fe.get("name", ""))
index[name] = names
return index
def _build_emulator_file_index(profiles: dict) -> dict[str, dict]:
"""Map emulator_name -> {files: set, systems: set} for cross-reference."""
index = {}
for name, profile in profiles.items():
if profile.get("type") == "alias":
continue
index[name] = {
"files": {f.get("name", "") for f in profile.get("files", [])},
"systems": set(profile.get("systems", [])),
}
return index
# ---------------------------------------------------------------------------
# mkdocs.yml nav generator
# ---------------------------------------------------------------------------
def generate_mkdocs_nav(
coverages: dict,
manufacturers: dict,
profiles: dict,
) -> list:
"""Generate the nav section for mkdocs.yml."""
platform_nav = [{"Overview": "platforms/index.md"}]
for name in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
display = coverages[name]["platform"]
platform_nav.append({display: f"platforms/{name}.md"})
system_nav = [{"Overview": "systems/index.md"}]
for mfr in sorted(manufacturers.keys()):
slug = mfr.lower().replace(" ", "-")
system_nav.append({mfr: f"systems/{slug}.md"})
unique_profiles = {k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")}
emu_nav = [{"Overview": "emulators/index.md"}]
for name in sorted(unique_profiles.keys()):
display = unique_profiles[name].get("emulator", name)
emu_nav.append({display: f"emulators/{name}.md"})
return [
{"Home": "index.md"},
{"Platforms": platform_nav},
{"Systems": system_nav},
{"Emulators": emu_nav},
{"Cross-reference": "cross-reference.md"},
{"Gap Analysis": "gaps.md"},
{"Contributing": "contributing.md"},
]
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(description="Generate MkDocs site from project data")
parser.add_argument("--db", default="database.json")
parser.add_argument("--platforms-dir", default="platforms")
parser.add_argument("--emulators-dir", default="emulators")
parser.add_argument("--docs-dir", default=DOCS_DIR)
args = parser.parse_args()
db = load_database(args.db)
docs = Path(args.docs_dir)
# Clean generated dirs (preserve docs/superpowers/)
for d in GENERATED_DIRS:
target = docs / d
if target.exists():
shutil.rmtree(target)
# Ensure output dirs
for d in GENERATED_DIRS:
(docs / d).mkdir(parents=True, exist_ok=True)
# Load registry for platform metadata (logos, etc.)
registry_path = Path(args.platforms_dir) / "_registry.yml"
registry = {}
if registry_path.exists():
with open(registry_path) as f:
registry = (yaml.safe_load(f) or {}).get("platforms", {})
# Load platform configs
platform_names = [
p.stem for p in Path(args.platforms_dir).glob("*.yml")
if not p.name.startswith("_")
]
print("Computing platform coverage...")
coverages = {}
for name in sorted(platform_names):
try:
cov = compute_coverage(name, args.platforms_dir, db)
coverages[name] = cov
print(f" {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})")
except FileNotFoundError as e:
print(f" {name}: skipped ({e})", file=sys.stderr)
# Load emulator profiles
print("Loading emulator profiles...")
profiles = load_emulator_profiles(args.emulators_dir, skip_aliases=False)
unique_count = sum(1 for p in profiles.values() if p.get("type") != "alias")
print(f" {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)")
# Build cross-reference indexes
platform_files = _build_platform_file_index(coverages)
emulator_files = _build_emulator_file_index(profiles)
# Generate home
print("Generating home page...")
(docs / "index.md").write_text(generate_home(db, coverages, profiles, registry))
# Build system_id -> manufacturer page map (needed by all generators)
print("Building system cross-reference map...")
manufacturers = _group_by_manufacturer(db)
_build_system_page_map_from_data(manufacturers, coverages, db)
print(f" {len(_system_page_map)} system IDs mapped to pages")
# Generate platform pages
print("Generating platform pages...")
(docs / "platforms" / "index.md").write_text(generate_platform_index(coverages))
for name, cov in coverages.items():
(docs / "platforms" / f"{name}.md").write_text(generate_platform_page(name, cov, registry, emulator_files))
# Generate system pages
print("Generating system pages...")
(docs / "systems" / "index.md").write_text(generate_systems_index(manufacturers))
for mfr, consoles in manufacturers.items():
slug = mfr.lower().replace(" ", "-")
page = generate_system_page(mfr, consoles, platform_files, emulator_files)
(docs / "systems" / f"{slug}.md").write_text(page)
# Generate emulator pages
print("Generating emulator pages...")
(docs / "emulators" / "index.md").write_text(generate_emulators_index(profiles))
for name, profile in profiles.items():
page = generate_emulator_page(name, profile, db, platform_files)
(docs / "emulators" / f"{name}.md").write_text(page)
# Generate cross-reference page
print("Generating cross-reference page...")
(docs / "cross-reference.md").write_text(
generate_cross_reference(coverages, profiles)
)
# Generate gap analysis page
print("Generating gap analysis page...")
(docs / "gaps.md").write_text(
generate_gap_analysis(profiles, coverages, db)
)
# Generate contributing
print("Generating contributing page...")
(docs / "contributing.md").write_text(generate_contributing())
# Update mkdocs.yml nav section only (avoid yaml.dump round-trip mangling quotes)
print("Updating mkdocs.yml nav...")
nav = generate_mkdocs_nav(coverages, manufacturers, profiles)
nav_yaml = yaml.dump({"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True)
with open("mkdocs.yml") as f:
content = f.read()
# Replace nav section (everything from \nnav: to the next top-level key or EOF)
import re
if "\nnav:" in content:
content = re.sub(r'\nnav:\n(?:[ \t]+.*\n?)*', '\n' + nav_yaml, content, count=1)
else:
content += "\n" + nav_yaml
with open("mkdocs.yml", "w") as f:
f.write(content)
total_pages = (
1 # home
+ 1 + len(coverages) # platform index + detail
+ 1 + len(manufacturers) # system index + detail
+ 1 # cross-reference
+ 1 + len(profiles) # emulator index + detail
+ 1 # gap analysis
+ 1 # contributing
)
print(f"\nGenerated {total_pages} pages in {args.docs_dir}/")
if __name__ == "__main__":
main()