#!/usr/bin/env python3
"""Generate MkDocs site pages from database.json, platform configs, and emulator profiles.

Reads the same data sources as verify.py and generate_pack.py to produce
a complete documentation site. Zero manual content.

Usage:
    python scripts/generate_site.py
    python scripts/generate_site.py --db database.json --platforms-dir platforms
"""

from __future__ import annotations

import argparse
import os
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path

sys.path.insert(0, os.path.dirname(__file__))
from common import (
    list_registered_platforms,
    load_database,
    load_emulator_profiles,
    require_yaml,
    write_if_changed,
)

yaml = require_yaml()
from generate_readme import compute_coverage

DOCS_DIR = "docs"
SITE_NAME = "RetroBIOS"
REPO_URL = "https://github.com/Abdess/retrobios"
RELEASE_URL = f"{REPO_URL}/releases/latest"
GENERATED_DIRS = ["platforms", "systems", "emulators"]
WIKI_SRC_DIR = "wiki"  # manually maintained wiki sources
SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png"

CLS_LABELS = {
    "official_port": "Official ports",
    "community_fork": "Community forks",
    "pure_libretro": "Pure libretro",
    "game_engine": "Game engines",
    "enhanced_fork": "Enhanced forks",
    "frozen_snapshot": "Frozen snapshots",
    "embedded_hle": "Embedded HLE",
    "launcher": "Launchers",
    "unclassified": "Unclassified",
    "other": "Other",
}

# Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking
_system_page_map: dict[str, tuple[str, str]] = {}


def _build_system_page_map_from_data(
    manufacturers: dict,
    coverages: dict,
    db: dict,
) -> None:
    """Build system_id -> (manufacturer_slug, console_name) mapping.

    Uses platform file paths to trace system_id -> bios directory -> manufacturer page.
    """
    db.get("files", {})
    db.get("indexes", {}).get("by_name", {})

    # Build reverse index: filename -> (manufacturer, console) from bios/ structure
    file_to_console: dict[str, tuple[str, str]] = {}
    for mfr, consoles in manufacturers.items():
        for console, entries in consoles.items():
            for entry in entries:
                file_to_console[entry["name"]] = (mfr, console)

    # Build normalized console name index for fuzzy matching
    console_norm: dict[str, tuple[str, str]] = {}
    for mfr, consoles in manufacturers.items():
        slug = mfr.lower().replace(" ", "-")
        mfr_norm = mfr.lower().replace(" ", "-")
        for console in consoles:
            norm = console.lower().replace(" ", "-")
            entry = (slug, console)
            console_norm[norm] = entry
            console_norm[f"{mfr_norm}-{norm}"] = entry
            # Short aliases: strip common manufacturer prefix words
            for prefix in (
                f"{mfr_norm}-",
                "nintendo-",
                "sega-",
                "sony-",
                "snk-",
                "nec-",
            ):
                if norm.startswith(prefix.replace(f"{mfr_norm}-", "")):
                    pass  # already covered by norm
                key = f"{prefix}{norm}"
                console_norm[key] = entry

    # Map system_id -> (manufacturer, console) via platform file entries
    for cov in coverages.values():
        config = cov["config"]
        for sys_id, system in config.get("systems", {}).items():
            if sys_id in _system_page_map:
                continue
            # Strategy 1: trace via file paths in DB
            for fe in system.get("files", []):
                fname = fe.get("name", "")
                if fname in file_to_console:
                    mfr, console = file_to_console[fname]
                    slug = mfr.lower().replace(" ", "-")
                    _system_page_map[sys_id] = (slug, console)
                    break
            if sys_id in _system_page_map:
                continue
            # Strategy 2: fuzzy match system_id against console directory names
            if sys_id in console_norm:
                _system_page_map[sys_id] = console_norm[sys_id]
            else:
                # Try partial match: "nintendo-wii" matches "Wii" under "Nintendo"
                parts = sys_id.split("-")
                for i in range(len(parts)):
                    suffix = "-".join(parts[i:])
                    if suffix in console_norm:
                        _system_page_map[sys_id] = console_norm[suffix]
                        break


def _system_link(sys_id: str, prefix: str = "") -> str:
    """Generate a markdown link to a system page with anchor."""
    if sys_id in _system_page_map:
        slug, console = _system_page_map[sys_id]
        anchor = console.lower().replace(" ", "-").replace("/", "-")
        return f"[{sys_id}]({prefix}systems/{slug}.md#{anchor})"
    return sys_id


def _render_yaml_value(lines: list[str], val, indent: int = 4) -> None:
    """Render any YAML value as indented markdown."""
    pad = " " * indent
    if isinstance(val, dict):
        for k, v in val.items():
            if isinstance(v, dict):
                lines.append(f"{pad}**{k}:**")
                lines.append("")
                _render_yaml_value(lines, v, indent + 4)
            elif isinstance(v, list):
                lines.append(f"{pad}**{k}:**")
                lines.append("")
                for item in v:
                    if isinstance(item, dict):
                        parts = [
                            f"{ik}: {iv}"
                            for ik, iv in item.items()
                            if not isinstance(iv, (dict, list))
                        ]
                        lines.append(f"{pad}- {', '.join(parts)}")
                    else:
                        lines.append(f"{pad}- {item}")
                lines.append("")
            else:
                # Truncate very long strings in tables
                sv = str(v)
                if len(sv) > 200:
                    sv = sv[:200] + "..."
                lines.append(f"{pad}- **{k}:** {sv}")
    elif isinstance(val, list):
        for item in val:
            if isinstance(item, dict):
                parts = [
                    f"{ik}: {iv}"
                    for ik, iv in item.items()
                    if not isinstance(iv, (dict, list))
                ]
                lines.append(f"{pad}- {', '.join(parts)}")
            else:
                lines.append(f"{pad}- {item}")
    elif isinstance(val, str) and "\n" in val:
        for line in val.split("\n"):
            lines.append(f"{pad}{line}")
    else:
        lines.append(f"{pad}{val}")


def _platform_link(name: str, display: str, prefix: str = "") -> str:
    """Generate a markdown link to a platform page."""
    return f"[{display}]({prefix}platforms/{name}.md)"


def _emulator_link(name: str, prefix: str = "") -> str:
    """Generate a markdown link to an emulator page."""
    return f"[{name}]({prefix}emulators/{name}.md)"


def _timestamp() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _fmt_size(size: int) -> str:
    if size >= 1024 * 1024 * 1024:
        return f"{size / (1024**3):.1f} GB"
    if size >= 1024 * 1024:
        return f"{size / (1024**2):.1f} MB"
    if size >= 1024:
        return f"{size / 1024:.1f} KB"
    return f"{size} B"


def _pct(n: int, total: int) -> str:
    if total == 0:
        return "0%"
    return f"{n / total * 100:.1f}%"


def _status_icon(pct: float) -> str:
    if pct >= 100:
        return "OK"
    if pct >= 95:
        return "~OK"
    return "partial"


# Home page


def generate_home(
    db: dict, coverages: dict, profiles: dict, registry: dict | None = None
) -> str:
    total_files = db.get("total_files", 0)
    total_size = db.get("total_size", 0)
    ts = _timestamp()

    unique = {
        k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
    }
    emulator_count = len(unique)

    # Classification stats
    classifications: dict[str, int] = {}
    for p in unique.values():
        cls = p.get("core_classification", "unclassified")
        classifications[cls] = classifications.get(cls, 0) + 1

    # Count total systems across all profiles
    all_systems = set()
    for p in unique.values():
        all_systems.update(p.get("systems", []))

    lines = [
        '<div class="rb-hero" markdown>',
        "",
        f"# {SITE_NAME}",
        "",
        "Source-verified BIOS and firmware packs for retrogaming platforms.",
        "",
        "</div>",
        "",
        '<div class="rb-stats" markdown>',
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{total_files:,}</span>',
        '<span class="rb-stat-label">Files</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{len(coverages)}</span>',
        '<span class="rb-stat-label">Platforms</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{emulator_count}</span>',
        '<span class="rb-stat-label">Emulators profiled</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{_fmt_size(total_size)}</span>',
        '<span class="rb-stat-label">Total size</span>',
        "</div>",
        "",
        "</div>",
        "",
    ]

    # Platforms FIRST (main action)
    lines.extend(
        [
            "## Platforms",
            "",
            "| | Platform | Files | Verification | Download |",
            "|---|----------|-------|-------------|----------|",
        ]
    )

    mode_icons = {"md5": "MD5", "sha1": "SHA1", "existence": "exists"}

    for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
        display = cov["platform"]
        logo_url = (registry or {}).get(name, {}).get("logo", "")
        logo_md = (
            f"![{display}]({logo_url}){{ width=20 loading=lazy }}" if logo_url else ""
        )
        mode_label = mode_icons.get(cov["mode"], cov["mode"])

        lines.append(
            f"| {logo_md} | [{display}](platforms/{name}.md) | "
            f"{cov['present']:,} | {mode_label} | "
            f"[Pack]({RELEASE_URL}){{ .md-button .md-button--primary }} |"
        )

    # Quick start (collapsible -- secondary info)
    lines.extend(
        [
            "",
            '??? info "Where to extract"',
            "",
            "    | Platform | Extract to |",
            "    |----------|-----------|",
            "    | RetroArch / Lakka | `system/` |",
            "    | Batocera | `/userdata/bios/` |",
            "    | Recalbox | `/recalbox/share/bios/` |",
            "    | RetroBat | `bios/` |",
            "    | RetroDECK | `~/retrodeck/bios/` |",
            "    | EmuDeck | `Emulation/bios/` |",
            "",
        ]
    )

    # Emulator classification breakdown
    lines.extend(
        [
            "## Emulator profiles",
            "",
            "| Classification | Count |",
            "|---------------|-------|",
        ]
    )
    for cls, count in sorted(classifications.items(), key=lambda x: -x[1]):
        label = CLS_LABELS.get(cls, cls)
        lines.append(f"| [{label}](emulators/index.md#{cls}) | {count} |")

    # Methodology (collapsible)
    lines.extend(
        [
            "",
            '??? abstract "Methodology"',
            "",
            "    Each file is checked against the emulator's source code. "
            "Documentation and metadata can drift from actual runtime behavior, "
            "so the source is the primary reference.",
            "",
            "    1. **Upstream emulator source** -- what the original project "
            "loads (Dolphin, PCSX2, Mednafen...)",
            "    2. **Libretro core source** -- the RetroArch port, which may "
            "adapt paths or add files",
            "    3. **`.info` declarations** -- metadata that platforms rely on, "
            "checked for accuracy",
            "",
        ]
    )

    # Quick links
    lines.extend(
        [
            "---",
            "",
            "[Systems](systems/index.md){ .md-button } "
            "[Emulators](emulators/index.md){ .md-button } "
            "[Cross-reference](cross-reference.md){ .md-button } "
            "[Gap Analysis](gaps.md){ .md-button } "
            "[Contributing](contributing.md){ .md-button .md-button--primary }",
            "",
            f'<div class="rb-timestamp">Generated on {ts}.</div>',
        ]
    )

    return "\n".join(lines) + "\n"


# Platform pages


def generate_platform_index(coverages: dict) -> str:
    total_files = sum(c["total"] for c in coverages.values())
    total_present = sum(c["present"] for c in coverages.values())
    total_verified = sum(c["verified"] for c in coverages.values())

    lines = [
        f"# Platforms - {SITE_NAME}",
        "",
        f"{len(coverages)} supported platforms, "
        f"{total_present:,}/{total_files:,} files present, "
        f"{total_verified:,} verified.",
        "",
        "| Platform | Coverage | Verification | Status |",
        "|----------|----------|-------------|--------|",
    ]

    mode_labels = {
        "md5": '<span class="rb-badge rb-badge-success">MD5</span>',
        "sha1": '<span class="rb-badge rb-badge-success">SHA1</span>',
        "existence": '<span class="rb-badge rb-badge-info">existence</span>',
    }

    for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
        display = cov["platform"]
        pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
        pct = _pct(cov["present"], cov["total"])
        plat_status = cov["config"].get("status", "active")

        badge_cls = (
            "rb-badge-success"
            if pct_val >= 95
            else "rb-badge-warning"
            if pct_val >= 70
            else "rb-badge-danger"
        )
        coverage_str = (
            f'{cov["present"]}/{cov["total"]} '
            f'<span class="rb-badge {badge_cls}">{pct}</span>'
        )

        mode_html = mode_labels.get(
            cov["mode"],
            f'<span class="rb-badge rb-badge-muted">{cov["mode"]}</span>',
        )

        if plat_status == "archived":
            status = '<span class="rb-badge rb-badge-muted">archived</span>'
        elif pct_val >= 100:
            status = '<span class="rb-badge rb-badge-success">complete</span>'
        elif pct_val >= 95:
            status = '<span class="rb-badge rb-badge-warning">near</span>'
        else:
            status = '<span class="rb-badge rb-badge-danger">partial</span>'

        lines.append(
            f"| [{display}]({name}.md) | {coverage_str} | {mode_html} | {status} |"
        )

    return "\n".join(lines) + "\n"


def generate_platform_page(
    name: str,
    cov: dict,
    registry: dict | None = None,
    emulator_files: dict | None = None,
) -> str:
    config = cov["config"]
    display = cov["platform"]
    mode = cov["mode"]
    pct = _pct(cov["present"], cov["total"])

    logo_url = (registry or {}).get(name, {}).get("logo", "")
    logo_md = (
        f"![{display}]({logo_url}){{ width=48 align=right }}\n\n" if logo_url else ""
    )

    homepage = config.get("homepage", "")
    version = config.get("version", "")
    hash_type = config.get("hash_type", "")
    base_dest = config.get("base_destination", "")

    pct_val = cov["present"] / cov["total"] * 100 if cov["total"] else 0
    coverage_badge = (
        "rb-badge-success"
        if pct_val >= 95
        else "rb-badge-warning"
        if pct_val >= 70
        else "rb-badge-danger"
    )
    mode_badge = (
        "rb-badge-success" if mode in ("md5", "sha1") else "rb-badge-info"
    )

    lines = [
        f"# {display} - {SITE_NAME}",
        "",
        logo_md,
    ]

    # Stat cards
    lines.extend(
        [
            '<div class="rb-stats" markdown>',
            "",
            '<div class="rb-stat" markdown>',
            f'<span class="rb-stat-value">{cov["present"]}/{cov["total"]}</span>',
            f'<span class="rb-stat-label">Coverage ({pct})</span>',
            "</div>",
            "",
            '<div class="rb-stat" markdown>',
            f'<span class="rb-stat-value">{cov["verified"]}</span>',
            '<span class="rb-stat-label">Verified</span>',
            "</div>",
            "",
            '<div class="rb-stat" markdown>',
            f'<span class="rb-stat-value">{cov["missing"]}</span>',
            '<span class="rb-stat-label">Missing</span>',
            "</div>",
            "",
            '<div class="rb-stat" markdown>',
            f'<span class="rb-stat-value">'
            f'<span class="rb-badge {mode_badge}">{mode}</span></span>',
            '<span class="rb-stat-label">Verification</span>',
            "</div>",
            "",
            "</div>",
            "",
            "| | |",
            "|---|---|",
        ]
    )
    if hash_type:
        lines.append(f"| Hash type | {hash_type} |")
    if version:
        lines.append(f"| Version | {version} |")
    if base_dest:
        lines.append(f"| BIOS path | `{base_dest}/` |")
    if homepage:
        lines.append(f"| Homepage | [{homepage}]({homepage}) |")
    lines.extend(
        [
            "",
            f"[Download {display} Pack]({RELEASE_URL})"
            "{ .md-button .md-button--primary }",
            "",
        ]
    )

    # Build lookup from config file entries (has hashes/sizes)
    config_files: dict[str, dict] = {}
    for sys_id, system in config.get("systems", {}).items():
        for fe in system.get("files", []):
            fname = fe.get("name", "")
            if fname:
                config_files[fname] = fe

    # Group details by system
    by_system: dict[str, list] = {}
    for d in cov["details"]:
        sys_id = d.get("system", "unknown")
        by_system.setdefault(sys_id, []).append(d)

    # System summary table (quick navigation)
    lines.extend(
        [
            "## Systems overview",
            "",
            "| System | Files | Status | Emulators |",
            "|--------|-------|--------|-----------|",
        ]
    )
    for sys_id, files in sorted(by_system.items()):
        ok_count = sum(1 for f in files if f["status"] == "ok")
        total = len(files)
        non_ok = total - ok_count
        if non_ok == 0:
            status = '<span class="rb-badge rb-badge-success">OK</span>'
        else:
            status = (
                f'<span class="rb-badge rb-badge-warning">'
                f'{non_ok} issue{"s" if non_ok > 1 else ""}</span>'
            )
        sys_emus = []
        if emulator_files:
            for emu_name, emu_data in emulator_files.items():
                if sys_id in emu_data.get("systems", set()):
                    sys_emus.append(emu_name)
        emu_str = ", ".join(sys_emus[:3])
        if len(sys_emus) > 3:
            emu_str += f" +{len(sys_emus) - 3}"
        anchor = sys_id.replace(" ", "-")
        lines.append(
            f"| [{sys_id}](#{anchor}) | {ok_count}/{total} | {status} | {emu_str} |"
        )
    lines.append("")

    # Per-system detail sections (collapsible for large platforms)
    use_collapsible = len(by_system) > 10

    for sys_id, files in sorted(by_system.items()):
        ok_count = sum(1 for f in files if f["status"] == "ok")
        total = len(files)

        sys_emus = []
        if emulator_files:
            for emu_name, emu_data in emulator_files.items():
                if sys_id in emu_data.get("systems", set()):
                    sys_emus.append(emu_name)

        sys_link = _system_link(sys_id, "../")

        anchor = sys_id.replace(" ", "-")
        if use_collapsible:
            status_tag = "OK" if ok_count == total else f"{total - ok_count} issues"
            lines.append(f'<a id="{anchor}"></a>')
            lines.append(f'??? note "{sys_id} ({ok_count}/{total} - {status_tag})"')
            lines.append("")
            pad = "    "
        else:
            lines.append(f"## {sys_link}")
            lines.append("")
            pad = ""

        lines.append(f"{pad}{ok_count}/{total} files verified")
        if sys_emus:
            emu_links = ", ".join(_emulator_link(e, "../") for e in sorted(sys_emus))
            lines.append(f"{pad}Emulators: {emu_links}")
        lines.append("")

        # File listing
        for f in sorted(files, key=lambda x: x["name"]):
            status = f["status"]
            fname = f["name"]
            cfg_entry = config_files.get(fname, {})
            sha1 = cfg_entry.get("sha1", f.get("sha1", ""))
            md5 = cfg_entry.get("md5", f.get("expected_md5", ""))
            size = cfg_entry.get("size", f.get("size", 0))

            if status == "ok":
                status_display = "OK"
            elif status == "untested":
                reason = f.get("reason", "")
                status_display = f"untested: {reason}" if reason else "untested"
            elif status == "missing":
                status_display = "**missing**"
            else:
                status_display = status

            size_str = _fmt_size(size) if size else ""
            details = [status_display]
            if size_str:
                details.append(size_str)

            lines.append(f"{pad}- `{fname}` - {', '.join(details)}")
            # Show full hashes on a sub-line (useful for copy-paste)
            if sha1 or md5:
                hash_parts = []
                if sha1:
                    hash_parts.append(f"SHA1: `{sha1}`")
                if md5:
                    hash_parts.append(f"MD5: `{md5}`")
                lines.append(f"{pad}    {' | '.join(hash_parts)}")

        lines.append("")

    lines.append(f"*Generated on {_timestamp()}*")
    return "\n".join(lines) + "\n"


# System pages


def _group_by_manufacturer(db: dict) -> dict[str, dict[str, list]]:
    """Group files by manufacturer -> console -> files."""
    manufacturers: dict[str, dict[str, list]] = {}
    for sha1, entry in db.get("files", {}).items():
        path = entry.get("path", "")
        parts = path.split("/")
        if len(parts) < 3 or parts[0] != "bios":
            continue
        manufacturer = parts[1]
        console = parts[2]
        manufacturers.setdefault(manufacturer, {}).setdefault(console, []).append(entry)
    return manufacturers


def generate_systems_index(manufacturers: dict) -> str:
    total_mfr = len(manufacturers)
    total_consoles = sum(len(c) for c in manufacturers.values())
    total_files = sum(
        len(files) for consoles in manufacturers.values() for files in consoles.values()
    )

    lines = [
        f"# Systems - {SITE_NAME}",
        "",
        f"{total_mfr} manufacturers, {total_consoles} consoles, "
        f"{total_files:,} files in the repository.",
        "",
        "| Manufacturer | Consoles | Files |",
        "|-------------|----------|-------|",
    ]

    for mfr in sorted(manufacturers.keys()):
        consoles = manufacturers[mfr]
        file_count = sum(len(files) for files in consoles.values())
        slug = mfr.lower().replace(" ", "-")
        lines.append(f"| [{mfr}]({slug}.md) | {len(consoles)} | {file_count} |")

    return "\n".join(lines) + "\n"


def generate_system_page(
    manufacturer: str,
    consoles: dict[str, list],
    platform_files: dict[str, set],
    emulator_files: dict[str, dict],
) -> str:
    manufacturer.lower().replace(" ", "-")
    lines = [
        f"# {manufacturer} - {SITE_NAME}",
        "",
    ]

    for console_name in sorted(consoles.keys()):
        files = consoles[console_name]
        icon_name = f"{manufacturer} - {console_name}".replace("/", " ")
        icon_url = f"{SYSTEM_ICON_BASE}/{icon_name.replace(' ', '%20')}.png"
        lines.append(f"## ![{console_name}]({icon_url}){{ width=24 }} {console_name}")
        lines.append("")
        # Separate main files from variants
        main_files = [f for f in files if "/.variants/" not in f["path"]]
        variant_files = [f for f in files if "/.variants/" in f["path"]]

        for f in sorted(main_files, key=lambda x: x["name"]):
            name = f["name"]
            sha1_full = f.get("sha1", "unknown")
            md5_full = f.get("md5", "unknown")
            size = _fmt_size(f.get("size", 0))

            # Cross-reference: which platforms declare this file
            plats = sorted(p for p, names in platform_files.items() if name in names)
            # Cross-reference: which emulators load this file
            emus = sorted(
                e
                for e, data in emulator_files.items()
                if name in data.get("files", set())
            )

            lines.append(f"**`{name}`** ({size})")
            lines.append("")
            lines.append(f"- SHA1: `{sha1_full}`")
            lines.append(f"- MD5: `{md5_full}`")
            if plats:
                plat_links = [_platform_link(p, p, "../") for p in plats]
                lines.append(f"- Platforms: {', '.join(plat_links)}")
            if emus:
                emu_links = [_emulator_link(e, "../") for e in emus]
                lines.append(f"- Emulators: {', '.join(emu_links)}")
            lines.append("")

        if variant_files:
            lines.append("**Variants:**")
            lines.append("")
            for v in sorted(variant_files, key=lambda x: x["name"]):
                vname = v["name"]
                vmd5 = v.get("md5", "unknown")
                lines.append(f"- `{vname}` MD5: `{vmd5}`")

        lines.append("")

    lines.append(f"*Generated on {_timestamp()}*")
    return "\n".join(lines) + "\n"


# Emulator pages


def generate_emulators_index(profiles: dict) -> str:
    unique = {
        k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
    }
    aliases = {k: v for k, v in profiles.items() if v.get("type") == "alias"}

    # Group by classification
    by_class: dict[str, list[tuple[str, dict]]] = {}
    for name in sorted(unique.keys()):
        p = unique[name]
        cls = p.get("core_classification", "other")
        by_class.setdefault(cls, []).append((name, p))

    total_files = sum(len(p.get("files", [])) for p in unique.values())

    lines = [
        f"# Emulators - {SITE_NAME}",
        "",
        f"**{len(unique)}** emulator profiles, **{total_files}** files total, **{len(aliases)}** aliases.",
        "",
        "| Classification | Count | Description |",
        "|---------------|-------|-------------|",
    ]

    cls_desc = {
        "official_port": "Same author maintains both standalone and libretro",
        "community_fork": "Third-party port to libretro",
        "pure_libretro": "Built for libretro, no standalone version",
        "game_engine": "Game engine reimplementation",
        "enhanced_fork": "Fork with added features",
        "frozen_snapshot": "Frozen at an old version",
        "embedded_hle": "All ROMs compiled into binary",
        "launcher": "Launches an external emulator",
        "other": "Unclassified",
    }

    cls_order = [
        "official_port",
        "community_fork",
        "pure_libretro",
        "game_engine",
        "enhanced_fork",
        "frozen_snapshot",
        "embedded_hle",
        "launcher",
        "other",
    ]

    for cls in cls_order:
        entries = by_class.get(cls, [])
        if not entries:
            continue
        label = CLS_LABELS.get(cls, cls)
        desc = cls_desc.get(cls, "")
        lines.append(f"| [{label}](#{cls}) | {len(entries)} | {desc} |")
    lines.append("")

    for cls in cls_order:
        entries = by_class.get(cls, [])
        if not entries:
            continue
        label = CLS_LABELS.get(cls, cls)
        desc = cls_desc.get(cls, "")
        lines.extend(
            [
                f'## <span class="rb-cls-dot rb-dot-{cls}"></span>{label} {{ #{cls} }}',
                "",
                f"*{desc}* -- {len(entries)} profiles",
                "",
                "| Engine | Systems | Files |",
                "|--------|---------|-------|",
            ]
        )

        for name, p in entries:
            emu_name = p.get("emulator", name)
            systems = p.get("systems", [])
            files = p.get("files", [])
            sys_str = ", ".join(systems[:3])
            if len(systems) > 3:
                sys_str += f" +{len(systems) - 3}"
            file_count = len(files)
            file_str = str(file_count) if file_count else "-"
            lines.append(f"| [{emu_name}]({name}.md) | {sys_str} | {file_str} |")
        lines.append("")

    if aliases:
        lines.extend(["## Aliases", ""])
        lines.append("| Core | Points to |")
        lines.append("|------|-----------|")
        for name in sorted(aliases.keys()):
            parent = aliases[name].get(
                "alias_of", aliases[name].get("bios_identical_to", "unknown")
            )
            lines.append(f"| {name} | [{parent}]({parent}.md) |")
        lines.append("")

    return "\n".join(lines) + "\n"


def generate_emulator_page(
    name: str, profile: dict, db: dict, platform_files: dict | None = None
) -> str:
    if profile.get("type") == "alias":
        parent = profile.get("alias_of", profile.get("bios_identical_to", "unknown"))
        return (
            f"# {name} - {SITE_NAME}\n\n"
            f"This core uses the same firmware as **{parent}**.\n\n"
            f"See [{parent}]({parent}.md) for details.\n"
        )

    emu_name = profile.get("emulator", name)
    emu_type = profile.get("type", "unknown")
    classification = profile.get("core_classification", "")
    source = profile.get("source", "")
    upstream = profile.get("upstream", "")
    version = profile.get("core_version", "unknown")
    profile.get("display_name", emu_name)
    profiled = profile.get("profiled_date", "unknown")
    systems = profile.get("systems", [])
    cores = profile.get("cores", [name])
    files = profile.get("files", [])
    notes_raw = profile.get("notes", profile.get("note", ""))
    notes = (
        str(notes_raw).strip() if notes_raw and not isinstance(notes_raw, dict) else ""
    )
    exclusion = profile.get("exclusion_note", "")
    data_dirs = profile.get("data_directories", [])

    lines = [
        f"# {emu_name} - {SITE_NAME}",
        "",
        "| | |",
        "|---|---|",
        f"| Type | {emu_type} |",
    ]
    if classification:
        cls_display = CLS_LABELS.get(classification, classification)
        lines.append(f"| Classification | {cls_display} |")
    if source:
        lines.append(f"| Source | [{source}]({source}) |")
    if upstream and upstream != source:
        lines.append(f"| Upstream | [{upstream}]({upstream}) |")
    lines.append(f"| Version | {version} |")
    lines.append(f"| Profiled | {profiled} |")
    if cores:
        lines.append(f"| Cores | {', '.join(str(c) for c in cores)} |")
    if systems:
        sys_links = [_system_link(s, "../") for s in systems]
        lines.append(f"| Systems | {', '.join(sys_links)} |")
    mame_ver = profile.get("mame_version", "")
    if mame_ver:
        lines.append(f"| MAME version | {mame_ver} |")
    author = profile.get("author", "")
    if author:
        lines.append(f"| Author | {author} |")
    based_on = profile.get("based_on", "")
    if based_on:
        lines.append(f"| Based on | {based_on} |")
    # Additional metadata fields (scalar values only -complex ones go to collapsible sections)
    for field, label in [
        ("core", "Core ID"),
        ("core_name", "Core name"),
        ("bios_size", "BIOS size"),
        ("bios_directory", "BIOS directory"),
        ("bios_detection", "BIOS detection"),
        ("bios_selection", "BIOS selection"),
        ("firmware_file", "Firmware file"),
        ("firmware_source", "Firmware source"),
        ("firmware_install", "Firmware install"),
        ("firmware_detection", "Firmware detection"),
        ("resources_directory", "Resources directory"),
        ("rom_path", "ROM path"),
        ("game_count", "Game count"),
        ("verification", "Verification mode"),
        ("source_ref", "Source ref"),
        ("analysis_date", "Analysis date"),
        ("analysis_commit", "Analysis commit"),
    ]:
        val = profile.get(field)
        if val is None or val == "" or isinstance(val, (dict, list)):
            continue
        if isinstance(val, str) and val.startswith("http"):
            lines.append(f"| {label} | [{val}]({val}) |")
        else:
            lines.append(f"| {label} | {val} |")
    lines.append("")

    # Platform-specific details (rich structured data)
    platform_details = profile.get("platform_details")
    if platform_details and isinstance(platform_details, dict):
        lines.extend(['???+ info "Platform details"', ""])
        for pk, pv in platform_details.items():
            if isinstance(pv, dict):
                lines.append(f"    **{pk}:**")
                for sk, sv in pv.items():
                    lines.append(f"    - {sk}: {sv}")
            elif isinstance(pv, list):
                lines.append(f"    **{pk}:** {', '.join(str(x) for x in pv)}")
            else:
                lines.append(f"    **{pk}:** {pv}")
        lines.append("")

    # All remaining structured data blocks as collapsible sections
    _structured_blocks = [
        ("analysis", "Source analysis"),
        ("memory_layout", "Memory layout"),
        ("regions", "Regions"),
        ("nvm_layout", "NVM layout"),
        ("model_kickstart_map", "Model kickstart map"),
        ("builtin_boot_roms", "Built-in boot ROMs"),
        ("common_bios_filenames", "Common BIOS filenames"),
        ("valid_bios_crc32", "Valid BIOS CRC32"),
        ("dev_flash", "dev_flash"),
        ("dev_flash2", "dev_flash2"),
        ("dev_flash3", "dev_flash3"),
        ("firmware_modules", "Firmware modules"),
        ("firmware_titles", "Firmware titles"),
        ("fallback_fonts", "Fallback fonts"),
        ("io_devices", "I/O devices"),
        ("partitions", "Partitions"),
        ("mlc_structure", "MLC structure"),
        ("machine_directories", "Machine directories"),
        ("machine_properties", "Machine properties"),
        ("whdload_kickstarts", "WHDLoad kickstarts"),
        ("bios_identical_to", "BIOS identical to"),
        ("pack_structure", "Pack structure"),
        ("firmware_version", "Firmware version"),
    ]
    for field, label in _structured_blocks:
        val = profile.get(field)
        if val is None:
            continue
        lines.append(f'???+ abstract "{label}"')
        lines.append("")
        _render_yaml_value(lines, val, indent=4)
        lines.append("")

    # Notes
    if notes:
        indented = notes.replace("\n", "\n    ")
        lines.extend(['???+ note "Technical notes"', f"    {indented}", ""])

    if not files:
        lines.append("No BIOS or firmware files required.")
        if exclusion:
            lines.extend(
                [
                    "",
                    '!!! info "Why no files"',
                    f"    {exclusion}",
                ]
            )
    else:
        by_name = db.get("indexes", {}).get("by_name", {})
        db.get("files", {})

        # Stats by category
        bios_files = [f for f in files if f.get("category", "bios") == "bios"]
        game_data = [f for f in files if f.get("category") == "game_data"]
        bios_zips = [f for f in files if f.get("category") == "bios_zip"]

        in_repo_count = sum(1 for f in files if f.get("name", "") in by_name)
        missing_count = len(files) - in_repo_count
        req_count = sum(1 for f in files if f.get("required"))
        opt_count = len(files) - req_count
        hle_count = sum(1 for f in files if f.get("hle_fallback"))

        parts = [f"**{len(files)} files**"]
        parts.append(f"{req_count} required, {opt_count} optional")
        parts.append(f"{in_repo_count} in repo, {missing_count} missing")
        if hle_count:
            parts.append(f"{hle_count} with HLE fallback")
        lines.append(" | ".join(parts))

        if game_data or bios_zips:
            cats = []
            if bios_files:
                cats.append(f"{len(bios_files)} BIOS")
            if game_data:
                cats.append(f"{len(game_data)} game data")
            if bios_zips:
                cats.append(f"{len(bios_zips)} BIOS ZIPs")
            lines.append(f"Categories: {', '.join(cats)}")
        lines.append("")

        # File table
        for f in files:
            fname = f.get("name", "")
            required = f.get("required", False)
            in_repo = fname in by_name
            source_ref = f.get("source_ref", "")
            mode = f.get("mode", "")
            hle = f.get("hle_fallback", False)
            aliases = f.get("aliases", [])
            category = f.get("category", "")
            validation = f.get("validation", [])
            size = f.get("size")
            fnote = f.get("note", f.get("notes", ""))
            storage = f.get("storage", "")
            fmd5 = f.get("md5", "")
            fsha1 = f.get("sha1", "")
            fcrc32 = f.get("crc32", "")
            fsha256 = f.get("sha256", "")
            fadler32 = f.get("known_hash_adler32", "")
            fmin = f.get("min_size")
            fmax = f.get("max_size")
            desc = f.get("description", "")
            region = f.get("region", "")
            archive = f.get("archive", "")
            fpath = f.get("path", "")
            fsystem = f.get("system", "")
            priority = f.get("priority")
            fast_boot = f.get("fast_boot")
            bundled = f.get("bundled", False)
            embedded = f.get("embedded", False)
            has_builtin = f.get("has_builtin", False)
            contents = f.get("contents", [])
            config_key = f.get("config_key", "")
            dest = f.get("dest", f.get("destination", ""))
            ftype = f.get("type", "")
            fpattern = f.get("pattern", "")
            region_check = f.get("region_check")
            size_note = f.get("size_note", "")
            size_options = f.get("size_options", [])
            size_range = f.get("size_range", "")

            # Status badges (HTML)
            badges = []
            if required:
                badges.append(
                    '<span class="rb-badge rb-badge-danger">required</span>'
                )
            else:
                badges.append(
                    '<span class="rb-badge rb-badge-muted">optional</span>'
                )
            if not in_repo:
                badges.append(
                    '<span class="rb-badge rb-badge-warning">missing</span>'
                )
            elif in_repo:
                badges.append(
                    '<span class="rb-badge rb-badge-success">in repo</span>'
                )
            if hle:
                badges.append(
                    '<span class="rb-badge rb-badge-info">HLE fallback</span>'
                )
            if mode:
                badges.append(
                    f'<span class="rb-badge rb-badge-muted">{mode}</span>'
                )
            if category and category != "bios":
                badges.append(
                    f'<span class="rb-badge rb-badge-info">{category}</span>'
                )
            if region:
                region_str = (
                    ", ".join(region) if isinstance(region, list) else str(region)
                )
                badges.append(
                    f'<span class="rb-badge rb-badge-muted">{region_str}</span>'
                )
            if storage and storage != "embedded":
                badges.append(
                    f'<span class="rb-badge rb-badge-muted">{storage}</span>'
                )
            if bundled:
                badges.append(
                    '<span class="rb-badge rb-badge-muted">bundled</span>'
                )
            if embedded:
                badges.append(
                    '<span class="rb-badge rb-badge-muted">embedded</span>'
                )
            if has_builtin:
                badges.append(
                    '<span class="rb-badge rb-badge-info">built-in fallback</span>'
                )
            if archive:
                badges.append(
                    f'<span class="rb-badge rb-badge-muted">in {archive}</span>'
                )
            if ftype and ftype != "bios":
                badges.append(
                    f'<span class="rb-badge rb-badge-muted">{ftype}</span>'
                )

            badge_str = " ".join(badges)
            border_cls = (
                "rb-file-entry-required" if required else "rb-file-entry-optional"
            )
            lines.append(
                f'<div class="rb-file-entry {border_cls}" markdown>'
            )
            lines.append("")
            lines.append(f"**`{fname}`** {badge_str}")
            if desc:
                lines.append(f"<br>{desc}")
            lines.append("")

            details = []
            if fpath and fpath != fname:
                details.append(f"Path: `{fpath}`")
            if fsystem:
                details.append(f"System: {_system_link(fsystem, '../')}")
            if size:
                size_str = _fmt_size(size)
                if fmin or fmax:
                    bounds = []
                    if fmin:
                        bounds.append(f"min {_fmt_size(fmin)}")
                    if fmax:
                        bounds.append(f"max {_fmt_size(fmax)}")
                    size_str += f" ({', '.join(bounds)})"
                details.append(f"Size: {size_str}")
            elif fmin or fmax:
                bounds = []
                if fmin:
                    bounds.append(f"min {_fmt_size(fmin)}")
                if fmax:
                    bounds.append(f"max {_fmt_size(fmax)}")
                details.append(f"Size: {', '.join(bounds)}")
            if fsha1:
                details.append(f"SHA1: `{fsha1}`")
            if fmd5:
                details.append(f"MD5: `{fmd5}`")
            if fcrc32:
                details.append(f"CRC32: `{fcrc32}`")
            if fsha256:
                details.append(f"SHA256: `{fsha256}`")
            if fadler32:
                details.append(f"Adler32: `{fadler32}`")
            if aliases:
                details.append(f"Aliases: {', '.join(f'`{a}`' for a in aliases)}")
            if priority is not None:
                details.append(f"Priority: {priority}")
            if fast_boot is not None:
                details.append(f"Fast boot: {'yes' if fast_boot else 'no'}")
            if validation:
                if isinstance(validation, list):
                    details.append(f"Validation: {', '.join(validation)}")
                elif isinstance(validation, dict):
                    for scope, checks in validation.items():
                        details.append(f"Validation ({scope}): {', '.join(checks)}")
            if source_ref:
                details.append(f"Source: `{source_ref}`")
            if platform_files:
                plats = sorted(
                    p for p, names in platform_files.items() if fname in names
                )
                if plats:
                    plat_links = [_platform_link(p, p, "../") for p in plats]
                    details.append(f"Platforms: {', '.join(plat_links)}")

            if dest and dest != fname and dest != fpath:
                details.append(f"Destination: `{dest}`")
            if config_key:
                details.append(f"Config key: `{config_key}`")
            if fpattern:
                details.append(f"Pattern: `{fpattern}`")
            if region_check is not None:
                details.append(f"Region check: {'yes' if region_check else 'no'}")
            if size_note:
                details.append(f"Size note: {size_note}")
            if size_options:
                details.append(
                    f"Size options: {', '.join(_fmt_size(s) for s in size_options)}"
                )
            if size_range:
                details.append(f"Size range: {size_range}")

            if details:
                for d in details:
                    lines.append(f"- {d}")
            if fnote:
                lines.append(f"- {fnote}")
            if contents:
                lines.append(f"- Contents ({len(contents)} entries):")
                for c in contents[:10]:
                    if isinstance(c, dict):
                        cname = c.get("name", "")
                        cdesc = c.get("description", "")
                        csize = c.get("size", "")
                        parts = [f"`{cname}`"]
                        if cdesc:
                            parts.append(cdesc)
                        if csize:
                            parts.append(_fmt_size(csize))
                        lines.append(f"    - {' -'.join(parts)}")
                    else:
                        lines.append(f"    - {c}")
                if len(contents) > 10:
                    lines.append(f"    - ... and {len(contents) - 10} more")
            lines.append("")
            lines.append("</div>")
            lines.append("")

    # Data directories
    if data_dirs:
        lines.extend(["## Data directories", ""])
        for dd in data_dirs:
            ref = dd.get("ref", "")
            dest = dd.get("destination", "")
            lines.append(f"- `{ref}` >`{dest}`")
        lines.append("")

    lines.extend([f"*Generated on {_timestamp()}*"])
    return "\n".join(lines) + "\n"


# Contributing page


def generate_gap_analysis(
    profiles: dict,
    coverages: dict,
    db: dict,
    data_names: set[str] | None = None,
) -> str:
    """Generate a unified gap analysis page.

    Combines verification results (from coverages/verify.py) with source
    provenance (from cross_reference) into a single truth dashboard.

    Sections:
    1. Verification status -- aggregated across all platforms
    2. Problem files -- missing, untested, hash mismatch
    3. Core complement -- emulator files not declared by any platform
    """
    from cross_reference import cross_reference as run_cross_reference

    from common import resolve_platform_cores

    # ---- Section 1: aggregate verify results across all platforms ----

    total_verified = 0
    total_untested = 0
    total_missing_verify = 0
    total_files_verify = 0

    platform_problems: list[dict] = []
    for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
        total_verified += cov["verified"]
        total_untested += cov["untested"]
        total_missing_verify += cov["missing"]
        total_files_verify += cov["total"]

        for d in cov["details"]:
            if d["status"] != "ok" or d.get("discrepancy"):
                platform_problems.append({
                    "platform": cov["platform"],
                    "platform_key": pname,
                    "name": d["name"],
                    "status": d["status"],
                    "required": d.get("required", True),
                    "reason": d.get("reason", ""),
                    "discrepancy": d.get("discrepancy", ""),
                    "system": d.get("system", ""),
                })

    pct_verified = (
        f"{total_verified / total_files_verify * 100:.0f}%"
        if total_files_verify
        else "0%"
    )

    lines = [
        f"# Gap Analysis - {SITE_NAME}",
        "",
        "Unified view of BIOS verification, file provenance, and coverage gaps.",
        "",
        '<div class="rb-stats" markdown>',
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{total_files_verify:,}</span>',
        '<span class="rb-stat-label">Total files (all platforms)</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{total_verified:,}</span>',
        f'<span class="rb-stat-label">Verified ({pct_verified})</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{total_untested:,}</span>',
        '<span class="rb-stat-label">Untested</span>',
        "</div>",
        "",
        '<div class="rb-stat" markdown>',
        f'<span class="rb-stat-value">{total_missing_verify:,}</span>',
        '<span class="rb-stat-label">Missing</span>',
        "</div>",
        "",
        "</div>",
        "",
    ]

    # ---- Verification per platform ----

    lines.extend([
        "## Verification by Platform",
        "",
        "| Platform | Files | Verified | Untested | Missing | Mode |",
        "|----------|------:|---------:|---------:|--------:|------|",
    ])

    for pname, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):
        display = cov["platform"]
        m = cov["missing"]
        u = cov["untested"]
        missing_str = (
            f'<span class="rb-badge rb-badge-danger">{m}</span>'
            if m > 0
            else '<span class="rb-badge rb-badge-success">0</span>'
        )
        untested_str = (
            f'<span class="rb-badge rb-badge-warning">{u}</span>'
            if u > 0
            else str(u)
        )
        lines.append(
            f"| [{display}](platforms/{pname}.md) "
            f"| {cov['total']} "
            f"| {cov['verified']} "
            f"| {untested_str} "
            f"| {missing_str} "
            f"| {cov['mode']} |"
        )
    lines.append("")

    # ---- Section 2: Problem files ----

    missing_files: dict[str, dict] = {}
    untested_files: dict[str, dict] = {}
    mismatch_files: dict[str, dict] = {}

    for p in platform_problems:
        fname = p["name"]
        if p["status"] == "missing":
            entry = missing_files.setdefault(fname, {
                "name": fname, "required": p["required"],
                "platforms": [], "reason": p["reason"],
            })
            entry["platforms"].append(p["platform"])
            if p["required"]:
                entry["required"] = True
        elif p["status"] == "untested":
            entry = untested_files.setdefault(fname, {
                "name": fname, "required": p["required"],
                "platforms": [], "reason": p["reason"],
            })
            entry["platforms"].append(p["platform"])
        if p.get("discrepancy"):
            entry = mismatch_files.setdefault(fname, {
                "name": fname, "platforms": [],
                "discrepancy": p["discrepancy"],
            })
            entry["platforms"].append(p["platform"])

    total_problems = len(missing_files) + len(untested_files) + len(mismatch_files)

    if total_problems > 0:
        lines.extend([
            "## Problem Files",
            "",
            f"{len(missing_files)} missing, {len(untested_files)} untested, "
            f"{len(mismatch_files)} hash mismatch.",
            "",
        ])

        if missing_files:
            lines.extend([
                f'### Missing <span class="rb-badge rb-badge-danger">'
                f"{len(missing_files)} files</span>",
                "",
                "| File | Required | Platforms |",
                "|------|----------|-----------|",
            ])
            for fname in sorted(missing_files):
                f = missing_files[fname]
                req = "yes" if f["required"] else "no"
                plats = ", ".join(sorted(set(f["platforms"])))
                lines.append(f"| `{fname}` | {req} | {plats} |")
            lines.append("")

        if untested_files:
            lines.extend([
                f'### Untested <span class="rb-badge rb-badge-warning">'
                f"{len(untested_files)} files</span>",
                "",
                "Present but hash not verified.",
                "",
                "| File | Platforms | Reason |",
                "|------|----------|--------|",
            ])
            for fname in sorted(untested_files):
                f = untested_files[fname]
                plats = ", ".join(sorted(set(f["platforms"])))
                lines.append(f"| `{fname}` | {plats} | {f['reason']} |")
            lines.append("")

        if mismatch_files:
            lines.extend([
                f'### Hash Mismatch <span class="rb-badge rb-badge-warning">'
                f"{len(mismatch_files)} files</span>",
                "",
                "Platform says OK but emulator validation disagrees.",
                "",
                "| File | Platforms | Discrepancy |",
                "|------|----------|-------------|",
            ])
            for fname in sorted(mismatch_files):
                f = mismatch_files[fname]
                plats = ", ".join(sorted(set(f["platforms"])))
                lines.append(f"| `{fname}` | {plats} | {f['discrepancy']} |")
            lines.append("")

    # ---- Section 3: Core complement (cross-reference provenance) ----

    all_declared: set[str] = set()
    declared: dict[str, set[str]] = {}
    for _name, cov in coverages.items():
        config = cov["config"]
        for sys_id, system in config.get("systems", {}).items():
            for fe in system.get("files", []):
                fname = fe.get("name", "")
                if fname:
                    declared.setdefault(sys_id, set()).add(fname)
                    all_declared.add(fname)

    active_profiles = {
        k: v for k, v in profiles.items() if v.get("type") != "alias"
    }

    report = run_cross_reference(
        active_profiles, declared, db,
        data_names=data_names, all_declared=all_declared,
    )

    src_totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
    total_undeclared = 0
    emulator_gaps = []

    for emu_name, data in sorted(report.items()):
        if data["gaps"] == 0:
            continue
        total_undeclared += data["gaps"]
        for key in src_totals:
            src_totals[key] += data.get(f"gap_{key}", 0)
        emulator_gaps.append((emu_name, data))

    if total_undeclared > 0:
        total_available = (
            src_totals["bios"] + src_totals["data"] + src_totals["large_file"]
        )
        pct_available = (
            f"{total_available / total_undeclared * 100:.0f}%"
            if total_undeclared
            else "0%"
        )

        lines.extend([
            "## Core Complement",
            "",
            f"Files loaded by emulators but not declared by any platform. "
            f"{total_undeclared:,} files across {len(emulator_gaps)} emulators, "
            f"{total_available:,} available ({pct_available}), "
            f"{src_totals['missing']} to source.",
            "",
            "### Provenance",
            "",
            "| Source | Count | Description |",
            "|--------|------:|-------------|",
            f"| bios/ | {src_totals['bios']} | In repository (database.json) |",
            f"| data/ | {src_totals['data']} | Data directories (buildbot, GitHub) |",
            f"| release | {src_totals['large_file']} "
            "| GitHub release assets (large files) |",
            f"| missing | {src_totals['missing']} | Not available, needs sourcing |",
            "",
            "### Per Emulator",
            "",
            "| Emulator | Undeclared | bios | data | release | Missing |",
            "|----------|----------:|-----:|-----:|--------:|--------:|",
        ])

        for emu_name, data in sorted(emulator_gaps, key=lambda x: -x[1]["gaps"]):
            display = data["emulator"]
            m = data.get("gap_missing", 0)
            missing_str = (
                f'<span class="rb-badge rb-badge-danger">{m}</span>'
                if m > 0
                else '<span class="rb-badge rb-badge-success">0</span>'
            )
            lines.append(
                f"| [{display}](emulators/{emu_name}.md) "
                f"| {data['gaps']} "
                f"| {data.get('gap_bios', 0)} "
                f"| {data.get('gap_data', 0)} "
                f"| {data.get('gap_large_file', 0)} "
                f"| {missing_str} |"
            )
        lines.append("")

        # List truly missing files with platform impact
        emu_to_platforms: dict[str, set[str]] = {}
        unique_profiles = {
            k: v
            for k, v in profiles.items()
            if v.get("type") not in ("alias", "test")
        }
        for pname in coverages:
            config = coverages[pname]["config"]
            matched = resolve_platform_cores(config, unique_profiles)
            for emu_name in matched:
                emu_to_platforms.setdefault(emu_name, set()).add(pname)

        all_src_missing: set[str] = set()
        src_missing_details: list[dict] = []
        for emu_name, data in emulator_gaps:
            for g in data["gap_details"]:
                if g["source"] == "missing" and g["name"] not in all_src_missing:
                    all_src_missing.add(g["name"])
                    src_missing_details.append({
                        "name": g["name"],
                        "emulator": data["emulator"],
                        "emu_key": emu_name,
                        "required": g["required"],
                        "source_ref": g["source_ref"],
                    })

        if src_missing_details:
            req_src = [m for m in src_missing_details if m["required"]]
            lines.extend([
                f"### Files to Source ({len(src_missing_details)} unique, "
                f"{len(req_src)} required)",
                "",
                "| File | Emulator | Required | Affects platforms | Source ref |",
                "|------|----------|----------|------------------|-----------|",
            ])
            for m in sorted(
                src_missing_details,
                key=lambda x: (not x["required"], x["name"]),
            ):
                plats = sorted(emu_to_platforms.get(m["emu_key"], set()))
                plat_badges = (
                    " ".join(
                        f'<span class="rb-badge rb-badge-info">{p}</span>'
                        for p in plats
                    )
                    if plats
                    else "-"
                )
                req = "yes" if m["required"] else "no"
                lines.append(
                    f"| `{m['name']}` | {m['emulator']} | {req} | "
                    f"{plat_badges} | {m['source_ref']} |"
                )
            lines.append("")

    lines.extend(["", f'<div class="rb-timestamp">Generated on {_timestamp()}.</div>'])
    return "\n".join(lines) + "\n"


def generate_cross_reference(
    coverages: dict,
    profiles: dict,
) -> str:
    """Generate cross-reference: Platform -> Core -> Systems -> Upstream."""
    unique = {
        k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
    }

    # Build core -> profile lookup by core name
    core_to_profile: dict[str, str] = {}
    for pname, p in unique.items():
        for core in p.get("cores", [pname]):
            core_to_profile[str(core)] = pname

    total_cores = len(unique)
    total_upstreams = len({
        p.get("upstream", p.get("source", ""))
        for p in unique.values()
        if p.get("upstream") or p.get("source")
    })

    lines = [
        f"# Cross-reference - {SITE_NAME}",
        "",
        f"Platform > Core > Systems > Upstream emulator. "
        f"{total_cores} cores across {len(coverages)} platforms, "
        f"tracing back to {total_upstreams} upstream projects.",
        "",
        "The libretro core is a port of the upstream emulator. "
        "Files, features, and validation may differ between the two.",
        "",
    ]

    # Per platform
    for pname in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
        cov = coverages[pname]
        display = cov["platform"]
        config = cov["config"]
        platform_cores = config.get("cores", [])

        lines.append(f'??? abstract "[{display}](platforms/{pname}.md)"')
        lines.append("")

        # Resolve which profiles this platform uses
        if platform_cores == "all_libretro":
            matched = {
                k: v for k, v in unique.items() if "libretro" in v.get("type", "")
            }
        elif isinstance(platform_cores, list):
            matched = {}
            for cname in platform_cores:
                cname_str = str(cname)
                if cname_str in unique:
                    matched[cname_str] = unique[cname_str]
                elif cname_str in core_to_profile:
                    pkey = core_to_profile[cname_str]
                    matched[pkey] = unique[pkey]
        else:
            # Fallback: system intersection
            psystems = set(config.get("systems", {}).keys())
            matched = {
                k: v for k, v in unique.items() if set(v.get("systems", [])) & psystems
            }

        if platform_cores == "all_libretro":
            lines.append(f"    **{len(matched)} cores** (all libretro)")
        else:
            lines.append(f"    **{len(matched)} cores**")
        lines.append("")

        lines.append("    | Core | Classification | Systems | Files | Upstream |")
        lines.append("    |------|---------------|---------|-------|----------|")

        for emu_name in sorted(matched.keys()):
            p = matched[emu_name]
            emu_display = p.get("emulator", emu_name)
            cls_raw = p.get("core_classification", "-")
            cls = CLS_LABELS.get(cls_raw, cls_raw)
            p.get("type", "")
            upstream = p.get("upstream", "")
            source = p.get("source", "")
            systems = p.get("systems", [])
            files = p.get("files", [])

            sys_str = ", ".join(systems[:3])
            if len(systems) > 3:
                sys_str += f" +{len(systems) - 3}"

            file_count = len(files)
            # Count mode divergences
            libretro_only = sum(1 for f in files if f.get("mode") == "libretro")
            standalone_only = sum(1 for f in files if f.get("mode") == "standalone")
            file_str = str(file_count)
            if libretro_only or standalone_only:
                parts = []
                if libretro_only:
                    parts.append(f"{libretro_only} libretro-only")
                if standalone_only:
                    parts.append(f"{standalone_only} standalone-only")
                file_str += f" ({', '.join(parts)})"

            upstream_display = "-"
            if upstream:
                upstream_short = upstream.replace("https://github.com/", "")
                upstream_display = f"[{upstream_short}]({upstream})"
            elif source:
                source_short = source.replace("https://github.com/", "")
                upstream_display = f"[{source_short}]({source})"

            lines.append(
                f"    | [{emu_display}](emulators/{emu_name}.md) | {cls} | "
                f"{sys_str} | {file_str} | {upstream_display} |"
            )

        lines.append("")

    # Reverse view: by upstream emulator
    lines.extend(
        [
            "## By upstream emulator",
            "",
            "| Upstream | Cores | Classification | Platforms |",
            "|----------|-------|---------------|-----------|",
        ]
    )

    # Group profiles by upstream
    by_upstream: dict[str, list[str]] = {}
    for emu_name, p in sorted(unique.items()):
        upstream = p.get("upstream", p.get("source", ""))
        if upstream:
            by_upstream.setdefault(upstream, []).append(emu_name)

    # Build platform membership per core
    platform_membership: dict[str, set[str]] = {}
    for pname, cov in coverages.items():
        config = cov["config"]
        pcores = config.get("cores", [])
        if pcores == "all_libretro":
            for k, v in unique.items():
                if "libretro" in v.get("type", ""):
                    platform_membership.setdefault(k, set()).add(pname)
        elif isinstance(pcores, list):
            for cname in pcores:
                cname_str = str(cname)
                if cname_str in unique:
                    platform_membership.setdefault(cname_str, set()).add(pname)
                elif cname_str in core_to_profile:
                    pkey = core_to_profile[cname_str]
                    platform_membership.setdefault(pkey, set()).add(pname)

    for upstream_url in sorted(by_upstream.keys()):
        cores = by_upstream[upstream_url]
        upstream_short = upstream_url.replace("https://github.com/", "")
        classifications = set()
        all_plats: set[str] = set()
        for c in cores:
            raw_cls = unique[c].get("core_classification", "-")
            classifications.add(CLS_LABELS.get(raw_cls, raw_cls))
            all_plats.update(platform_membership.get(c, set()))

        cls_str = ", ".join(sorted(classifications))
        plat_str = ", ".join(sorted(all_plats)) if all_plats else "-"
        core_links = ", ".join(f"[{c}](emulators/{c}.md)" for c in sorted(cores))

        lines.append(
            f"| [{upstream_short}]({upstream_url}) | {core_links} | "
            f"{cls_str} | {plat_str} |"
        )

    lines.extend(["", f"*Generated on {_timestamp()}*"])
    return "\n".join(lines) + "\n"


def generate_contributing() -> str:
    return """# Contributing - RetroBIOS

## Add a BIOS file

1. Fork this repository
2. Place the file in `bios/Manufacturer/Console/filename`
3. Variants (alternate hashes for the same file): place in `bios/Manufacturer/Console/.variants/`
4. Create a Pull Request - hashes are verified automatically

## Add a platform

1. Create a scraper in `scripts/scraper/` (inherit `BaseScraper`)
2. Read the platform's upstream source code to understand its BIOS check logic
3. Add entry to `platforms/_registry.yml`
4. Generate the platform YAML config
5. Test: `python scripts/verify.py --platform <name>`

## Add an emulator profile

1. Clone the emulator's source code
2. Search for BIOS/firmware loading (grep for `bios`, `rom`, `firmware`, `fopen`)
3. Document every file the emulator loads with source code references
4. Write YAML to `emulators/<name>.yml`
5. Test: `python scripts/cross_reference.py --emulator <name>`

## File conventions

- `bios/Manufacturer/Console/filename` for canonical files
- `bios/Manufacturer/Console/.variants/filename.sha1prefix` for alternate versions
- Files >50 MB go in GitHub release assets (`large-files` release)
- RPG Maker and ScummVM directories are excluded from deduplication

## PR validation

The CI automatically:
- Computes SHA1/MD5/CRC32 of new files
- Checks against known hashes in platform configs
- Reports coverage impact
"""


# Wiki pages
# index, architecture, tools, profiling are maintained as wiki/ sources
# and copied verbatim by main(). Only data-model is generated dynamically.


def generate_wiki_data_model(db: dict, profiles: dict) -> str:
    """Generate data model documentation from actual database structure."""
    files_count = len(db.get("files", {}))
    by_md5 = len(db.get("indexes", {}).get("by_md5", {}))
    by_name = len(db.get("indexes", {}).get("by_name", {}))
    by_crc32 = len(db.get("indexes", {}).get("by_crc32", {}))
    by_path = len(db.get("indexes", {}).get("by_path_suffix", {}))

    lines = [
        f"# Data model - {SITE_NAME}",
        "",
        "## database.json",
        "",
        f"Primary key: SHA1. **{files_count}** file entries.",
        "",
        "Each entry:",
        "",
        "```json",
        "{",
        '  "path": "bios/Nintendo/GameCube/GC/USA/IPL.bin",',
        '  "name": "IPL.bin",',
        '  "size": 2097152,',
        '  "sha1": "...",',
        '  "md5": "...",',
        '  "sha256": "...",',
        '  "crc32": "...",',
        '  "adler32": "..."',
        "}",
        "```",
        "",
        "### Indexes",
        "",
        "| Index | Entries | Purpose |",
        "|-------|---------|---------|",
        f"| `by_md5` | {by_md5} | MD5 to SHA1 lookup (Batocera, Recalbox verification) |",
        f"| `by_name` | {by_name} | filename to SHA1 list (name-based resolution) |",
        f"| `by_crc32` | {by_crc32} | CRC32 to SHA1 lookup |",
        f"| `by_path_suffix` | {by_path} | relative path to SHA1 (regional variant disambiguation) |",
        "",
        "### File resolution order",
        "",
        "`resolve_local_file` tries these steps in order:",
        "",
        "1. Path suffix exact match (for regional variants with same filename)",
        "2. SHA1 exact match",
        "3. MD5 direct lookup (supports truncated Batocera 29-char MD5)",
        "4. Name + alias lookup without hash (existence mode)",
        "5. Name + alias with md5_composite / direct MD5 per candidate",
        "6. zippedFile content match via inner ROM MD5 index",
        "7. MAME clone fallback (deduped ZIP mapped to canonical name)",
        "",
        "## Platform YAML",
        "",
        "Scraped from upstream sources. Structure:",
        "",
        "```yaml",
        "platform: Batocera",
        "verification_mode: md5        # how the platform checks files",
        "hash_type: md5                # hash type in file entries",
        "base_destination: bios        # root directory for BIOS files",
        "systems:",
        "  system-id:",
        "    files:",
        "      - name: filename",
        "        destination: path/in/bios/dir",
        "        md5: expected_hash",
        "        sha1: expected_hash",
        "        required: true",
        "```",
        "",
        "Supports inheritance (`inherits: retroarch`) and shared groups",
        "(`includes: [group_name]` referencing `_shared.yml`).",
        "",
        "## Emulator YAML",
        "",
        f"**{len(profiles)}** profiles. Source-verified from emulator code.",
        "",
        "See the [profiling guide](profiling.md) for the full field reference.",
        "",
    ]
    return "\n".join(lines) + "\n"


# Build cross-reference indexes


def _build_platform_file_index(coverages: dict) -> dict[str, set]:
    """Map platform_name -> set of declared file names."""
    index = {}
    for name, cov in coverages.items():
        names = set()
        config = cov["config"]
        for system in config.get("systems", {}).values():
            for fe in system.get("files", []):
                names.add(fe.get("name", ""))
        index[name] = names
    return index


def _build_emulator_file_index(profiles: dict) -> dict[str, dict]:
    """Map emulator_name -> {files: set, systems: set} for cross-reference."""
    index = {}
    for name, profile in profiles.items():
        if profile.get("type") == "alias":
            continue
        index[name] = {
            "files": {f.get("name", "") for f in profile.get("files", [])},
            "systems": set(profile.get("systems", [])),
        }
    return index


# mkdocs.yml nav generator


def generate_mkdocs_nav(
    coverages: dict,
    manufacturers: dict,
    profiles: dict,
) -> list:
    """Generate the nav section for mkdocs.yml."""
    platform_nav = [{"Overview": "platforms/index.md"}]
    for name in sorted(coverages.keys(), key=lambda x: coverages[x]["platform"]):
        display = coverages[name]["platform"]
        platform_nav.append({display: f"platforms/{name}.md"})

    system_nav = [{"Overview": "systems/index.md"}]
    for mfr in sorted(manufacturers.keys()):
        slug = mfr.lower().replace(" ", "-")
        system_nav.append({mfr: f"systems/{slug}.md"})

    unique_profiles = {
        k: v for k, v in profiles.items() if v.get("type") not in ("alias", "test")
    }

    # Group emulators by classification for nav
    by_class: dict[str, list[tuple[str, str]]] = {}
    for name in sorted(unique_profiles.keys()):
        p = unique_profiles[name]
        cls = p.get("core_classification", "other")
        display = p.get("emulator", name)
        by_class.setdefault(cls, []).append((display, f"emulators/{name}.md"))

    # Classification display names
    cls_labels = {
        "pure_libretro": "Pure libretro",
        "official_port": "Official ports",
        "community_fork": "Community forks",
        "frozen_snapshot": "Frozen snapshots",
        "enhanced_fork": "Enhanced forks",
        "game_engine": "Game engines",
        "embedded_hle": "Embedded HLE",
        "launcher": "Launchers",
        "other": "Other",
    }

    emu_nav: list = [{"Overview": "emulators/index.md"}]
    for cls in [
        "official_port",
        "community_fork",
        "pure_libretro",
        "game_engine",
        "enhanced_fork",
        "frozen_snapshot",
        "embedded_hle",
        "launcher",
        "other",
    ]:
        entries = by_class.get(cls, [])
        if not entries:
            continue
        label = cls_labels.get(cls, cls)
        sub = [{display: path} for display, path in entries]
        emu_nav.append({f"{label} ({len(entries)})": sub})

    wiki_nav = [
        {"Overview": "wiki/index.md"},
        {"Getting started": "wiki/getting-started.md"},
        {"FAQ": "wiki/faq.md"},
        {"Troubleshooting": "wiki/troubleshooting.md"},
        {"Architecture": "wiki/architecture.md"},
        {"Tools": "wiki/tools.md"},
        {"Advanced usage": "wiki/advanced-usage.md"},
        {"Verification modes": "wiki/verification-modes.md"},
        {"Data model": "wiki/data-model.md"},
        {"Profiling guide": "wiki/profiling.md"},
        {"Adding a platform": "wiki/adding-a-platform.md"},
        {"Adding a scraper": "wiki/adding-a-scraper.md"},
        {"Testing guide": "wiki/testing-guide.md"},
        {"Release process": "wiki/release-process.md"},
    ]

    return [
        {"Home": "index.md"},
        {"Platforms": platform_nav},
        {"Systems": system_nav},
        {"Emulators": emu_nav},
        {"Cross-reference": "cross-reference.md"},
        {"Gap Analysis": "gaps.md"},
        {"Wiki": wiki_nav},
        {"Contributing": "contributing.md"},
    ]


# Main


def main():
    parser = argparse.ArgumentParser(
        description="Generate MkDocs site from project data"
    )
    parser.add_argument("--db", default="database.json")
    parser.add_argument("--platforms-dir", default="platforms")
    parser.add_argument("--emulators-dir", default="emulators")
    parser.add_argument("--docs-dir", default=DOCS_DIR)
    args = parser.parse_args()

    db = load_database(args.db)
    docs = Path(args.docs_dir)

    # Clean generated dirs (preserve docs/superpowers/)
    for d in GENERATED_DIRS:
        target = docs / d
        if target.exists():
            shutil.rmtree(target)

    # Ensure output dirs
    for d in GENERATED_DIRS:
        (docs / d).mkdir(parents=True, exist_ok=True)

    registry_path = Path(args.platforms_dir) / "_registry.yml"
    registry = {}
    if registry_path.exists():
        with open(registry_path) as f:
            registry = (yaml.safe_load(f) or {}).get("platforms", {})

    platform_names = list_registered_platforms(
        args.platforms_dir, include_archived=True
    )

    from common import load_data_dir_registry
    from cross_reference import _build_supplemental_index

    data_registry = load_data_dir_registry(args.platforms_dir)
    suppl_names = _build_supplemental_index()

    print("Computing platform coverage...")
    coverages = {}
    for name in sorted(platform_names):
        try:
            cov = compute_coverage(
                name, args.platforms_dir, db, data_registry, suppl_names
            )
            coverages[name] = cov
            print(
                f"  {cov['platform']}: {cov['present']}/{cov['total']} ({_pct(cov['present'], cov['total'])})"
            )
        except FileNotFoundError as e:
            print(f"  {name}: skipped ({e})", file=sys.stderr)

    print("Loading emulator profiles...")
    profiles = load_emulator_profiles(args.emulators_dir, skip_aliases=False)
    unique_count = sum(1 for p in profiles.values() if p.get("type") != "alias")
    print(
        f"  {len(profiles)} profiles ({unique_count} unique, {len(profiles) - unique_count} aliases)"
    )

    # Build cross-reference indexes
    platform_files = _build_platform_file_index(coverages)
    emulator_files = _build_emulator_file_index(profiles)

    # Generate home
    print("Generating home page...")
    write_if_changed(
        str(docs / "index.md"), generate_home(db, coverages, profiles, registry)
    )

    # Build system_id -> manufacturer page map (needed by all generators)
    print("Building system cross-reference map...")
    manufacturers = _group_by_manufacturer(db)
    _build_system_page_map_from_data(manufacturers, coverages, db)
    print(f"  {len(_system_page_map)} system IDs mapped to pages")

    # Generate platform pages
    print("Generating platform pages...")
    write_if_changed(
        str(docs / "platforms" / "index.md"), generate_platform_index(coverages)
    )
    for name, cov in coverages.items():
        write_if_changed(
            str(docs / "platforms" / f"{name}.md"),
            generate_platform_page(name, cov, registry, emulator_files),
        )

    # Generate system pages
    print("Generating system pages...")

    write_if_changed(
        str(docs / "systems" / "index.md"), generate_systems_index(manufacturers)
    )
    for mfr, consoles in manufacturers.items():
        slug = mfr.lower().replace(" ", "-")
        page = generate_system_page(mfr, consoles, platform_files, emulator_files)
        write_if_changed(str(docs / "systems" / f"{slug}.md"), page)

    # Generate emulator pages
    print("Generating emulator pages...")
    write_if_changed(
        str(docs / "emulators" / "index.md"), generate_emulators_index(profiles)
    )
    for name, profile in profiles.items():
        page = generate_emulator_page(name, profile, db, platform_files)
        write_if_changed(str(docs / "emulators" / f"{name}.md"), page)

    # Generate cross-reference page
    print("Generating cross-reference page...")
    write_if_changed(
        str(docs / "cross-reference.md"), generate_cross_reference(coverages, profiles)
    )

    # Generate gap analysis page
    print("Generating gap analysis page...")
    write_if_changed(
        str(docs / "gaps.md"),
        generate_gap_analysis(profiles, coverages, db, suppl_names),
    )

    # Wiki pages: copy manually maintained sources + generate dynamic ones
    print("Generating wiki pages...")
    wiki_dest = docs / "wiki"
    wiki_dest.mkdir(parents=True, exist_ok=True)
    wiki_src = Path(WIKI_SRC_DIR)
    if wiki_src.is_dir():
        for src_file in wiki_src.glob("*.md"):
            shutil.copy2(src_file, wiki_dest / src_file.name)
    # data-model.md is generated (contains live DB stats)
    write_if_changed(
        str(wiki_dest / "data-model.md"), generate_wiki_data_model(db, profiles)
    )

    # Generate contributing
    print("Generating contributing page...")
    write_if_changed(str(docs / "contributing.md"), generate_contributing())

    # Update mkdocs.yml nav section only (avoid yaml.dump round-trip mangling quotes)
    print("Updating mkdocs.yml nav...")
    nav = generate_mkdocs_nav(coverages, manufacturers, profiles)
    nav_yaml = yaml.dump(
        {"nav": nav}, default_flow_style=False, sort_keys=False, allow_unicode=True
    )

    # Rewrite mkdocs.yml entirely (static config + generated nav)
    mkdocs_static = """\
site_name: RetroBIOS
site_url: https://abdess.github.io/retrobios/
repo_url: https://github.com/Abdess/retrobios
repo_name: Abdess/retrobios
theme:
  name: material
  palette:
  - media: (prefers-color-scheme)
    toggle:
      icon: material/brightness-auto
      name: Switch to light mode
  - media: '(prefers-color-scheme: light)'
    scheme: default
    toggle:
      icon: material/brightness-7
      name: Switch to dark mode
  - media: '(prefers-color-scheme: dark)'
    scheme: slate
    toggle:
      icon: material/brightness-4
      name: Switch to auto
  font: false
  icon:
    logo: material/chip
  features:
  - navigation.tabs
  - navigation.sections
  - navigation.top
  - navigation.indexes
  - search.suggest
  - search.highlight
  - content.tabs.link
  - toc.follow
extra_css:
- stylesheets/extra.css
markdown_extensions:
- tables
- admonition
- attr_list
- md_in_html
- toc:
    permalink: true
- pymdownx.details
- pymdownx.superfences:
    custom_fences:
    - name: mermaid
      class: mermaid
      format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.tabbed:
    alternate_style: true
plugins:
- search
"""
    write_if_changed("mkdocs.yml", mkdocs_static + nav_yaml)

    total_pages = (
        1  # home
        + 1
        + len(coverages)  # platform index + detail
        + 1
        + len(manufacturers)  # system index + detail
        + 1  # cross-reference
        + 1
        + len(profiles)  # emulator index + detail
        + 1  # gap analysis
        + 14  # wiki pages (copied from wiki/ + generated data-model)
        + 1  # contributing
    )
    print(f"\nGenerated {total_pages} pages in {args.docs_dir}/")


if __name__ == "__main__":
    main()