From f8a325260f48cea6cb11ef4f0f5247021250391d Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Wed, 25 Mar 2026 14:56:37 +0100 Subject: [PATCH] feat: add wiki pages (architecture, tools, profiling, data model) --- scripts/generate_site.py | 398 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 397 insertions(+), 1 deletion(-) diff --git a/scripts/generate_site.py b/scripts/generate_site.py index 7ba4b55a..b629cb24 100644 --- a/scripts/generate_site.py +++ b/scripts/generate_site.py @@ -34,7 +34,7 @@ DOCS_DIR = "docs" SITE_NAME = "RetroBIOS" REPO_URL = "https://github.com/Abdess/retrobios" RELEASE_URL = f"{REPO_URL}/releases/latest" -GENERATED_DIRS = ["platforms", "systems", "emulators"] +GENERATED_DIRS = ["platforms", "systems", "emulators", "wiki"] SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png" # Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking @@ -1229,6 +1229,386 @@ The CI automatically: """ +# --------------------------------------------------------------------------- +# Wiki pages +# --------------------------------------------------------------------------- + +def generate_wiki_architecture() -> str: + """Generate architecture overview from codebase structure.""" + lines = [ + f"# Architecture - {SITE_NAME}", + "", + "## Directory structure", + "", + "```", + "bios/ BIOS and firmware files, organized by Manufacturer/Console/", + " Manufacturer/Console/ canonical files (one per unique content)", + " .variants/ alternate versions (different hash, same purpose)", + "emulators/ one YAML profile per core (285 profiles)", + "platforms/ one YAML config per platform (scraped from upstream)", + " _shared.yml shared file groups across platforms", + " _registry.yml platform metadata (logos, scrapers, status)", + " _data_dirs.yml data directory definitions (Dolphin Sys, PPSSPP...)", + "scripts/ all tooling (Python, pyyaml only dependency)", + " scraper/ upstream scrapers (libretro, batocera, recalbox...)", + "data/ cached data directories (not BIOS, fetched at build)", + "schemas/ JSON schemas for validation", + "tests/ E2E test suite with synthetic fixtures", + "dist/ generated packs (gitignored)", + ".cache/ hash cache and large file downloads (gitignored)", + "```", + "", + "## Data flow", + "", + "```", + "Upstream sources Scrapers parse generate_db.py scans", + " System.dat (libretro) + fetch versions bios/ on disk", + " batocera-systems builds database.json", + " es_bios.xml (recalbox) (SHA1 primary key,", + " core-info .info files indexes: by_md5, by_name,", + " by_crc32, by_path_suffix)", + "", + "emulators/*.yml verify.py checks generate_pack.py resolves", + " source-verified platform-native files by hash, builds ZIP", + " from code verification packs per platform", + "```", + "", + "## Three layers of data", + "", + "| Layer | Source | Role |", + "|-------|--------|------|", + "| Platform YAML | Scraped from upstream | What the platform declares it needs |", + "| `_shared.yml` | Curated | Shared files across platforms, reflects actual behavior |", + "| Emulator profiles | Source-verified | What the code actually loads. Used for cross-reference and gap detection |", + "", + "The pack combines platform baseline (layer 1) with core requirements (layer 3).", + "Neither too much (no files from unused cores) nor too few (no missing files for active cores).", + "", + ] + return "\n".join(lines) + "\n" + + +def generate_wiki_tools() -> str: + """Generate tool reference from script docstrings and argparse.""" + lines = [ + f"# Tools - {SITE_NAME}", + "", + "All tools are Python scripts in `scripts/`. Single dependency: `pyyaml`.", + "", + "## Pipeline", + "", + "Run everything in sequence:", + "", + "```bash", + "python scripts/pipeline.py --offline # DB + verify + packs + readme + site", + "python scripts/pipeline.py --offline --skip-packs # DB + verify only", + "python scripts/pipeline.py --skip-docs # skip readme + site generation", + "```", + "", + "## Individual tools", + "", + "### generate_db.py", + "", + "Scan `bios/` and build `database.json` with multi-indexed lookups.", + "Large files in `.gitignore` are preserved from the existing database", + "and downloaded from GitHub release assets if not cached locally.", + "", + "```bash", + "python scripts/generate_db.py --force --bios-dir bios --output database.json", + "```", + "", + "### verify.py", + "", + "Check BIOS coverage for each platform using its native verification mode.", + "", + "```bash", + "python scripts/verify.py --all # all platforms", + "python scripts/verify.py --platform batocera # single platform", + "python scripts/verify.py --emulator dolphin # single emulator", + "python scripts/verify.py --system atari-lynx # single system", + "```", + "", + "Verification modes per platform:", + "", + "| Platform | Mode | Logic |", + "|----------|------|-------|", + "| RetroArch, Lakka, RetroPie | existence | file present = OK |", + "| Batocera, RetroBat | md5 | MD5 hash match |", + "| Recalbox | md5 | MD5 multi-hash, 3 severity levels |", + "| EmuDeck | md5 | MD5 whitelist per system |", + "", + "### generate_pack.py", + "", + "Build platform-specific BIOS ZIP packs.", + "", + "```bash", + "python scripts/generate_pack.py --all --output-dir dist/", + "python scripts/generate_pack.py --platform batocera", + "python scripts/generate_pack.py --emulator dolphin", + "python scripts/generate_pack.py --system atari-lynx", + "```", + "", + "Packs include platform baseline files plus files required by the platform's cores.", + "When a file passes platform verification but fails emulator validation,", + "the tool searches for a variant that satisfies both.", + "If none exists, the platform version is kept and the discrepancy is reported.", + "", + "### cross_reference.py", + "", + "Compare emulator profiles against platform configs.", + "Reports files that cores need but platforms don't declare.", + "", + "```bash", + "python scripts/cross_reference.py # all", + "python scripts/cross_reference.py --emulator dolphin # single", + "```", + "", + "### refresh_data_dirs.py", + "", + "Fetch data directories (Dolphin Sys, PPSSPP assets, blueMSX databases)", + "from upstream repositories into `data/`.", + "", + "```bash", + "python scripts/refresh_data_dirs.py", + "python scripts/refresh_data_dirs.py --key dolphin-sys --force", + "```", + "", + "### Other tools", + "", + "| Script | Purpose |", + "|--------|---------|", + "| `dedup.py` | Deduplicate `bios/`, move duplicates to `.variants/` |", + "| `validate_pr.py` | Validate BIOS files in pull requests |", + "| `auto_fetch.py` | Fetch missing BIOS files from known sources |", + "| `list_platforms.py` | List active platforms (used by CI) |", + "| `download.py` | Download packs from GitHub releases |", + "", + ] + return "\n".join(lines) + "\n" + + +def generate_wiki_profiling() -> str: + """Generate the emulator profiling methodology guide.""" + lines = [ + f"# Profiling guide - {SITE_NAME}", + "", + "How to create an emulator profile from source code.", + "", + "## Approach", + "", + "A profile documents what an emulator loads at runtime.", + "The source code is the reference because it reflects actual behavior.", + "Documentation, .info files, and wikis are useful starting points", + "but are verified against the code.", + "", + "## Steps", + "", + "### 1. Find the source code", + "", + "Check these locations in order:", + "", + "1. Upstream original (the emulator's own repository)", + "2. Libretro fork (may have adapted paths or added files)", + "3. If not on GitHub: GitLab, Codeberg, SourceForge, archive.org", + "", + "Always clone both upstream and libretro port to compare.", + "", + "### 2. Trace file loading", + "", + "Read the code flow. Don't grep keywords by assumption.", + "Each emulator has its own way of loading files.", + "", + "Look for:", + "", + "- `fopen`, `open`, `read_file`, `load_rom`, `load_bios` calls", + "- `retro_system_directory` / `system_dir` in libretro cores", + "- File existence checks (`path_is_valid`, `file_exists`)", + "- Hash validation (MD5, CRC32, SHA1 comparisons in code)", + "- Size validation (`fseek`/`ftell`, `stat`, fixed buffer sizes)", + "", + "### 3. Determine required vs optional", + "", + "This is decided by code behavior, not by judgment:", + "", + "- **required**: the core does not start or function without the file", + "- **optional**: the core works with degraded functionality without it", + "- **hle_fallback: true**: the core has a high-level emulation path when the file is missing", + "", + "### 4. Document divergences", + "", + "When the libretro port differs from the upstream:", + "", + "- `mode: libretro` - file only used by the libretro core", + "- `mode: standalone` - file only used in standalone mode", + "- `mode: both` - used by both (default, can be omitted)", + "", + "Path differences (current dir vs system_dir) are normal adaptation,", + "not a divergence. Name changes (e.g. `naomi2_` to `n2_`) may be intentional", + "to avoid conflicts in the shared system directory.", + "", + "### 5. Write the YAML profile", + "", + "```yaml", + "emulator: Dolphin", + "type: standalone + libretro", + "core_classification: community_fork", + "source: https://github.com/libretro/dolphin", + "upstream: https://github.com/dolphin-emu/dolphin", + "profiled_date: 2026-03-25", + "core_version: 5.0-21264", + "systems:", + " - nintendo-gamecube", + " - nintendo-wii", + "", + "files:", + " - name: GC/USA/IPL.bin", + " system: nintendo-gamecube", + " required: false", + " hle_fallback: true", + " size: 2097152", + " validation: [size, adler32]", + " known_hash_adler32: 0x4f1f6f5c", + " region: north-america", + " source_ref: Source/Core/Core/Boot/Boot_BS2Emu.cpp:42", + "```", + "", + "### 6. Validate", + "", + "```bash", + "python scripts/cross_reference.py --emulator dolphin --json", + "python scripts/verify.py --emulator dolphin", + "```", + "", + "## YAML field reference", + "", + "### Profile fields", + "", + "| Field | Required | Description |", + "|-------|----------|-------------|", + "| `emulator` | yes | display name |", + "| `type` | yes | `libretro`, `standalone`, `standalone + libretro`, `alias`, `launcher` |", + "| `core_classification` | no | `pure_libretro`, `official_port`, `community_fork`, `frozen_snapshot`, `enhanced_fork`, `game_engine`, `embedded_hle`, `alias`, `launcher` |", + "| `source` | yes | libretro core repository URL |", + "| `upstream` | no | original emulator repository URL |", + "| `profiled_date` | yes | date of source analysis |", + "| `core_version` | yes | version analyzed |", + "| `systems` | yes | list of system IDs this core handles |", + "| `cores` | no | list of core names (default: profile filename) |", + "| `files` | yes | list of file entries |", + "| `notes` | no | free-form technical notes |", + "| `exclusion_note` | no | why the profile has no files |", + "| `data_directories` | no | references to data dirs in `_data_dirs.yml` |", + "", + "### File entry fields", + "", + "| Field | Description |", + "|-------|-------------|", + "| `name` | filename as the core expects it |", + "| `required` | true if the core needs this file to function |", + "| `system` | system ID this file belongs to |", + "| `size` | expected size in bytes |", + "| `md5`, `sha1`, `crc32`, `sha256` | expected hashes from source code |", + "| `validation` | list of checks the code performs: `size`, `crc32`, `md5`, `sha1` |", + "| `aliases` | alternate filenames for the same file |", + "| `mode` | `libretro`, `standalone`, or `both` |", + "| `hle_fallback` | true if a high-level emulation path exists |", + "| `category` | `bios` (default), `game_data`, `bios_zip` |", + "| `region` | geographic region (e.g. `north-america`, `japan`) |", + "| `source_ref` | source file and line number |", + "| `path` | path relative to system directory |", + "| `description` | what this file is |", + "| `note` | additional context |", + "| `archive` | parent ZIP if this file is inside an archive |", + "| `contents` | structure of files inside a BIOS ZIP |", + "| `storage` | `embedded` (default), `external`, `user_provided` |", + "", + ] + return "\n".join(lines) + "\n" + + +def generate_wiki_data_model(db: dict, profiles: dict) -> str: + """Generate data model documentation from actual database structure.""" + files_count = len(db.get("files", {})) + by_md5 = len(db.get("indexes", {}).get("by_md5", {})) + by_name = len(db.get("indexes", {}).get("by_name", {})) + by_crc32 = len(db.get("indexes", {}).get("by_crc32", {})) + by_path = len(db.get("indexes", {}).get("by_path_suffix", {})) + + lines = [ + f"# Data model - {SITE_NAME}", + "", + "## database.json", + "", + f"Primary key: SHA1. **{files_count}** file entries.", + "", + "Each entry:", + "", + "```json", + '{', + ' "path": "bios/Nintendo/GameCube/GC/USA/IPL.bin",', + ' "name": "IPL.bin",', + ' "size": 2097152,', + ' "sha1": "...",', + ' "md5": "...",', + ' "sha256": "...",', + ' "crc32": "...",', + ' "adler32": "..."', + '}', + "```", + "", + "### Indexes", + "", + f"| Index | Entries | Purpose |", + f"|-------|---------|---------|", + f"| `by_md5` | {by_md5} | MD5 to SHA1 lookup (Batocera, Recalbox verification) |", + f"| `by_name` | {by_name} | filename to SHA1 list (name-based resolution) |", + f"| `by_crc32` | {by_crc32} | CRC32 to SHA1 lookup |", + f"| `by_path_suffix` | {by_path} | relative path to SHA1 (regional variant disambiguation) |", + "", + "### File resolution order", + "", + "`resolve_local_file` tries these steps in order:", + "", + "1. Path suffix exact match (for regional variants with same filename)", + "2. SHA1 exact match", + "3. MD5 direct lookup (supports truncated Batocera 29-char MD5)", + "4. Name + alias lookup without hash (existence mode)", + "5. Name + alias with md5_composite / direct MD5 per candidate", + "6. zippedFile content match via inner ROM MD5 index", + "7. MAME clone fallback (deduped ZIP mapped to canonical name)", + "", + "## Platform YAML", + "", + "Scraped from upstream sources. Structure:", + "", + "```yaml", + "platform: Batocera", + "verification_mode: md5 # how the platform checks files", + "hash_type: md5 # hash type in file entries", + "base_destination: bios # root directory for BIOS files", + "systems:", + " system-id:", + " files:", + " - name: filename", + " destination: path/in/bios/dir", + " md5: expected_hash", + " sha1: expected_hash", + " required: true", + "```", + "", + "Supports inheritance (`inherits: retroarch`) and shared groups", + "(`includes: [group_name]` referencing `_shared.yml`).", + "", + "## Emulator YAML", + "", + f"**{len(profiles)}** profiles. Source-verified from emulator code.", + "", + "See the [profiling guide](profiling.md) for the full field reference.", + "", + ] + return "\n".join(lines) + "\n" + + # --------------------------------------------------------------------------- # Build cross-reference indexes # --------------------------------------------------------------------------- @@ -1285,6 +1665,13 @@ def generate_mkdocs_nav( display = unique_profiles[name].get("emulator", name) emu_nav.append({display: f"emulators/{name}.md"}) + wiki_nav = [ + {"Architecture": "wiki/architecture.md"}, + {"Tools": "wiki/tools.md"}, + {"Profiling guide": "wiki/profiling.md"}, + {"Data model": "wiki/data-model.md"}, + ] + return [ {"Home": "index.md"}, {"Platforms": platform_nav}, @@ -1292,6 +1679,7 @@ def generate_mkdocs_nav( {"Emulators": emu_nav}, {"Cross-reference": "cross-reference.md"}, {"Gap Analysis": "gaps.md"}, + {"Wiki": wiki_nav}, {"Contributing": "contributing.md"}, ] @@ -1398,6 +1786,13 @@ def main(): generate_gap_analysis(profiles, coverages, db) ) + # Generate wiki pages + print("Generating wiki pages...") + (docs / "wiki" / "architecture.md").write_text(generate_wiki_architecture()) + (docs / "wiki" / "tools.md").write_text(generate_wiki_tools()) + (docs / "wiki" / "profiling.md").write_text(generate_wiki_profiling()) + (docs / "wiki" / "data-model.md").write_text(generate_wiki_data_model(db, profiles)) + # Generate contributing print("Generating contributing page...") (docs / "contributing.md").write_text(generate_contributing()) @@ -1425,6 +1820,7 @@ def main(): + 1 # cross-reference + 1 + len(profiles) # emulator index + detail + 1 # gap analysis + + 4 # wiki (architecture, tools, profiling, data model) + 1 # contributing ) print(f"\nGenerated {total_pages} pages in {args.docs_dir}/")