From 6eca4c416a53a08bf1820033085976ffd353285e Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Wed, 1 Apr 2026 13:05:34 +0200 Subject: [PATCH] chore: remove dead wiki generator functions, update docs Remove 4 unused functions from generate_site.py (generate_wiki_index, generate_wiki_architecture, generate_wiki_tools, generate_wiki_profiling) that contained stale data. Wiki pages are sourced from wiki/ directory. Update generate_site.py contributing section with correct test counts (249 total, 186 E2E, 8 pack integrity) and pack integrity documentation. --- scripts/generate_site.py | 481 --------------------------------------- 1 file changed, 481 deletions(-) diff --git a/scripts/generate_site.py b/scripts/generate_site.py index 0b99cc72..357da191 100644 --- a/scripts/generate_site.py +++ b/scripts/generate_site.py @@ -1361,487 +1361,6 @@ The CI automatically: # index, architecture, tools, profiling are maintained as wiki/ sources # and copied verbatim by main(). Only data-model is generated dynamically. -def generate_wiki_index() -> str: - """Generate wiki landing page.""" - return f"""# Wiki - {SITE_NAME} - -Technical documentation for the RetroBIOS toolchain. - -## Pages - -- **[Architecture](architecture.md)** - directory structure, data flow, platform inheritance, pack grouping, security, edge cases, CI workflows -- **[Tools](tools.md)** - CLI reference for every script, pipeline usage, scrapers -- **[Profiling guide](profiling.md)** - how to create an emulator profile from source code, step by step, with YAML field reference -- **[Data model](data-model.md)** - database.json structure, indexes, file resolution order, YAML formats - -## For users - -If you just want to download BIOS packs, see the [home page](../index.md). - -## For contributors - -Start with the [profiling guide](profiling.md) to understand how emulator profiles are built, -then see [contributing](../contributing.md) for submission guidelines. -""" - - -def generate_wiki_architecture() -> str: - """Generate architecture overview from codebase structure.""" - lines = [ - f"# Architecture - {SITE_NAME}", - "", - "## Directory structure", - "", - "```", - "bios/ BIOS and firmware files, organized by Manufacturer/Console/", - " Manufacturer/Console/ canonical files (one per unique content)", - " .variants/ alternate versions (different hash, same purpose)", - "emulators/ one YAML profile per core (285 profiles)", - "platforms/ one YAML config per platform (scraped from upstream)", - " _shared.yml shared file groups across platforms", - " _registry.yml platform metadata (logos, scrapers, status)", - " _data_dirs.yml data directory definitions (Dolphin Sys, PPSSPP...)", - "scripts/ all tooling (Python, pyyaml only dependency)", - " scraper/ upstream scrapers (libretro, batocera, recalbox...)", - "data/ cached data directories (not BIOS, fetched at build)", - "schemas/ JSON schemas for validation", - "tests/ E2E test suite with synthetic fixtures", - "dist/ generated packs (gitignored)", - ".cache/ hash cache and large file downloads (gitignored)", - "```", - "", - "## Data flow", - "", - "```", - "Upstream sources Scrapers parse generate_db.py scans", - " System.dat (libretro) + fetch versions bios/ on disk", - " batocera-systems builds database.json", - " es_bios.xml (recalbox) (SHA1 primary key,", - " core-info .info files indexes: by_md5, by_name,", - " by_crc32, by_path_suffix)", - "", - "emulators/*.yml verify.py checks generate_pack.py resolves", - " source-verified platform-native files by hash, builds ZIP", - " from code verification packs per platform", - "```", - "", - "## Three layers of data", - "", - "| Layer | Source | Role |", - "|-------|--------|------|", - "| Platform YAML | Scraped from upstream | What the platform declares it needs |", - "| `_shared.yml` | Curated | Shared files across platforms, reflects actual behavior |", - "| Emulator profiles | Source-verified | What the code actually loads. Used for cross-reference and gap detection |", - "", - "The pack combines platform baseline (layer 1) with core requirements (layer 3).", - "Neither too much (no files from unused cores) nor too few (no missing files for active cores).", - "", - "## Pack grouping", - "", - "Platforms that produce identical packs are grouped automatically.", - "RetroArch and Lakka share the same files and `base_destination` (`system/`),", - "so they produce one combined pack (`RetroArch_Lakka_BIOS_Pack.zip`).", - "RetroPie uses `BIOS/` as base path, so it gets a separate pack.", - "", - "## Storage tiers", - "", - "| Tier | Meaning |", - "|------|---------|", - "| `embedded` (default) | file is in the `bios/` directory, included in packs |", - "| `external` | file has a `source_url`, downloaded at pack build time |", - "| `user_provided` | user must provide the file (instructions included in pack) |", - "", - "## Verification severity", - "", - "How missing or mismatched files are reported:", - "", - "| Mode | required + missing | optional + missing | hash mismatch |", - "|------|-------------------|-------------------|--------------|", - "| existence | WARNING | INFO | N/A |", - "| md5 | CRITICAL | WARNING | UNTESTED |", - "", - "Files with `hle_fallback: true` are downgraded to INFO when missing", - "(the emulator has a software fallback).", - "", - "## Discrepancy detection", - "", - "When a file passes platform verification (MD5 match) but fails", - "emulator-level validation (wrong CRC32, wrong size), a DISCREPANCY is reported.", - "The pack generator searches the repo for a variant that satisfies both.", - "If none exists, the platform version is kept.", - "", - "## Security", - "", - "- `safe_extract_zip()` prevents zip-slip path traversal attacks", - "- `deterministic_zip` rebuilds MAME ZIPs so same ROMs always produce the same hash", - "- `crypto_verify.py` and `sect233r1.py` verify 3DS RSA-2048 signatures and AES-128-CBC integrity", - "- ZIP inner ROM verification via `checkInsideZip()` replicates Batocera's behavior", - "- `md5_composite()` replicates Recalbox's composite ZIP hash", - "", - "## Edge cases", - "", - "| Case | Handling |", - "|------|---------|", - "| Batocera truncated MD5 (29 chars) | prefix match in resolution |", - "| `zippedFile` entries | MD5 is of the ROM inside the ZIP, not the ZIP itself |", - "| Regional variants (same filename) | `by_path_suffix` index disambiguates |", - "| MAME BIOS ZIPs | `contents` field documents inner structure |", - "| RPG Maker/ScummVM | excluded from dedup (NODEDUP) to preserve directory structure |", - "| `strip_components` in data dirs | flattens cache prefix to match expected path |", - "| case-insensitive dedup | prevents `font.rom` + `FONT.ROM` conflicts on Windows/macOS |", - "", - "## Platform inheritance", - "", - "Platform configs support `inherits:` to share definitions.", - "Lakka inherits from RetroArch, RetroPie inherits from RetroArch with `base_destination: BIOS`.", - "`overrides:` allows child platforms to modify specific systems from the parent.", - "", - "Core resolution (`resolve_platform_cores`) uses three strategies:", - "", - "- `cores: all_libretro` - include all profiles with `libretro` in their type", - "- `cores: [list]` - include only named profiles", - "- `cores:` absent - fallback to system ID intersection between platform and profiles", - "", - "## MAME clone map", - "", - "`_mame_clones.json` at repo root maps MAME clone ROM names to their canonical parent.", - "When a clone ZIP was deduplicated, `resolve_local_file` uses this map to find the canonical file.", - "", - "## Tests", - "", - "`tests/test_e2e.py` contains 186 end-to-end tests with synthetic fixtures.", - "Covers: file resolution, verification, severity, cross-reference, aliases,", - "inheritance, shared groups, data dirs, storage tiers, HLE, launchers,", - "platform grouping, core resolution (3 strategies + alias exclusion),", - "target filtering, ground truth validation.", - "", - "```bash", - "python -m unittest tests.test_e2e -v", - "```", - "", - "`tests/test_pack_integrity.py` contains 8 pack integrity tests (1 per platform).", - "Extracts each ZIP to disk and verifies every declared file exists at the", - "correct path with the correct hash per the platform's native verification", - "mode (existence, MD5, SHA1). Handles inner ZIP verification for MAME/FBNeo", - "ROM sets. Integrated as pipeline step 6/8.", - "", - "```bash", - "python -m unittest tests.test_pack_integrity -v", - "```", - "", - "## CI workflows", - "", - "| Workflow | File | Trigger | Role |", - "|----------|------|---------|------|", - "| Build & Release | `build.yml` | `workflow_dispatch` (manual) | restore large files, build packs, deploy site, create GitHub release |", - "| PR Validation | `validate.yml` | pull request on `bios/`/`platforms/` | validate BIOS hashes, schema check, run tests, auto-label PR |", - "| Weekly Sync | `watch.yml` | cron (Monday 6 AM UTC) + manual | scrape upstream sources, detect changes, create update PR |", - "", - "Build workflow has a 7-day rate limit between releases and keeps the 3 most recent.", - "", - "## License", - "", - "See `LICENSE` at repo root. Files are provided for personal backup and archival.", - "", - ] - return "\n".join(lines) + "\n" - - -def generate_wiki_tools() -> str: - """Generate tool reference from script docstrings and argparse.""" - lines = [ - f"# Tools - {SITE_NAME}", - "", - "All tools are Python scripts in `scripts/`. Single dependency: `pyyaml`.", - "", - "## Pipeline", - "", - "Run everything in sequence:", - "", - "```bash", - "python scripts/pipeline.py --offline # DB + verify + packs + readme + site", - "python scripts/pipeline.py --offline --skip-packs # DB + verify only", - "python scripts/pipeline.py --skip-docs # skip readme + site generation", - "```", - "", - "## Individual tools", - "", - "### generate_db.py", - "", - "Scan `bios/` and build `database.json` with multi-indexed lookups.", - "Large files in `.gitignore` are preserved from the existing database", - "and downloaded from GitHub release assets if not cached locally.", - "", - "```bash", - "python scripts/generate_db.py --force --bios-dir bios --output database.json", - "```", - "", - "### verify.py", - "", - "Check BIOS coverage for each platform using its native verification mode.", - "", - "```bash", - "python scripts/verify.py --all # all platforms", - "python scripts/verify.py --platform batocera # single platform", - "python scripts/verify.py --emulator dolphin # single emulator", - "python scripts/verify.py --system atari-lynx # single system", - "```", - "", - "Verification modes per platform:", - "", - "| Platform | Mode | Logic |", - "|----------|------|-------|", - "| RetroArch, Lakka, RetroPie | existence | file present = OK |", - "| Batocera, RetroBat | md5 | MD5 hash match |", - "| Recalbox | md5 | MD5 multi-hash, 3 severity levels |", - "| EmuDeck | md5 | MD5 whitelist per system |", - "", - "### generate_pack.py", - "", - "Build platform-specific BIOS ZIP packs.", - "", - "```bash", - "# Full platform packs", - "python scripts/generate_pack.py --all --output-dir dist/", - "python scripts/generate_pack.py --platform batocera", - "python scripts/generate_pack.py --emulator dolphin", - "python scripts/generate_pack.py --system atari-lynx", - "", - "# Granular options", - "python scripts/generate_pack.py --platform retroarch --system sony-playstation", - "python scripts/generate_pack.py --platform batocera --required-only", - "python scripts/generate_pack.py --platform retroarch --split", - "python scripts/generate_pack.py --platform retroarch --split --group-by manufacturer", - "", - "# Hash-based lookup and custom packs", - "python scripts/generate_pack.py --from-md5 d8f1206299c48946e6ec5ef96d014eaa", - "python scripts/generate_pack.py --platform batocera --from-md5-file missing.txt", - "python scripts/generate_pack.py --platform retroarch --list-systems", - "```", - "", - "Packs include platform baseline files plus files required by the platform's cores.", - "When a file passes platform verification but fails emulator validation,", - "the tool searches for a variant that satisfies both.", - "If none exists, the platform version is kept and the discrepancy is reported.", - "", - "**Granular options:**", - "", - "- `--system` with `--platform`: filter to specific systems within a platform pack", - "- `--required-only`: exclude optional files, keep only required", - "- `--split`: generate one ZIP per system instead of one big pack", - "- `--split --group-by manufacturer`: group split packs by manufacturer (Sony, Nintendo, Sega...)", - "- `--from-md5`: look up a hash in the database, or build a custom pack with `--platform`/`--emulator`", - "- `--from-md5-file`: same, reading hashes from a file (one per line, comments with #)", - "", - "### cross_reference.py", - "", - "Compare emulator profiles against platform configs.", - "Reports files that cores need but platforms don't declare.", - "", - "```bash", - "python scripts/cross_reference.py # all", - "python scripts/cross_reference.py --emulator dolphin # single", - "```", - "", - "### refresh_data_dirs.py", - "", - "Fetch data directories (Dolphin Sys, PPSSPP assets, blueMSX databases)", - "from upstream repositories into `data/`.", - "", - "```bash", - "python scripts/refresh_data_dirs.py", - "python scripts/refresh_data_dirs.py --key dolphin-sys --force", - "```", - "", - "### Other tools", - "", - "| Script | Purpose |", - "|--------|---------|", - "| `dedup.py` | Deduplicate `bios/`, move duplicates to `.variants/`. RPG Maker and ScummVM excluded (NODEDUP) |", - "| `validate_pr.py` | Validate BIOS files in pull requests |", - "| `auto_fetch.py` | Fetch missing BIOS files from known sources |", - "| `list_platforms.py` | List active platforms (used by CI) |", - "| `download.py` | Download packs from GitHub releases |", - "| `common.py` | Shared library: hash computation, file resolution, platform config loading, emulator profiles |", - "| `generate_readme.py` | Generate README.md and CONTRIBUTING.md from database |", - "| `generate_site.py` | Generate all MkDocs site pages (this documentation) |", - "| `deterministic_zip.py` | Rebuild MAME BIOS ZIPs deterministically (same ROMs = same hash) |", - "| `crypto_verify.py` | 3DS RSA signature and AES crypto verification |", - "| `sect233r1.py` | Pure Python ECDSA verification on sect233r1 curve (3DS OTP cert) |", - "| `batch_profile.py` | Batch profiling automation for libretro cores |", - "| `migrate.py` | Migrate flat bios structure to Manufacturer/Console/ hierarchy |", - "", - "## Large files", - "", - "Files over 50 MB are stored as assets on the `large-files` GitHub release.", - "They are listed in `.gitignore` so they don't bloat the git repository.", - "`generate_db.py` downloads them from the release when rebuilding the database,", - "using `fetch_large_file()` from `common.py`. The same function is used by", - "`generate_pack.py` when a file has a hash mismatch with the local variant.", - "", - "## Scrapers", - "", - "Located in `scripts/scraper/`. Each inherits `BaseScraper` and implements `fetch_requirements()`.", - "", - "| Scraper | Source | Format |", - "|---------|--------|--------|", - "| `libretro_scraper` | System.dat + core-info .info files | clrmamepro DAT |", - "| `batocera_scraper` | batocera-systems script | Python dict |", - "| `recalbox_scraper` | es_bios.xml | XML |", - "| `retrobat_scraper` | batocera-systems.json | JSON |", - "| `emudeck_scraper` | checkBIOS.sh | Bash + CSV |", - "| `retrodeck_scraper` | component manifests | JSON per component |", - "| `coreinfo_scraper` | .info files from libretro-core-info | INI-like |", - "", - "Internal modules: `base_scraper.py` (abstract base with `_fetch_raw()` caching", - "and shared CLI), `dat_parser.py` (clrmamepro DAT format parser).", - "", - "Adding a scraper: inherit `BaseScraper`, implement `fetch_requirements()`,", - "call `scraper_cli(YourScraper)` in `__main__`.", - "", - ] - return "\n".join(lines) + "\n" - - -def generate_wiki_profiling() -> str: - """Generate the emulator profiling methodology guide.""" - lines = [ - f"# Profiling guide - {SITE_NAME}", - "", - "How to create an emulator profile from source code.", - "", - "## Approach", - "", - "A profile documents what an emulator loads at runtime.", - "The source code is the reference because it reflects actual behavior.", - "Documentation, .info files, and wikis are useful starting points", - "but are verified against the code.", - "", - "## Steps", - "", - "### 1. Find the source code", - "", - "Check these locations in order:", - "", - "1. Upstream original (the emulator's own repository)", - "2. Libretro fork (may have adapted paths or added files)", - "3. If not on GitHub: GitLab, Codeberg, SourceForge, archive.org", - "", - "Always clone both upstream and libretro port to compare.", - "", - "### 2. Trace file loading", - "", - "Read the code flow. Don't grep keywords by assumption.", - "Each emulator has its own way of loading files.", - "", - "Look for:", - "", - "- `fopen`, `open`, `read_file`, `load_rom`, `load_bios` calls", - "- `retro_system_directory` / `system_dir` in libretro cores", - "- File existence checks (`path_is_valid`, `file_exists`)", - "- Hash validation (MD5, CRC32, SHA1 comparisons in code)", - "- Size validation (`fseek`/`ftell`, `stat`, fixed buffer sizes)", - "", - "### 3. Determine required vs optional", - "", - "This is decided by code behavior, not by judgment:", - "", - "- **required**: the core does not start or function without the file", - "- **optional**: the core works with degraded functionality without it", - "- **hle_fallback: true**: the core has a high-level emulation path when the file is missing", - "", - "### 4. Document divergences", - "", - "When the libretro port differs from the upstream:", - "", - "- `mode: libretro` - file only used by the libretro core", - "- `mode: standalone` - file only used in standalone mode", - "- `mode: both` - used by both (default, can be omitted)", - "", - "Path differences (current dir vs system_dir) are normal adaptation,", - "not a divergence. Name changes (e.g. `naomi2_` to `n2_`) may be intentional", - "to avoid conflicts in the shared system directory.", - "", - "### 5. Write the YAML profile", - "", - "```yaml", - "emulator: Dolphin", - "type: standalone + libretro", - "core_classification: community_fork", - "source: https://github.com/libretro/dolphin", - "upstream: https://github.com/dolphin-emu/dolphin", - "profiled_date: 2026-03-25", - "core_version: 5.0-21264", - "systems:", - " - nintendo-gamecube", - " - nintendo-wii", - "", - "files:", - " - name: GC/USA/IPL.bin", - " system: nintendo-gamecube", - " required: false", - " hle_fallback: true", - " size: 2097152", - " validation: [size, adler32]", - " known_hash_adler32: 0x4f1f6f5c", - " region: north-america", - " source_ref: Source/Core/Core/Boot/Boot_BS2Emu.cpp:42", - "```", - "", - "### 6. Validate", - "", - "```bash", - "python scripts/cross_reference.py --emulator dolphin --json", - "python scripts/verify.py --emulator dolphin", - "```", - "", - "## YAML field reference", - "", - "### Profile fields", - "", - "| Field | Required | Description |", - "|-------|----------|-------------|", - "| `emulator` | yes | display name |", - "| `type` | yes | `libretro`, `standalone`, `standalone + libretro`, `alias`, `launcher` |", - "| `core_classification` | no | `pure_libretro`, `official_port`, `community_fork`, `frozen_snapshot`, `enhanced_fork`, `game_engine`, `embedded_hle`, `alias`, `launcher` |", - "| `source` | yes | libretro core repository URL |", - "| `upstream` | no | original emulator repository URL |", - "| `profiled_date` | yes | date of source analysis |", - "| `core_version` | yes | version analyzed |", - "| `systems` | yes | list of system IDs this core handles |", - "| `cores` | no | list of core names (default: profile filename) |", - "| `files` | yes | list of file entries |", - "| `notes` | no | free-form technical notes |", - "| `exclusion_note` | no | why the profile has no files |", - "| `data_directories` | no | references to data dirs in `_data_dirs.yml` |", - "", - "### File entry fields", - "", - "| Field | Description |", - "|-------|-------------|", - "| `name` | filename as the core expects it |", - "| `required` | true if the core needs this file to function |", - "| `system` | system ID this file belongs to |", - "| `size` | expected size in bytes |", - "| `md5`, `sha1`, `crc32`, `sha256` | expected hashes from source code |", - "| `validation` | list of checks the code performs: `size`, `crc32`, `md5`, `sha1` |", - "| `aliases` | alternate filenames for the same file |", - "| `mode` | `libretro`, `standalone`, or `both` |", - "| `hle_fallback` | true if a high-level emulation path exists |", - "| `category` | `bios` (default), `game_data`, `bios_zip` |", - "| `region` | geographic region (e.g. `north-america`, `japan`) |", - "| `source_ref` | source file and line number |", - "| `path` | path relative to system directory |", - "| `description` | what this file is |", - "| `note` | additional context |", - "| `archive` | parent ZIP if this file is inside an archive |", - "| `contents` | structure of files inside a BIOS ZIP |", - "| `storage` | `embedded` (default), `external`, `user_provided` |", - "", - ] - return "\n".join(lines) + "\n" - - def generate_wiki_data_model(db: dict, profiles: dict) -> str: """Generate data model documentation from actual database structure.""" files_count = len(db.get("files", {}))