From 851f53ba7f48a68deb9418fc905fbacf03141718 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Wed, 25 Mar 2026 16:02:11 +0100 Subject: [PATCH] refactor: extract wiki to source files, use deploy-pages action --- .github/workflows/build.yml | 9 -- .github/workflows/deploy-site.yml | 22 ++++- .gitignore | 9 +- scripts/generate_site.py | 18 ++-- wiki/architecture.md | 144 ++++++++++++++++++++++++++++++ wiki/index.md | 19 ++++ wiki/profiling.md | 132 +++++++++++++++++++++++++++ wiki/tools.md | 128 ++++++++++++++++++++++++++ 8 files changed, 455 insertions(+), 26 deletions(-) create mode 100644 wiki/architecture.md create mode 100644 wiki/index.md create mode 100644 wiki/profiling.md create mode 100644 wiki/tools.md diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2d966f79..fb0528d5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,6 @@ jobs: runs-on: ubuntu-latest permissions: contents: write - pages: write steps: - uses: actions/checkout@v6 @@ -81,14 +80,6 @@ jobs: if: steps.rate.outputs.skip != 'true' run: python scripts/generate_pack.py --all --output-dir dist/ - - name: Deploy site to GitHub Pages - if: steps.rate.outputs.skip != 'true' - run: | - pip install mkdocs-material - python scripts/generate_readme.py --db database.json --platforms-dir platforms - python scripts/generate_site.py - mkdocs gh-deploy --force --clean - - name: Release if: steps.rate.outputs.skip != 'true' run: | diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index b038fbdd..27a5ed1a 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -6,6 +6,7 @@ on: paths: - "platforms/**" - "emulators/**" + - "wiki/**" - "scripts/generate_site.py" - "scripts/generate_readme.py" - "scripts/verify.py" @@ -15,15 +16,16 @@ on: workflow_dispatch: permissions: - contents: write + contents: read pages: write + id-token: write concurrency: group: deploy-site cancel-in-progress: true jobs: - deploy: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -38,6 +40,20 @@ jobs: run: | python scripts/generate_site.py python scripts/generate_readme.py --db database.json --platforms-dir platforms + mkdocs build --strict + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: site/ + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: - name: Deploy to GitHub Pages - run: mkdocs gh-deploy --force --clean + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 001757db..676c2c16 100644 --- a/.gitignore +++ b/.gitignore @@ -11,13 +11,8 @@ site/ *.log node_modules/ -# Generated site pages (built in CI) -docs/index.md -docs/platforms/ -docs/systems/ -docs/emulators/ -docs/contributing.md -docs/gaps.md +# Generated site pages (built by generate_site.py, deployed in CI) +docs/ # Large files stored as GitHub Release assets (> 50MB) bios/Arcade/Arcade/Firmware.19.0.0.zip diff --git a/scripts/generate_site.py b/scripts/generate_site.py index f80025b9..7363d8f9 100644 --- a/scripts/generate_site.py +++ b/scripts/generate_site.py @@ -34,7 +34,8 @@ DOCS_DIR = "docs" SITE_NAME = "RetroBIOS" REPO_URL = "https://github.com/Abdess/retrobios" RELEASE_URL = f"{REPO_URL}/releases/latest" -GENERATED_DIRS = ["platforms", "systems", "emulators", "wiki"] +GENERATED_DIRS = ["platforms", "systems", "emulators"] +WIKI_SRC_DIR = "wiki" # manually maintained wiki sources SYSTEM_ICON_BASE = "https://raw.githubusercontent.com/libretro/retroarch-assets/master/xmb/systematic/png" # Global index: maps system_id -> (manufacturer_slug, console_name) for cross-linking @@ -2111,13 +2112,16 @@ def main(): generate_gap_analysis(profiles, coverages, db) ) - # Generate wiki pages + # Wiki pages: copy manually maintained sources + generate dynamic ones print("Generating wiki pages...") - (docs / "wiki" / "index.md").write_text(generate_wiki_index()) - (docs / "wiki" / "architecture.md").write_text(generate_wiki_architecture()) - (docs / "wiki" / "tools.md").write_text(generate_wiki_tools()) - (docs / "wiki" / "profiling.md").write_text(generate_wiki_profiling()) - (docs / "wiki" / "data-model.md").write_text(generate_wiki_data_model(db, profiles)) + wiki_dest = docs / "wiki" + wiki_dest.mkdir(parents=True, exist_ok=True) + wiki_src = Path(WIKI_SRC_DIR) + if wiki_src.is_dir(): + for src_file in wiki_src.glob("*.md"): + shutil.copy2(src_file, wiki_dest / src_file.name) + # data-model.md is generated (contains live DB stats) + (wiki_dest / "data-model.md").write_text(generate_wiki_data_model(db, profiles)) # Generate contributing print("Generating contributing page...") diff --git a/wiki/architecture.md b/wiki/architecture.md new file mode 100644 index 00000000..280f48e1 --- /dev/null +++ b/wiki/architecture.md @@ -0,0 +1,144 @@ +# Architecture - RetroBIOS + +## Directory structure + +``` +bios/ BIOS and firmware files, organized by Manufacturer/Console/ + Manufacturer/Console/ canonical files (one per unique content) + .variants/ alternate versions (different hash, same purpose) +emulators/ one YAML profile per core (285 profiles) +platforms/ one YAML config per platform (scraped from upstream) + _shared.yml shared file groups across platforms + _registry.yml platform metadata (logos, scrapers, status) + _data_dirs.yml data directory definitions (Dolphin Sys, PPSSPP...) +scripts/ all tooling (Python, pyyaml only dependency) + scraper/ upstream scrapers (libretro, batocera, recalbox...) +data/ cached data directories (not BIOS, fetched at build) +schemas/ JSON schemas for validation +tests/ E2E test suite with synthetic fixtures +dist/ generated packs (gitignored) +.cache/ hash cache and large file downloads (gitignored) +``` + +## Data flow + +``` +Upstream sources Scrapers parse generate_db.py scans + System.dat (libretro) + fetch versions bios/ on disk + batocera-systems builds database.json + es_bios.xml (recalbox) (SHA1 primary key, + core-info .info files indexes: by_md5, by_name, + by_crc32, by_path_suffix) + +emulators/*.yml verify.py checks generate_pack.py resolves + source-verified platform-native files by hash, builds ZIP + from code verification packs per platform +``` + +## Three layers of data + +| Layer | Source | Role | +|-------|--------|------| +| Platform YAML | Scraped from upstream | What the platform declares it needs | +| `_shared.yml` | Curated | Shared files across platforms, reflects actual behavior | +| Emulator profiles | Source-verified | What the code actually loads. Used for cross-reference and gap detection | + +The pack combines platform baseline (layer 1) with core requirements (layer 3). +Neither too much (no files from unused cores) nor too few (no missing files for active cores). + +## Pack grouping + +Platforms that produce identical packs are grouped automatically. +RetroArch and Lakka share the same files and `base_destination` (`system/`), +so they produce one combined pack (`RetroArch_Lakka_BIOS_Pack.zip`). +RetroPie uses `BIOS/` as base path, so it gets a separate pack. + +## Storage tiers + +| Tier | Meaning | +|------|---------| +| `embedded` (default) | file is in the `bios/` directory, included in packs | +| `external` | file has a `source_url`, downloaded at pack build time | +| `user_provided` | user must provide the file (instructions included in pack) | + +## Verification severity + +How missing or mismatched files are reported: + +| Mode | required + missing | optional + missing | hash mismatch | +|------|-------------------|-------------------|--------------| +| existence | WARNING | INFO | N/A | +| md5 | CRITICAL | WARNING | UNTESTED | + +Files with `hle_fallback: true` are downgraded to INFO when missing +(the emulator has a software fallback). + +## Discrepancy detection + +When a file passes platform verification (MD5 match) but fails +emulator-level validation (wrong CRC32, wrong size), a DISCREPANCY is reported. +The pack generator searches the repo for a variant that satisfies both. +If none exists, the platform version is kept. + +## Security + +- `safe_extract_zip()` prevents zip-slip path traversal attacks +- `deterministic_zip` rebuilds MAME ZIPs so same ROMs always produce the same hash +- `crypto_verify.py` and `sect233r1.py` verify 3DS RSA-2048 signatures and AES-128-CBC integrity +- ZIP inner ROM verification via `checkInsideZip()` replicates Batocera's behavior +- `md5_composite()` replicates Recalbox's composite ZIP hash + +## Edge cases + +| Case | Handling | +|------|---------| +| Batocera truncated MD5 (29 chars) | prefix match in resolution | +| `zippedFile` entries | MD5 is of the ROM inside the ZIP, not the ZIP itself | +| Regional variants (same filename) | `by_path_suffix` index disambiguates | +| MAME BIOS ZIPs | `contents` field documents inner structure | +| RPG Maker/ScummVM | excluded from dedup (NODEDUP) to preserve directory structure | +| `strip_components` in data dirs | flattens cache prefix to match expected path | +| case-insensitive dedup | prevents `font.rom` + `FONT.ROM` conflicts on Windows/macOS | + +## Platform inheritance + +Platform configs support `inherits:` to share definitions. +Lakka inherits from RetroArch, RetroPie inherits from RetroArch with `base_destination: BIOS`. +`overrides:` allows child platforms to modify specific systems from the parent. + +Core resolution (`resolve_platform_cores`) uses three strategies: + +- `cores: all_libretro` - include all profiles with `libretro` in their type +- `cores: [list]` - include only named profiles +- `cores:` absent - fallback to system ID intersection between platform and profiles + +## MAME clone map + +`_mame_clones.json` at repo root maps MAME clone ROM names to their canonical parent. +When a clone ZIP was deduplicated, `resolve_local_file` uses this map to find the canonical file. + +## Tests + +`tests/test_e2e.py` contains 75 end-to-end tests with synthetic fixtures. +Covers: file resolution, verification, severity, cross-reference, aliases, +inheritance, shared groups, data dirs, storage tiers, HLE, launchers, +platform grouping, core resolution (3 strategies + alias exclusion). + +```bash +python -m unittest tests.test_e2e -v +``` + +## CI workflows + +| Workflow | File | Trigger | Role | +|----------|------|---------|------| +| Build & Release | `build.yml` | `workflow_dispatch` (manual) | restore large files, build packs, deploy site, create GitHub release | +| PR Validation | `validate.yml` | pull request on `bios/`/`platforms/` | validate BIOS hashes, schema check, run tests, auto-label PR | +| Weekly Sync | `watch.yml` | cron (Monday 6 AM UTC) + manual | scrape upstream sources, detect changes, create update PR | + +Build workflow has a 7-day rate limit between releases and keeps the 3 most recent. + +## License + +See `LICENSE` at repo root. Files are provided for personal backup and archival. + diff --git a/wiki/index.md b/wiki/index.md new file mode 100644 index 00000000..9692b662 --- /dev/null +++ b/wiki/index.md @@ -0,0 +1,19 @@ +# Wiki - RetroBIOS + +Technical documentation for the RetroBIOS toolchain. + +## Pages + +- **[Architecture](architecture.md)** - directory structure, data flow, platform inheritance, pack grouping, security, edge cases, CI workflows +- **[Tools](tools.md)** - CLI reference for every script, pipeline usage, scrapers +- **[Profiling guide](profiling.md)** - how to create an emulator profile from source code, step by step, with YAML field reference +- **[Data model](data-model.md)** - database.json structure, indexes, file resolution order, YAML formats + +## For users + +If you just want to download BIOS packs, see the [home page](../index.md). + +## For contributors + +Start with the [profiling guide](profiling.md) to understand how emulator profiles are built, +then see [contributing](../contributing.md) for submission guidelines. diff --git a/wiki/profiling.md b/wiki/profiling.md new file mode 100644 index 00000000..0a0eca33 --- /dev/null +++ b/wiki/profiling.md @@ -0,0 +1,132 @@ +# Profiling guide - RetroBIOS + +How to create an emulator profile from source code. + +## Approach + +A profile documents what an emulator loads at runtime. +The source code is the reference because it reflects actual behavior. +Documentation, .info files, and wikis are useful starting points +but are verified against the code. + +## Steps + +### 1. Find the source code + +Check these locations in order: + +1. Upstream original (the emulator's own repository) +2. Libretro fork (may have adapted paths or added files) +3. If not on GitHub: GitLab, Codeberg, SourceForge, archive.org + +Always clone both upstream and libretro port to compare. + +### 2. Trace file loading + +Read the code flow. Don't grep keywords by assumption. +Each emulator has its own way of loading files. + +Look for: + +- `fopen`, `open`, `read_file`, `load_rom`, `load_bios` calls +- `retro_system_directory` / `system_dir` in libretro cores +- File existence checks (`path_is_valid`, `file_exists`) +- Hash validation (MD5, CRC32, SHA1 comparisons in code) +- Size validation (`fseek`/`ftell`, `stat`, fixed buffer sizes) + +### 3. Determine required vs optional + +This is decided by code behavior, not by judgment: + +- **required**: the core does not start or function without the file +- **optional**: the core works with degraded functionality without it +- **hle_fallback: true**: the core has a high-level emulation path when the file is missing + +### 4. Document divergences + +When the libretro port differs from the upstream: + +- `mode: libretro` - file only used by the libretro core +- `mode: standalone` - file only used in standalone mode +- `mode: both` - used by both (default, can be omitted) + +Path differences (current dir vs system_dir) are normal adaptation, +not a divergence. Name changes (e.g. `naomi2_` to `n2_`) may be intentional +to avoid conflicts in the shared system directory. + +### 5. Write the YAML profile + +```yaml +emulator: Dolphin +type: standalone + libretro +core_classification: community_fork +source: https://github.com/libretro/dolphin +upstream: https://github.com/dolphin-emu/dolphin +profiled_date: 2026-03-25 +core_version: 5.0-21264 +systems: + - nintendo-gamecube + - nintendo-wii + +files: + - name: GC/USA/IPL.bin + system: nintendo-gamecube + required: false + hle_fallback: true + size: 2097152 + validation: [size, adler32] + known_hash_adler32: 0x4f1f6f5c + region: north-america + source_ref: Source/Core/Core/Boot/Boot_BS2Emu.cpp:42 +``` + +### 6. Validate + +```bash +python scripts/cross_reference.py --emulator dolphin --json +python scripts/verify.py --emulator dolphin +``` + +## YAML field reference + +### Profile fields + +| Field | Required | Description | +|-------|----------|-------------| +| `emulator` | yes | display name | +| `type` | yes | `libretro`, `standalone`, `standalone + libretro`, `alias`, `launcher` | +| `core_classification` | no | `pure_libretro`, `official_port`, `community_fork`, `frozen_snapshot`, `enhanced_fork`, `game_engine`, `embedded_hle`, `alias`, `launcher` | +| `source` | yes | libretro core repository URL | +| `upstream` | no | original emulator repository URL | +| `profiled_date` | yes | date of source analysis | +| `core_version` | yes | version analyzed | +| `systems` | yes | list of system IDs this core handles | +| `cores` | no | list of core names (default: profile filename) | +| `files` | yes | list of file entries | +| `notes` | no | free-form technical notes | +| `exclusion_note` | no | why the profile has no files | +| `data_directories` | no | references to data dirs in `_data_dirs.yml` | + +### File entry fields + +| Field | Description | +|-------|-------------| +| `name` | filename as the core expects it | +| `required` | true if the core needs this file to function | +| `system` | system ID this file belongs to | +| `size` | expected size in bytes | +| `md5`, `sha1`, `crc32`, `sha256` | expected hashes from source code | +| `validation` | list of checks the code performs: `size`, `crc32`, `md5`, `sha1` | +| `aliases` | alternate filenames for the same file | +| `mode` | `libretro`, `standalone`, or `both` | +| `hle_fallback` | true if a high-level emulation path exists | +| `category` | `bios` (default), `game_data`, `bios_zip` | +| `region` | geographic region (e.g. `north-america`, `japan`) | +| `source_ref` | source file and line number | +| `path` | path relative to system directory | +| `description` | what this file is | +| `note` | additional context | +| `archive` | parent ZIP if this file is inside an archive | +| `contents` | structure of files inside a BIOS ZIP | +| `storage` | `embedded` (default), `external`, `user_provided` | + diff --git a/wiki/tools.md b/wiki/tools.md new file mode 100644 index 00000000..0bd68d0f --- /dev/null +++ b/wiki/tools.md @@ -0,0 +1,128 @@ +# Tools - RetroBIOS + +All tools are Python scripts in `scripts/`. Single dependency: `pyyaml`. + +## Pipeline + +Run everything in sequence: + +```bash +python scripts/pipeline.py --offline # DB + verify + packs + readme + site +python scripts/pipeline.py --offline --skip-packs # DB + verify only +python scripts/pipeline.py --skip-docs # skip readme + site generation +``` + +## Individual tools + +### generate_db.py + +Scan `bios/` and build `database.json` with multi-indexed lookups. +Large files in `.gitignore` are preserved from the existing database +and downloaded from GitHub release assets if not cached locally. + +```bash +python scripts/generate_db.py --force --bios-dir bios --output database.json +``` + +### verify.py + +Check BIOS coverage for each platform using its native verification mode. + +```bash +python scripts/verify.py --all # all platforms +python scripts/verify.py --platform batocera # single platform +python scripts/verify.py --emulator dolphin # single emulator +python scripts/verify.py --system atari-lynx # single system +``` + +Verification modes per platform: + +| Platform | Mode | Logic | +|----------|------|-------| +| RetroArch, Lakka, RetroPie | existence | file present = OK | +| Batocera, RetroBat | md5 | MD5 hash match | +| Recalbox | md5 | MD5 multi-hash, 3 severity levels | +| EmuDeck | md5 | MD5 whitelist per system | + +### generate_pack.py + +Build platform-specific BIOS ZIP packs. + +```bash +python scripts/generate_pack.py --all --output-dir dist/ +python scripts/generate_pack.py --platform batocera +python scripts/generate_pack.py --emulator dolphin +python scripts/generate_pack.py --system atari-lynx +``` + +Packs include platform baseline files plus files required by the platform's cores. +When a file passes platform verification but fails emulator validation, +the tool searches for a variant that satisfies both. +If none exists, the platform version is kept and the discrepancy is reported. + +### cross_reference.py + +Compare emulator profiles against platform configs. +Reports files that cores need but platforms don't declare. + +```bash +python scripts/cross_reference.py # all +python scripts/cross_reference.py --emulator dolphin # single +``` + +### refresh_data_dirs.py + +Fetch data directories (Dolphin Sys, PPSSPP assets, blueMSX databases) +from upstream repositories into `data/`. + +```bash +python scripts/refresh_data_dirs.py +python scripts/refresh_data_dirs.py --key dolphin-sys --force +``` + +### Other tools + +| Script | Purpose | +|--------|---------| +| `dedup.py` | Deduplicate `bios/`, move duplicates to `.variants/`. RPG Maker and ScummVM excluded (NODEDUP) | +| `validate_pr.py` | Validate BIOS files in pull requests | +| `auto_fetch.py` | Fetch missing BIOS files from known sources | +| `list_platforms.py` | List active platforms (used by CI) | +| `download.py` | Download packs from GitHub releases | +| `common.py` | Shared library: hash computation, file resolution, platform config loading, emulator profiles | +| `generate_readme.py` | Generate README.md and CONTRIBUTING.md from database | +| `generate_site.py` | Generate all MkDocs site pages (this documentation) | +| `deterministic_zip.py` | Rebuild MAME BIOS ZIPs deterministically (same ROMs = same hash) | +| `crypto_verify.py` | 3DS RSA signature and AES crypto verification | +| `sect233r1.py` | Pure Python ECDSA verification on sect233r1 curve (3DS OTP cert) | +| `batch_profile.py` | Batch profiling automation for libretro cores | +| `migrate.py` | Migrate flat bios structure to Manufacturer/Console/ hierarchy | + +## Large files + +Files over 50 MB are stored as assets on the `large-files` GitHub release. +They are listed in `.gitignore` so they don't bloat the git repository. +`generate_db.py` downloads them from the release when rebuilding the database, +using `fetch_large_file()` from `common.py`. The same function is used by +`generate_pack.py` when a file has a hash mismatch with the local variant. + +## Scrapers + +Located in `scripts/scraper/`. Each inherits `BaseScraper` and implements `fetch_requirements()`. + +| Scraper | Source | Format | +|---------|--------|--------| +| `libretro_scraper` | System.dat + core-info .info files | clrmamepro DAT | +| `batocera_scraper` | batocera-systems script | Python dict | +| `recalbox_scraper` | es_bios.xml | XML | +| `retrobat_scraper` | batocera-systems.json | JSON | +| `emudeck_scraper` | checkBIOS.sh | Bash + CSV | +| `retrodeck_scraper` | component manifests | JSON per component | +| `coreinfo_scraper` | .info files from libretro-core-info | INI-like | + +Internal modules: `base_scraper.py` (abstract base with `_fetch_raw()` caching +and shared CLI), `dat_parser.py` (clrmamepro DAT format parser). + +Adding a scraper: inherit `BaseScraper`, implement `fetch_requirements()`, +call `scraper_cli(YourScraper)` in `__main__`. +