feat: add doom engine wad files, emulatorjs base config

This commit is contained in:
Abdessamad Derraz
2026-03-25 23:00:11 +01:00
parent 32b391ef69
commit d2cc9b8f29
32 changed files with 2349 additions and 1607 deletions

View File

@@ -26,7 +26,7 @@ import urllib.error
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from common import load_database, load_platform_config
from common import list_registered_platforms, load_database, load_platform_config
try:
import yaml
@@ -381,10 +381,9 @@ def main():
db = load_database(args.db)
if args.all:
platforms = []
for f in Path(args.platforms_dir).glob("*.yml"):
if not f.name.startswith("_"):
platforms.append(f.stem)
platforms = list_registered_platforms(
args.platforms_dir, include_archived=True,
)
elif args.platform:
platforms = [args.platform]
else:

View File

@@ -167,6 +167,31 @@ def load_data_dir_registry(platforms_dir: str = "platforms") -> dict:
return data.get("data_directories", {})
def list_registered_platforms(
platforms_dir: str = "platforms",
include_archived: bool = False,
) -> list[str]:
"""List platforms registered in _registry.yml.
Only registered platforms generate packs and appear in CI.
Unregistered YAMLs (e.g., emulatorjs.yml) are base configs for inheritance.
"""
registry_path = os.path.join(platforms_dir, "_registry.yml")
if not os.path.exists(registry_path):
return []
with open(registry_path) as f:
registry = yaml.safe_load(f) or {}
platforms = []
for name, meta in sorted(registry.get("platforms", {}).items()):
status = meta.get("status", "active")
if status == "archived" and not include_archived:
continue
config_path = os.path.join(platforms_dir, meta.get("config", f"{name}.yml"))
if os.path.exists(config_path):
platforms.append(name)
return platforms
def resolve_local_file(
file_entry: dict,
db: dict,

View File

@@ -25,7 +25,7 @@ except ImportError:
sys.exit(1)
sys.path.insert(0, os.path.dirname(__file__))
from common import load_database, load_emulator_profiles, load_platform_config
from common import list_registered_platforms, load_database, load_emulator_profiles, load_platform_config
DEFAULT_EMULATORS_DIR = "emulators"
DEFAULT_PLATFORMS_DIR = "platforms"
@@ -36,10 +36,8 @@ def load_platform_files(platforms_dir: str) -> tuple[dict[str, set[str]], dict[s
"""Load all platform configs and collect declared filenames + data_directories per system."""
declared = {}
platform_data_dirs = {}
for f in sorted(Path(platforms_dir).glob("*.yml")):
if f.name.startswith("_"):
continue
config = load_platform_config(f.stem, platforms_dir)
for platform_name in list_registered_platforms(platforms_dir, include_archived=True):
config = load_platform_config(platform_name, platforms_dir)
for sys_id, system in config.get("systems", {}).items():
for fe in system.get("files", []):
name = fe.get("name", "")

View File

@@ -18,7 +18,7 @@ from datetime import datetime, timezone
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import compute_hashes
from common import compute_hashes, list_registered_platforms
CACHE_DIR = ".cache"
CACHE_FILE = os.path.join(CACHE_DIR, "db_cache.json")
@@ -353,9 +353,8 @@ def _collect_all_aliases(files: dict) -> dict:
if platforms_dir.is_dir():
try:
import yaml
for config_file in platforms_dir.glob("*.yml"):
if config_file.name.startswith("_"):
continue
for platform_name in list_registered_platforms(str(platforms_dir), include_archived=True):
config_file = platforms_dir / f"{platform_name}.yml"
try:
with open(config_file) as f:
config = yaml.safe_load(f) or {}

View File

@@ -27,9 +27,10 @@ sys.path.insert(0, os.path.dirname(__file__))
from common import (
_build_validation_index, build_zip_contents_index, check_file_validation,
check_inside_zip, compute_hashes, fetch_large_file, filter_files_by_mode,
group_identical_platforms, list_emulator_profiles, list_system_ids,
load_database, load_data_dir_registry, load_emulator_profiles,
load_platform_config, md5_composite, resolve_local_file,
group_identical_platforms, list_emulator_profiles, list_registered_platforms,
list_system_ids, load_database, load_data_dir_registry,
load_emulator_profiles, load_platform_config, md5_composite,
resolve_local_file,
)
from deterministic_zip import rebuild_zip_deterministic
@@ -820,13 +821,8 @@ def generate_system_pack(
def list_platforms(platforms_dir: str) -> list[str]:
"""List available platform names from YAML files."""
platforms = []
for f in sorted(Path(platforms_dir).glob("*.yml")):
if f.name.startswith("_"):
continue
platforms.append(f.stem)
return platforms
"""List available platform names from registry."""
return list_registered_platforms(platforms_dir, include_archived=True)
def main():
@@ -901,9 +897,9 @@ def main():
# Platform mode (existing)
if args.all:
sys.path.insert(0, os.path.dirname(__file__))
from list_platforms import list_platforms as _list_active
platforms = _list_active(include_archived=args.include_archived)
platforms = list_registered_platforms(
args.platforms_dir, include_archived=args.include_archived,
)
elif args.platform:
platforms = [args.platform]
else:

View File

@@ -18,7 +18,7 @@ from datetime import datetime, timezone
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import load_database, load_platform_config
from common import list_registered_platforms, load_database, load_platform_config
from verify import verify_platform
def compute_coverage(platform_name: str, platforms_dir: str, db: dict) -> dict:
@@ -80,10 +80,7 @@ def generate_readme(db: dict, platforms_dir: str) -> str:
size_mb = total_size / (1024 * 1024)
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
platform_names = sorted(
p.stem for p in Path(platforms_dir).glob("*.yml")
if not p.name.startswith("_")
)
platform_names = list_registered_platforms(platforms_dir, include_archived=True)
coverages = {}
for name in platform_names:
@@ -135,6 +132,7 @@ def generate_readme(db: dict, platforms_dir: str) -> str:
"RetroPie": "`BIOS/`",
"RetroDECK": "`~/retrodeck/bios/`",
"EmuDeck": "`Emulation/bios/`",
"RomM": "`bios/{platform_slug}/`",
}
for name, cov in sorted(coverages.items(), key=lambda x: x[1]["platform"]):

View File

@@ -26,7 +26,7 @@ except ImportError:
sys.exit(1)
sys.path.insert(0, os.path.dirname(__file__))
from common import load_database, load_emulator_profiles, load_platform_config
from common import list_registered_platforms, load_database, load_emulator_profiles, load_platform_config
from generate_readme import compute_coverage
from verify import verify_platform
@@ -2044,10 +2044,7 @@ def main():
registry = (yaml.safe_load(f) or {}).get("platforms", {})
# Load platform configs
platform_names = [
p.stem for p in Path(args.platforms_dir).glob("*.yml")
if not p.name.startswith("_")
]
platform_names = list_registered_platforms(args.platforms_dir, include_archived=True)
print("Computing platform coverage...")
coverages = {}

View File

@@ -17,45 +17,16 @@ import argparse
import json
import os
import sys
from pathlib import Path
try:
import yaml
except ImportError:
yaml = None
sys.path.insert(0, os.path.dirname(__file__))
from common import list_registered_platforms
PLATFORMS_DIR = "platforms"
def _load_registry(platforms_dir: str = PLATFORMS_DIR) -> dict:
"""Load _registry.yml if available."""
registry_path = Path(platforms_dir) / "_registry.yml"
if yaml and registry_path.exists():
with open(registry_path) as f:
return yaml.safe_load(f) or {}
return {}
def list_platforms(include_archived: bool = False) -> list[str]:
"""List platform config files, filtering by status from _registry.yml."""
platforms_dir = Path(PLATFORMS_DIR)
if not platforms_dir.is_dir():
return []
registry = _load_registry(str(platforms_dir))
registry_platforms = registry.get("platforms", {})
platforms = []
for f in sorted(platforms_dir.glob("*.yml")):
if f.name.startswith("_"):
continue
name = f.stem
status = registry_platforms.get(name, {}).get("status", "active")
if status == "archived" and not include_archived:
continue
platforms.append(name)
return platforms
return list_registered_platforms(PLATFORMS_DIR, include_archived=include_archived)
def main():

View File

@@ -2,48 +2,116 @@
"""Scraper for RomM BIOS requirements.
Source: https://github.com/rommapp/romm
Format: known_bios_files.json in backend/models/fixtures/
Hash: MD5 (primary), SHA1, CRC
Format: JSON fixture mapping "slug:filename" to {size, crc, md5, sha1}
Hash: SHA1 primary (all four hashes available per entry)
RomM stores BIOS requirements in known_bios_files.json,
it contains bios files for all emulators, and is formatted as a mapping of "<console>:<bios_file>": { "size": "<size_in_bytes>", "crc": "<crc>", "md5": "<md5>", "sha1": "<sha1>" }.
RomM stores known BIOS hashes in known_bios_files.json. At startup, the
fixture is loaded into Redis. When scanning or uploading firmware, RomM
verifies: file size must match AND at least one hash (MD5, SHA1, CRC32)
must match (firmware.py:verify_file_hashes).
RomM hashes files as opaque blobs (no ZIP content inspection). Arcade
BIOS ZIPs are matched by their container hash, which varies by MAME
version and ZIP tool. This is a known limitation (rommapp/romm#2888).
Folder structure: {library}/bios/{platform_slug}/{filename} (flat).
Slugs are IGDB-style platform identifiers.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
try:
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
except ImportError:
sys.path.insert(0, str(Path(__file__).parent.parent))
from scraper.base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
from base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
PLATFORM_NAME = "RomM"
PLATFORM_NAME = "romm"
BIOS_REPO = "rommapp/romm"
BIOS_BRANCH = "master"
BIOS_FILE = "backend/models/fixtures/known_bios_files.json"
BIOS_URL = (
f"https://raw.githubusercontent.com/{BIOS_REPO}/refs/heads/{BIOS_BRANCH}/{BIOS_FILE}"
SOURCE_URL = (
"https://raw.githubusercontent.com/rommapp/romm/"
"master/backend/models/fixtures/known_bios_files.json"
)
GITHUB_REPO = "rommapp/romm"
# IGDB slug -> retrobios system ID
SLUG_MAP: dict[str, str] = {
"3do": "3do",
"64dd": "nintendo-64dd",
"acpc": "amstrad-cpc",
"amiga": "commodore-amiga",
"arcade": "arcade",
"atari-st": "atari-st",
"atari5200": "atari-5200",
"atari7800": "atari-7800",
"atari8bit": "atari-400-800",
"colecovision": "coleco-colecovision",
"dc": "sega-dreamcast",
"doom": "doom",
"enterprise": "enterprise-64-128",
"fairchild-channel-f": "fairchild-channel-f",
"fds": "nintendo-fds",
"gamegear": "sega-game-gear",
"gb": "nintendo-gb",
"gba": "nintendo-gba",
"gbc": "nintendo-gbc",
"genesis": "sega-mega-drive",
"intellivision": "mattel-intellivision",
"j2me": "j2me",
"lynx": "atari-lynx",
"mac": "apple-macintosh-ii",
"msx": "microsoft-msx",
"msx2": "microsoft-msx",
"nds": "nintendo-ds",
"neo-geo-cd": "snk-neogeo-cd",
"nes": "nintendo-nes",
"ngc": "nintendo-gamecube",
"odyssey-2-slash-videopac-g7000": "magnavox-odyssey2",
"pc-9800-series": "nec-pc-98",
"pc-fx": "nec-pc-fx",
"pokemon-mini": "nintendo-pokemon-mini",
"ps2": "sony-playstation-2",
"psp": "sony-psp",
"psx": "sony-playstation",
"satellaview": "nintendo-satellaview",
"saturn": "sega-saturn",
"scummvm": "scummvm",
"segacd": "sega-mega-cd",
"sharp-x68000": "sharp-x68000",
"sms": "sega-master-system",
"snes": "nintendo-snes",
"sufami-turbo": "nintendo-sufami-turbo",
"super-gb": "nintendo-sgb",
"tg16": "nec-pc-engine",
"tvc": "videoton-tvc",
"videopac-g7400": "philips-videopac",
"wolfenstein": "wolfenstein-3d",
"x1": "sharp-x1",
"xbox": "microsoft-xbox",
"zxs": "sinclair-zx-spectrum",
}
class Scraper(BaseScraper):
"""RomM BIOS scraper from known_bios_files.json."""
"""Scraper for RomM known_bios_files.json."""
def __init__(self, url = BIOS_URL):
super().__init__(url)
def __init__(self, url: str = SOURCE_URL):
super().__init__(url=url)
self._parsed: dict | None = None
def fetch_metadata(self) -> dict:
version = fetch_github_latest_version(BIOS_REPO) or "unknown"
return {
"name": PLATFORM_NAME,
"version": version,
"homepage": "https://romm.app",
"source": self.url,
}
def _parse_json(self) -> dict:
if self._parsed is not None:
return self._parsed
raw = self._fetch_raw()
try:
self._parsed = json.loads(raw)
except json.JSONDecodeError as e:
raise ValueError(f"Failed to parse JSON: {e}") from e
return self._parsed
def fetch_requirements(self) -> list[BiosRequirement]:
"""Parse known_bios_files.json and return BIOS requirements."""
@@ -52,43 +120,61 @@ class Scraper(BaseScraper):
if not self.validate_format(raw):
raise ValueError("known_bios_files.json format validation failed")
roms = json.loads(raw)
data = self._parse_json()
requirements = []
for key, info in roms.items():
for key, entry in data.items():
if ":" not in key:
continue
system, name = key.split(":", 1)
igdb_slug, filename = key.split(":", 1)
system = SLUG_MAP.get(igdb_slug)
if not system:
print(f"Warning: unmapped IGDB slug '{igdb_slug}'", file=sys.stderr)
continue
sha1 = (entry.get("sha1") or "").strip() or None
md5 = (entry.get("md5") or "").strip() or None
crc32 = (entry.get("crc") or "").strip() or None
size = int(entry["size"]) if entry.get("size") else None
requirements.append(BiosRequirement(
name=name,
name=filename,
system=system,
size=int(info.get("size", 0)),
crc32=info.get("crc"),
md5=info.get("md5"),
sha1=info.get("sha1"),
sha1=sha1,
md5=md5,
crc32=crc32,
size=size,
destination=f"{igdb_slug}/{filename}",
required=True,
))
return requirements
def validate_format(self, raw_data: str) -> bool:
"""Validate that the raw data is a JSON object with the expected structure."""
"""Validate that raw_data is a JSON dict with slug:filename keys."""
try:
data = json.loads(raw_data)
if not isinstance(data, dict):
return False
for key, value in data.items():
if ":" not in key or not isinstance(value, dict):
return False
if not all(k in value for k in ("size", "crc", "md5", "sha1")):
return False
return True
except json.JSONDecodeError:
except (json.JSONDecodeError, TypeError):
return False
if not isinstance(data, dict):
return False
for key in list(data.keys())[:5]:
if ":" not in key:
return False
_, entry = key.split(":", 1), data[key]
if not isinstance(data[key], dict):
return False
if "md5" not in data[key] and "sha1" not in data[key]:
return False
return len(data) > 0
def generate_platform_yaml(self) -> dict:
"""Generate platform YAML content for RomM."""
"""Generate a platform YAML config dict from scraped data."""
requirements = self.fetch_requirements()
metadata = self.fetch_metadata()
systems: dict[str, dict] = {}
for req in requirements:
@@ -97,34 +183,42 @@ class Scraper(BaseScraper):
entry: dict = {
"name": req.name,
"destination": f"{req.system}/{req.name}",
"size": req.size,
"crc": req.crc32,
"md5": req.md5,
"sha1": req.sha1,
"destination": req.destination,
"required": req.required,
}
if req.sha1:
entry["sha1"] = req.sha1
if req.md5:
entry["md5"] = req.md5
if req.crc32:
entry["crc32"] = req.crc32
if req.size:
entry["size"] = req.size
systems[req.system]["files"].append(entry)
version = ""
tag = fetch_github_latest_version(GITHUB_REPO)
if tag:
version = tag
return {
"platform": metadata["name"],
"version": metadata["version"],
"homepage": metadata["homepage"],
"source": metadata["source"],
"inherits": "emulatorjs",
"platform": "RomM",
"version": version,
"homepage": "https://romm.app",
"source": SOURCE_URL,
"base_destination": "bios",
"hash_type": "md5",
"hash_type": "sha1",
"verification_mode": "md5",
"cores": [],
"systems": systems,
}
def main():
try:
from .base_scraper import scraper_cli
except ImportError:
sys.path.insert(0, str(Path(__file__).parent.parent))
from scraper.base_scraper import scraper_cli
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape RomM BIOS requirements")
if __name__ == "__main__":
main()
main()

View File

@@ -25,7 +25,7 @@ import sys
from pathlib import Path
sys.path.insert(0, os.path.dirname(__file__))
from common import compute_hashes, load_database as _load_database
from common import compute_hashes, list_registered_platforms, load_database as _load_database
try:
import yaml
@@ -107,9 +107,8 @@ def load_platform_hashes(platforms_dir: str) -> dict:
if not os.path.isdir(platforms_dir) or yaml is None:
return known
for f in Path(platforms_dir).glob("*.yml"):
if f.name.startswith("_"):
continue
for name in list_registered_platforms(platforms_dir, include_archived=True):
f = Path(platforms_dir) / f"{name}.yml"
with open(f) as fh:
try:
config = yaml.safe_load(fh) or {}