From 00d7b57884f9d5376b7f29a303f5f86530fc792b Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:29:06 +0200 Subject: [PATCH] feat: add FBNeo source parser for BIOS sets --- scripts/scraper/fbneo_parser.py | 127 +++++++++++++++++++++ tests/test_fbneo_parser.py | 190 ++++++++++++++++++++++++++++++++ 2 files changed, 317 insertions(+) create mode 100644 scripts/scraper/fbneo_parser.py create mode 100644 tests/test_fbneo_parser.py diff --git a/scripts/scraper/fbneo_parser.py b/scripts/scraper/fbneo_parser.py new file mode 100644 index 00000000..03e82443 --- /dev/null +++ b/scripts/scraper/fbneo_parser.py @@ -0,0 +1,127 @@ +"""Parser for FBNeo source files to extract BIOS sets and ROM definitions. + +Parses BurnRomInfo structs (static ROM arrays) and BurnDriver structs +(driver registration) from FBNeo C source files. BIOS sets are identified +by the BDF_BOARDROM flag in BurnDriver definitions. +""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + + +_ROM_ENTRY_RE = re.compile( + r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}', +) + +_BURN_DRIVER_RE = re.compile( + r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};', + re.DOTALL, +) + +_ROM_DESC_RE = re.compile( + r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};', + re.DOTALL, +) + + +def find_bios_sets(source: str, filename: str) -> dict[str, dict]: + """Find BDF_BOARDROM drivers in source code. + + Returns a dict mapping set name to metadata: + {set_name: {"source_file": str, "source_line": int}} + """ + results: dict[str, dict] = {} + + for match in _BURN_DRIVER_RE.finditer(source): + body = match.group(2) + if 'BDF_BOARDROM' not in body: + continue + + # Set name is the first quoted string in the struct body + name_match = re.search(r'"([^"]+)"', body) + if not name_match: + continue + + set_name = name_match.group(1) + line_num = source[:match.start()].count('\n') + 1 + + results[set_name] = { + 'source_file': filename, + 'source_line': line_num, + } + + return results + + +def parse_rom_info(source: str, set_name: str) -> list[dict]: + """Parse a BurnRomInfo array for the given set name. + + Returns a list of dicts with keys: name, size, crc32. + Sentinel entries (empty name) are skipped. + """ + pattern = re.compile( + r'static\s+struct\s+BurnRomInfo\s+' + + re.escape(set_name) + + r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};', + re.DOTALL, + ) + match = pattern.search(source) + if not match: + return [] + + body = match.group(1) + roms: list[dict] = [] + + for entry in _ROM_ENTRY_RE.finditer(body): + name = entry.group(1) + if not name: + continue + size = int(entry.group(2), 16) + crc32 = format(int(entry.group(3), 16), '08x') + + roms.append({ + 'name': name, + 'size': size, + 'crc32': crc32, + }) + + return roms + + +def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]: + """Walk the FBNeo driver source tree and extract all BIOS sets. + + Scans .cpp files under src/burn/drv/ for BDF_BOARDROM drivers, + then parses their associated BurnRomInfo arrays. + + Returns a dict mapping set name to: + {source_file, source_line, roms: [{name, size, crc32}, ...]} + """ + drv_path = Path(base_path) / 'src' / 'burn' / 'drv' + if not drv_path.is_dir(): + return {} + + results: dict[str, dict] = {} + + for root, _dirs, files in os.walk(drv_path): + for fname in files: + if not fname.endswith('.cpp'): + continue + + filepath = Path(root) / fname + source = filepath.read_text(encoding='utf-8', errors='replace') + rel_path = str(filepath.relative_to(base_path)) + + bios_sets = find_bios_sets(source, rel_path) + for set_name, meta in bios_sets.items(): + roms = parse_rom_info(source, set_name) + results[set_name] = { + 'source_file': meta['source_file'], + 'source_line': meta['source_line'], + 'roms': roms, + } + + return results diff --git a/tests/test_fbneo_parser.py b/tests/test_fbneo_parser.py new file mode 100644 index 00000000..8852a5c3 --- /dev/null +++ b/tests/test_fbneo_parser.py @@ -0,0 +1,190 @@ +"""Tests for the FBNeo source parser.""" + +from __future__ import annotations + +import os +import tempfile +import unittest +from pathlib import Path + +from scripts.scraper.fbneo_parser import ( + find_bios_sets, + parse_fbneo_source_tree, + parse_rom_info, +) + +NEOGEO_FIXTURE = """\ +static struct BurnRomInfo neogeoRomDesc[] = { + { "sp-s2.sp1", 0x020000, 0x9036d879, BRF_ESS | BRF_BIOS }, + { "sp-s.sp1", 0x020000, 0xc7f2fa45, BRF_ESS | BRF_BIOS }, + { "asia-s3.rom", 0x020000, 0x91b64be3, BRF_ESS | BRF_BIOS }, + { "vs-bios.rom", 0x020000, 0xf0e8f27d, BRF_ESS | BRF_BIOS }, + { "uni-bios.rom", 0x020000, 0x2d50996a, BRF_ESS | BRF_BIOS }, + { "", 0, 0, 0 } +}; + +STD_ROM_FN(neogeo) + +struct BurnDriver BurnDrvneogeo = { + "neogeo", NULL, NULL, NULL, "1990", + "Neo Geo\\0", "BIOS only", "SNK", "Neo Geo MVS", + NULL, NULL, NULL, NULL, + BDF_BOARDROM, 0, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO, + GBF_BIOS, 0, + NULL, neogeoRomInfo, neogeoRomName, NULL, NULL, NULL, NULL, + neogeoInputInfo, neogeoDIPInfo, + NULL, NULL, NULL, NULL, 0x1000, + 304, 224, 4, 3 +}; +""" + +PGM_FIXTURE = """\ +static struct BurnRomInfo pgmRomDesc[] = { + { "pgm_t01s.rom", 0x200000, 0x1a7123a0, BRF_GRA }, + { "pgm_m01s.rom", 0x200000, 0x45ae7159, BRF_SND }, + { "pgm_p01s.rom", 0x020000, 0xe42b166e, BRF_ESS | BRF_BIOS }, + { "", 0, 0, 0 } +}; + +STD_ROM_FN(pgm) + +struct BurnDriver BurnDrvpgm = { + "pgm", NULL, NULL, NULL, "1997", + "PGM (Polygame Master)\\0", "BIOS only", "IGS", "PGM", + NULL, NULL, NULL, NULL, + BDF_BOARDROM, 0, HARDWARE_IGS_PGM, + GBF_BIOS, 0, + NULL, pgmRomInfo, pgmRomName, NULL, NULL, NULL, NULL, + pgmInputInfo, pgmDIPInfo, + NULL, NULL, NULL, NULL, 0x900, + 448, 224, 4, 3 +}; +""" + +NON_BIOS_FIXTURE = """\ +static struct BurnRomInfo mslugRomDesc[] = { + { "201-p1.p1", 0x100000, 0x08d8daa5, BRF_ESS | BRF_PRG }, + { "", 0, 0, 0 } +}; + +STD_ROM_FN(mslug) + +struct BurnDriver BurnDrvmslug = { + "mslug", NULL, "neogeo", NULL, "1996", + "Metal Slug\\0", NULL, "Nazca", "Neo Geo MVS", + NULL, NULL, NULL, NULL, + BDF_GAME_WORKING, 2, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO, + GBF_PLATFORM | GBF_HORSHOOT, 0, + NULL, mslugRomInfo, mslugRomName, NULL, NULL, NULL, NULL, + neogeoInputInfo, neogeoDIPInfo, + NULL, NULL, NULL, NULL, 0x1000, + 304, 224, 4, 3 +}; +""" + + +class TestFindBiosSets(unittest.TestCase): + + def test_detects_neogeo(self) -> None: + result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp') + self.assertIn('neogeo', result) + self.assertEqual(result['neogeo']['source_file'], 'd_neogeo.cpp') + + def test_detects_pgm(self) -> None: + result = find_bios_sets(PGM_FIXTURE, 'd_pgm.cpp') + self.assertIn('pgm', result) + self.assertEqual(result['pgm']['source_file'], 'd_pgm.cpp') + + def test_ignores_non_bios(self) -> None: + result = find_bios_sets(NON_BIOS_FIXTURE, 'd_neogeo.cpp') + self.assertEqual(result, {}) + + def test_source_line_positive(self) -> None: + result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp') + self.assertGreater(result['neogeo']['source_line'], 0) + + +class TestParseRomInfo(unittest.TestCase): + + def test_neogeo_rom_count(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + self.assertEqual(len(roms), 5) + + def test_sentinel_skipped(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + names = [r['name'] for r in roms] + self.assertNotIn('', names) + + def test_crc32_lowercase_hex(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + first = roms[0] + self.assertEqual(first['crc32'], '9036d879') + self.assertRegex(first['crc32'], r'^[0-9a-f]{8}$') + + def test_no_sha1(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + for rom in roms: + self.assertNotIn('sha1', rom) + + def test_neogeo_first_rom(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo') + first = roms[0] + self.assertEqual(first['name'], 'sp-s2.sp1') + self.assertEqual(first['size'], 0x020000) + self.assertEqual(first['crc32'], '9036d879') + + def test_pgm_rom_count(self) -> None: + roms = parse_rom_info(PGM_FIXTURE, 'pgm') + self.assertEqual(len(roms), 3) + + def test_pgm_bios_entry(self) -> None: + roms = parse_rom_info(PGM_FIXTURE, 'pgm') + bios = roms[2] + self.assertEqual(bios['name'], 'pgm_p01s.rom') + self.assertEqual(bios['crc32'], 'e42b166e') + + def test_unknown_set_returns_empty(self) -> None: + roms = parse_rom_info(NEOGEO_FIXTURE, 'nonexistent') + self.assertEqual(roms, []) + + +class TestParseSourceTree(unittest.TestCase): + + def test_walks_drv_directory(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' / 'neogeo' + drv_dir.mkdir(parents=True) + (drv_dir / 'd_neogeo.cpp').write_text(NEOGEO_FIXTURE) + + result = parse_fbneo_source_tree(tmpdir) + self.assertIn('neogeo', result) + self.assertEqual(len(result['neogeo']['roms']), 5) + + def test_skips_non_cpp(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' + drv_dir.mkdir(parents=True) + (drv_dir / 'd_neogeo.h').write_text(NEOGEO_FIXTURE) + + result = parse_fbneo_source_tree(tmpdir) + self.assertEqual(result, {}) + + def test_missing_directory_returns_empty(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + result = parse_fbneo_source_tree(tmpdir) + self.assertEqual(result, {}) + + def test_multiple_sets(self) -> None: + combined = NEOGEO_FIXTURE + '\n' + PGM_FIXTURE + with tempfile.TemporaryDirectory() as tmpdir: + drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' + drv_dir.mkdir(parents=True) + (drv_dir / 'd_combined.cpp').write_text(combined) + + result = parse_fbneo_source_tree(tmpdir) + self.assertIn('neogeo', result) + self.assertIn('pgm', result) + + +if __name__ == '__main__': + unittest.main()