feat: add FBNeo source parser for BIOS sets

This commit is contained in:
Abdessamad Derraz
2026-03-30 18:29:06 +02:00
parent caf6285a04
commit 00d7b57884
2 changed files with 317 additions and 0 deletions

View File

@@ -0,0 +1,127 @@
"""Parser for FBNeo source files to extract BIOS sets and ROM definitions.
Parses BurnRomInfo structs (static ROM arrays) and BurnDriver structs
(driver registration) from FBNeo C source files. BIOS sets are identified
by the BDF_BOARDROM flag in BurnDriver definitions.
"""
from __future__ import annotations
import os
import re
from pathlib import Path
_ROM_ENTRY_RE = re.compile(
r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}',
)
_BURN_DRIVER_RE = re.compile(
r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};',
re.DOTALL,
)
_ROM_DESC_RE = re.compile(
r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
re.DOTALL,
)
def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
"""Find BDF_BOARDROM drivers in source code.
Returns a dict mapping set name to metadata:
{set_name: {"source_file": str, "source_line": int}}
"""
results: dict[str, dict] = {}
for match in _BURN_DRIVER_RE.finditer(source):
body = match.group(2)
if 'BDF_BOARDROM' not in body:
continue
# Set name is the first quoted string in the struct body
name_match = re.search(r'"([^"]+)"', body)
if not name_match:
continue
set_name = name_match.group(1)
line_num = source[:match.start()].count('\n') + 1
results[set_name] = {
'source_file': filename,
'source_line': line_num,
}
return results
def parse_rom_info(source: str, set_name: str) -> list[dict]:
"""Parse a BurnRomInfo array for the given set name.
Returns a list of dicts with keys: name, size, crc32.
Sentinel entries (empty name) are skipped.
"""
pattern = re.compile(
r'static\s+struct\s+BurnRomInfo\s+'
+ re.escape(set_name)
+ r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
re.DOTALL,
)
match = pattern.search(source)
if not match:
return []
body = match.group(1)
roms: list[dict] = []
for entry in _ROM_ENTRY_RE.finditer(body):
name = entry.group(1)
if not name:
continue
size = int(entry.group(2), 16)
crc32 = format(int(entry.group(3), 16), '08x')
roms.append({
'name': name,
'size': size,
'crc32': crc32,
})
return roms
def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
"""Walk the FBNeo driver source tree and extract all BIOS sets.
Scans .cpp files under src/burn/drv/ for BDF_BOARDROM drivers,
then parses their associated BurnRomInfo arrays.
Returns a dict mapping set name to:
{source_file, source_line, roms: [{name, size, crc32}, ...]}
"""
drv_path = Path(base_path) / 'src' / 'burn' / 'drv'
if not drv_path.is_dir():
return {}
results: dict[str, dict] = {}
for root, _dirs, files in os.walk(drv_path):
for fname in files:
if not fname.endswith('.cpp'):
continue
filepath = Path(root) / fname
source = filepath.read_text(encoding='utf-8', errors='replace')
rel_path = str(filepath.relative_to(base_path))
bios_sets = find_bios_sets(source, rel_path)
for set_name, meta in bios_sets.items():
roms = parse_rom_info(source, set_name)
results[set_name] = {
'source_file': meta['source_file'],
'source_line': meta['source_line'],
'roms': roms,
}
return results

190
tests/test_fbneo_parser.py Normal file
View File

@@ -0,0 +1,190 @@
"""Tests for the FBNeo source parser."""
from __future__ import annotations
import os
import tempfile
import unittest
from pathlib import Path
from scripts.scraper.fbneo_parser import (
find_bios_sets,
parse_fbneo_source_tree,
parse_rom_info,
)
NEOGEO_FIXTURE = """\
static struct BurnRomInfo neogeoRomDesc[] = {
{ "sp-s2.sp1", 0x020000, 0x9036d879, BRF_ESS | BRF_BIOS },
{ "sp-s.sp1", 0x020000, 0xc7f2fa45, BRF_ESS | BRF_BIOS },
{ "asia-s3.rom", 0x020000, 0x91b64be3, BRF_ESS | BRF_BIOS },
{ "vs-bios.rom", 0x020000, 0xf0e8f27d, BRF_ESS | BRF_BIOS },
{ "uni-bios.rom", 0x020000, 0x2d50996a, BRF_ESS | BRF_BIOS },
{ "", 0, 0, 0 }
};
STD_ROM_FN(neogeo)
struct BurnDriver BurnDrvneogeo = {
"neogeo", NULL, NULL, NULL, "1990",
"Neo Geo\\0", "BIOS only", "SNK", "Neo Geo MVS",
NULL, NULL, NULL, NULL,
BDF_BOARDROM, 0, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
GBF_BIOS, 0,
NULL, neogeoRomInfo, neogeoRomName, NULL, NULL, NULL, NULL,
neogeoInputInfo, neogeoDIPInfo,
NULL, NULL, NULL, NULL, 0x1000,
304, 224, 4, 3
};
"""
PGM_FIXTURE = """\
static struct BurnRomInfo pgmRomDesc[] = {
{ "pgm_t01s.rom", 0x200000, 0x1a7123a0, BRF_GRA },
{ "pgm_m01s.rom", 0x200000, 0x45ae7159, BRF_SND },
{ "pgm_p01s.rom", 0x020000, 0xe42b166e, BRF_ESS | BRF_BIOS },
{ "", 0, 0, 0 }
};
STD_ROM_FN(pgm)
struct BurnDriver BurnDrvpgm = {
"pgm", NULL, NULL, NULL, "1997",
"PGM (Polygame Master)\\0", "BIOS only", "IGS", "PGM",
NULL, NULL, NULL, NULL,
BDF_BOARDROM, 0, HARDWARE_IGS_PGM,
GBF_BIOS, 0,
NULL, pgmRomInfo, pgmRomName, NULL, NULL, NULL, NULL,
pgmInputInfo, pgmDIPInfo,
NULL, NULL, NULL, NULL, 0x900,
448, 224, 4, 3
};
"""
NON_BIOS_FIXTURE = """\
static struct BurnRomInfo mslugRomDesc[] = {
{ "201-p1.p1", 0x100000, 0x08d8daa5, BRF_ESS | BRF_PRG },
{ "", 0, 0, 0 }
};
STD_ROM_FN(mslug)
struct BurnDriver BurnDrvmslug = {
"mslug", NULL, "neogeo", NULL, "1996",
"Metal Slug\\0", NULL, "Nazca", "Neo Geo MVS",
NULL, NULL, NULL, NULL,
BDF_GAME_WORKING, 2, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
GBF_PLATFORM | GBF_HORSHOOT, 0,
NULL, mslugRomInfo, mslugRomName, NULL, NULL, NULL, NULL,
neogeoInputInfo, neogeoDIPInfo,
NULL, NULL, NULL, NULL, 0x1000,
304, 224, 4, 3
};
"""
class TestFindBiosSets(unittest.TestCase):
def test_detects_neogeo(self) -> None:
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
self.assertIn('neogeo', result)
self.assertEqual(result['neogeo']['source_file'], 'd_neogeo.cpp')
def test_detects_pgm(self) -> None:
result = find_bios_sets(PGM_FIXTURE, 'd_pgm.cpp')
self.assertIn('pgm', result)
self.assertEqual(result['pgm']['source_file'], 'd_pgm.cpp')
def test_ignores_non_bios(self) -> None:
result = find_bios_sets(NON_BIOS_FIXTURE, 'd_neogeo.cpp')
self.assertEqual(result, {})
def test_source_line_positive(self) -> None:
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
self.assertGreater(result['neogeo']['source_line'], 0)
class TestParseRomInfo(unittest.TestCase):
def test_neogeo_rom_count(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
self.assertEqual(len(roms), 5)
def test_sentinel_skipped(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
names = [r['name'] for r in roms]
self.assertNotIn('', names)
def test_crc32_lowercase_hex(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
first = roms[0]
self.assertEqual(first['crc32'], '9036d879')
self.assertRegex(first['crc32'], r'^[0-9a-f]{8}$')
def test_no_sha1(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
for rom in roms:
self.assertNotIn('sha1', rom)
def test_neogeo_first_rom(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
first = roms[0]
self.assertEqual(first['name'], 'sp-s2.sp1')
self.assertEqual(first['size'], 0x020000)
self.assertEqual(first['crc32'], '9036d879')
def test_pgm_rom_count(self) -> None:
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
self.assertEqual(len(roms), 3)
def test_pgm_bios_entry(self) -> None:
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
bios = roms[2]
self.assertEqual(bios['name'], 'pgm_p01s.rom')
self.assertEqual(bios['crc32'], 'e42b166e')
def test_unknown_set_returns_empty(self) -> None:
roms = parse_rom_info(NEOGEO_FIXTURE, 'nonexistent')
self.assertEqual(roms, [])
class TestParseSourceTree(unittest.TestCase):
def test_walks_drv_directory(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' / 'neogeo'
drv_dir.mkdir(parents=True)
(drv_dir / 'd_neogeo.cpp').write_text(NEOGEO_FIXTURE)
result = parse_fbneo_source_tree(tmpdir)
self.assertIn('neogeo', result)
self.assertEqual(len(result['neogeo']['roms']), 5)
def test_skips_non_cpp(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
drv_dir.mkdir(parents=True)
(drv_dir / 'd_neogeo.h').write_text(NEOGEO_FIXTURE)
result = parse_fbneo_source_tree(tmpdir)
self.assertEqual(result, {})
def test_missing_directory_returns_empty(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
result = parse_fbneo_source_tree(tmpdir)
self.assertEqual(result, {})
def test_multiple_sets(self) -> None:
combined = NEOGEO_FIXTURE + '\n' + PGM_FIXTURE
with tempfile.TemporaryDirectory() as tmpdir:
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
drv_dir.mkdir(parents=True)
(drv_dir / 'd_combined.cpp').write_text(combined)
result = parse_fbneo_source_tree(tmpdir)
self.assertIn('neogeo', result)
self.assertIn('pgm', result)
if __name__ == '__main__':
unittest.main()