mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
feat: add FBNeo source parser for BIOS sets
This commit is contained in:
127
scripts/scraper/fbneo_parser.py
Normal file
127
scripts/scraper/fbneo_parser.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Parser for FBNeo source files to extract BIOS sets and ROM definitions.
|
||||
|
||||
Parses BurnRomInfo structs (static ROM arrays) and BurnDriver structs
|
||||
(driver registration) from FBNeo C source files. BIOS sets are identified
|
||||
by the BDF_BOARDROM flag in BurnDriver definitions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
_ROM_ENTRY_RE = re.compile(
|
||||
r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}',
|
||||
)
|
||||
|
||||
_BURN_DRIVER_RE = re.compile(
|
||||
r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
_ROM_DESC_RE = re.compile(
|
||||
r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
"""Find BDF_BOARDROM drivers in source code.
|
||||
|
||||
Returns a dict mapping set name to metadata:
|
||||
{set_name: {"source_file": str, "source_line": int}}
|
||||
"""
|
||||
results: dict[str, dict] = {}
|
||||
|
||||
for match in _BURN_DRIVER_RE.finditer(source):
|
||||
body = match.group(2)
|
||||
if 'BDF_BOARDROM' not in body:
|
||||
continue
|
||||
|
||||
# Set name is the first quoted string in the struct body
|
||||
name_match = re.search(r'"([^"]+)"', body)
|
||||
if not name_match:
|
||||
continue
|
||||
|
||||
set_name = name_match.group(1)
|
||||
line_num = source[:match.start()].count('\n') + 1
|
||||
|
||||
results[set_name] = {
|
||||
'source_file': filename,
|
||||
'source_line': line_num,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def parse_rom_info(source: str, set_name: str) -> list[dict]:
|
||||
"""Parse a BurnRomInfo array for the given set name.
|
||||
|
||||
Returns a list of dicts with keys: name, size, crc32.
|
||||
Sentinel entries (empty name) are skipped.
|
||||
"""
|
||||
pattern = re.compile(
|
||||
r'static\s+struct\s+BurnRomInfo\s+'
|
||||
+ re.escape(set_name)
|
||||
+ r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||
re.DOTALL,
|
||||
)
|
||||
match = pattern.search(source)
|
||||
if not match:
|
||||
return []
|
||||
|
||||
body = match.group(1)
|
||||
roms: list[dict] = []
|
||||
|
||||
for entry in _ROM_ENTRY_RE.finditer(body):
|
||||
name = entry.group(1)
|
||||
if not name:
|
||||
continue
|
||||
size = int(entry.group(2), 16)
|
||||
crc32 = format(int(entry.group(3), 16), '08x')
|
||||
|
||||
roms.append({
|
||||
'name': name,
|
||||
'size': size,
|
||||
'crc32': crc32,
|
||||
})
|
||||
|
||||
return roms
|
||||
|
||||
|
||||
def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
|
||||
"""Walk the FBNeo driver source tree and extract all BIOS sets.
|
||||
|
||||
Scans .cpp files under src/burn/drv/ for BDF_BOARDROM drivers,
|
||||
then parses their associated BurnRomInfo arrays.
|
||||
|
||||
Returns a dict mapping set name to:
|
||||
{source_file, source_line, roms: [{name, size, crc32}, ...]}
|
||||
"""
|
||||
drv_path = Path(base_path) / 'src' / 'burn' / 'drv'
|
||||
if not drv_path.is_dir():
|
||||
return {}
|
||||
|
||||
results: dict[str, dict] = {}
|
||||
|
||||
for root, _dirs, files in os.walk(drv_path):
|
||||
for fname in files:
|
||||
if not fname.endswith('.cpp'):
|
||||
continue
|
||||
|
||||
filepath = Path(root) / fname
|
||||
source = filepath.read_text(encoding='utf-8', errors='replace')
|
||||
rel_path = str(filepath.relative_to(base_path))
|
||||
|
||||
bios_sets = find_bios_sets(source, rel_path)
|
||||
for set_name, meta in bios_sets.items():
|
||||
roms = parse_rom_info(source, set_name)
|
||||
results[set_name] = {
|
||||
'source_file': meta['source_file'],
|
||||
'source_line': meta['source_line'],
|
||||
'roms': roms,
|
||||
}
|
||||
|
||||
return results
|
||||
190
tests/test_fbneo_parser.py
Normal file
190
tests/test_fbneo_parser.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Tests for the FBNeo source parser."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from scripts.scraper.fbneo_parser import (
|
||||
find_bios_sets,
|
||||
parse_fbneo_source_tree,
|
||||
parse_rom_info,
|
||||
)
|
||||
|
||||
NEOGEO_FIXTURE = """\
|
||||
static struct BurnRomInfo neogeoRomDesc[] = {
|
||||
{ "sp-s2.sp1", 0x020000, 0x9036d879, BRF_ESS | BRF_BIOS },
|
||||
{ "sp-s.sp1", 0x020000, 0xc7f2fa45, BRF_ESS | BRF_BIOS },
|
||||
{ "asia-s3.rom", 0x020000, 0x91b64be3, BRF_ESS | BRF_BIOS },
|
||||
{ "vs-bios.rom", 0x020000, 0xf0e8f27d, BRF_ESS | BRF_BIOS },
|
||||
{ "uni-bios.rom", 0x020000, 0x2d50996a, BRF_ESS | BRF_BIOS },
|
||||
{ "", 0, 0, 0 }
|
||||
};
|
||||
|
||||
STD_ROM_FN(neogeo)
|
||||
|
||||
struct BurnDriver BurnDrvneogeo = {
|
||||
"neogeo", NULL, NULL, NULL, "1990",
|
||||
"Neo Geo\\0", "BIOS only", "SNK", "Neo Geo MVS",
|
||||
NULL, NULL, NULL, NULL,
|
||||
BDF_BOARDROM, 0, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
|
||||
GBF_BIOS, 0,
|
||||
NULL, neogeoRomInfo, neogeoRomName, NULL, NULL, NULL, NULL,
|
||||
neogeoInputInfo, neogeoDIPInfo,
|
||||
NULL, NULL, NULL, NULL, 0x1000,
|
||||
304, 224, 4, 3
|
||||
};
|
||||
"""
|
||||
|
||||
PGM_FIXTURE = """\
|
||||
static struct BurnRomInfo pgmRomDesc[] = {
|
||||
{ "pgm_t01s.rom", 0x200000, 0x1a7123a0, BRF_GRA },
|
||||
{ "pgm_m01s.rom", 0x200000, 0x45ae7159, BRF_SND },
|
||||
{ "pgm_p01s.rom", 0x020000, 0xe42b166e, BRF_ESS | BRF_BIOS },
|
||||
{ "", 0, 0, 0 }
|
||||
};
|
||||
|
||||
STD_ROM_FN(pgm)
|
||||
|
||||
struct BurnDriver BurnDrvpgm = {
|
||||
"pgm", NULL, NULL, NULL, "1997",
|
||||
"PGM (Polygame Master)\\0", "BIOS only", "IGS", "PGM",
|
||||
NULL, NULL, NULL, NULL,
|
||||
BDF_BOARDROM, 0, HARDWARE_IGS_PGM,
|
||||
GBF_BIOS, 0,
|
||||
NULL, pgmRomInfo, pgmRomName, NULL, NULL, NULL, NULL,
|
||||
pgmInputInfo, pgmDIPInfo,
|
||||
NULL, NULL, NULL, NULL, 0x900,
|
||||
448, 224, 4, 3
|
||||
};
|
||||
"""
|
||||
|
||||
NON_BIOS_FIXTURE = """\
|
||||
static struct BurnRomInfo mslugRomDesc[] = {
|
||||
{ "201-p1.p1", 0x100000, 0x08d8daa5, BRF_ESS | BRF_PRG },
|
||||
{ "", 0, 0, 0 }
|
||||
};
|
||||
|
||||
STD_ROM_FN(mslug)
|
||||
|
||||
struct BurnDriver BurnDrvmslug = {
|
||||
"mslug", NULL, "neogeo", NULL, "1996",
|
||||
"Metal Slug\\0", NULL, "Nazca", "Neo Geo MVS",
|
||||
NULL, NULL, NULL, NULL,
|
||||
BDF_GAME_WORKING, 2, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
|
||||
GBF_PLATFORM | GBF_HORSHOOT, 0,
|
||||
NULL, mslugRomInfo, mslugRomName, NULL, NULL, NULL, NULL,
|
||||
neogeoInputInfo, neogeoDIPInfo,
|
||||
NULL, NULL, NULL, NULL, 0x1000,
|
||||
304, 224, 4, 3
|
||||
};
|
||||
"""
|
||||
|
||||
|
||||
class TestFindBiosSets(unittest.TestCase):
|
||||
|
||||
def test_detects_neogeo(self) -> None:
|
||||
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
|
||||
self.assertIn('neogeo', result)
|
||||
self.assertEqual(result['neogeo']['source_file'], 'd_neogeo.cpp')
|
||||
|
||||
def test_detects_pgm(self) -> None:
|
||||
result = find_bios_sets(PGM_FIXTURE, 'd_pgm.cpp')
|
||||
self.assertIn('pgm', result)
|
||||
self.assertEqual(result['pgm']['source_file'], 'd_pgm.cpp')
|
||||
|
||||
def test_ignores_non_bios(self) -> None:
|
||||
result = find_bios_sets(NON_BIOS_FIXTURE, 'd_neogeo.cpp')
|
||||
self.assertEqual(result, {})
|
||||
|
||||
def test_source_line_positive(self) -> None:
|
||||
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
|
||||
self.assertGreater(result['neogeo']['source_line'], 0)
|
||||
|
||||
|
||||
class TestParseRomInfo(unittest.TestCase):
|
||||
|
||||
def test_neogeo_rom_count(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||
self.assertEqual(len(roms), 5)
|
||||
|
||||
def test_sentinel_skipped(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||
names = [r['name'] for r in roms]
|
||||
self.assertNotIn('', names)
|
||||
|
||||
def test_crc32_lowercase_hex(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||
first = roms[0]
|
||||
self.assertEqual(first['crc32'], '9036d879')
|
||||
self.assertRegex(first['crc32'], r'^[0-9a-f]{8}$')
|
||||
|
||||
def test_no_sha1(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||
for rom in roms:
|
||||
self.assertNotIn('sha1', rom)
|
||||
|
||||
def test_neogeo_first_rom(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||
first = roms[0]
|
||||
self.assertEqual(first['name'], 'sp-s2.sp1')
|
||||
self.assertEqual(first['size'], 0x020000)
|
||||
self.assertEqual(first['crc32'], '9036d879')
|
||||
|
||||
def test_pgm_rom_count(self) -> None:
|
||||
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
|
||||
self.assertEqual(len(roms), 3)
|
||||
|
||||
def test_pgm_bios_entry(self) -> None:
|
||||
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
|
||||
bios = roms[2]
|
||||
self.assertEqual(bios['name'], 'pgm_p01s.rom')
|
||||
self.assertEqual(bios['crc32'], 'e42b166e')
|
||||
|
||||
def test_unknown_set_returns_empty(self) -> None:
|
||||
roms = parse_rom_info(NEOGEO_FIXTURE, 'nonexistent')
|
||||
self.assertEqual(roms, [])
|
||||
|
||||
|
||||
class TestParseSourceTree(unittest.TestCase):
|
||||
|
||||
def test_walks_drv_directory(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' / 'neogeo'
|
||||
drv_dir.mkdir(parents=True)
|
||||
(drv_dir / 'd_neogeo.cpp').write_text(NEOGEO_FIXTURE)
|
||||
|
||||
result = parse_fbneo_source_tree(tmpdir)
|
||||
self.assertIn('neogeo', result)
|
||||
self.assertEqual(len(result['neogeo']['roms']), 5)
|
||||
|
||||
def test_skips_non_cpp(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
|
||||
drv_dir.mkdir(parents=True)
|
||||
(drv_dir / 'd_neogeo.h').write_text(NEOGEO_FIXTURE)
|
||||
|
||||
result = parse_fbneo_source_tree(tmpdir)
|
||||
self.assertEqual(result, {})
|
||||
|
||||
def test_missing_directory_returns_empty(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = parse_fbneo_source_tree(tmpdir)
|
||||
self.assertEqual(result, {})
|
||||
|
||||
def test_multiple_sets(self) -> None:
|
||||
combined = NEOGEO_FIXTURE + '\n' + PGM_FIXTURE
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
|
||||
drv_dir.mkdir(parents=True)
|
||||
(drv_dir / 'd_combined.cpp').write_text(combined)
|
||||
|
||||
result = parse_fbneo_source_tree(tmpdir)
|
||||
self.assertIn('neogeo', result)
|
||||
self.assertIn('pgm', result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user