mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-17 22:32:31 -05:00
feat: add FBNeo source parser for BIOS sets
This commit is contained in:
127
scripts/scraper/fbneo_parser.py
Normal file
127
scripts/scraper/fbneo_parser.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
"""Parser for FBNeo source files to extract BIOS sets and ROM definitions.
|
||||||
|
|
||||||
|
Parses BurnRomInfo structs (static ROM arrays) and BurnDriver structs
|
||||||
|
(driver registration) from FBNeo C source files. BIOS sets are identified
|
||||||
|
by the BDF_BOARDROM flag in BurnDriver definitions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
_ROM_ENTRY_RE = re.compile(
|
||||||
|
r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}',
|
||||||
|
)
|
||||||
|
|
||||||
|
_BURN_DRIVER_RE = re.compile(
|
||||||
|
r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
_ROM_DESC_RE = re.compile(
|
||||||
|
r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
|
||||||
|
"""Find BDF_BOARDROM drivers in source code.
|
||||||
|
|
||||||
|
Returns a dict mapping set name to metadata:
|
||||||
|
{set_name: {"source_file": str, "source_line": int}}
|
||||||
|
"""
|
||||||
|
results: dict[str, dict] = {}
|
||||||
|
|
||||||
|
for match in _BURN_DRIVER_RE.finditer(source):
|
||||||
|
body = match.group(2)
|
||||||
|
if 'BDF_BOARDROM' not in body:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Set name is the first quoted string in the struct body
|
||||||
|
name_match = re.search(r'"([^"]+)"', body)
|
||||||
|
if not name_match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
set_name = name_match.group(1)
|
||||||
|
line_num = source[:match.start()].count('\n') + 1
|
||||||
|
|
||||||
|
results[set_name] = {
|
||||||
|
'source_file': filename,
|
||||||
|
'source_line': line_num,
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def parse_rom_info(source: str, set_name: str) -> list[dict]:
|
||||||
|
"""Parse a BurnRomInfo array for the given set name.
|
||||||
|
|
||||||
|
Returns a list of dicts with keys: name, size, crc32.
|
||||||
|
Sentinel entries (empty name) are skipped.
|
||||||
|
"""
|
||||||
|
pattern = re.compile(
|
||||||
|
r'static\s+struct\s+BurnRomInfo\s+'
|
||||||
|
+ re.escape(set_name)
|
||||||
|
+ r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
match = pattern.search(source)
|
||||||
|
if not match:
|
||||||
|
return []
|
||||||
|
|
||||||
|
body = match.group(1)
|
||||||
|
roms: list[dict] = []
|
||||||
|
|
||||||
|
for entry in _ROM_ENTRY_RE.finditer(body):
|
||||||
|
name = entry.group(1)
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
size = int(entry.group(2), 16)
|
||||||
|
crc32 = format(int(entry.group(3), 16), '08x')
|
||||||
|
|
||||||
|
roms.append({
|
||||||
|
'name': name,
|
||||||
|
'size': size,
|
||||||
|
'crc32': crc32,
|
||||||
|
})
|
||||||
|
|
||||||
|
return roms
|
||||||
|
|
||||||
|
|
||||||
|
def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
|
||||||
|
"""Walk the FBNeo driver source tree and extract all BIOS sets.
|
||||||
|
|
||||||
|
Scans .cpp files under src/burn/drv/ for BDF_BOARDROM drivers,
|
||||||
|
then parses their associated BurnRomInfo arrays.
|
||||||
|
|
||||||
|
Returns a dict mapping set name to:
|
||||||
|
{source_file, source_line, roms: [{name, size, crc32}, ...]}
|
||||||
|
"""
|
||||||
|
drv_path = Path(base_path) / 'src' / 'burn' / 'drv'
|
||||||
|
if not drv_path.is_dir():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
results: dict[str, dict] = {}
|
||||||
|
|
||||||
|
for root, _dirs, files in os.walk(drv_path):
|
||||||
|
for fname in files:
|
||||||
|
if not fname.endswith('.cpp'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filepath = Path(root) / fname
|
||||||
|
source = filepath.read_text(encoding='utf-8', errors='replace')
|
||||||
|
rel_path = str(filepath.relative_to(base_path))
|
||||||
|
|
||||||
|
bios_sets = find_bios_sets(source, rel_path)
|
||||||
|
for set_name, meta in bios_sets.items():
|
||||||
|
roms = parse_rom_info(source, set_name)
|
||||||
|
results[set_name] = {
|
||||||
|
'source_file': meta['source_file'],
|
||||||
|
'source_line': meta['source_line'],
|
||||||
|
'roms': roms,
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
190
tests/test_fbneo_parser.py
Normal file
190
tests/test_fbneo_parser.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
"""Tests for the FBNeo source parser."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from scripts.scraper.fbneo_parser import (
|
||||||
|
find_bios_sets,
|
||||||
|
parse_fbneo_source_tree,
|
||||||
|
parse_rom_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
NEOGEO_FIXTURE = """\
|
||||||
|
static struct BurnRomInfo neogeoRomDesc[] = {
|
||||||
|
{ "sp-s2.sp1", 0x020000, 0x9036d879, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "sp-s.sp1", 0x020000, 0xc7f2fa45, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "asia-s3.rom", 0x020000, 0x91b64be3, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "vs-bios.rom", 0x020000, 0xf0e8f27d, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "uni-bios.rom", 0x020000, 0x2d50996a, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "", 0, 0, 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
STD_ROM_FN(neogeo)
|
||||||
|
|
||||||
|
struct BurnDriver BurnDrvneogeo = {
|
||||||
|
"neogeo", NULL, NULL, NULL, "1990",
|
||||||
|
"Neo Geo\\0", "BIOS only", "SNK", "Neo Geo MVS",
|
||||||
|
NULL, NULL, NULL, NULL,
|
||||||
|
BDF_BOARDROM, 0, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
|
||||||
|
GBF_BIOS, 0,
|
||||||
|
NULL, neogeoRomInfo, neogeoRomName, NULL, NULL, NULL, NULL,
|
||||||
|
neogeoInputInfo, neogeoDIPInfo,
|
||||||
|
NULL, NULL, NULL, NULL, 0x1000,
|
||||||
|
304, 224, 4, 3
|
||||||
|
};
|
||||||
|
"""
|
||||||
|
|
||||||
|
PGM_FIXTURE = """\
|
||||||
|
static struct BurnRomInfo pgmRomDesc[] = {
|
||||||
|
{ "pgm_t01s.rom", 0x200000, 0x1a7123a0, BRF_GRA },
|
||||||
|
{ "pgm_m01s.rom", 0x200000, 0x45ae7159, BRF_SND },
|
||||||
|
{ "pgm_p01s.rom", 0x020000, 0xe42b166e, BRF_ESS | BRF_BIOS },
|
||||||
|
{ "", 0, 0, 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
STD_ROM_FN(pgm)
|
||||||
|
|
||||||
|
struct BurnDriver BurnDrvpgm = {
|
||||||
|
"pgm", NULL, NULL, NULL, "1997",
|
||||||
|
"PGM (Polygame Master)\\0", "BIOS only", "IGS", "PGM",
|
||||||
|
NULL, NULL, NULL, NULL,
|
||||||
|
BDF_BOARDROM, 0, HARDWARE_IGS_PGM,
|
||||||
|
GBF_BIOS, 0,
|
||||||
|
NULL, pgmRomInfo, pgmRomName, NULL, NULL, NULL, NULL,
|
||||||
|
pgmInputInfo, pgmDIPInfo,
|
||||||
|
NULL, NULL, NULL, NULL, 0x900,
|
||||||
|
448, 224, 4, 3
|
||||||
|
};
|
||||||
|
"""
|
||||||
|
|
||||||
|
NON_BIOS_FIXTURE = """\
|
||||||
|
static struct BurnRomInfo mslugRomDesc[] = {
|
||||||
|
{ "201-p1.p1", 0x100000, 0x08d8daa5, BRF_ESS | BRF_PRG },
|
||||||
|
{ "", 0, 0, 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
STD_ROM_FN(mslug)
|
||||||
|
|
||||||
|
struct BurnDriver BurnDrvmslug = {
|
||||||
|
"mslug", NULL, "neogeo", NULL, "1996",
|
||||||
|
"Metal Slug\\0", NULL, "Nazca", "Neo Geo MVS",
|
||||||
|
NULL, NULL, NULL, NULL,
|
||||||
|
BDF_GAME_WORKING, 2, HARDWARE_PREFIX_CARTRIDGE | HARDWARE_SNK_NEOGEO,
|
||||||
|
GBF_PLATFORM | GBF_HORSHOOT, 0,
|
||||||
|
NULL, mslugRomInfo, mslugRomName, NULL, NULL, NULL, NULL,
|
||||||
|
neogeoInputInfo, neogeoDIPInfo,
|
||||||
|
NULL, NULL, NULL, NULL, 0x1000,
|
||||||
|
304, 224, 4, 3
|
||||||
|
};
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindBiosSets(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_detects_neogeo(self) -> None:
|
||||||
|
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
|
||||||
|
self.assertIn('neogeo', result)
|
||||||
|
self.assertEqual(result['neogeo']['source_file'], 'd_neogeo.cpp')
|
||||||
|
|
||||||
|
def test_detects_pgm(self) -> None:
|
||||||
|
result = find_bios_sets(PGM_FIXTURE, 'd_pgm.cpp')
|
||||||
|
self.assertIn('pgm', result)
|
||||||
|
self.assertEqual(result['pgm']['source_file'], 'd_pgm.cpp')
|
||||||
|
|
||||||
|
def test_ignores_non_bios(self) -> None:
|
||||||
|
result = find_bios_sets(NON_BIOS_FIXTURE, 'd_neogeo.cpp')
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
def test_source_line_positive(self) -> None:
|
||||||
|
result = find_bios_sets(NEOGEO_FIXTURE, 'd_neogeo.cpp')
|
||||||
|
self.assertGreater(result['neogeo']['source_line'], 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseRomInfo(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_neogeo_rom_count(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||||
|
self.assertEqual(len(roms), 5)
|
||||||
|
|
||||||
|
def test_sentinel_skipped(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||||
|
names = [r['name'] for r in roms]
|
||||||
|
self.assertNotIn('', names)
|
||||||
|
|
||||||
|
def test_crc32_lowercase_hex(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||||
|
first = roms[0]
|
||||||
|
self.assertEqual(first['crc32'], '9036d879')
|
||||||
|
self.assertRegex(first['crc32'], r'^[0-9a-f]{8}$')
|
||||||
|
|
||||||
|
def test_no_sha1(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||||
|
for rom in roms:
|
||||||
|
self.assertNotIn('sha1', rom)
|
||||||
|
|
||||||
|
def test_neogeo_first_rom(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'neogeo')
|
||||||
|
first = roms[0]
|
||||||
|
self.assertEqual(first['name'], 'sp-s2.sp1')
|
||||||
|
self.assertEqual(first['size'], 0x020000)
|
||||||
|
self.assertEqual(first['crc32'], '9036d879')
|
||||||
|
|
||||||
|
def test_pgm_rom_count(self) -> None:
|
||||||
|
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
|
||||||
|
self.assertEqual(len(roms), 3)
|
||||||
|
|
||||||
|
def test_pgm_bios_entry(self) -> None:
|
||||||
|
roms = parse_rom_info(PGM_FIXTURE, 'pgm')
|
||||||
|
bios = roms[2]
|
||||||
|
self.assertEqual(bios['name'], 'pgm_p01s.rom')
|
||||||
|
self.assertEqual(bios['crc32'], 'e42b166e')
|
||||||
|
|
||||||
|
def test_unknown_set_returns_empty(self) -> None:
|
||||||
|
roms = parse_rom_info(NEOGEO_FIXTURE, 'nonexistent')
|
||||||
|
self.assertEqual(roms, [])
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseSourceTree(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_walks_drv_directory(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv' / 'neogeo'
|
||||||
|
drv_dir.mkdir(parents=True)
|
||||||
|
(drv_dir / 'd_neogeo.cpp').write_text(NEOGEO_FIXTURE)
|
||||||
|
|
||||||
|
result = parse_fbneo_source_tree(tmpdir)
|
||||||
|
self.assertIn('neogeo', result)
|
||||||
|
self.assertEqual(len(result['neogeo']['roms']), 5)
|
||||||
|
|
||||||
|
def test_skips_non_cpp(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
|
||||||
|
drv_dir.mkdir(parents=True)
|
||||||
|
(drv_dir / 'd_neogeo.h').write_text(NEOGEO_FIXTURE)
|
||||||
|
|
||||||
|
result = parse_fbneo_source_tree(tmpdir)
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
def test_missing_directory_returns_empty(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
result = parse_fbneo_source_tree(tmpdir)
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
def test_multiple_sets(self) -> None:
|
||||||
|
combined = NEOGEO_FIXTURE + '\n' + PGM_FIXTURE
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
drv_dir = Path(tmpdir) / 'src' / 'burn' / 'drv'
|
||||||
|
drv_dir.mkdir(parents=True)
|
||||||
|
(drv_dir / 'd_combined.cpp').write_text(combined)
|
||||||
|
|
||||||
|
result = parse_fbneo_source_tree(tmpdir)
|
||||||
|
self.assertIn('neogeo', result)
|
||||||
|
self.assertIn('pgm', result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user