chore: lint and format entire codebase

Run ruff check --fix: remove unused imports (F401), fix f-strings
without placeholders (F541), remove unused variables (F841), fix
duplicate dict key (F601).

Run isort --profile black: normalize import ordering across all files.

Run ruff format: apply consistent formatting (black-compatible) to
all 58 Python files.

3 intentional E402 remain (imports after require_yaml() must execute
after yaml is available).
This commit is contained in:
Abdessamad Derraz
2026-04-01 13:17:55 +02:00
parent a2d30557e4
commit 0a272dc4e9
56 changed files with 5115 additions and 2679 deletions

View File

@@ -34,40 +34,40 @@ def merge_mame_profile(
profile = _load_yaml(profile_path)
hashes = _load_json(hashes_path)
profile['core_version'] = hashes.get('version', profile.get('core_version'))
profile["core_version"] = hashes.get("version", profile.get("core_version"))
files = profile.get('files', [])
bios_zip, non_bios = _split_files(files, lambda f: f.get('category') == 'bios_zip')
files = profile.get("files", [])
bios_zip, non_bios = _split_files(files, lambda f: f.get("category") == "bios_zip")
existing_by_name: dict[str, dict] = {}
for entry in bios_zip:
key = _zip_name_to_set(entry['name'])
key = _zip_name_to_set(entry["name"])
existing_by_name[key] = entry
updated_bios: list[dict] = []
matched_names: set[str] = set()
for set_name, set_data in hashes.get('bios_sets', {}).items():
contents = _build_contents(set_data.get('roms', []))
for set_name, set_data in hashes.get("bios_sets", {}).items():
contents = _build_contents(set_data.get("roms", []))
source_ref = _build_source_ref(set_data)
if set_name in existing_by_name:
# Update existing entry: preserve manual fields, update contents
entry = existing_by_name[set_name].copy()
entry['contents'] = contents
entry["contents"] = contents
if source_ref:
entry['source_ref'] = source_ref
entry["source_ref"] = source_ref
updated_bios.append(entry)
matched_names.add(set_name)
elif add_new:
# New BIOS set — only added to the main profile
entry = {
'name': f'{set_name}.zip',
'required': True,
'category': 'bios_zip',
'system': None,
'source_ref': source_ref,
'contents': contents,
"name": f"{set_name}.zip",
"required": True,
"category": "bios_zip",
"system": None,
"source_ref": source_ref,
"contents": contents,
}
updated_bios.append(entry)
@@ -77,7 +77,7 @@ def merge_mame_profile(
if set_name not in matched_names:
updated_bios.append(entry)
profile['files'] = non_bios + updated_bios
profile["files"] = non_bios + updated_bios
if write:
_backup_and_write(profile_path, profile)
@@ -102,49 +102,49 @@ def merge_fbneo_profile(
profile = _load_yaml(profile_path)
hashes = _load_json(hashes_path)
profile['core_version'] = hashes.get('version', profile.get('core_version'))
profile["core_version"] = hashes.get("version", profile.get("core_version"))
files = profile.get('files', [])
archive_files, non_archive = _split_files(files, lambda f: 'archive' in f)
files = profile.get("files", [])
archive_files, non_archive = _split_files(files, lambda f: "archive" in f)
existing_by_key: dict[tuple[str, str], dict] = {}
for entry in archive_files:
key = (entry['archive'], entry['name'])
key = (entry["archive"], entry["name"])
existing_by_key[key] = entry
merged: list[dict] = []
matched_keys: set[tuple[str, str]] = set()
for set_name, set_data in hashes.get('bios_sets', {}).items():
archive_name = f'{set_name}.zip'
for set_name, set_data in hashes.get("bios_sets", {}).items():
archive_name = f"{set_name}.zip"
source_ref = _build_source_ref(set_data)
for rom in set_data.get('roms', []):
rom_name = rom['name']
for rom in set_data.get("roms", []):
rom_name = rom["name"]
key = (archive_name, rom_name)
if key in existing_by_key:
entry = existing_by_key[key].copy()
entry['size'] = rom['size']
entry['crc32'] = rom['crc32']
if rom.get('sha1'):
entry['sha1'] = rom['sha1']
entry["size"] = rom["size"]
entry["crc32"] = rom["crc32"]
if rom.get("sha1"):
entry["sha1"] = rom["sha1"]
if source_ref:
entry['source_ref'] = source_ref
entry["source_ref"] = source_ref
merged.append(entry)
matched_keys.add(key)
elif add_new:
entry = {
'name': rom_name,
'archive': archive_name,
'required': True,
'size': rom['size'],
'crc32': rom['crc32'],
"name": rom_name,
"archive": archive_name,
"required": True,
"size": rom["size"],
"crc32": rom["crc32"],
}
if rom.get('sha1'):
entry['sha1'] = rom['sha1']
if rom.get("sha1"):
entry["sha1"] = rom["sha1"]
if source_ref:
entry['source_ref'] = source_ref
entry["source_ref"] = source_ref
merged.append(entry)
# Entries not matched stay untouched
@@ -152,7 +152,7 @@ def merge_fbneo_profile(
if key not in matched_keys:
merged.append(entry)
profile['files'] = non_archive + merged
profile["files"] = non_archive + merged
if write:
_backup_and_write_fbneo(profile_path, profile, hashes)
@@ -163,7 +163,7 @@ def merge_fbneo_profile(
def compute_diff(
profile_path: str,
hashes_path: str,
mode: str = 'mame',
mode: str = "mame",
) -> dict[str, Any]:
"""Compute diff between profile and hashes without writing.
@@ -172,7 +172,7 @@ def compute_diff(
profile = _load_yaml(profile_path)
hashes = _load_json(hashes_path)
if mode == 'mame':
if mode == "mame":
return _diff_mame(profile, hashes)
return _diff_fbneo(profile, hashes)
@@ -181,26 +181,26 @@ def _diff_mame(
profile: dict[str, Any],
hashes: dict[str, Any],
) -> dict[str, Any]:
files = profile.get('files', [])
bios_zip, _ = _split_files(files, lambda f: f.get('category') == 'bios_zip')
files = profile.get("files", [])
bios_zip, _ = _split_files(files, lambda f: f.get("category") == "bios_zip")
existing_by_name: dict[str, dict] = {}
for entry in bios_zip:
existing_by_name[_zip_name_to_set(entry['name'])] = entry
existing_by_name[_zip_name_to_set(entry["name"])] = entry
added: list[str] = []
updated: list[str] = []
unchanged = 0
bios_sets = hashes.get('bios_sets', {})
bios_sets = hashes.get("bios_sets", {})
for set_name, set_data in bios_sets.items():
if set_name not in existing_by_name:
added.append(set_name)
continue
old_entry = existing_by_name[set_name]
new_contents = _build_contents(set_data.get('roms', []))
old_contents = old_entry.get('contents', [])
new_contents = _build_contents(set_data.get("roms", []))
old_contents = old_entry.get("contents", [])
if _contents_differ(old_contents, new_contents):
updated.append(set_name)
@@ -213,11 +213,11 @@ def _diff_mame(
)
return {
'added': added,
'updated': updated,
'removed': [],
'unchanged': unchanged,
'out_of_scope': out_of_scope,
"added": added,
"updated": updated,
"removed": [],
"unchanged": unchanged,
"out_of_scope": out_of_scope,
}
@@ -225,24 +225,24 @@ def _diff_fbneo(
profile: dict[str, Any],
hashes: dict[str, Any],
) -> dict[str, Any]:
files = profile.get('files', [])
archive_files, _ = _split_files(files, lambda f: 'archive' in f)
files = profile.get("files", [])
archive_files, _ = _split_files(files, lambda f: "archive" in f)
existing_by_key: dict[tuple[str, str], dict] = {}
for entry in archive_files:
existing_by_key[(entry['archive'], entry['name'])] = entry
existing_by_key[(entry["archive"], entry["name"])] = entry
added: list[str] = []
updated: list[str] = []
unchanged = 0
seen_keys: set[tuple[str, str]] = set()
bios_sets = hashes.get('bios_sets', {})
bios_sets = hashes.get("bios_sets", {})
for set_name, set_data in bios_sets.items():
archive_name = f'{set_name}.zip'
for rom in set_data.get('roms', []):
key = (archive_name, rom['name'])
archive_name = f"{set_name}.zip"
for rom in set_data.get("roms", []):
key = (archive_name, rom["name"])
seen_keys.add(key)
label = f"{archive_name}:{rom['name']}"
@@ -251,7 +251,9 @@ def _diff_fbneo(
continue
old = existing_by_key[key]
if old.get('crc32') != rom.get('crc32') or old.get('size') != rom.get('size'):
if old.get("crc32") != rom.get("crc32") or old.get("size") != rom.get(
"size"
):
updated.append(label)
else:
unchanged += 1
@@ -259,11 +261,11 @@ def _diff_fbneo(
out_of_scope = sum(1 for k in existing_by_key if k not in seen_keys)
return {
'added': added,
'updated': updated,
'removed': [],
'unchanged': unchanged,
'out_of_scope': out_of_scope,
"added": added,
"updated": updated,
"removed": [],
"unchanged": unchanged,
"out_of_scope": out_of_scope,
}
@@ -271,12 +273,12 @@ def _diff_fbneo(
def _load_yaml(path: str) -> dict[str, Any]:
with open(path, encoding='utf-8') as f:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
def _load_json(path: str) -> dict[str, Any]:
with open(path, encoding='utf-8') as f:
with open(path, encoding="utf-8") as f:
return json.load(f)
@@ -295,7 +297,7 @@ def _split_files(
def _zip_name_to_set(name: str) -> str:
if name.endswith('.zip'):
if name.endswith(".zip"):
return name[:-4]
return name
@@ -304,42 +306,42 @@ def _build_contents(roms: list[dict]) -> list[dict]:
contents: list[dict] = []
for rom in roms:
entry: dict[str, Any] = {
'name': rom['name'],
'size': rom['size'],
'crc32': rom['crc32'],
"name": rom["name"],
"size": rom["size"],
"crc32": rom["crc32"],
}
if rom.get('sha1'):
entry['sha1'] = rom['sha1']
desc = rom.get('bios_description') or rom.get('bios_label') or ''
if rom.get("sha1"):
entry["sha1"] = rom["sha1"]
desc = rom.get("bios_description") or rom.get("bios_label") or ""
if desc:
entry['description'] = desc
if rom.get('bad_dump'):
entry['bad_dump'] = True
entry["description"] = desc
if rom.get("bad_dump"):
entry["bad_dump"] = True
contents.append(entry)
return contents
def _build_source_ref(set_data: dict) -> str:
source_file = set_data.get('source_file', '')
source_line = set_data.get('source_line')
source_file = set_data.get("source_file", "")
source_line = set_data.get("source_line")
if source_file and source_line is not None:
return f'{source_file}:{source_line}'
return f"{source_file}:{source_line}"
return source_file
def _contents_differ(old: list[dict], new: list[dict]) -> bool:
if len(old) != len(new):
return True
old_by_name = {c['name']: c for c in old}
old_by_name = {c["name"]: c for c in old}
for entry in new:
prev = old_by_name.get(entry['name'])
prev = old_by_name.get(entry["name"])
if prev is None:
return True
if prev.get('crc32') != entry.get('crc32'):
if prev.get("crc32") != entry.get("crc32"):
return True
if prev.get('size') != entry.get('size'):
if prev.get("size") != entry.get("size"):
return True
if prev.get('sha1') != entry.get('sha1'):
if prev.get("sha1") != entry.get("sha1"):
return True
return False
@@ -352,15 +354,15 @@ def _backup_and_write(path: str, data: dict) -> None:
(core_version, contents, source_ref), and appends new entries.
"""
p = Path(path)
backup = p.with_suffix('.old.yml')
backup = p.with_suffix(".old.yml")
shutil.copy2(p, backup)
original = p.read_text(encoding='utf-8')
patched = _patch_core_version(original, data.get('core_version', ''))
patched = _patch_bios_entries(patched, data.get('files', []))
patched = _append_new_entries(patched, data.get('files', []), original)
original = p.read_text(encoding="utf-8")
patched = _patch_core_version(original, data.get("core_version", ""))
patched = _patch_bios_entries(patched, data.get("files", []))
patched = _append_new_entries(patched, data.get("files", []), original)
p.write_text(patched, encoding='utf-8')
p.write_text(patched, encoding="utf-8")
def _patch_core_version(text: str, version: str) -> str:
@@ -368,8 +370,9 @@ def _patch_core_version(text: str, version: str) -> str:
if not version:
return text
import re
return re.sub(
r'^(core_version:\s*).*$',
r"^(core_version:\s*).*$",
rf'\g<1>"{version}"',
text,
count=1,
@@ -390,18 +393,18 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
# Build a lookup of what to patch
patches: dict[str, dict] = {}
for fe in files:
if fe.get('category') != 'bios_zip':
if fe.get("category") != "bios_zip":
continue
patches[fe['name']] = fe
patches[fe["name"]] = fe
if not patches:
return text
lines = text.split('\n')
lines = text.split("\n")
# Find all entry start positions (line indices)
entry_starts: list[tuple[int, str]] = []
for i, line in enumerate(lines):
m = re.match(r'^ - name:\s*(.+?)\s*$', line)
m = re.match(r"^ - name:\s*(.+?)\s*$", line)
if m:
entry_starts.append((i, m.group(1).strip('"').strip("'")))
@@ -412,8 +415,8 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
continue
fe = patches[entry_name]
contents = fe.get('contents', [])
source_ref = fe.get('source_ref', '')
contents = fe.get("contents", [])
source_ref = fe.get("source_ref", "")
# Find the last "owned" line of this entry
# Owned = indented with 4+ spaces (field lines of this entry)
@@ -422,11 +425,11 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
stripped = lines[j].strip()
if not stripped:
break # blank line = end of entry
if stripped.startswith('#'):
if stripped.startswith("#"):
break # comment = belongs to next entry
if re.match(r'^ - ', lines[j]):
if re.match(r"^ - ", lines[j]):
break # next list item
if re.match(r'^ ', lines[j]) or re.match(r'^ \w', lines[j]):
if re.match(r"^ ", lines[j]) or re.match(r"^ \w", lines[j]):
last_owned = j
else:
break
@@ -435,7 +438,7 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
if source_ref:
found_sr = False
for j in range(start_line + 1, last_owned + 1):
if re.match(r'^ source_ref:', lines[j]):
if re.match(r"^ source_ref:", lines[j]):
lines[j] = f' source_ref: "{source_ref}"'
found_sr = True
break
@@ -447,10 +450,10 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
contents_start = None
contents_end = None
for j in range(start_line + 1, last_owned + 1):
if re.match(r'^ contents:', lines[j]):
if re.match(r"^ contents:", lines[j]):
contents_start = j
elif contents_start is not None:
if re.match(r'^ ', lines[j]):
if re.match(r"^ ", lines[j]):
contents_end = j
else:
break
@@ -458,29 +461,29 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
contents_end = contents_start
if contents_start is not None:
del lines[contents_start:contents_end + 1]
last_owned -= (contents_end - contents_start + 1)
del lines[contents_start : contents_end + 1]
last_owned -= contents_end - contents_start + 1
# Insert new contents after last owned line
if contents:
new_lines = _format_contents(contents).split('\n')
new_lines = _format_contents(contents).split("\n")
for k, cl in enumerate(new_lines):
lines.insert(last_owned + 1 + k, cl)
return '\n'.join(lines)
return "\n".join(lines)
def _append_new_entries(text: str, files: list[dict], original: str) -> str:
"""Append new bios_zip entries (system=None) that aren't in the original."""
# Parse original to get existing entry names (more reliable than text search)
existing_data = yaml.safe_load(original) or {}
existing_names = {f['name'] for f in existing_data.get('files', [])}
existing_names = {f["name"] for f in existing_data.get("files", [])}
new_entries = []
for fe in files:
if fe.get('category') != 'bios_zip' or fe.get('system') is not None:
if fe.get("category") != "bios_zip" or fe.get("system") is not None:
continue
if fe['name'] in existing_names:
if fe["name"] in existing_names:
continue
new_entries.append(fe)
@@ -489,36 +492,36 @@ def _append_new_entries(text: str, files: list[dict], original: str) -> str:
lines = []
for fe in new_entries:
lines.append(f'\n - name: {fe["name"]}')
lines.append(f' required: {str(fe["required"]).lower()}')
lines.append(f' category: bios_zip')
if fe.get('source_ref'):
lines.append(f"\n - name: {fe['name']}")
lines.append(f" required: {str(fe['required']).lower()}")
lines.append(" category: bios_zip")
if fe.get("source_ref"):
lines.append(f' source_ref: "{fe["source_ref"]}"')
if fe.get('contents'):
lines.append(_format_contents(fe['contents']))
if fe.get("contents"):
lines.append(_format_contents(fe["contents"]))
if lines:
text = text.rstrip('\n') + '\n' + '\n'.join(lines) + '\n'
text = text.rstrip("\n") + "\n" + "\n".join(lines) + "\n"
return text
def _format_contents(contents: list[dict]) -> str:
"""Format a contents list as YAML text."""
lines = [' contents:']
lines = [" contents:"]
for rom in contents:
lines.append(f' - name: {rom["name"]}')
if rom.get('description'):
lines.append(f' description: {rom["description"]}')
if rom.get('size'):
lines.append(f' size: {rom["size"]}')
if rom.get('crc32'):
lines.append(f" - name: {rom['name']}")
if rom.get("description"):
lines.append(f" description: {rom['description']}")
if rom.get("size"):
lines.append(f" size: {rom['size']}")
if rom.get("crc32"):
lines.append(f' crc32: "{rom["crc32"]}"')
if rom.get('sha1'):
if rom.get("sha1"):
lines.append(f' sha1: "{rom["sha1"]}"')
if rom.get('bad_dump'):
lines.append(f' bad_dump: true')
return '\n'.join(lines)
if rom.get("bad_dump"):
lines.append(" bad_dump: true")
return "\n".join(lines)
def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None:
@@ -529,37 +532,38 @@ def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None:
Existing entries are left untouched (CRC32 changes are rare).
"""
p = Path(path)
backup = p.with_suffix('.old.yml')
backup = p.with_suffix(".old.yml")
shutil.copy2(p, backup)
original = p.read_text(encoding='utf-8')
patched = _patch_core_version(original, data.get('core_version', ''))
original = p.read_text(encoding="utf-8")
patched = _patch_core_version(original, data.get("core_version", ""))
# Identify new ROM entries by comparing parsed data keys, not text search
existing_data = yaml.safe_load(original) or {}
existing_keys = {
(f['archive'], f['name'])
for f in existing_data.get('files', [])
if f.get('archive')
(f["archive"], f["name"])
for f in existing_data.get("files", [])
if f.get("archive")
}
new_roms = [
f for f in data.get('files', [])
if f.get('archive') and (f['archive'], f['name']) not in existing_keys
f
for f in data.get("files", [])
if f.get("archive") and (f["archive"], f["name"]) not in existing_keys
]
if new_roms:
lines = []
for fe in new_roms:
lines.append(f' - name: "{fe["name"]}"')
lines.append(f' archive: {fe["archive"]}')
lines.append(f' required: {str(fe.get("required", True)).lower()}')
if fe.get('size'):
lines.append(f' size: {fe["size"]}')
if fe.get('crc32'):
lines.append(f" archive: {fe['archive']}")
lines.append(f" required: {str(fe.get('required', True)).lower()}")
if fe.get("size"):
lines.append(f" size: {fe['size']}")
if fe.get("crc32"):
lines.append(f' crc32: "{fe["crc32"]}"')
if fe.get('source_ref'):
if fe.get("source_ref"):
lines.append(f' source_ref: "{fe["source_ref"]}"')
lines.append('')
patched = patched.rstrip('\n') + '\n\n' + '\n'.join(lines)
lines.append("")
patched = patched.rstrip("\n") + "\n\n" + "\n".join(lines)
p.write_text(patched, encoding='utf-8')
p.write_text(patched, encoding="utf-8")

View File

@@ -4,8 +4,8 @@ from __future__ import annotations
import json
import sys
import urllib.request
import urllib.error
import urllib.request
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
@@ -14,6 +14,7 @@ from pathlib import Path
@dataclass
class BiosRequirement:
"""A single BIOS file requirement from a platform source."""
name: str
system: str
sha1: str | None = None
@@ -29,9 +30,12 @@ class BiosRequirement:
@dataclass
class ChangeSet:
"""Differences between scraped requirements and current config."""
added: list[BiosRequirement] = field(default_factory=list)
removed: list[BiosRequirement] = field(default_factory=list)
modified: list[tuple[BiosRequirement, BiosRequirement]] = field(default_factory=list)
modified: list[tuple[BiosRequirement, BiosRequirement]] = field(
default_factory=list
)
@property
def has_changes(self) -> bool:
@@ -80,7 +84,9 @@ class BaseScraper(ABC):
if not self.url:
raise ValueError("No source URL configured")
try:
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
req = urllib.request.Request(
self.url, headers={"User-Agent": "retrobios-scraper/1.0"}
)
with urllib.request.urlopen(req, timeout=30) as resp:
self._raw_data = _read_limited(resp).decode("utf-8")
return self._raw_data
@@ -113,35 +119,49 @@ class BaseScraper(ABC):
changes.added.append(req)
else:
existing_file = existing[key]
if req.sha1 and existing_file.get("sha1") and req.sha1 != existing_file["sha1"]:
changes.modified.append((
BiosRequirement(
name=existing_file["name"],
system=key[0],
sha1=existing_file.get("sha1"),
md5=existing_file.get("md5"),
),
req,
))
elif req.md5 and existing_file.get("md5") and req.md5 != existing_file["md5"]:
changes.modified.append((
BiosRequirement(
name=existing_file["name"],
system=key[0],
md5=existing_file.get("md5"),
),
req,
))
if (
req.sha1
and existing_file.get("sha1")
and req.sha1 != existing_file["sha1"]
):
changes.modified.append(
(
BiosRequirement(
name=existing_file["name"],
system=key[0],
sha1=existing_file.get("sha1"),
md5=existing_file.get("md5"),
),
req,
)
)
elif (
req.md5
and existing_file.get("md5")
and req.md5 != existing_file["md5"]
):
changes.modified.append(
(
BiosRequirement(
name=existing_file["name"],
system=key[0],
md5=existing_file.get("md5"),
),
req,
)
)
for key in existing:
if key not in scraped_map:
f = existing[key]
changes.removed.append(BiosRequirement(
name=f["name"],
system=key[0],
sha1=f.get("sha1"),
md5=f.get("md5"),
))
changes.removed.append(
BiosRequirement(
name=f["name"],
system=key[0],
sha1=f.get("sha1"),
md5=f.get("md5"),
)
)
return changes
@@ -163,10 +183,13 @@ def fetch_github_latest_version(repo: str) -> str | None:
"""Fetch the latest release version tag from a GitHub repo."""
url = f"https://api.github.com/repos/{repo}/releases/latest"
try:
req = urllib.request.Request(url, headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
})
req = urllib.request.Request(
url,
headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
},
)
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
return data.get("tag_name", "")
@@ -174,7 +197,9 @@ def fetch_github_latest_version(repo: str) -> str | None:
return None
def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirements") -> None:
def scraper_cli(
scraper_class: type, description: str = "Scrape BIOS requirements"
) -> None:
"""Shared CLI entry point for all scrapers. Eliminates main() boilerplate."""
import argparse
@@ -203,13 +228,23 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement
return
if args.json:
data = [{"name": r.name, "system": r.system, "sha1": r.sha1, "md5": r.md5,
"size": r.size, "required": r.required} for r in reqs]
data = [
{
"name": r.name,
"system": r.system,
"sha1": r.sha1,
"md5": r.md5,
"size": r.size,
"required": r.required,
}
for r in reqs
]
print(json.dumps(data, indent=2))
return
if args.output:
import yaml
# Use scraper's generate_platform_yaml() if available (includes
# platform metadata, cores list, standalone_cores, etc.)
if hasattr(scraper, "generate_platform_yaml"):
@@ -224,7 +259,11 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement
if req.native_id:
sys_entry["native_id"] = req.native_id
config["systems"][sys_id] = sys_entry
entry = {"name": req.name, "destination": req.destination or req.name, "required": req.required}
entry = {
"name": req.name,
"destination": req.destination or req.name,
"required": req.required,
}
if req.sha1:
entry["sha1"] = req.sha1
if req.md5:
@@ -265,10 +304,13 @@ def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None:
"""Fetch the most recent matching tag from a GitHub repo."""
url = f"https://api.github.com/repos/{repo}/tags?per_page=50"
try:
req = urllib.request.Request(url, headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
})
req = urllib.request.Request(
url,
headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
},
)
with urllib.request.urlopen(req, timeout=15) as resp:
tags = json.loads(resp.read())
for tag in tags:

View File

@@ -12,8 +12,8 @@ import ast
import json
import re
import sys
import urllib.request
import urllib.error
import urllib.request
from pathlib import Path
import yaml
@@ -102,7 +102,6 @@ SYSTEM_SLUG_MAP = {
"dragon64": "dragon64",
"mc10": "mc10",
"msx2+": "microsoft-msx",
"msxturbor": "microsoft-msx",
"spectravideo": "spectravideo",
"tvc": "videoton-tvc",
"enterprise": "enterprise-64-128",
@@ -116,7 +115,7 @@ SYSTEM_SLUG_MAP = {
}
_MD5_RE = re.compile(r'^[a-fA-F0-9]+$')
_MD5_RE = re.compile(r"^[a-fA-F0-9]+$")
def _load_md5_index() -> dict[str, str]:
@@ -183,11 +182,11 @@ class Scraper(BaseScraper):
def _extract_systems_dict(self, raw: str) -> dict:
"""Extract and parse the 'systems' dict from the Python source via ast.literal_eval."""
match = re.search(r'^systems\s*=\s*\{', raw, re.MULTILINE)
match = re.search(r"^systems\s*=\s*\{", raw, re.MULTILINE)
if not match:
raise ValueError("Could not find 'systems = {' in batocera-systems")
start = match.start() + raw[match.start():].index("{")
start = match.start() + raw[match.start() :].index("{")
depth = 0
i = start
in_str = False
@@ -195,7 +194,7 @@ class Scraper(BaseScraper):
while i < len(raw):
ch = raw[i]
if in_str:
if ch == '\\':
if ch == "\\":
i += 2
continue
if ch == str_ch:
@@ -214,7 +213,7 @@ class Scraper(BaseScraper):
i += 1
i += 1
dict_str = raw[start:i + 1]
dict_str = raw[start : i + 1]
lines = []
for line in dict_str.split("\n"):
@@ -224,7 +223,7 @@ class Scraper(BaseScraper):
j = 0
while j < len(line):
ch = line[j]
if ch == '\\' and j + 1 < len(line):
if ch == "\\" and j + 1 < len(line):
clean.append(ch)
clean.append(line[j + 1])
j += 2
@@ -246,8 +245,8 @@ class Scraper(BaseScraper):
clean_dict_str = "\n".join(lines)
# OrderedDict({...}) -> just the inner dict literal
clean_dict_str = re.sub(r'OrderedDict\(\s*\{', '{', clean_dict_str)
clean_dict_str = re.sub(r'\}\s*\)', '}', clean_dict_str)
clean_dict_str = re.sub(r"OrderedDict\(\s*\{", "{", clean_dict_str)
clean_dict_str = re.sub(r"\}\s*\)", "}", clean_dict_str)
try:
return ast.literal_eval(clean_dict_str)
@@ -279,22 +278,24 @@ class Scraper(BaseScraper):
name = file_path.split("/")[-1] if "/" in file_path else file_path
requirements.append(BiosRequirement(
name=name,
system=system_slug,
md5=md5 or None,
destination=file_path,
required=True,
zipped_file=zipped_file or None,
native_id=sys_key,
))
requirements.append(
BiosRequirement(
name=name,
system=system_slug,
md5=md5 or None,
destination=file_path,
required=True,
zipped_file=zipped_file or None,
native_id=sys_key,
)
)
return requirements
def validate_format(self, raw_data: str) -> bool:
"""Validate batocera-systems format."""
has_systems = "systems" in raw_data and "biosFiles" in raw_data
has_dict = re.search(r'^systems\s*=\s*\{', raw_data, re.MULTILINE) is not None
has_dict = re.search(r"^systems\s*=\s*\{", raw_data, re.MULTILINE) is not None
has_md5 = '"md5"' in raw_data
has_file = '"file"' in raw_data
return has_systems and has_dict and has_md5 and has_file
@@ -336,7 +337,9 @@ class Scraper(BaseScraper):
systems[req.system]["files"].append(entry)
tag = fetch_github_latest_tag("batocera-linux/batocera.linux", prefix="batocera-")
tag = fetch_github_latest_tag(
"batocera-linux/batocera.linux", prefix="batocera-"
)
batocera_version = ""
if tag:
num = tag.removeprefix("batocera-")
@@ -344,7 +347,9 @@ class Scraper(BaseScraper):
batocera_version = num
if not batocera_version:
# Preserve existing version when fetch fails (offline mode)
existing = Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml"
existing = (
Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml"
)
if existing.exists():
with open(existing) as f:
old = yaml.safe_load(f) or {}
@@ -369,6 +374,7 @@ class Scraper(BaseScraper):
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape batocera BIOS requirements")

View File

@@ -19,7 +19,6 @@ the Ideal non-bad option is selected as canonical.
from __future__ import annotations
import re
import sys
try:
from .base_scraper import (
@@ -108,12 +107,33 @@ SYSTEM_ID_MAP: dict[str, str] = {
# Cores that overlap with BizHawk's system coverage
BIZHAWK_CORES = [
"gambatte", "mgba", "sameboy", "melonds", "snes9x", "bsnes",
"beetle_psx", "beetle_saturn", "beetle_pce", "beetle_pcfx",
"beetle_wswan", "beetle_vb", "beetle_ngp", "opera", "stella",
"picodrive", "ppsspp", "handy", "quicknes", "genesis_plus_gx",
"ares", "mupen64plus_next", "puae", "prboom", "virtualjaguar",
"vice_x64", "mame",
"gambatte",
"mgba",
"sameboy",
"melonds",
"snes9x",
"bsnes",
"beetle_psx",
"beetle_saturn",
"beetle_pce",
"beetle_pcfx",
"beetle_wswan",
"beetle_vb",
"beetle_ngp",
"opera",
"stella",
"picodrive",
"ppsspp",
"handy",
"quicknes",
"genesis_plus_gx",
"ares",
"mupen64plus_next",
"puae",
"prboom",
"virtualjaguar",
"vice_x64",
"mame",
]
@@ -137,9 +157,7 @@ def _safe_arithmetic(expr: str) -> int:
def _strip_comments(source: str) -> str:
"""Remove block comments and #if false blocks."""
source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL)
source = re.sub(
r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL
)
source = re.sub(r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL)
return source
@@ -158,14 +176,14 @@ def parse_firmware_database(
var_to_hash: dict[str, str] = {}
file_re = re.compile(
r'(?:var\s+(\w+)\s*=\s*)?'
r'File\(\s*'
r"(?:var\s+(\w+)\s*=\s*)?"
r"File\(\s*"
r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*'
r'([^,]+?)\s*,\s*'
r"([^,]+?)\s*,\s*"
r'"([^"]+)"\s*,\s*'
r'"([^"]*)"'
r'(?:\s*,\s*isBad:\s*(true|false))?'
r'\s*\)'
r"(?:\s*,\s*isBad:\s*(true|false))?"
r"\s*\)"
)
for m in file_re.finditer(source):
@@ -194,15 +212,15 @@ def parse_firmware_database(
# FirmwareAndOption one-liner
fao_re = re.compile(
r'FirmwareAndOption\(\s*'
r"FirmwareAndOption\(\s*"
r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*'
r'([^,]+?)\s*,\s*'
r"([^,]+?)\s*,\s*"
r'"([^"]+)"\s*,\s*'
r'"([^"]+)"\s*,\s*'
r'"([^"]+)"\s*,\s*'
r'"([^"]*)"'
r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?'
r'\s*\)'
r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?"
r"\s*\)"
)
# Firmware(system, id, desc)
@@ -213,10 +231,10 @@ def parse_firmware_database(
# Option(system, id, in varref|File(...), status?)
option_re = re.compile(
r'Option\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*,\s*'
r'(?:in\s+(\w+)'
r"(?:in\s+(\w+)"
r'|File\(\s*"([A-Fa-f0-9]+)"\s*,\s*([^,]+?)\s*,\s*"([^"]+)"\s*,\s*"([^"]*)"\s*\))'
r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?'
r'\s*\)'
r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?"
r"\s*\)"
)
# Collect firmware slots
@@ -269,15 +287,17 @@ def parse_firmware_database(
desc = m.group(6)
status = m.group(7) or "Acceptable"
records.append({
"system": system,
"firmware_id": fw_id,
"sha1": sha1,
"name": name,
"size": _safe_arithmetic(size_expr),
"description": desc,
"status": status,
})
records.append(
{
"system": system,
"firmware_id": fw_id,
"sha1": sha1,
"name": name,
"size": _safe_arithmetic(size_expr),
"description": desc,
"status": status,
}
)
# Build records from Firmware+Option pairs, picking best option
for (system, fw_id), options in slot_options.items():
@@ -291,15 +311,17 @@ def parse_firmware_database(
viable.sort(key=lambda x: STATUS_RANK.get(x[1], 2), reverse=True)
best_file, best_status = viable[0]
records.append({
"system": system,
"firmware_id": fw_id,
"sha1": best_file["sha1"],
"name": best_file["name"],
"size": best_file["size"],
"description": best_file.get("description", desc),
"status": best_status,
})
records.append(
{
"system": system,
"firmware_id": fw_id,
"sha1": best_file["sha1"],
"name": best_file["name"],
"size": best_file["size"],
"description": best_file.get("description", desc),
"status": best_status,
}
)
return records, files_by_hash

View File

@@ -13,19 +13,24 @@ Complements libretro_scraper (System.dat) with:
from __future__ import annotations
import json
import re
import sys
import urllib.request
import urllib.error
import json
import urllib.request
try:
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
except ImportError:
# Allow running directly: python scripts/scraper/coreinfo_scraper.py
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from scraper.base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
from scraper.base_scraper import (
BaseScraper,
BiosRequirement,
fetch_github_latest_version,
)
PLATFORM_NAME = "libretro_coreinfo"
@@ -168,11 +173,13 @@ def _extract_firmware(info: dict) -> list[dict]:
if _is_native_lib(path):
continue
firmware.append({
"path": path,
"desc": desc,
"optional": opt.lower() == "true",
})
firmware.append(
{
"path": path,
"desc": desc,
"optional": opt.lower() == "true",
}
)
return firmware
@@ -182,7 +189,7 @@ def _extract_md5_from_notes(info: dict) -> dict[str, str]:
notes = info.get("notes", "")
md5_map = {}
for match in re.finditer(r'\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})', notes):
for match in re.finditer(r"\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})", notes):
filename = match.group(1).strip()
md5 = match.group(2)
md5_map[filename] = md5
@@ -202,15 +209,19 @@ class Scraper(BaseScraper):
# Use the tree API to get all files at once
url = f"{GITHUB_API}/git/trees/master?recursive=1"
try:
req = urllib.request.Request(url, headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
})
req = urllib.request.Request(
url,
headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
},
)
with urllib.request.urlopen(req, timeout=30) as resp:
data = json.loads(resp.read())
return [
item["path"] for item in data.get("tree", [])
item["path"]
for item in data.get("tree", [])
if item["path"].endswith("_libretro.info")
]
except (urllib.error.URLError, json.JSONDecodeError) as e:
@@ -220,7 +231,9 @@ class Scraper(BaseScraper):
"""Fetch and parse a single .info file."""
url = f"{RAW_BASE}/{filename}"
try:
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
req = urllib.request.Request(
url, headers={"User-Agent": "retrobios-scraper/1.0"}
)
with urllib.request.urlopen(req, timeout=15) as resp:
content = resp.read().decode("utf-8")
return _parse_info_file(content)
@@ -253,17 +266,25 @@ class Scraper(BaseScraper):
basename = path.split("/")[-1] if "/" in path else path
# Full path when basename is generic to avoid SGB1.sfc/program.rom vs SGB2.sfc/program.rom collisions
GENERIC_NAMES = {"program.rom", "data.rom", "boot.rom", "bios.bin", "firmware.bin"}
GENERIC_NAMES = {
"program.rom",
"data.rom",
"boot.rom",
"bios.bin",
"firmware.bin",
}
name = path if basename.lower() in GENERIC_NAMES else basename
md5 = md5_map.get(basename)
requirements.append(BiosRequirement(
name=name,
system=system,
md5=md5,
destination=path,
required=not fw["optional"],
))
requirements.append(
BiosRequirement(
name=name,
system=system,
md5=md5,
destination=path,
required=not fw["optional"],
)
)
return requirements
@@ -281,7 +302,9 @@ def main():
"""CLI entry point."""
import argparse
parser = argparse.ArgumentParser(description="Scrape libretro-core-info firmware requirements")
parser = argparse.ArgumentParser(
description="Scrape libretro-core-info firmware requirements"
)
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--compare-db", help="Compare against database.json")
args = parser.parse_args()
@@ -296,6 +319,7 @@ def main():
if args.compare_db:
import json as _json
with open(args.compare_db) as f:
db = _json.load(f)
@@ -320,6 +344,7 @@ def main():
return
from collections import defaultdict
by_system = defaultdict(list)
for r in reqs:
by_system[r.system].append(r)

View File

@@ -10,13 +10,13 @@ Parses files like libretro's System.dat which uses the format:
from __future__ import annotations
import re
from dataclasses import dataclass
@dataclass
class DatRom:
"""A ROM entry from a DAT file."""
name: str
size: int
crc32: str
@@ -28,6 +28,7 @@ class DatRom:
@dataclass
class DatMetadata:
"""Metadata from a DAT file header."""
name: str = ""
version: str = ""
description: str = ""
@@ -53,7 +54,10 @@ def parse_dat(content: str) -> list[DatRom]:
if stripped.startswith("comment "):
value = stripped[8:].strip().strip('"')
if value in ("System", "System, firmware, and BIOS files used by libretro cores."):
if value in (
"System",
"System, firmware, and BIOS files used by libretro cores.",
):
continue
current_system = value
@@ -78,9 +82,16 @@ def parse_dat_metadata(content: str) -> DatMetadata:
if in_header and stripped == ")":
break
if in_header:
for field in ("name", "version", "description", "author", "homepage", "url"):
for field in (
"name",
"version",
"description",
"author",
"homepage",
"url",
):
if stripped.startswith(f"{field} "):
value = stripped[len(field) + 1:].strip().strip('"')
value = stripped[len(field) + 1 :].strip().strip('"')
setattr(meta, field, value)
return meta
@@ -94,7 +105,7 @@ def _parse_rom_line(line: str, system: str) -> DatRom | None:
if start == -1 or end == -1 or end <= start:
return None
content = line[start + 1:end].strip()
content = line[start + 1 : end].strip()
fields = {}
i = 0

View File

@@ -14,9 +14,8 @@ from __future__ import annotations
import csv
import io
import re
import sys
import urllib.request
import urllib.error
import urllib.request
try:
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
@@ -31,8 +30,7 @@ CHECKBIOS_URL = (
)
CSV_BASE_URL = (
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/"
"main/docs/tables"
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables"
)
CSV_SHEETS = [
@@ -117,10 +115,22 @@ KNOWN_BIOS_FILES = {
{"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"},
],
"sony-playstation-2": [
{"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"},
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"},
{
"name": "SCPH-70004_BIOS_V12_EUR_200.BIN",
"destination": "SCPH-70004_BIOS_V12_EUR_200.BIN",
},
{
"name": "SCPH-70004_BIOS_V12_EUR_200.EROM",
"destination": "SCPH-70004_BIOS_V12_EUR_200.EROM",
},
{
"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1",
"destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1",
},
{
"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2",
"destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2",
},
],
"sega-mega-cd": [
{"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"},
@@ -157,17 +167,17 @@ KNOWN_BIOS_FILES = {
}
_RE_ARRAY = re.compile(
r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
r"(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)",
re.MULTILINE,
)
_RE_FUNC = re.compile(
r'function\s+(check\w+Bios)\s*\(\)',
r"function\s+(check\w+Bios)\s*\(\)",
re.MULTILINE,
)
_RE_LOCAL_HASHES = re.compile(
r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
r"local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)",
re.MULTILINE,
)
@@ -184,7 +194,9 @@ def _fetch_url(url: str) -> str:
class Scraper(BaseScraper):
"""Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets."""
def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL):
def __init__(
self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL
):
super().__init__(url=checkbios_url)
self.checkbios_url = checkbios_url
self.csv_base_url = csv_base_url
@@ -241,12 +253,12 @@ class Scraper(BaseScraper):
@staticmethod
def _clean_markdown(text: str) -> str:
"""Strip markdown/HTML artifacts from CSV fields."""
text = re.sub(r'\*\*', '', text) # bold
text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons
text = re.sub(r':material-[^:]+:', '', text)
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
text = re.sub(r'<br\s*/?>', ' ', text) # <br/>
text = re.sub(r'<[^>]+>', '', text) # remaining HTML
text = re.sub(r"\*\*", "", text) # bold
text = re.sub(r":material-[^:]+:\{[^}]*\}", "", text) # mkdocs material icons
text = re.sub(r":material-[^:]+:", "", text)
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) # [text](url) -> text
text = re.sub(r"<br\s*/?>", " ", text) # <br/>
text = re.sub(r"<[^>]+>", "", text) # remaining HTML
return text.strip()
def _parse_csv_bios(self, csv_text: str) -> list[dict]:
@@ -274,28 +286,32 @@ class Scraper(BaseScraper):
system_col = self._clean_markdown((row[key] or ""))
break
slug = None
for part in re.split(r'[`\s/]+', folder_col):
part = part.strip().strip('`').lower()
for part in re.split(r"[`\s/]+", folder_col):
part = part.strip().strip("`").lower()
if part and part in SYSTEM_SLUG_MAP:
slug = SYSTEM_SLUG_MAP[part]
break
if not slug:
clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower())
clean = re.sub(
r"[^a-z0-9\-]", "", folder_col.strip().strip("`").lower()
)
slug = clean if clean else "unknown"
entries.append({
"system": slug,
"system_name": system_col,
"bios_raw": bios_col,
})
entries.append(
{
"system": slug,
"system_name": system_col,
"bios_raw": bios_col,
}
)
return entries
def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]:
"""Extract individual BIOS filenames from a CSV BIOS field."""
results = []
bios_raw = re.sub(r'<br\s*/?>', ' ', bios_raw)
bios_raw = bios_raw.replace('`', '')
bios_raw = re.sub(r"<br\s*/?>", " ", bios_raw)
bios_raw = bios_raw.replace("`", "")
patterns = re.findall(
r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)',
r"[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)",
bios_raw,
)
for p in patterns:
@@ -324,21 +340,25 @@ class Scraper(BaseScraper):
if key in seen:
continue
seen.add(key)
requirements.append(BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
))
requirements.append(
BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
)
)
for md5 in system_hashes:
requirements.append(BiosRequirement(
name=f"{system}:{md5}",
system=system,
md5=md5,
destination="",
required=True,
))
requirements.append(
BiosRequirement(
name=f"{system}:{md5}",
system=system,
md5=md5,
destination="",
required=True,
)
)
for sheet in CSV_SHEETS:
csv_text = self._fetch_csv(sheet)
@@ -353,19 +373,21 @@ class Scraper(BaseScraper):
seen.add(key)
if system in KNOWN_BIOS_FILES:
continue
requirements.append(BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
))
requirements.append(
BiosRequirement(
name=f["name"],
system=system,
destination=f.get("destination", f["name"]),
required=True,
)
)
return requirements
def validate_format(self, raw_data: str) -> bool:
has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data
has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data
has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None
has_md5 = re.search(r"[0-9a-f]{32}", raw_data) is not None
return has_ps and has_func and has_md5
def generate_platform_yaml(self) -> dict:
@@ -419,14 +441,17 @@ class Scraper(BaseScraper):
"contents/functions/EmuScripts"
)
name_overrides = {
"pcsx2qt": "pcsx2", "rpcs3legacy": "rpcs3",
"cemuproton": "cemu", "rmg": "mupen64plus_next",
"pcsx2qt": "pcsx2",
"rpcs3legacy": "rpcs3",
"cemuproton": "cemu",
"rmg": "mupen64plus_next",
}
skip = {"retroarch_maincfg", "retroarch"}
try:
req = urllib.request.Request(
api_url, headers={"User-Agent": "retrobios-scraper/1.0"},
api_url,
headers={"User-Agent": "retrobios-scraper/1.0"},
)
data = json.loads(urllib.request.urlopen(req, timeout=30).read())
except (urllib.error.URLError, OSError):
@@ -454,6 +479,7 @@ class Scraper(BaseScraper):
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape emudeck BIOS requirements")

View File

@@ -13,22 +13,22 @@ import logging
import shutil
import subprocess
import sys
from datetime import datetime, timezone, timedelta
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
import yaml
from scripts.scraper.fbneo_parser import parse_fbneo_source_tree
from scripts.scraper._hash_merge import compute_diff, merge_fbneo_profile
from scripts.scraper.fbneo_parser import parse_fbneo_source_tree
log = logging.getLogger(__name__)
REPO_URL = 'https://github.com/finalburnneo/FBNeo.git'
REPO_URL = "https://github.com/finalburnneo/FBNeo.git"
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
CLONE_DIR = REPO_ROOT / 'tmp' / 'fbneo'
CACHE_PATH = REPO_ROOT / 'data' / 'fbneo-hashes.json'
EMULATORS_DIR = REPO_ROOT / 'emulators'
CLONE_DIR = REPO_ROOT / "tmp" / "fbneo"
CACHE_PATH = REPO_ROOT / "data" / "fbneo-hashes.json"
EMULATORS_DIR = REPO_ROOT / "emulators"
STALE_HOURS = 24
@@ -37,8 +37,8 @@ def _is_cache_fresh() -> bool:
if not CACHE_PATH.exists():
return False
try:
data = json.loads(CACHE_PATH.read_text(encoding='utf-8'))
fetched_at = datetime.fromisoformat(data['fetched_at'])
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
fetched_at = datetime.fromisoformat(data["fetched_at"])
return datetime.now(timezone.utc) - fetched_at < timedelta(hours=STALE_HOURS)
except (json.JSONDecodeError, KeyError, ValueError):
return False
@@ -53,8 +53,14 @@ def _sparse_clone() -> None:
subprocess.run(
[
'git', 'clone', '--depth', '1', '--filter=blob:none',
'--sparse', REPO_URL, str(CLONE_DIR),
"git",
"clone",
"--depth",
"1",
"--filter=blob:none",
"--sparse",
REPO_URL,
str(CLONE_DIR),
],
check=True,
capture_output=True,
@@ -62,7 +68,7 @@ def _sparse_clone() -> None:
)
subprocess.run(
['git', 'sparse-checkout', 'set', 'src/burn/drv', 'src/burner/resource.h'],
["git", "sparse-checkout", "set", "src/burn/drv", "src/burner/resource.h"],
cwd=CLONE_DIR,
check=True,
capture_output=True,
@@ -76,42 +82,44 @@ def _extract_version() -> tuple[str, str]:
Returns (version, commit_sha). Falls back to resource.h if no tag.
"""
result = subprocess.run(
['git', 'describe', '--tags', '--abbrev=0'],
["git", "describe", "--tags", "--abbrev=0"],
cwd=CLONE_DIR,
capture_output=True,
text=True,
)
# Prefer real version tags over pseudo-tags like "latest"
version = 'unknown'
version = "unknown"
if result.returncode == 0:
tag = result.stdout.strip()
if tag and tag != 'latest':
if tag and tag != "latest":
version = tag
# Fallback: resource.h
if version == 'unknown':
if version == "unknown":
version = _version_from_resource_h()
# Last resort: use GitHub API for latest real release tag
if version == 'unknown':
if version == "unknown":
try:
import urllib.request
import urllib.error
import urllib.request
req = urllib.request.Request(
'https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10',
headers={'User-Agent': 'retrobios-scraper/1.0'},
"https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10",
headers={"User-Agent": "retrobios-scraper/1.0"},
)
with urllib.request.urlopen(req, timeout=10) as resp:
import json as json_mod
tags = json_mod.loads(resp.read())
for t in tags:
if t['name'] != 'latest' and t['name'].startswith('v'):
version = t['name']
if t["name"] != "latest" and t["name"].startswith("v"):
version = t["name"]
break
except (urllib.error.URLError, OSError):
pass
sha_result = subprocess.run(
['git', 'rev-parse', 'HEAD'],
["git", "rev-parse", "HEAD"],
cwd=CLONE_DIR,
capture_output=True,
text=True,
@@ -124,17 +132,17 @@ def _extract_version() -> tuple[str, str]:
def _version_from_resource_h() -> str:
"""Fallback: parse VER_FULL_VERSION_STR from resource.h."""
resource_h = CLONE_DIR / 'src' / 'burner' / 'resource.h'
resource_h = CLONE_DIR / "src" / "burner" / "resource.h"
if not resource_h.exists():
return 'unknown'
return "unknown"
text = resource_h.read_text(encoding='utf-8', errors='replace')
text = resource_h.read_text(encoding="utf-8", errors="replace")
for line in text.splitlines():
if 'VER_FULL_VERSION_STR' in line:
if "VER_FULL_VERSION_STR" in line:
parts = line.split('"')
if len(parts) >= 2:
return parts[1]
return 'unknown'
return "unknown"
def _cleanup() -> None:
@@ -146,33 +154,33 @@ def _cleanup() -> None:
def fetch_and_cache(force: bool = False) -> dict[str, Any]:
"""Clone, parse, and write JSON cache. Returns the cache dict."""
if not force and _is_cache_fresh():
log.info('cache fresh, skipping clone (use --force to override)')
return json.loads(CACHE_PATH.read_text(encoding='utf-8'))
log.info("cache fresh, skipping clone (use --force to override)")
return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
try:
log.info('sparse cloning %s', REPO_URL)
log.info("sparse cloning %s", REPO_URL)
_sparse_clone()
log.info('extracting version')
log.info("extracting version")
version, commit = _extract_version()
log.info('parsing source tree')
log.info("parsing source tree")
bios_sets = parse_fbneo_source_tree(str(CLONE_DIR))
cache: dict[str, Any] = {
'source': 'finalburnneo/FBNeo',
'version': version,
'commit': commit,
'fetched_at': datetime.now(timezone.utc).isoformat(),
'bios_sets': bios_sets,
"source": "finalburnneo/FBNeo",
"version": version,
"commit": commit,
"fetched_at": datetime.now(timezone.utc).isoformat(),
"bios_sets": bios_sets,
}
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
CACHE_PATH.write_text(
json.dumps(cache, indent=2, ensure_ascii=False) + '\n',
encoding='utf-8',
json.dumps(cache, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
log.info('wrote %d BIOS sets to %s', len(bios_sets), CACHE_PATH)
log.info("wrote %d BIOS sets to %s", len(bios_sets), CACHE_PATH)
return cache
finally:
@@ -182,48 +190,50 @@ def fetch_and_cache(force: bool = False) -> dict[str, Any]:
def _find_fbneo_profiles() -> list[Path]:
"""Find emulator profiles whose upstream references finalburnneo/FBNeo."""
profiles: list[Path] = []
for path in sorted(EMULATORS_DIR.glob('*.yml')):
if path.name.endswith('.old.yml'):
for path in sorted(EMULATORS_DIR.glob("*.yml")):
if path.name.endswith(".old.yml"):
continue
try:
data = yaml.safe_load(path.read_text(encoding='utf-8'))
data = yaml.safe_load(path.read_text(encoding="utf-8"))
except (yaml.YAMLError, OSError):
continue
if not data or not isinstance(data, dict):
continue
upstream = data.get('upstream', '')
if isinstance(upstream, str) and 'finalburnneo/fbneo' in upstream.lower():
upstream = data.get("upstream", "")
if isinstance(upstream, str) and "finalburnneo/fbneo" in upstream.lower():
profiles.append(path)
return profiles
def _format_diff(profile_name: str, diff: dict[str, Any], show_added: bool = True) -> str:
def _format_diff(
profile_name: str, diff: dict[str, Any], show_added: bool = True
) -> str:
"""Format diff for a single profile."""
lines: list[str] = []
lines.append(f' {profile_name}:')
lines.append(f" {profile_name}:")
added = diff.get('added', [])
updated = diff.get('updated', [])
oos = diff.get('out_of_scope', 0)
added = diff.get("added", [])
updated = diff.get("updated", [])
oos = diff.get("out_of_scope", 0)
if not added and not updated:
lines.append(' no changes')
lines.append(" no changes")
if oos:
lines.append(f' . {oos} out of scope')
return '\n'.join(lines)
lines.append(f" . {oos} out of scope")
return "\n".join(lines)
if show_added:
for label in added:
lines.append(f' + {label}')
lines.append(f" + {label}")
elif added:
lines.append(f' + {len(added)} new ROMs available (main profile only)')
lines.append(f" + {len(added)} new ROMs available (main profile only)")
for label in updated:
lines.append(f' ~ {label}')
lines.append(f' = {diff["unchanged"]} unchanged')
lines.append(f" ~ {label}")
lines.append(f" = {diff['unchanged']} unchanged")
if oos:
lines.append(f' . {oos} out of scope')
lines.append(f" . {oos} out of scope")
return '\n'.join(lines)
return "\n".join(lines)
def run(
@@ -234,82 +244,84 @@ def run(
"""Main entry point for the scraper."""
cache = fetch_and_cache(force=force)
version = cache.get('version', 'unknown')
commit = cache.get('commit', '?')[:12]
bios_sets = cache.get('bios_sets', {})
version = cache.get("version", "unknown")
commit = cache.get("commit", "?")[:12]
bios_sets = cache.get("bios_sets", {})
profiles = _find_fbneo_profiles()
if json_output:
result: dict[str, Any] = {
'source': cache.get('source'),
'version': version,
'commit': cache.get('commit'),
'bios_set_count': len(bios_sets),
'profiles': {},
"source": cache.get("source"),
"version": version,
"commit": cache.get("commit"),
"bios_set_count": len(bios_sets),
"profiles": {},
}
for path in profiles:
diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo')
result['profiles'][path.stem] = diff
diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo")
result["profiles"][path.stem] = diff
print(json.dumps(result, indent=2))
return 0
header = (
f'fbneo-hashes: {len(bios_sets)} BIOS sets '
f'from finalburnneo/FBNeo @ {version} ({commit})'
f"fbneo-hashes: {len(bios_sets)} BIOS sets "
f"from finalburnneo/FBNeo @ {version} ({commit})"
)
print(header)
print()
if not profiles:
print(' no matching emulator profiles found')
print(" no matching emulator profiles found")
return 0
for path in profiles:
is_main = path.name == 'fbneo.yml'
diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo')
is_main = path.name == "fbneo.yml"
diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo")
print(_format_diff(path.stem, diff, show_added=is_main))
effective_added = diff['added'] if is_main else []
if not dry_run and (effective_added or diff['updated']):
effective_added = diff["added"] if is_main else []
if not dry_run and (effective_added or diff["updated"]):
merge_fbneo_profile(str(path), str(CACHE_PATH), write=True, add_new=is_main)
log.info('merged changes into %s', path.name)
log.info("merged changes into %s", path.name)
return 0
def main() -> None:
parser = argparse.ArgumentParser(
description='Scrape FBNeo BIOS set hashes from upstream source',
description="Scrape FBNeo BIOS set hashes from upstream source",
)
parser.add_argument(
'--dry-run',
action='store_true',
help='show diff without writing changes',
"--dry-run",
action="store_true",
help="show diff without writing changes",
)
parser.add_argument(
'--force',
action='store_true',
help='force re-clone even if cache is fresh',
"--force",
action="store_true",
help="force re-clone even if cache is fresh",
)
parser.add_argument(
'--json',
action='store_true',
dest='json_output',
help='output diff as JSON',
"--json",
action="store_true",
dest="json_output",
help="output diff as JSON",
)
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format='%(name)s: %(message)s',
format="%(name)s: %(message)s",
)
sys.exit(run(
dry_run=args.dry_run,
force=args.force,
json_output=args.json_output,
))
sys.exit(
run(
dry_run=args.dry_run,
force=args.force,
json_output=args.json_output,
)
)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -11,18 +11,17 @@ import os
import re
from pathlib import Path
_ROM_ENTRY_RE = re.compile(
r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}',
)
_BURN_DRIVER_RE = re.compile(
r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};',
r"struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};",
re.DOTALL,
)
_ROM_DESC_RE = re.compile(
r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
r"static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};",
re.DOTALL,
)
@@ -37,7 +36,7 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
for match in _BURN_DRIVER_RE.finditer(source):
body = match.group(2)
if 'BDF_BOARDROM' not in body:
if "BDF_BOARDROM" not in body:
continue
# Set name is the first quoted string in the struct body
@@ -46,11 +45,11 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
continue
set_name = name_match.group(1)
line_num = source[:match.start()].count('\n') + 1
line_num = source[: match.start()].count("\n") + 1
results[set_name] = {
'source_file': filename,
'source_line': line_num,
"source_file": filename,
"source_line": line_num,
}
return results
@@ -63,9 +62,9 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]:
Sentinel entries (empty name) are skipped.
"""
pattern = re.compile(
r'static\s+struct\s+BurnRomInfo\s+'
r"static\s+struct\s+BurnRomInfo\s+"
+ re.escape(set_name)
+ r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
+ r"RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};",
re.DOTALL,
)
match = pattern.search(source)
@@ -80,13 +79,15 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]:
if not name:
continue
size = int(entry.group(2), 16)
crc32 = format(int(entry.group(3), 16), '08x')
crc32 = format(int(entry.group(3), 16), "08x")
roms.append({
'name': name,
'size': size,
'crc32': crc32,
})
roms.append(
{
"name": name,
"size": size,
"crc32": crc32,
}
)
return roms
@@ -100,7 +101,7 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
Returns a dict mapping set name to:
{source_file, source_line, roms: [{name, size, crc32}, ...]}
"""
drv_path = Path(base_path) / 'src' / 'burn' / 'drv'
drv_path = Path(base_path) / "src" / "burn" / "drv"
if not drv_path.is_dir():
return {}
@@ -108,20 +109,20 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
for root, _dirs, files in os.walk(drv_path):
for fname in files:
if not fname.endswith('.cpp'):
if not fname.endswith(".cpp"):
continue
filepath = Path(root) / fname
source = filepath.read_text(encoding='utf-8', errors='replace')
source = filepath.read_text(encoding="utf-8", errors="replace")
rel_path = str(filepath.relative_to(base_path))
bios_sets = find_bios_sets(source, rel_path)
for set_name, meta in bios_sets.items():
roms = parse_rom_info(source, set_name)
results[set_name] = {
'source_file': meta['source_file'],
'source_line': meta['source_line'],
'roms': roms,
"source_file": meta["source_file"],
"source_line": meta["source_line"],
"roms": roms,
}
return results

View File

@@ -8,9 +8,8 @@ Hash: SHA1 primary
from __future__ import annotations
import sys
import urllib.request
import urllib.error
import urllib.request
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
@@ -18,18 +17,17 @@ from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
PLATFORM_NAME = "libretro"
SOURCE_URL = (
"https://raw.githubusercontent.com/libretro/libretro-database/"
"master/dat/System.dat"
"https://raw.githubusercontent.com/libretro/libretro-database/master/dat/System.dat"
)
# Libretro cores that expect BIOS files in a subdirectory of system/.
# System.dat lists filenames flat; the scraper prepends the prefix.
# ref: each core's libretro.c or equivalent -see platforms/README.md
CORE_SUBDIR_MAP = {
"nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c
"sharp-x68000": "keropi", # px68k/libretro/libretro.c
"sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp
"sega-dreamcast-arcade": "dc", # flycast -same subfolder
"nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c
"sharp-x68000": "keropi", # px68k/libretro/libretro.c
"sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp
"sega-dreamcast-arcade": "dc", # flycast -same subfolder
}
SYSTEM_SLUG_MAP = {
@@ -100,7 +98,6 @@ class Scraper(BaseScraper):
def __init__(self, url: str = SOURCE_URL):
super().__init__(url=url)
def fetch_requirements(self) -> list[BiosRequirement]:
"""Parse System.dat and return BIOS requirements."""
raw = self._fetch_raw()
@@ -113,7 +110,9 @@ class Scraper(BaseScraper):
for rom in roms:
native_system = rom.system
system_slug = SYSTEM_SLUG_MAP.get(native_system, native_system.lower().replace(" ", "-"))
system_slug = SYSTEM_SLUG_MAP.get(
native_system, native_system.lower().replace(" ", "-")
)
destination = rom.name
name = rom.name.split("/")[-1] if "/" in rom.name else rom.name
@@ -122,17 +121,19 @@ class Scraper(BaseScraper):
if subdir and not destination.startswith(subdir + "/"):
destination = f"{subdir}/{destination}"
requirements.append(BiosRequirement(
name=name,
system=system_slug,
sha1=rom.sha1 or None,
md5=rom.md5 or None,
crc32=rom.crc32 or None,
size=rom.size or None,
destination=destination,
required=True,
native_id=native_system,
))
requirements.append(
BiosRequirement(
name=name,
system=system_slug,
sha1=rom.sha1 or None,
md5=rom.md5 or None,
crc32=rom.crc32 or None,
size=rom.size or None,
destination=destination,
required=True,
native_id=native_system,
)
)
return requirements
@@ -158,17 +159,22 @@ class Scraper(BaseScraper):
"""Fetch per-core metadata from libretro-core-info .info files."""
metadata = {}
try:
url = f"https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
req = urllib.request.Request(url, headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
})
url = "https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
req = urllib.request.Request(
url,
headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
},
)
with urllib.request.urlopen(req, timeout=30) as resp:
import json
tree = json.loads(resp.read())
info_files = [
item["path"] for item in tree.get("tree", [])
item["path"]
for item in tree.get("tree", [])
if item["path"].endswith("_libretro.info")
]
@@ -176,7 +182,9 @@ class Scraper(BaseScraper):
core_name = filename.replace("_libretro.info", "")
try:
info_url = f"https://raw.githubusercontent.com/libretro/libretro-core-info/master/{filename}"
req = urllib.request.Request(info_url, headers={"User-Agent": "retrobios-scraper/1.0"})
req = urllib.request.Request(
info_url, headers={"User-Agent": "retrobios-scraper/1.0"}
)
with urllib.request.urlopen(req, timeout=10) as resp:
content = resp.read().decode("utf-8")
@@ -194,10 +202,11 @@ class Scraper(BaseScraper):
system_name = info.get("systemname", "")
manufacturer = info.get("manufacturer", "")
display_name = info.get("display_name", "")
categories = info.get("categories", "")
info.get("categories", "")
# Map core to our system slug via firmware paths
from .coreinfo_scraper import CORE_SYSTEM_MAP
system_slug = CORE_SYSTEM_MAP.get(core_name)
if not system_slug:
continue
@@ -267,7 +276,11 @@ class Scraper(BaseScraper):
# ref: Vircon32/libretro.c -virtual console, single BIOS
"vircon32": {
"files": [
{"name": "Vircon32Bios.v32", "destination": "Vircon32Bios.v32", "required": True},
{
"name": "Vircon32Bios.v32",
"destination": "Vircon32Bios.v32",
"required": True,
},
],
"core": "vircon32",
"manufacturer": "Vircon",
@@ -276,7 +289,11 @@ class Scraper(BaseScraper):
# ref: xrick/src/sysvid.c, xrick/src/data.c -game data archive
"xrick": {
"files": [
{"name": "data.zip", "destination": "xrick/data.zip", "required": True},
{
"name": "data.zip",
"destination": "xrick/data.zip",
"required": True,
},
],
"core": "xrick",
"manufacturer": "Other",
@@ -318,27 +335,51 @@ class Scraper(BaseScraper):
# segasp.zip for Sega System SP (Flycast)
if "sega-dreamcast-arcade" in systems:
existing = {f["name"] for f in systems["sega-dreamcast-arcade"].get("files", [])}
existing = {
f["name"] for f in systems["sega-dreamcast-arcade"].get("files", [])
}
if "segasp.zip" not in existing:
systems["sega-dreamcast-arcade"]["files"].append({
"name": "segasp.zip",
"destination": "dc/segasp.zip",
"required": True,
})
systems["sega-dreamcast-arcade"]["files"].append(
{
"name": "segasp.zip",
"destination": "dc/segasp.zip",
"required": True,
}
)
# Extra files missing from System.dat for specific systems.
# Each traced to the core's source code.
EXTRA_SYSTEM_FILES = {
# melonDS DS DSi mode -ref: JesseTG/melonds-ds/src/libretro.cpp
"nintendo-ds": [
{"name": "dsi_bios7.bin", "destination": "dsi_bios7.bin", "required": True},
{"name": "dsi_bios9.bin", "destination": "dsi_bios9.bin", "required": True},
{"name": "dsi_firmware.bin", "destination": "dsi_firmware.bin", "required": True},
{"name": "dsi_nand.bin", "destination": "dsi_nand.bin", "required": True},
{
"name": "dsi_bios7.bin",
"destination": "dsi_bios7.bin",
"required": True,
},
{
"name": "dsi_bios9.bin",
"destination": "dsi_bios9.bin",
"required": True,
},
{
"name": "dsi_firmware.bin",
"destination": "dsi_firmware.bin",
"required": True,
},
{
"name": "dsi_nand.bin",
"destination": "dsi_nand.bin",
"required": True,
},
],
# bsnes SGB naming -ref: bsnes/target-libretro/libretro.cpp
"nintendo-sgb": [
{"name": "sgb.boot.rom", "destination": "sgb.boot.rom", "required": False},
{
"name": "sgb.boot.rom",
"destination": "sgb.boot.rom",
"required": False,
},
],
# JollyCV -ref: jollycv/libretro.c
"coleco-colecovision": [
@@ -348,12 +389,20 @@ class Scraper(BaseScraper):
],
# Kronos ST-V -ref: libretro-kronos/libretro/libretro.c
"sega-saturn": [
{"name": "stvbios.zip", "destination": "kronos/stvbios.zip", "required": True},
{
"name": "stvbios.zip",
"destination": "kronos/stvbios.zip",
"required": True,
},
],
# PCSX ReARMed / Beetle PSX alt BIOS -ref: pcsx_rearmed/libpcsxcore/misc.c
# docs say PSXONPSP660.bin (uppercase) but core accepts any case
"sony-playstation": [
{"name": "psxonpsp660.bin", "destination": "psxonpsp660.bin", "required": False},
{
"name": "psxonpsp660.bin",
"destination": "psxonpsp660.bin",
"required": False,
},
],
# Dolphin GC -ref: DolphinLibretro/Boot.cpp:72-73,
# BootManager.cpp:200-217, CommonPaths.h:139 GC_IPL="IPL.bin"
@@ -361,15 +410,43 @@ class Scraper(BaseScraper):
# System.dat gc-ntsc-*.bin names are NOT what Dolphin loads.
# We add the correct Dolphin paths for BIOS + essential firmware.
"nintendo-gamecube": [
{"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/USA/IPL.bin", "required": False},
{"name": "gc-pal-12.bin", "destination": "dolphin-emu/Sys/GC/EUR/IPL.bin", "required": False},
{"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/JAP/IPL.bin", "required": False},
{
"name": "gc-ntsc-12.bin",
"destination": "dolphin-emu/Sys/GC/USA/IPL.bin",
"required": False,
},
{
"name": "gc-pal-12.bin",
"destination": "dolphin-emu/Sys/GC/EUR/IPL.bin",
"required": False,
},
{
"name": "gc-ntsc-12.bin",
"destination": "dolphin-emu/Sys/GC/JAP/IPL.bin",
"required": False,
},
# DSP firmware -ref: Source/Core/Core/HW/DSPLLE/DSPHost.cpp
{"name": "dsp_coef.bin", "destination": "dolphin-emu/Sys/GC/dsp_coef.bin", "required": True},
{"name": "dsp_rom.bin", "destination": "dolphin-emu/Sys/GC/dsp_rom.bin", "required": True},
{
"name": "dsp_coef.bin",
"destination": "dolphin-emu/Sys/GC/dsp_coef.bin",
"required": True,
},
{
"name": "dsp_rom.bin",
"destination": "dolphin-emu/Sys/GC/dsp_rom.bin",
"required": True,
},
# Fonts -ref: Source/Core/Core/HW/EXI/EXI_DeviceIPL.cpp
{"name": "font_western.bin", "destination": "dolphin-emu/Sys/GC/font_western.bin", "required": False},
{"name": "font_japanese.bin", "destination": "dolphin-emu/Sys/GC/font_japanese.bin", "required": False},
{
"name": "font_western.bin",
"destination": "dolphin-emu/Sys/GC/font_western.bin",
"required": False,
},
{
"name": "font_japanese.bin",
"destination": "dolphin-emu/Sys/GC/font_japanese.bin",
"required": False,
},
],
# minivmac casing -ref: minivmac/src/MYOSGLUE.c
# doc says MacII.rom, repo has MacII.ROM -both work on case-insensitive FS
@@ -455,6 +532,7 @@ class Scraper(BaseScraper):
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape libretro BIOS requirements")

View File

@@ -21,16 +21,16 @@ from typing import Any
import yaml
from .mame_parser import parse_mame_source_tree
from ._hash_merge import compute_diff, merge_mame_profile
from .mame_parser import parse_mame_source_tree
log = logging.getLogger(__name__)
_ROOT = Path(__file__).resolve().parents[2]
_CACHE_PATH = _ROOT / 'data' / 'mame-hashes.json'
_CLONE_DIR = _ROOT / 'tmp' / 'mame'
_EMULATORS_DIR = _ROOT / 'emulators'
_REPO_URL = 'https://github.com/mamedev/mame.git'
_CACHE_PATH = _ROOT / "data" / "mame-hashes.json"
_CLONE_DIR = _ROOT / "tmp" / "mame"
_EMULATORS_DIR = _ROOT / "emulators"
_REPO_URL = "https://github.com/mamedev/mame.git"
_STALE_HOURS = 24
@@ -41,7 +41,7 @@ def _load_cache() -> dict[str, Any] | None:
if not _CACHE_PATH.exists():
return None
try:
with open(_CACHE_PATH, encoding='utf-8') as f:
with open(_CACHE_PATH, encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError):
return None
@@ -50,7 +50,7 @@ def _load_cache() -> dict[str, Any] | None:
def _is_stale(cache: dict[str, Any] | None) -> bool:
if cache is None:
return True
fetched_at = cache.get('fetched_at')
fetched_at = cache.get("fetched_at")
if not fetched_at:
return True
try:
@@ -63,17 +63,19 @@ def _is_stale(cache: dict[str, Any] | None) -> bool:
def _write_cache(data: dict[str, Any]) -> None:
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(_CACHE_PATH, 'w', encoding='utf-8') as f:
with open(_CACHE_PATH, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
log.info('cache written to %s', _CACHE_PATH)
log.info("cache written to %s", _CACHE_PATH)
# ── Git operations ───────────────────────────────────────────────────
def _run_git(args: list[str], cwd: Path | None = None) -> subprocess.CompletedProcess[str]:
def _run_git(
args: list[str], cwd: Path | None = None
) -> subprocess.CompletedProcess[str]:
return subprocess.run(
['git', *args],
["git", *args],
cwd=cwd,
check=True,
capture_output=True,
@@ -86,17 +88,20 @@ def _sparse_clone() -> None:
shutil.rmtree(_CLONE_DIR)
_CLONE_DIR.parent.mkdir(parents=True, exist_ok=True)
log.info('sparse cloning mamedev/mame into %s', _CLONE_DIR)
_run_git([
'clone',
'--depth', '1',
'--filter=blob:none',
'--sparse',
_REPO_URL,
str(_CLONE_DIR),
])
log.info("sparse cloning mamedev/mame into %s", _CLONE_DIR)
_run_git(
['sparse-checkout', 'set', 'src/mame', 'src/devices'],
[
"clone",
"--depth",
"1",
"--filter=blob:none",
"--sparse",
_REPO_URL,
str(_CLONE_DIR),
]
)
_run_git(
["sparse-checkout", "set", "src/mame", "src/devices"],
cwd=_CLONE_DIR,
)
@@ -106,41 +111,41 @@ def _get_version() -> str:
# Use GitHub API to get the latest release tag.
try:
req = urllib.request.Request(
'https://api.github.com/repos/mamedev/mame/releases/latest',
headers={'User-Agent': 'retrobios-scraper/1.0',
'Accept': 'application/vnd.github.v3+json'},
"https://api.github.com/repos/mamedev/mame/releases/latest",
headers={
"User-Agent": "retrobios-scraper/1.0",
"Accept": "application/vnd.github.v3+json",
},
)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read())
tag = data.get('tag_name', '')
tag = data.get("tag_name", "")
if tag:
return _parse_version_tag(tag)
except (urllib.error.URLError, json.JSONDecodeError, OSError):
pass
return 'unknown'
return "unknown"
def _parse_version_tag(tag: str) -> str:
prefix = 'mame'
prefix = "mame"
raw = tag.removeprefix(prefix) if tag.startswith(prefix) else tag
if raw.isdigit() and len(raw) >= 4:
return f'{raw[0]}.{raw[1:]}'
return f"{raw[0]}.{raw[1:]}"
return raw
def _get_commit() -> str:
try:
result = _run_git(['rev-parse', 'HEAD'], cwd=_CLONE_DIR)
result = _run_git(["rev-parse", "HEAD"], cwd=_CLONE_DIR)
return result.stdout.strip()
except subprocess.CalledProcessError:
return ''
return ""
def _cleanup() -> None:
if _CLONE_DIR.exists():
log.info('cleaning up %s', _CLONE_DIR)
log.info("cleaning up %s", _CLONE_DIR)
shutil.rmtree(_CLONE_DIR)
@@ -149,18 +154,21 @@ def _cleanup() -> None:
def _find_mame_profiles() -> list[Path]:
profiles: list[Path] = []
for path in sorted(_EMULATORS_DIR.glob('*.yml')):
if path.name.endswith('.old.yml'):
for path in sorted(_EMULATORS_DIR.glob("*.yml")):
if path.name.endswith(".old.yml"):
continue
try:
with open(path, encoding='utf-8') as f:
with open(path, encoding="utf-8") as f:
data = yaml.safe_load(f)
if not isinstance(data, dict):
continue
upstream = data.get('upstream', '')
upstream = data.get("upstream", "")
# Only match profiles tracking current MAME (not frozen snapshots
# which have upstream like "mamedev/mame/tree/mame0139")
if isinstance(upstream, str) and upstream.rstrip('/') == 'https://github.com/mamedev/mame':
if (
isinstance(upstream, str)
and upstream.rstrip("/") == "https://github.com/mamedev/mame"
):
profiles.append(path)
except (yaml.YAMLError, OSError):
continue
@@ -179,36 +187,36 @@ def _format_diff(
lines: list[str] = []
name = profile_path.stem
added = diff.get('added', [])
updated = diff.get('updated', [])
removed = diff.get('removed', [])
unchanged = diff.get('unchanged', 0)
added = diff.get("added", [])
updated = diff.get("updated", [])
removed = diff.get("removed", [])
unchanged = diff.get("unchanged", 0)
if not added and not updated and not removed:
lines.append(f' {name}:')
lines.append(' no changes')
lines.append(f" {name}:")
lines.append(" no changes")
return lines
lines.append(f' {name}:')
lines.append(f" {name}:")
if show_added:
bios_sets = hashes.get('bios_sets', {})
bios_sets = hashes.get("bios_sets", {})
for set_name in added:
rom_count = len(bios_sets.get(set_name, {}).get('roms', []))
source_file = bios_sets.get(set_name, {}).get('source_file', '')
source_line = bios_sets.get(set_name, {}).get('source_line', '')
ref = f'{source_file}:{source_line}' if source_file else ''
lines.append(f' + {set_name}.zip ({ref}, {rom_count} ROMs)')
rom_count = len(bios_sets.get(set_name, {}).get("roms", []))
source_file = bios_sets.get(set_name, {}).get("source_file", "")
source_line = bios_sets.get(set_name, {}).get("source_line", "")
ref = f"{source_file}:{source_line}" if source_file else ""
lines.append(f" + {set_name}.zip ({ref}, {rom_count} ROMs)")
elif added:
lines.append(f' + {len(added)} new sets available (main profile only)')
lines.append(f" + {len(added)} new sets available (main profile only)")
for set_name in updated:
lines.append(f' ~ {set_name}.zip (contents changed)')
lines.append(f" ~ {set_name}.zip (contents changed)")
oos = diff.get('out_of_scope', 0)
lines.append(f' = {unchanged} unchanged')
oos = diff.get("out_of_scope", 0)
lines.append(f" = {unchanged} unchanged")
if oos:
lines.append(f' . {oos} out of scope (not BIOS root sets)')
lines.append(f" . {oos} out of scope (not BIOS root sets)")
return lines
@@ -218,7 +226,7 @@ def _format_diff(
def _fetch_hashes(force: bool) -> dict[str, Any]:
cache = _load_cache()
if not force and not _is_stale(cache):
log.info('using cached data from %s', cache.get('fetched_at', ''))
log.info("using cached data from %s", cache.get("fetched_at", ""))
return cache # type: ignore[return-value]
try:
@@ -228,11 +236,11 @@ def _fetch_hashes(force: bool) -> dict[str, Any]:
commit = _get_commit()
data: dict[str, Any] = {
'source': 'mamedev/mame',
'version': version,
'commit': commit,
'fetched_at': datetime.now(timezone.utc).isoformat(),
'bios_sets': bios_sets,
"source": "mamedev/mame",
"version": version,
"commit": commit,
"fetched_at": datetime.now(timezone.utc).isoformat(),
"bios_sets": bios_sets,
}
_write_cache(data)
return data
@@ -243,34 +251,36 @@ def _fetch_hashes(force: bool) -> dict[str, Any]:
def _run(args: argparse.Namespace) -> None:
hashes = _fetch_hashes(args.force)
total_sets = len(hashes.get('bios_sets', {}))
version = hashes.get('version', 'unknown')
commit = hashes.get('commit', '')[:12]
total_sets = len(hashes.get("bios_sets", {}))
version = hashes.get("version", "unknown")
commit = hashes.get("commit", "")[:12]
if args.json:
json.dump(hashes, sys.stdout, indent=2, ensure_ascii=False)
sys.stdout.write('\n')
sys.stdout.write("\n")
return
print(f'mame-hashes: {total_sets} BIOS root sets from mamedev/mame'
f' @ {version} ({commit})')
print(
f"mame-hashes: {total_sets} BIOS root sets from mamedev/mame"
f" @ {version} ({commit})"
)
print()
profiles = _find_mame_profiles()
if not profiles:
print(' no profiles with mamedev/mame upstream found')
print(" no profiles with mamedev/mame upstream found")
return
for profile_path in profiles:
is_main = profile_path.name == 'mame.yml'
diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode='mame')
is_main = profile_path.name == "mame.yml"
diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode="mame")
lines = _format_diff(profile_path, diff, hashes, show_added=is_main)
for line in lines:
print(line)
if not args.dry_run:
updated = diff.get('updated', [])
added = diff.get('added', []) if is_main else []
updated = diff.get("updated", [])
added = diff.get("added", []) if is_main else []
if added or updated:
merge_mame_profile(
str(profile_path),
@@ -278,32 +288,32 @@ def _run(args: argparse.Namespace) -> None:
write=True,
add_new=is_main,
)
log.info('merged into %s', profile_path.name)
log.info("merged into %s", profile_path.name)
print()
if args.dry_run:
print('(dry run, no files modified)')
print("(dry run, no files modified)")
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog='mame_hash_scraper',
description='Fetch MAME BIOS hashes from source and merge into profiles.',
prog="mame_hash_scraper",
description="Fetch MAME BIOS hashes from source and merge into profiles.",
)
parser.add_argument(
'--dry-run',
action='store_true',
help='show diff only, do not modify profiles',
"--dry-run",
action="store_true",
help="show diff only, do not modify profiles",
)
parser.add_argument(
'--json',
action='store_true',
help='output raw JSON to stdout',
"--json",
action="store_true",
help="output raw JSON to stdout",
)
parser.add_argument(
'--force',
action='store_true',
help='re-fetch even if cache is fresh',
"--force",
action="store_true",
help="re-fetch even if cache is fresh",
)
return parser
@@ -311,12 +321,12 @@ def build_parser() -> argparse.ArgumentParser:
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format='%(levelname)s: %(message)s',
format="%(levelname)s: %(message)s",
)
parser = build_parser()
args = parser.parse_args()
_run(args)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -14,27 +14,27 @@ from pathlib import Path
# Macros that declare a machine entry
_MACHINE_MACROS = re.compile(
r'\b(GAME|SYST|COMP|CONS)\s*\(',
r"\b(GAME|SYST|COMP|CONS)\s*\(",
re.MULTILINE,
)
# ROM block boundaries
_ROM_START = re.compile(r'ROM_START\s*\(\s*(\w+)\s*\)')
_ROM_END = re.compile(r'ROM_END')
_ROM_START = re.compile(r"ROM_START\s*\(\s*(\w+)\s*\)")
_ROM_END = re.compile(r"ROM_END")
# ROM_REGION variants: ROM_REGION, ROM_REGION16_BE, ROM_REGION16_LE, ROM_REGION32_LE, etc.
_ROM_REGION = re.compile(
r'ROM_REGION\w*\s*\('
r'\s*(0x[\da-fA-F]+|\d+)\s*,' # size
r'\s*"([^"]+)"\s*,', # tag
r"ROM_REGION\w*\s*\("
r"\s*(0x[\da-fA-F]+|\d+)\s*," # size
r'\s*"([^"]+)"\s*,', # tag
)
# ROM_SYSTEM_BIOS( index, label, description )
_ROM_SYSTEM_BIOS = re.compile(
r'ROM_SYSTEM_BIOS\s*\('
r'\s*(\d+)\s*,' # index
r'\s*"([^"]+)"\s*,' # label
r'\s*"([^"]+)"\s*\)', # description
r"ROM_SYSTEM_BIOS\s*\("
r"\s*(\d+)\s*," # index
r'\s*"([^"]+)"\s*,' # label
r'\s*"([^"]+)"\s*\)', # description
)
# All ROM_LOAD variants including custom BIOS macros.
@@ -44,23 +44,23 @@ _ROM_SYSTEM_BIOS = re.compile(
# The key pattern: any macro containing "ROM_LOAD" or "ROMX_LOAD" in its name,
# with the first quoted string being the ROM filename.
_ROM_LOAD = re.compile(
r'\b\w*ROMX?_LOAD\w*\s*\('
r'[^"]*' # skip any args before the filename (e.g., bios index)
r'"([^"]+)"\s*,' # name (first quoted string)
r'\s*(0x[\da-fA-F]+|\d+)\s*,' # offset
r'\s*(0x[\da-fA-F]+|\d+)\s*,', # size
r"\b\w*ROMX?_LOAD\w*\s*\("
r'[^"]*' # skip any args before the filename (e.g., bios index)
r'"([^"]+)"\s*,' # name (first quoted string)
r"\s*(0x[\da-fA-F]+|\d+)\s*," # offset
r"\s*(0x[\da-fA-F]+|\d+)\s*,", # size
)
# CRC32 and SHA1 within a ROM_LOAD line
_CRC_SHA = re.compile(
r'CRC\s*\(\s*([0-9a-fA-F]+)\s*\)'
r'\s+'
r'SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)',
r"CRC\s*\(\s*([0-9a-fA-F]+)\s*\)"
r"\s+"
r"SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)",
)
_NO_DUMP = re.compile(r'\bNO_DUMP\b')
_BAD_DUMP = re.compile(r'\bBAD_DUMP\b')
_ROM_BIOS = re.compile(r'ROM_BIOS\s*\(\s*(\d+)\s*\)')
_NO_DUMP = re.compile(r"\bNO_DUMP\b")
_BAD_DUMP = re.compile(r"\bBAD_DUMP\b")
_ROM_BIOS = re.compile(r"ROM_BIOS\s*\(\s*(\d+)\s*\)")
def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
@@ -77,8 +77,8 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
if block_end == -1:
continue
block = source[start:block_end + 1]
if 'MACHINE_IS_BIOS_ROOT' not in block:
block = source[start : block_end + 1]
if "MACHINE_IS_BIOS_ROOT" not in block:
continue
# Extract set name: first arg after the opening paren
@@ -97,11 +97,11 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
continue
set_name = args[1].strip()
line_no = source[:match.start()].count('\n') + 1
line_no = source[: match.start()].count("\n") + 1
results[set_name] = {
'source_file': filename,
'source_line': line_no,
"source_file": filename,
"source_line": line_no,
}
return results
@@ -115,7 +115,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]:
extracts all ROM entries. Skips NO_DUMP, flags BAD_DUMP.
"""
pattern = re.compile(
r'ROM_START\s*\(\s*' + re.escape(set_name) + r'\s*\)',
r"ROM_START\s*\(\s*" + re.escape(set_name) + r"\s*\)",
)
start_match = pattern.search(source)
if not start_match:
@@ -125,7 +125,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]:
if not end_match:
return []
block = source[start_match.end():end_match.start()]
block = source[start_match.end() : end_match.start()]
# Pre-expand macros: find #define macros in the file that contain
# ROM_LOAD/ROM_REGION/ROM_SYSTEM_BIOS calls, then expand their
@@ -144,26 +144,26 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]:
results: dict[str, dict] = {}
root = Path(base_path)
search_dirs = [root / 'src' / 'mame', root / 'src' / 'devices']
search_dirs = [root / "src" / "mame", root / "src" / "devices"]
for search_dir in search_dirs:
if not search_dir.is_dir():
continue
for dirpath, _dirnames, filenames in os.walk(search_dir):
for fname in filenames:
if not fname.endswith(('.cpp', '.c', '.h', '.hxx')):
if not fname.endswith((".cpp", ".c", ".h", ".hxx")):
continue
filepath = Path(dirpath) / fname
rel_path = str(filepath.relative_to(root))
content = filepath.read_text(encoding='utf-8', errors='replace')
content = filepath.read_text(encoding="utf-8", errors="replace")
bios_sets = find_bios_root_sets(content, rel_path)
for set_name, info in bios_sets.items():
roms = parse_rom_block(content, set_name)
results[set_name] = {
'source_file': info['source_file'],
'source_line': info['source_line'],
'roms': roms,
"source_file": info["source_file"],
"source_line": info["source_line"],
"roms": roms,
}
return results
@@ -171,13 +171,20 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]:
# Regex for #define macros that span multiple lines (backslash continuation)
_DEFINE_RE = re.compile(
r'^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)',
r"^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)",
re.MULTILINE,
)
# ROM-related tokens that indicate a macro is relevant for expansion
_ROM_TOKENS = {'ROM_LOAD', 'ROMX_LOAD', 'ROM_REGION', 'ROM_SYSTEM_BIOS',
'ROM_FILL', 'ROM_COPY', 'ROM_RELOAD'}
_ROM_TOKENS = {
"ROM_LOAD",
"ROMX_LOAD",
"ROM_REGION",
"ROM_SYSTEM_BIOS",
"ROM_FILL",
"ROM_COPY",
"ROM_RELOAD",
}
def _collect_rom_macros(source: str) -> dict[str, str]:
@@ -193,14 +200,14 @@ def _collect_rom_macros(source: str) -> dict[str, str]:
name = m.group(1)
body = m.group(2)
# Join backslash-continued lines
body = body.replace('\\\n', ' ')
body = body.replace("\\\n", " ")
# Only keep macros that contain ROM-related tokens
if not any(tok in body for tok in _ROM_TOKENS):
continue
# Skip wrapper macros: if the body contains ROMX_LOAD/ROM_LOAD
# with unquoted args (formal parameters), it's a wrapper.
# These are already recognized by the _ROM_LOAD regex directly.
if re.search(r'ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,', body):
if re.search(r"ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,", body):
continue
macros[name] = body
return macros
@@ -223,7 +230,7 @@ def _expand_macros(block: str, macros: dict[str, str], depth: int = 5) -> str:
iterations += 1
for name, body in macros.items():
# Match macro invocation: NAME or NAME(args)
pattern = re.compile(r'\b' + re.escape(name) + r'(?:\s*\([^)]*\))?')
pattern = re.compile(r"\b" + re.escape(name) + r"(?:\s*\([^)]*\))?")
if pattern.search(block):
block = pattern.sub(body, block)
changed = True
@@ -237,9 +244,9 @@ def _find_closing_paren(source: str, start: int) -> int:
i = start
while i < len(source):
ch = source[i]
if ch == '(':
if ch == "(":
depth += 1
elif ch == ')':
elif ch == ")":
depth -= 1
if depth == 0:
return i
@@ -268,24 +275,24 @@ def _split_macro_args(inner: str) -> list[str]:
i += 1
if i < len(inner):
current.append(inner[i])
elif ch == '(':
elif ch == "(":
depth += 1
current.append(ch)
elif ch == ')':
elif ch == ")":
if depth == 0:
args.append(''.join(current))
args.append("".join(current))
break
depth -= 1
current.append(ch)
elif ch == ',' and depth == 0:
args.append(''.join(current))
elif ch == "," and depth == 0:
args.append("".join(current))
current = []
else:
current.append(ch)
i += 1
if current:
remaining = ''.join(current).strip()
remaining = "".join(current).strip()
if remaining:
args.append(remaining)
@@ -300,15 +307,15 @@ def _parse_rom_entries(block: str) -> list[dict]:
Processes matches in order of appearance to track region and BIOS context.
"""
roms: list[dict] = []
current_region = ''
current_region = ""
bios_labels: dict[int, tuple[str, str]] = {}
# Build a combined pattern that matches all interesting tokens
# and process them in order of occurrence
token_patterns = [
('region', _ROM_REGION),
('bios_label', _ROM_SYSTEM_BIOS),
('rom_load', _ROM_LOAD),
("region", _ROM_REGION),
("bios_label", _ROM_SYSTEM_BIOS),
("rom_load", _ROM_LOAD),
]
# Collect all matches with their positions
@@ -321,22 +328,22 @@ def _parse_rom_entries(block: str) -> list[dict]:
events.sort(key=lambda e: e[0])
for _pos, tag, m in events:
if tag == 'region':
if tag == "region":
current_region = m.group(2)
elif tag == 'bios_label':
elif tag == "bios_label":
idx = int(m.group(1))
bios_labels[idx] = (m.group(2), m.group(3))
elif tag == 'rom_load':
elif tag == "rom_load":
# Get the full macro call as context (find closing paren)
context_start = m.start()
# Find the opening paren of the ROM_LOAD macro
paren_pos = block.find('(', context_start)
paren_pos = block.find("(", context_start)
if paren_pos != -1:
close_pos = _find_closing_paren(block, paren_pos)
context_end = close_pos + 1 if close_pos != -1 else m.end() + 200
else:
context_end = m.end() + 200
context = block[context_start:min(context_end, len(block))]
context = block[context_start : min(context_end, len(block))]
if _NO_DUMP.search(context):
continue
@@ -345,8 +352,8 @@ def _parse_rom_entries(block: str) -> list[dict]:
rom_size = _parse_int(m.group(3))
crc_sha_match = _CRC_SHA.search(context)
crc32 = ''
sha1 = ''
crc32 = ""
sha1 = ""
if crc_sha_match:
crc32 = crc_sha_match.group(1).lower()
sha1 = crc_sha_match.group(2).lower()
@@ -354,8 +361,8 @@ def _parse_rom_entries(block: str) -> list[dict]:
bad_dump = bool(_BAD_DUMP.search(context))
bios_index = None
bios_label = ''
bios_description = ''
bios_label = ""
bios_description = ""
bios_ref = _ROM_BIOS.search(context)
if bios_ref:
bios_index = int(bios_ref.group(1))
@@ -363,18 +370,18 @@ def _parse_rom_entries(block: str) -> list[dict]:
bios_label, bios_description = bios_labels[bios_index]
entry: dict = {
'name': rom_name,
'size': rom_size,
'crc32': crc32,
'sha1': sha1,
'region': current_region,
'bad_dump': bad_dump,
"name": rom_name,
"size": rom_size,
"crc32": crc32,
"sha1": sha1,
"region": current_region,
"bad_dump": bad_dump,
}
if bios_index is not None:
entry['bios_index'] = bios_index
entry['bios_label'] = bios_label
entry['bios_description'] = bios_description
entry["bios_index"] = bios_index
entry["bios_label"] = bios_label
entry["bios_description"] = bios_description
roms.append(entry)
@@ -384,6 +391,6 @@ def _parse_rom_entries(block: str) -> list[dict]:
def _parse_int(value: str) -> int:
"""Parse an integer that may be hex (0x...) or decimal."""
value = value.strip()
if value.startswith('0x') or value.startswith('0X'):
if value.startswith("0x") or value.startswith("0X"):
return int(value, 16)
return int(value)

View File

@@ -16,8 +16,6 @@ Recalbox verification logic:
from __future__ import annotations
import sys
import urllib.request
import urllib.error
import xml.etree.ElementTree as ET
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag
@@ -121,17 +119,19 @@ class Scraper(BaseScraper):
for bios_elem in system_elem.findall("bios"):
paths_str = bios_elem.get("path", "")
md5_str = bios_elem.get("md5", "")
core = bios_elem.get("core", "")
bios_elem.get("core", "")
mandatory = bios_elem.get("mandatory", "true") != "false"
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
note = bios_elem.get("note", "")
bios_elem.get("hashMatchMandatory", "true") != "false"
bios_elem.get("note", "")
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
if not paths:
continue
primary_path = paths[0]
name = primary_path.split("/")[-1] if "/" in primary_path else primary_path
name = (
primary_path.split("/")[-1] if "/" in primary_path else primary_path
)
md5_list = [m.strip() for m in md5_str.split(",") if m.strip()]
all_md5 = ",".join(md5_list) if md5_list else None
@@ -141,14 +141,16 @@ class Scraper(BaseScraper):
continue
seen.add(dedup_key)
requirements.append(BiosRequirement(
name=name,
system=system_slug,
md5=all_md5,
destination=primary_path,
required=mandatory,
native_id=platform,
))
requirements.append(
BiosRequirement(
name=name,
system=system_slug,
md5=all_md5,
destination=primary_path,
required=mandatory,
native_id=platform,
)
)
return requirements
@@ -168,7 +170,9 @@ class Scraper(BaseScraper):
md5_str = bios_elem.get("md5", "")
core = bios_elem.get("core", "")
mandatory = bios_elem.get("mandatory", "true") != "false"
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
hash_match_mandatory = (
bios_elem.get("hashMatchMandatory", "true") != "false"
)
note = bios_elem.get("note", "")
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
@@ -179,17 +183,19 @@ class Scraper(BaseScraper):
name = paths[0].split("/")[-1] if "/" in paths[0] else paths[0]
requirements.append({
"name": name,
"system": system_slug,
"system_name": system_name,
"paths": paths,
"md5_list": md5_list,
"core": core,
"mandatory": mandatory,
"hash_match_mandatory": hash_match_mandatory,
"note": note,
})
requirements.append(
{
"name": name,
"system": system_slug,
"system_name": system_name,
"paths": paths,
"md5_list": md5_list,
"core": core,
"mandatory": mandatory,
"hash_match_mandatory": hash_match_mandatory,
"note": note,
}
)
return requirements
@@ -245,7 +251,9 @@ def main():
parser = argparse.ArgumentParser(description="Scrape Recalbox es_bios.xml")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--json", action="store_true")
parser.add_argument("--full", action="store_true", help="Show full Recalbox-specific fields")
parser.add_argument(
"--full", action="store_true", help="Show full Recalbox-specific fields"
)
parser.add_argument("--output", "-o")
args = parser.parse_args()
@@ -264,6 +272,7 @@ def main():
if args.dry_run:
from collections import defaultdict
by_system = defaultdict(list)
for r in reqs:
by_system[r.system].append(r)
@@ -272,7 +281,7 @@ def main():
for f in files[:5]:
print(f" {f.name} (md5={f.md5[:12] if f.md5 else 'N/A'}...)")
if len(files) > 5:
print(f" ... +{len(files)-5} more")
print(f" ... +{len(files) - 5} more")
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
return

View File

@@ -9,9 +9,6 @@ Hash: MD5 primary
from __future__ import annotations
import json
import sys
import urllib.request
import urllib.error
try:
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
@@ -43,7 +40,6 @@ class Scraper(BaseScraper):
super().__init__(url=url)
self._parsed: dict | None = None
def _parse_json(self) -> dict:
if self._parsed is not None:
return self._parsed
@@ -89,13 +85,15 @@ class Scraper(BaseScraper):
name = file_path.split("/")[-1] if "/" in file_path else file_path
requirements.append(BiosRequirement(
name=name,
system=SYSTEM_SLUG_MAP.get(sys_key, sys_key),
md5=md5 or None,
destination=file_path,
required=True,
))
requirements.append(
BiosRequirement(
name=name,
system=SYSTEM_SLUG_MAP.get(sys_key, sys_key),
md5=md5 or None,
destination=file_path,
required=True,
)
)
return requirements
@@ -170,6 +168,7 @@ class Scraper(BaseScraper):
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape retrobat BIOS requirements")

View File

@@ -29,8 +29,8 @@ import json
import os
import re
import sys
import urllib.request
import urllib.error
import urllib.request
from pathlib import Path
try:
@@ -43,16 +43,16 @@ PLATFORM_NAME = "retrodeck"
COMPONENTS_REPO = "RetroDECK/components"
COMPONENTS_BRANCH = "main"
COMPONENTS_API_URL = (
f"https://api.github.com/repos/{COMPONENTS_REPO}"
f"/git/trees/{COMPONENTS_BRANCH}"
)
RAW_BASE = (
f"https://raw.githubusercontent.com/{COMPONENTS_REPO}"
f"/{COMPONENTS_BRANCH}"
f"https://api.github.com/repos/{COMPONENTS_REPO}/git/trees/{COMPONENTS_BRANCH}"
)
RAW_BASE = f"https://raw.githubusercontent.com/{COMPONENTS_REPO}/{COMPONENTS_BRANCH}"
SKIP_DIRS = {"archive_later", "archive_old", "automation-tools", ".github"}
NON_EMULATOR_COMPONENTS = {
"framework", "es-de", "steam-rom-manager", "flips", "portmaster",
"framework",
"es-de",
"steam-rom-manager",
"flips",
"portmaster",
}
# RetroDECK system ID -> retrobios slug.
@@ -358,13 +358,20 @@ class Scraper(BaseScraper):
required_raw = entry.get("required", "")
required = bool(required_raw) and str(required_raw).lower() not in (
"false", "no", "optional", "",
"false",
"no",
"optional",
"",
)
key = (system, filename.lower())
if key in seen:
existing = next(
(r for r in requirements if (r.system, r.name.lower()) == key),
(
r
for r in requirements
if (r.system, r.name.lower()) == key
),
None,
)
if existing and md5 and existing.md5 and md5 != existing.md5:
@@ -376,13 +383,15 @@ class Scraper(BaseScraper):
continue
seen.add(key)
requirements.append(BiosRequirement(
name=filename,
system=system,
destination=destination,
md5=md5,
required=required,
))
requirements.append(
BiosRequirement(
name=filename,
system=system,
destination=destination,
md5=md5,
required=required,
)
)
return requirements
@@ -390,11 +399,14 @@ class Scraper(BaseScraper):
reqs = self.fetch_requirements()
manifests = self._get_manifests()
cores = sorted({
comp_name for comp_name, _ in manifests
if comp_name not in SKIP_DIRS
and comp_name not in NON_EMULATOR_COMPONENTS
})
cores = sorted(
{
comp_name
for comp_name, _ in manifests
if comp_name not in SKIP_DIRS
and comp_name not in NON_EMULATOR_COMPONENTS
}
)
systems: dict[str, dict] = {}
for req in reqs:
@@ -423,6 +435,7 @@ class Scraper(BaseScraper):
def main() -> None:
from scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape RetroDECK BIOS requirements")

View File

@@ -138,16 +138,18 @@ class Scraper(BaseScraper):
crc32 = (entry.get("crc") or "").strip() or None
size = int(entry["size"]) if entry.get("size") else None
requirements.append(BiosRequirement(
name=filename,
system=system,
sha1=sha1,
md5=md5,
crc32=crc32,
size=size,
destination=f"{igdb_slug}/{filename}",
required=True,
))
requirements.append(
BiosRequirement(
name=filename,
system=system,
sha1=sha1,
md5=md5,
crc32=crc32,
size=size,
destination=f"{igdb_slug}/{filename}",
required=True,
)
)
return requirements
@@ -164,7 +166,7 @@ class Scraper(BaseScraper):
for key in list(data.keys())[:5]:
if ":" not in key:
return False
_, entry = key.split(":", 1), data[key]
_, _entry = key.split(":", 1), data[key]
if not isinstance(data[key], dict):
return False
if "md5" not in data[key] and "sha1" not in data[key]:
@@ -217,6 +219,7 @@ class Scraper(BaseScraper):
def main():
from scripts.scraper.base_scraper import scraper_cli
scraper_cli(Scraper, "Scrape RomM BIOS requirements")

View File

@@ -2,6 +2,7 @@
Auto-detects *_targets_scraper.py files and exposes their scrapers.
"""
from __future__ import annotations
import importlib

View File

@@ -6,6 +6,7 @@ Sources (batocera-linux/batocera.linux):
- package/batocera/emulationstation/batocera-es-system/es_systems.yml
-- emulator requireAnyOf flag mapping
"""
from __future__ import annotations
import argparse
@@ -35,23 +36,23 @@ _HEADERS = {
"Accept": "application/vnd.github.v3+json",
}
_TARGET_FLAG_RE = re.compile(r'^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y', re.MULTILINE)
_TARGET_FLAG_RE = re.compile(r"^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y", re.MULTILINE)
# Matches: select BR2_PACKAGE_FOO (optional: if CONDITION)
# Condition may span multiple lines (backslash continuation)
_SELECT_RE = re.compile(
r'^\s+select\s+(BR2_PACKAGE_\w+)' # package being selected
r'(?:\s+if\s+((?:[^\n]|\\\n)+?))?' # optional "if CONDITION" (may continue with \)
r'(?:\s*#[^\n]*)?$', # optional trailing comment
r"^\s+select\s+(BR2_PACKAGE_\w+)" # package being selected
r"(?:\s+if\s+((?:[^\n]|\\\n)+?))?" # optional "if CONDITION" (may continue with \)
r"(?:\s*#[^\n]*)?$", # optional trailing comment
re.MULTILINE,
)
# Meta-flag definition: "if COND\n\tconfig DERIVED_FLAG\n\t...\nendif"
_META_BLOCK_RE = re.compile(
r'^if\s+((?:[^\n]|\\\n)+?)\n' # condition (may span lines via \)
r'(?:.*?\n)*?' # optional lines before the config
r'\s+config\s+(BR2_PACKAGE_\w+)' # derived flag name
r'.*?^endif', # end of block
r"^if\s+((?:[^\n]|\\\n)+?)\n" # condition (may span lines via \)
r"(?:.*?\n)*?" # optional lines before the config
r"\s+config\s+(BR2_PACKAGE_\w+)" # derived flag name
r".*?^endif", # end of block
re.MULTILINE | re.DOTALL,
)
@@ -80,7 +81,7 @@ def _fetch_json(url: str) -> list | dict | None:
def _normalise_condition(raw: str) -> str:
"""Strip backslash-continuations and collapse whitespace."""
return re.sub(r'\\\n\s*', ' ', raw).strip()
return re.sub(r"\\\n\s*", " ", raw).strip()
def _tokenise(condition: str) -> list[str]:
@@ -89,14 +90,16 @@ def _tokenise(condition: str) -> list[str]:
return token_re.findall(condition)
def _check_condition(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
def _check_condition(
tokens: list[str], pos: int, active: frozenset[str]
) -> tuple[bool, int]:
"""Recursive descent check of a Kconfig boolean expression."""
return _check_or(tokens, pos, active)
def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
left, pos = _check_and(tokens, pos, active)
while pos < len(tokens) and tokens[pos] == '||':
while pos < len(tokens) and tokens[pos] == "||":
pos += 1
right, pos = _check_and(tokens, pos, active)
left = left or right
@@ -105,7 +108,7 @@ def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool
def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
left, pos = _check_not(tokens, pos, active)
while pos < len(tokens) and tokens[pos] == '&&':
while pos < len(tokens) and tokens[pos] == "&&":
pos += 1
right, pos = _check_not(tokens, pos, active)
left = left and right
@@ -113,24 +116,26 @@ def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[boo
def _check_not(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
if pos < len(tokens) and tokens[pos] == '!':
if pos < len(tokens) and tokens[pos] == "!":
pos += 1
val, pos = _check_atom(tokens, pos, active)
return not val, pos
return _check_atom(tokens, pos, active)
def _check_atom(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
def _check_atom(
tokens: list[str], pos: int, active: frozenset[str]
) -> tuple[bool, int]:
if pos >= len(tokens):
return True, pos
tok = tokens[pos]
if tok == '(':
if tok == "(":
pos += 1
val, pos = _check_or(tokens, pos, active)
if pos < len(tokens) and tokens[pos] == ')':
if pos < len(tokens) and tokens[pos] == ")":
pos += 1
return val, pos
if tok.startswith('BR2_'):
if tok.startswith("BR2_"):
pos += 1
return tok in active, pos
if tok.startswith('"'):
@@ -170,7 +175,9 @@ def _parse_meta_flags(text: str) -> list[tuple[str, str]]:
return results
def _expand_flags(primary_flag: str, meta_rules: list[tuple[str, str]]) -> frozenset[str]:
def _expand_flags(
primary_flag: str, meta_rules: list[tuple[str, str]]
) -> frozenset[str]:
"""Given a board's primary flag, expand to all active derived flags.
Iterates until stable (handles chained derivations like X86_64_ANY -> X86_ANY).
@@ -194,7 +201,7 @@ def _parse_selects(text: str) -> list[tuple[str, str]]:
results: list[tuple[str, str]] = []
for m in _SELECT_RE.finditer(text):
pkg = m.group(1)
cond = _normalise_condition(m.group(2) or '')
cond = _normalise_condition(m.group(2) or "")
results.append((pkg, cond))
return results
@@ -261,7 +268,8 @@ class Scraper(BaseTargetScraper):
if not data or not isinstance(data, list):
return []
return [
item["name"] for item in data
item["name"]
for item in data
if isinstance(item, dict)
and item.get("name", "").startswith("batocera-")
and item.get("name", "").endswith(".board")

View File

@@ -4,6 +4,7 @@ Sources:
SteamOS: dragoonDorise/EmuDeck -functions/EmuScripts/*.sh
Windows: EmuDeck/emudeck-we -functions/EmuScripts/*.ps1
"""
from __future__ import annotations
import argparse
@@ -20,8 +21,12 @@ from . import BaseTargetScraper
PLATFORM_NAME = "emudeck"
STEAMOS_API = "https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts"
WINDOWS_API = "https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts"
STEAMOS_API = (
"https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts"
)
WINDOWS_API = (
"https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts"
)
# Map EmuDeck script names to emulator profile keys
# Script naming: emuDeckDolphin.sh -> dolphin
@@ -70,8 +75,8 @@ def _list_emuscripts(api_url: str) -> list[str]:
def _script_to_core(filename: str) -> str | None:
"""Convert EmuScripts filename to core profile key."""
# Strip extension and emuDeck prefix
name = re.sub(r'\.(sh|ps1)$', '', filename, flags=re.IGNORECASE)
name = re.sub(r'^emuDeck', '', name, flags=re.IGNORECASE)
name = re.sub(r"\.(sh|ps1)$", "", filename, flags=re.IGNORECASE)
name = re.sub(r"^emuDeck", "", name, flags=re.IGNORECASE)
if not name:
return None
key = name.lower()
@@ -86,8 +91,9 @@ class Scraper(BaseTargetScraper):
def __init__(self, url: str = "https://github.com/dragoonDorise/EmuDeck"):
super().__init__(url=url)
def _fetch_cores_for_target(self, api_url: str, label: str,
arch: str = "x86_64") -> list[str]:
def _fetch_cores_for_target(
self, api_url: str, label: str, arch: str = "x86_64"
) -> list[str]:
print(f" fetching {label} EmuScripts...", file=sys.stderr)
scripts = _list_emuscripts(api_url)
cores: list[str] = []
@@ -99,7 +105,7 @@ class Scraper(BaseTargetScraper):
seen.add(core)
cores.append(core)
# Detect RetroArch presence (provides all libretro cores)
name = re.sub(r'\.(sh|ps1)$', '', script, flags=re.IGNORECASE)
name = re.sub(r"\.(sh|ps1)$", "", script, flags=re.IGNORECASE)
if name.lower() in ("emudeckretroarch", "retroarch_maincfg"):
has_retroarch = True
@@ -112,15 +118,18 @@ class Scraper(BaseTargetScraper):
seen.add(c)
cores.append(c)
print(f" {label}: {standalone_count} standalone + "
f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total",
file=sys.stderr)
print(
f" {label}: {standalone_count} standalone + "
f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total",
file=sys.stderr,
)
return sorted(cores)
@staticmethod
def _load_retroarch_cores(arch: str) -> list[str]:
"""Load RetroArch target cores for given architecture."""
import os
target_path = os.path.join("platforms", "targets", "retroarch.yml")
if not os.path.exists(target_path):
return []
@@ -157,9 +166,7 @@ class Scraper(BaseTargetScraper):
def main() -> None:
parser = argparse.ArgumentParser(
description="Scrape EmuDeck emulator targets"
)
parser = argparse.ArgumentParser(description="Scrape EmuDeck emulator targets")
parser.add_argument("--dry-run", action="store_true", help="Show target summary")
parser.add_argument("--output", "-o", help="Output YAML file")
args = parser.parse_args()

View File

@@ -16,6 +16,7 @@ Buildbot structure varies by platform:
- ps2: playstation/ps2/latest/ -> *_libretro_ps2.elf.zip
- vita: bundles only (VPK) - no individual cores
"""
from __future__ import annotations
import argparse
@@ -64,7 +65,9 @@ RECIPE_TARGETS: list[tuple[str, str, str]] = [
("playstation/vita", "playstation-vita", "armv7"),
]
RECIPE_BASE_URL = "https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/"
RECIPE_BASE_URL = (
"https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/"
)
# Match any href containing _libretro followed by a platform-specific extension
# Covers: .so.zip, .dll.zip, .dylib.zip, .nro.zip, .dol.zip, .rpx.zip,
@@ -75,7 +78,7 @@ _HREF_RE = re.compile(
)
# Extract core name: everything before _libretro
_CORE_NAME_RE = re.compile(r'^(.+?)_libretro')
_CORE_NAME_RE = re.compile(r"^(.+?)_libretro")
class Scraper(BaseTargetScraper):
@@ -180,12 +183,16 @@ def main() -> None:
data = scraper.fetch_targets()
total_cores = sum(len(t["cores"]) for t in data["targets"].values())
print(f"\n{len(data['targets'])} targets, {total_cores} total core entries",
file=sys.stderr)
print(
f"\n{len(data['targets'])} targets, {total_cores} total core entries",
file=sys.stderr,
)
if args.dry_run:
for name, info in sorted(data["targets"].items()):
print(f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores")
print(
f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores"
)
return
if args.output:

View File

@@ -4,6 +4,7 @@ Source: https://github.com/RetroPie/RetroPie-Setup/tree/master/scriptmodules/lib
Parses rp_module_id and rp_module_flags from each scriptmodule to determine
which platforms each core supports.
"""
from __future__ import annotations
import argparse