mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
chore: lint and format entire codebase
Run ruff check --fix: remove unused imports (F401), fix f-strings without placeholders (F541), remove unused variables (F841), fix duplicate dict key (F601). Run isort --profile black: normalize import ordering across all files. Run ruff format: apply consistent formatting (black-compatible) to all 58 Python files. 3 intentional E402 remain (imports after require_yaml() must execute after yaml is available).
This commit is contained in:
@@ -34,40 +34,40 @@ def merge_mame_profile(
|
||||
profile = _load_yaml(profile_path)
|
||||
hashes = _load_json(hashes_path)
|
||||
|
||||
profile['core_version'] = hashes.get('version', profile.get('core_version'))
|
||||
profile["core_version"] = hashes.get("version", profile.get("core_version"))
|
||||
|
||||
files = profile.get('files', [])
|
||||
bios_zip, non_bios = _split_files(files, lambda f: f.get('category') == 'bios_zip')
|
||||
files = profile.get("files", [])
|
||||
bios_zip, non_bios = _split_files(files, lambda f: f.get("category") == "bios_zip")
|
||||
|
||||
existing_by_name: dict[str, dict] = {}
|
||||
for entry in bios_zip:
|
||||
key = _zip_name_to_set(entry['name'])
|
||||
key = _zip_name_to_set(entry["name"])
|
||||
existing_by_name[key] = entry
|
||||
|
||||
updated_bios: list[dict] = []
|
||||
matched_names: set[str] = set()
|
||||
|
||||
for set_name, set_data in hashes.get('bios_sets', {}).items():
|
||||
contents = _build_contents(set_data.get('roms', []))
|
||||
for set_name, set_data in hashes.get("bios_sets", {}).items():
|
||||
contents = _build_contents(set_data.get("roms", []))
|
||||
source_ref = _build_source_ref(set_data)
|
||||
|
||||
if set_name in existing_by_name:
|
||||
# Update existing entry: preserve manual fields, update contents
|
||||
entry = existing_by_name[set_name].copy()
|
||||
entry['contents'] = contents
|
||||
entry["contents"] = contents
|
||||
if source_ref:
|
||||
entry['source_ref'] = source_ref
|
||||
entry["source_ref"] = source_ref
|
||||
updated_bios.append(entry)
|
||||
matched_names.add(set_name)
|
||||
elif add_new:
|
||||
# New BIOS set — only added to the main profile
|
||||
entry = {
|
||||
'name': f'{set_name}.zip',
|
||||
'required': True,
|
||||
'category': 'bios_zip',
|
||||
'system': None,
|
||||
'source_ref': source_ref,
|
||||
'contents': contents,
|
||||
"name": f"{set_name}.zip",
|
||||
"required": True,
|
||||
"category": "bios_zip",
|
||||
"system": None,
|
||||
"source_ref": source_ref,
|
||||
"contents": contents,
|
||||
}
|
||||
updated_bios.append(entry)
|
||||
|
||||
@@ -77,7 +77,7 @@ def merge_mame_profile(
|
||||
if set_name not in matched_names:
|
||||
updated_bios.append(entry)
|
||||
|
||||
profile['files'] = non_bios + updated_bios
|
||||
profile["files"] = non_bios + updated_bios
|
||||
|
||||
if write:
|
||||
_backup_and_write(profile_path, profile)
|
||||
@@ -102,49 +102,49 @@ def merge_fbneo_profile(
|
||||
profile = _load_yaml(profile_path)
|
||||
hashes = _load_json(hashes_path)
|
||||
|
||||
profile['core_version'] = hashes.get('version', profile.get('core_version'))
|
||||
profile["core_version"] = hashes.get("version", profile.get("core_version"))
|
||||
|
||||
files = profile.get('files', [])
|
||||
archive_files, non_archive = _split_files(files, lambda f: 'archive' in f)
|
||||
files = profile.get("files", [])
|
||||
archive_files, non_archive = _split_files(files, lambda f: "archive" in f)
|
||||
|
||||
existing_by_key: dict[tuple[str, str], dict] = {}
|
||||
for entry in archive_files:
|
||||
key = (entry['archive'], entry['name'])
|
||||
key = (entry["archive"], entry["name"])
|
||||
existing_by_key[key] = entry
|
||||
|
||||
merged: list[dict] = []
|
||||
matched_keys: set[tuple[str, str]] = set()
|
||||
|
||||
for set_name, set_data in hashes.get('bios_sets', {}).items():
|
||||
archive_name = f'{set_name}.zip'
|
||||
for set_name, set_data in hashes.get("bios_sets", {}).items():
|
||||
archive_name = f"{set_name}.zip"
|
||||
source_ref = _build_source_ref(set_data)
|
||||
|
||||
for rom in set_data.get('roms', []):
|
||||
rom_name = rom['name']
|
||||
for rom in set_data.get("roms", []):
|
||||
rom_name = rom["name"]
|
||||
key = (archive_name, rom_name)
|
||||
|
||||
if key in existing_by_key:
|
||||
entry = existing_by_key[key].copy()
|
||||
entry['size'] = rom['size']
|
||||
entry['crc32'] = rom['crc32']
|
||||
if rom.get('sha1'):
|
||||
entry['sha1'] = rom['sha1']
|
||||
entry["size"] = rom["size"]
|
||||
entry["crc32"] = rom["crc32"]
|
||||
if rom.get("sha1"):
|
||||
entry["sha1"] = rom["sha1"]
|
||||
if source_ref:
|
||||
entry['source_ref'] = source_ref
|
||||
entry["source_ref"] = source_ref
|
||||
merged.append(entry)
|
||||
matched_keys.add(key)
|
||||
elif add_new:
|
||||
entry = {
|
||||
'name': rom_name,
|
||||
'archive': archive_name,
|
||||
'required': True,
|
||||
'size': rom['size'],
|
||||
'crc32': rom['crc32'],
|
||||
"name": rom_name,
|
||||
"archive": archive_name,
|
||||
"required": True,
|
||||
"size": rom["size"],
|
||||
"crc32": rom["crc32"],
|
||||
}
|
||||
if rom.get('sha1'):
|
||||
entry['sha1'] = rom['sha1']
|
||||
if rom.get("sha1"):
|
||||
entry["sha1"] = rom["sha1"]
|
||||
if source_ref:
|
||||
entry['source_ref'] = source_ref
|
||||
entry["source_ref"] = source_ref
|
||||
merged.append(entry)
|
||||
|
||||
# Entries not matched stay untouched
|
||||
@@ -152,7 +152,7 @@ def merge_fbneo_profile(
|
||||
if key not in matched_keys:
|
||||
merged.append(entry)
|
||||
|
||||
profile['files'] = non_archive + merged
|
||||
profile["files"] = non_archive + merged
|
||||
|
||||
if write:
|
||||
_backup_and_write_fbneo(profile_path, profile, hashes)
|
||||
@@ -163,7 +163,7 @@ def merge_fbneo_profile(
|
||||
def compute_diff(
|
||||
profile_path: str,
|
||||
hashes_path: str,
|
||||
mode: str = 'mame',
|
||||
mode: str = "mame",
|
||||
) -> dict[str, Any]:
|
||||
"""Compute diff between profile and hashes without writing.
|
||||
|
||||
@@ -172,7 +172,7 @@ def compute_diff(
|
||||
profile = _load_yaml(profile_path)
|
||||
hashes = _load_json(hashes_path)
|
||||
|
||||
if mode == 'mame':
|
||||
if mode == "mame":
|
||||
return _diff_mame(profile, hashes)
|
||||
return _diff_fbneo(profile, hashes)
|
||||
|
||||
@@ -181,26 +181,26 @@ def _diff_mame(
|
||||
profile: dict[str, Any],
|
||||
hashes: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
files = profile.get('files', [])
|
||||
bios_zip, _ = _split_files(files, lambda f: f.get('category') == 'bios_zip')
|
||||
files = profile.get("files", [])
|
||||
bios_zip, _ = _split_files(files, lambda f: f.get("category") == "bios_zip")
|
||||
|
||||
existing_by_name: dict[str, dict] = {}
|
||||
for entry in bios_zip:
|
||||
existing_by_name[_zip_name_to_set(entry['name'])] = entry
|
||||
existing_by_name[_zip_name_to_set(entry["name"])] = entry
|
||||
|
||||
added: list[str] = []
|
||||
updated: list[str] = []
|
||||
unchanged = 0
|
||||
|
||||
bios_sets = hashes.get('bios_sets', {})
|
||||
bios_sets = hashes.get("bios_sets", {})
|
||||
for set_name, set_data in bios_sets.items():
|
||||
if set_name not in existing_by_name:
|
||||
added.append(set_name)
|
||||
continue
|
||||
|
||||
old_entry = existing_by_name[set_name]
|
||||
new_contents = _build_contents(set_data.get('roms', []))
|
||||
old_contents = old_entry.get('contents', [])
|
||||
new_contents = _build_contents(set_data.get("roms", []))
|
||||
old_contents = old_entry.get("contents", [])
|
||||
|
||||
if _contents_differ(old_contents, new_contents):
|
||||
updated.append(set_name)
|
||||
@@ -213,11 +213,11 @@ def _diff_mame(
|
||||
)
|
||||
|
||||
return {
|
||||
'added': added,
|
||||
'updated': updated,
|
||||
'removed': [],
|
||||
'unchanged': unchanged,
|
||||
'out_of_scope': out_of_scope,
|
||||
"added": added,
|
||||
"updated": updated,
|
||||
"removed": [],
|
||||
"unchanged": unchanged,
|
||||
"out_of_scope": out_of_scope,
|
||||
}
|
||||
|
||||
|
||||
@@ -225,24 +225,24 @@ def _diff_fbneo(
|
||||
profile: dict[str, Any],
|
||||
hashes: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
files = profile.get('files', [])
|
||||
archive_files, _ = _split_files(files, lambda f: 'archive' in f)
|
||||
files = profile.get("files", [])
|
||||
archive_files, _ = _split_files(files, lambda f: "archive" in f)
|
||||
|
||||
existing_by_key: dict[tuple[str, str], dict] = {}
|
||||
for entry in archive_files:
|
||||
existing_by_key[(entry['archive'], entry['name'])] = entry
|
||||
existing_by_key[(entry["archive"], entry["name"])] = entry
|
||||
|
||||
added: list[str] = []
|
||||
updated: list[str] = []
|
||||
unchanged = 0
|
||||
|
||||
seen_keys: set[tuple[str, str]] = set()
|
||||
bios_sets = hashes.get('bios_sets', {})
|
||||
bios_sets = hashes.get("bios_sets", {})
|
||||
|
||||
for set_name, set_data in bios_sets.items():
|
||||
archive_name = f'{set_name}.zip'
|
||||
for rom in set_data.get('roms', []):
|
||||
key = (archive_name, rom['name'])
|
||||
archive_name = f"{set_name}.zip"
|
||||
for rom in set_data.get("roms", []):
|
||||
key = (archive_name, rom["name"])
|
||||
seen_keys.add(key)
|
||||
label = f"{archive_name}:{rom['name']}"
|
||||
|
||||
@@ -251,7 +251,9 @@ def _diff_fbneo(
|
||||
continue
|
||||
|
||||
old = existing_by_key[key]
|
||||
if old.get('crc32') != rom.get('crc32') or old.get('size') != rom.get('size'):
|
||||
if old.get("crc32") != rom.get("crc32") or old.get("size") != rom.get(
|
||||
"size"
|
||||
):
|
||||
updated.append(label)
|
||||
else:
|
||||
unchanged += 1
|
||||
@@ -259,11 +261,11 @@ def _diff_fbneo(
|
||||
out_of_scope = sum(1 for k in existing_by_key if k not in seen_keys)
|
||||
|
||||
return {
|
||||
'added': added,
|
||||
'updated': updated,
|
||||
'removed': [],
|
||||
'unchanged': unchanged,
|
||||
'out_of_scope': out_of_scope,
|
||||
"added": added,
|
||||
"updated": updated,
|
||||
"removed": [],
|
||||
"unchanged": unchanged,
|
||||
"out_of_scope": out_of_scope,
|
||||
}
|
||||
|
||||
|
||||
@@ -271,12 +273,12 @@ def _diff_fbneo(
|
||||
|
||||
|
||||
def _load_yaml(path: str) -> dict[str, Any]:
|
||||
with open(path, encoding='utf-8') as f:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
|
||||
|
||||
def _load_json(path: str) -> dict[str, Any]:
|
||||
with open(path, encoding='utf-8') as f:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@@ -295,7 +297,7 @@ def _split_files(
|
||||
|
||||
|
||||
def _zip_name_to_set(name: str) -> str:
|
||||
if name.endswith('.zip'):
|
||||
if name.endswith(".zip"):
|
||||
return name[:-4]
|
||||
return name
|
||||
|
||||
@@ -304,42 +306,42 @@ def _build_contents(roms: list[dict]) -> list[dict]:
|
||||
contents: list[dict] = []
|
||||
for rom in roms:
|
||||
entry: dict[str, Any] = {
|
||||
'name': rom['name'],
|
||||
'size': rom['size'],
|
||||
'crc32': rom['crc32'],
|
||||
"name": rom["name"],
|
||||
"size": rom["size"],
|
||||
"crc32": rom["crc32"],
|
||||
}
|
||||
if rom.get('sha1'):
|
||||
entry['sha1'] = rom['sha1']
|
||||
desc = rom.get('bios_description') or rom.get('bios_label') or ''
|
||||
if rom.get("sha1"):
|
||||
entry["sha1"] = rom["sha1"]
|
||||
desc = rom.get("bios_description") or rom.get("bios_label") or ""
|
||||
if desc:
|
||||
entry['description'] = desc
|
||||
if rom.get('bad_dump'):
|
||||
entry['bad_dump'] = True
|
||||
entry["description"] = desc
|
||||
if rom.get("bad_dump"):
|
||||
entry["bad_dump"] = True
|
||||
contents.append(entry)
|
||||
return contents
|
||||
|
||||
|
||||
def _build_source_ref(set_data: dict) -> str:
|
||||
source_file = set_data.get('source_file', '')
|
||||
source_line = set_data.get('source_line')
|
||||
source_file = set_data.get("source_file", "")
|
||||
source_line = set_data.get("source_line")
|
||||
if source_file and source_line is not None:
|
||||
return f'{source_file}:{source_line}'
|
||||
return f"{source_file}:{source_line}"
|
||||
return source_file
|
||||
|
||||
|
||||
def _contents_differ(old: list[dict], new: list[dict]) -> bool:
|
||||
if len(old) != len(new):
|
||||
return True
|
||||
old_by_name = {c['name']: c for c in old}
|
||||
old_by_name = {c["name"]: c for c in old}
|
||||
for entry in new:
|
||||
prev = old_by_name.get(entry['name'])
|
||||
prev = old_by_name.get(entry["name"])
|
||||
if prev is None:
|
||||
return True
|
||||
if prev.get('crc32') != entry.get('crc32'):
|
||||
if prev.get("crc32") != entry.get("crc32"):
|
||||
return True
|
||||
if prev.get('size') != entry.get('size'):
|
||||
if prev.get("size") != entry.get("size"):
|
||||
return True
|
||||
if prev.get('sha1') != entry.get('sha1'):
|
||||
if prev.get("sha1") != entry.get("sha1"):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -352,15 +354,15 @@ def _backup_and_write(path: str, data: dict) -> None:
|
||||
(core_version, contents, source_ref), and appends new entries.
|
||||
"""
|
||||
p = Path(path)
|
||||
backup = p.with_suffix('.old.yml')
|
||||
backup = p.with_suffix(".old.yml")
|
||||
shutil.copy2(p, backup)
|
||||
|
||||
original = p.read_text(encoding='utf-8')
|
||||
patched = _patch_core_version(original, data.get('core_version', ''))
|
||||
patched = _patch_bios_entries(patched, data.get('files', []))
|
||||
patched = _append_new_entries(patched, data.get('files', []), original)
|
||||
original = p.read_text(encoding="utf-8")
|
||||
patched = _patch_core_version(original, data.get("core_version", ""))
|
||||
patched = _patch_bios_entries(patched, data.get("files", []))
|
||||
patched = _append_new_entries(patched, data.get("files", []), original)
|
||||
|
||||
p.write_text(patched, encoding='utf-8')
|
||||
p.write_text(patched, encoding="utf-8")
|
||||
|
||||
|
||||
def _patch_core_version(text: str, version: str) -> str:
|
||||
@@ -368,8 +370,9 @@ def _patch_core_version(text: str, version: str) -> str:
|
||||
if not version:
|
||||
return text
|
||||
import re
|
||||
|
||||
return re.sub(
|
||||
r'^(core_version:\s*).*$',
|
||||
r"^(core_version:\s*).*$",
|
||||
rf'\g<1>"{version}"',
|
||||
text,
|
||||
count=1,
|
||||
@@ -390,18 +393,18 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
# Build a lookup of what to patch
|
||||
patches: dict[str, dict] = {}
|
||||
for fe in files:
|
||||
if fe.get('category') != 'bios_zip':
|
||||
if fe.get("category") != "bios_zip":
|
||||
continue
|
||||
patches[fe['name']] = fe
|
||||
patches[fe["name"]] = fe
|
||||
|
||||
if not patches:
|
||||
return text
|
||||
|
||||
lines = text.split('\n')
|
||||
lines = text.split("\n")
|
||||
# Find all entry start positions (line indices)
|
||||
entry_starts: list[tuple[int, str]] = []
|
||||
for i, line in enumerate(lines):
|
||||
m = re.match(r'^ - name:\s*(.+?)\s*$', line)
|
||||
m = re.match(r"^ - name:\s*(.+?)\s*$", line)
|
||||
if m:
|
||||
entry_starts.append((i, m.group(1).strip('"').strip("'")))
|
||||
|
||||
@@ -412,8 +415,8 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
continue
|
||||
|
||||
fe = patches[entry_name]
|
||||
contents = fe.get('contents', [])
|
||||
source_ref = fe.get('source_ref', '')
|
||||
contents = fe.get("contents", [])
|
||||
source_ref = fe.get("source_ref", "")
|
||||
|
||||
# Find the last "owned" line of this entry
|
||||
# Owned = indented with 4+ spaces (field lines of this entry)
|
||||
@@ -422,11 +425,11 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
stripped = lines[j].strip()
|
||||
if not stripped:
|
||||
break # blank line = end of entry
|
||||
if stripped.startswith('#'):
|
||||
if stripped.startswith("#"):
|
||||
break # comment = belongs to next entry
|
||||
if re.match(r'^ - ', lines[j]):
|
||||
if re.match(r"^ - ", lines[j]):
|
||||
break # next list item
|
||||
if re.match(r'^ ', lines[j]) or re.match(r'^ \w', lines[j]):
|
||||
if re.match(r"^ ", lines[j]) or re.match(r"^ \w", lines[j]):
|
||||
last_owned = j
|
||||
else:
|
||||
break
|
||||
@@ -435,7 +438,7 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
if source_ref:
|
||||
found_sr = False
|
||||
for j in range(start_line + 1, last_owned + 1):
|
||||
if re.match(r'^ source_ref:', lines[j]):
|
||||
if re.match(r"^ source_ref:", lines[j]):
|
||||
lines[j] = f' source_ref: "{source_ref}"'
|
||||
found_sr = True
|
||||
break
|
||||
@@ -447,10 +450,10 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
contents_start = None
|
||||
contents_end = None
|
||||
for j in range(start_line + 1, last_owned + 1):
|
||||
if re.match(r'^ contents:', lines[j]):
|
||||
if re.match(r"^ contents:", lines[j]):
|
||||
contents_start = j
|
||||
elif contents_start is not None:
|
||||
if re.match(r'^ ', lines[j]):
|
||||
if re.match(r"^ ", lines[j]):
|
||||
contents_end = j
|
||||
else:
|
||||
break
|
||||
@@ -458,29 +461,29 @@ def _patch_bios_entries(text: str, files: list[dict]) -> str:
|
||||
contents_end = contents_start
|
||||
|
||||
if contents_start is not None:
|
||||
del lines[contents_start:contents_end + 1]
|
||||
last_owned -= (contents_end - contents_start + 1)
|
||||
del lines[contents_start : contents_end + 1]
|
||||
last_owned -= contents_end - contents_start + 1
|
||||
|
||||
# Insert new contents after last owned line
|
||||
if contents:
|
||||
new_lines = _format_contents(contents).split('\n')
|
||||
new_lines = _format_contents(contents).split("\n")
|
||||
for k, cl in enumerate(new_lines):
|
||||
lines.insert(last_owned + 1 + k, cl)
|
||||
|
||||
return '\n'.join(lines)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _append_new_entries(text: str, files: list[dict], original: str) -> str:
|
||||
"""Append new bios_zip entries (system=None) that aren't in the original."""
|
||||
# Parse original to get existing entry names (more reliable than text search)
|
||||
existing_data = yaml.safe_load(original) or {}
|
||||
existing_names = {f['name'] for f in existing_data.get('files', [])}
|
||||
existing_names = {f["name"] for f in existing_data.get("files", [])}
|
||||
|
||||
new_entries = []
|
||||
for fe in files:
|
||||
if fe.get('category') != 'bios_zip' or fe.get('system') is not None:
|
||||
if fe.get("category") != "bios_zip" or fe.get("system") is not None:
|
||||
continue
|
||||
if fe['name'] in existing_names:
|
||||
if fe["name"] in existing_names:
|
||||
continue
|
||||
new_entries.append(fe)
|
||||
|
||||
@@ -489,36 +492,36 @@ def _append_new_entries(text: str, files: list[dict], original: str) -> str:
|
||||
|
||||
lines = []
|
||||
for fe in new_entries:
|
||||
lines.append(f'\n - name: {fe["name"]}')
|
||||
lines.append(f' required: {str(fe["required"]).lower()}')
|
||||
lines.append(f' category: bios_zip')
|
||||
if fe.get('source_ref'):
|
||||
lines.append(f"\n - name: {fe['name']}")
|
||||
lines.append(f" required: {str(fe['required']).lower()}")
|
||||
lines.append(" category: bios_zip")
|
||||
if fe.get("source_ref"):
|
||||
lines.append(f' source_ref: "{fe["source_ref"]}"')
|
||||
if fe.get('contents'):
|
||||
lines.append(_format_contents(fe['contents']))
|
||||
if fe.get("contents"):
|
||||
lines.append(_format_contents(fe["contents"]))
|
||||
|
||||
if lines:
|
||||
text = text.rstrip('\n') + '\n' + '\n'.join(lines) + '\n'
|
||||
text = text.rstrip("\n") + "\n" + "\n".join(lines) + "\n"
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def _format_contents(contents: list[dict]) -> str:
|
||||
"""Format a contents list as YAML text."""
|
||||
lines = [' contents:']
|
||||
lines = [" contents:"]
|
||||
for rom in contents:
|
||||
lines.append(f' - name: {rom["name"]}')
|
||||
if rom.get('description'):
|
||||
lines.append(f' description: {rom["description"]}')
|
||||
if rom.get('size'):
|
||||
lines.append(f' size: {rom["size"]}')
|
||||
if rom.get('crc32'):
|
||||
lines.append(f" - name: {rom['name']}")
|
||||
if rom.get("description"):
|
||||
lines.append(f" description: {rom['description']}")
|
||||
if rom.get("size"):
|
||||
lines.append(f" size: {rom['size']}")
|
||||
if rom.get("crc32"):
|
||||
lines.append(f' crc32: "{rom["crc32"]}"')
|
||||
if rom.get('sha1'):
|
||||
if rom.get("sha1"):
|
||||
lines.append(f' sha1: "{rom["sha1"]}"')
|
||||
if rom.get('bad_dump'):
|
||||
lines.append(f' bad_dump: true')
|
||||
return '\n'.join(lines)
|
||||
if rom.get("bad_dump"):
|
||||
lines.append(" bad_dump: true")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None:
|
||||
@@ -529,37 +532,38 @@ def _backup_and_write_fbneo(path: str, data: dict, hashes: dict) -> None:
|
||||
Existing entries are left untouched (CRC32 changes are rare).
|
||||
"""
|
||||
p = Path(path)
|
||||
backup = p.with_suffix('.old.yml')
|
||||
backup = p.with_suffix(".old.yml")
|
||||
shutil.copy2(p, backup)
|
||||
|
||||
original = p.read_text(encoding='utf-8')
|
||||
patched = _patch_core_version(original, data.get('core_version', ''))
|
||||
original = p.read_text(encoding="utf-8")
|
||||
patched = _patch_core_version(original, data.get("core_version", ""))
|
||||
|
||||
# Identify new ROM entries by comparing parsed data keys, not text search
|
||||
existing_data = yaml.safe_load(original) or {}
|
||||
existing_keys = {
|
||||
(f['archive'], f['name'])
|
||||
for f in existing_data.get('files', [])
|
||||
if f.get('archive')
|
||||
(f["archive"], f["name"])
|
||||
for f in existing_data.get("files", [])
|
||||
if f.get("archive")
|
||||
}
|
||||
new_roms = [
|
||||
f for f in data.get('files', [])
|
||||
if f.get('archive') and (f['archive'], f['name']) not in existing_keys
|
||||
f
|
||||
for f in data.get("files", [])
|
||||
if f.get("archive") and (f["archive"], f["name"]) not in existing_keys
|
||||
]
|
||||
|
||||
if new_roms:
|
||||
lines = []
|
||||
for fe in new_roms:
|
||||
lines.append(f' - name: "{fe["name"]}"')
|
||||
lines.append(f' archive: {fe["archive"]}')
|
||||
lines.append(f' required: {str(fe.get("required", True)).lower()}')
|
||||
if fe.get('size'):
|
||||
lines.append(f' size: {fe["size"]}')
|
||||
if fe.get('crc32'):
|
||||
lines.append(f" archive: {fe['archive']}")
|
||||
lines.append(f" required: {str(fe.get('required', True)).lower()}")
|
||||
if fe.get("size"):
|
||||
lines.append(f" size: {fe['size']}")
|
||||
if fe.get("crc32"):
|
||||
lines.append(f' crc32: "{fe["crc32"]}"')
|
||||
if fe.get('source_ref'):
|
||||
if fe.get("source_ref"):
|
||||
lines.append(f' source_ref: "{fe["source_ref"]}"')
|
||||
lines.append('')
|
||||
patched = patched.rstrip('\n') + '\n\n' + '\n'.join(lines)
|
||||
lines.append("")
|
||||
patched = patched.rstrip("\n") + "\n\n" + "\n".join(lines)
|
||||
|
||||
p.write_text(patched, encoding='utf-8')
|
||||
p.write_text(patched, encoding="utf-8")
|
||||
|
||||
@@ -4,8 +4,8 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
@@ -14,6 +14,7 @@ from pathlib import Path
|
||||
@dataclass
|
||||
class BiosRequirement:
|
||||
"""A single BIOS file requirement from a platform source."""
|
||||
|
||||
name: str
|
||||
system: str
|
||||
sha1: str | None = None
|
||||
@@ -29,9 +30,12 @@ class BiosRequirement:
|
||||
@dataclass
|
||||
class ChangeSet:
|
||||
"""Differences between scraped requirements and current config."""
|
||||
|
||||
added: list[BiosRequirement] = field(default_factory=list)
|
||||
removed: list[BiosRequirement] = field(default_factory=list)
|
||||
modified: list[tuple[BiosRequirement, BiosRequirement]] = field(default_factory=list)
|
||||
modified: list[tuple[BiosRequirement, BiosRequirement]] = field(
|
||||
default_factory=list
|
||||
)
|
||||
|
||||
@property
|
||||
def has_changes(self) -> bool:
|
||||
@@ -80,7 +84,9 @@ class BaseScraper(ABC):
|
||||
if not self.url:
|
||||
raise ValueError("No source URL configured")
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
req = urllib.request.Request(
|
||||
self.url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = _read_limited(resp).decode("utf-8")
|
||||
return self._raw_data
|
||||
@@ -113,35 +119,49 @@ class BaseScraper(ABC):
|
||||
changes.added.append(req)
|
||||
else:
|
||||
existing_file = existing[key]
|
||||
if req.sha1 and existing_file.get("sha1") and req.sha1 != existing_file["sha1"]:
|
||||
changes.modified.append((
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
sha1=existing_file.get("sha1"),
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
))
|
||||
elif req.md5 and existing_file.get("md5") and req.md5 != existing_file["md5"]:
|
||||
changes.modified.append((
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
))
|
||||
if (
|
||||
req.sha1
|
||||
and existing_file.get("sha1")
|
||||
and req.sha1 != existing_file["sha1"]
|
||||
):
|
||||
changes.modified.append(
|
||||
(
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
sha1=existing_file.get("sha1"),
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
)
|
||||
)
|
||||
elif (
|
||||
req.md5
|
||||
and existing_file.get("md5")
|
||||
and req.md5 != existing_file["md5"]
|
||||
):
|
||||
changes.modified.append(
|
||||
(
|
||||
BiosRequirement(
|
||||
name=existing_file["name"],
|
||||
system=key[0],
|
||||
md5=existing_file.get("md5"),
|
||||
),
|
||||
req,
|
||||
)
|
||||
)
|
||||
|
||||
for key in existing:
|
||||
if key not in scraped_map:
|
||||
f = existing[key]
|
||||
changes.removed.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=key[0],
|
||||
sha1=f.get("sha1"),
|
||||
md5=f.get("md5"),
|
||||
))
|
||||
changes.removed.append(
|
||||
BiosRequirement(
|
||||
name=f["name"],
|
||||
system=key[0],
|
||||
sha1=f.get("sha1"),
|
||||
md5=f.get("md5"),
|
||||
)
|
||||
)
|
||||
|
||||
return changes
|
||||
|
||||
@@ -163,10 +183,13 @@ def fetch_github_latest_version(repo: str) -> str | None:
|
||||
"""Fetch the latest release version tag from a GitHub repo."""
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("tag_name", "")
|
||||
@@ -174,7 +197,9 @@ def fetch_github_latest_version(repo: str) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirements") -> None:
|
||||
def scraper_cli(
|
||||
scraper_class: type, description: str = "Scrape BIOS requirements"
|
||||
) -> None:
|
||||
"""Shared CLI entry point for all scrapers. Eliminates main() boilerplate."""
|
||||
import argparse
|
||||
|
||||
@@ -203,13 +228,23 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement
|
||||
return
|
||||
|
||||
if args.json:
|
||||
data = [{"name": r.name, "system": r.system, "sha1": r.sha1, "md5": r.md5,
|
||||
"size": r.size, "required": r.required} for r in reqs]
|
||||
data = [
|
||||
{
|
||||
"name": r.name,
|
||||
"system": r.system,
|
||||
"sha1": r.sha1,
|
||||
"md5": r.md5,
|
||||
"size": r.size,
|
||||
"required": r.required,
|
||||
}
|
||||
for r in reqs
|
||||
]
|
||||
print(json.dumps(data, indent=2))
|
||||
return
|
||||
|
||||
if args.output:
|
||||
import yaml
|
||||
|
||||
# Use scraper's generate_platform_yaml() if available (includes
|
||||
# platform metadata, cores list, standalone_cores, etc.)
|
||||
if hasattr(scraper, "generate_platform_yaml"):
|
||||
@@ -224,7 +259,11 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement
|
||||
if req.native_id:
|
||||
sys_entry["native_id"] = req.native_id
|
||||
config["systems"][sys_id] = sys_entry
|
||||
entry = {"name": req.name, "destination": req.destination or req.name, "required": req.required}
|
||||
entry = {
|
||||
"name": req.name,
|
||||
"destination": req.destination or req.name,
|
||||
"required": req.required,
|
||||
}
|
||||
if req.sha1:
|
||||
entry["sha1"] = req.sha1
|
||||
if req.md5:
|
||||
@@ -265,10 +304,13 @@ def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None:
|
||||
"""Fetch the most recent matching tag from a GitHub repo."""
|
||||
url = f"https://api.github.com/repos/{repo}/tags?per_page=50"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
tags = json.loads(resp.read())
|
||||
for tag in tags:
|
||||
|
||||
@@ -12,8 +12,8 @@ import ast
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
@@ -102,7 +102,6 @@ SYSTEM_SLUG_MAP = {
|
||||
"dragon64": "dragon64",
|
||||
"mc10": "mc10",
|
||||
"msx2+": "microsoft-msx",
|
||||
"msxturbor": "microsoft-msx",
|
||||
"spectravideo": "spectravideo",
|
||||
"tvc": "videoton-tvc",
|
||||
"enterprise": "enterprise-64-128",
|
||||
@@ -116,7 +115,7 @@ SYSTEM_SLUG_MAP = {
|
||||
}
|
||||
|
||||
|
||||
_MD5_RE = re.compile(r'^[a-fA-F0-9]+$')
|
||||
_MD5_RE = re.compile(r"^[a-fA-F0-9]+$")
|
||||
|
||||
|
||||
def _load_md5_index() -> dict[str, str]:
|
||||
@@ -183,11 +182,11 @@ class Scraper(BaseScraper):
|
||||
|
||||
def _extract_systems_dict(self, raw: str) -> dict:
|
||||
"""Extract and parse the 'systems' dict from the Python source via ast.literal_eval."""
|
||||
match = re.search(r'^systems\s*=\s*\{', raw, re.MULTILINE)
|
||||
match = re.search(r"^systems\s*=\s*\{", raw, re.MULTILINE)
|
||||
if not match:
|
||||
raise ValueError("Could not find 'systems = {' in batocera-systems")
|
||||
|
||||
start = match.start() + raw[match.start():].index("{")
|
||||
start = match.start() + raw[match.start() :].index("{")
|
||||
depth = 0
|
||||
i = start
|
||||
in_str = False
|
||||
@@ -195,7 +194,7 @@ class Scraper(BaseScraper):
|
||||
while i < len(raw):
|
||||
ch = raw[i]
|
||||
if in_str:
|
||||
if ch == '\\':
|
||||
if ch == "\\":
|
||||
i += 2
|
||||
continue
|
||||
if ch == str_ch:
|
||||
@@ -214,7 +213,7 @@ class Scraper(BaseScraper):
|
||||
i += 1
|
||||
i += 1
|
||||
|
||||
dict_str = raw[start:i + 1]
|
||||
dict_str = raw[start : i + 1]
|
||||
|
||||
lines = []
|
||||
for line in dict_str.split("\n"):
|
||||
@@ -224,7 +223,7 @@ class Scraper(BaseScraper):
|
||||
j = 0
|
||||
while j < len(line):
|
||||
ch = line[j]
|
||||
if ch == '\\' and j + 1 < len(line):
|
||||
if ch == "\\" and j + 1 < len(line):
|
||||
clean.append(ch)
|
||||
clean.append(line[j + 1])
|
||||
j += 2
|
||||
@@ -246,8 +245,8 @@ class Scraper(BaseScraper):
|
||||
clean_dict_str = "\n".join(lines)
|
||||
|
||||
# OrderedDict({...}) -> just the inner dict literal
|
||||
clean_dict_str = re.sub(r'OrderedDict\(\s*\{', '{', clean_dict_str)
|
||||
clean_dict_str = re.sub(r'\}\s*\)', '}', clean_dict_str)
|
||||
clean_dict_str = re.sub(r"OrderedDict\(\s*\{", "{", clean_dict_str)
|
||||
clean_dict_str = re.sub(r"\}\s*\)", "}", clean_dict_str)
|
||||
|
||||
try:
|
||||
return ast.literal_eval(clean_dict_str)
|
||||
@@ -279,22 +278,24 @@ class Scraper(BaseScraper):
|
||||
|
||||
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=md5 or None,
|
||||
destination=file_path,
|
||||
required=True,
|
||||
zipped_file=zipped_file or None,
|
||||
native_id=sys_key,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=md5 or None,
|
||||
destination=file_path,
|
||||
required=True,
|
||||
zipped_file=zipped_file or None,
|
||||
native_id=sys_key,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
"""Validate batocera-systems format."""
|
||||
has_systems = "systems" in raw_data and "biosFiles" in raw_data
|
||||
has_dict = re.search(r'^systems\s*=\s*\{', raw_data, re.MULTILINE) is not None
|
||||
has_dict = re.search(r"^systems\s*=\s*\{", raw_data, re.MULTILINE) is not None
|
||||
has_md5 = '"md5"' in raw_data
|
||||
has_file = '"file"' in raw_data
|
||||
return has_systems and has_dict and has_md5 and has_file
|
||||
@@ -336,7 +337,9 @@ class Scraper(BaseScraper):
|
||||
|
||||
systems[req.system]["files"].append(entry)
|
||||
|
||||
tag = fetch_github_latest_tag("batocera-linux/batocera.linux", prefix="batocera-")
|
||||
tag = fetch_github_latest_tag(
|
||||
"batocera-linux/batocera.linux", prefix="batocera-"
|
||||
)
|
||||
batocera_version = ""
|
||||
if tag:
|
||||
num = tag.removeprefix("batocera-")
|
||||
@@ -344,7 +347,9 @@ class Scraper(BaseScraper):
|
||||
batocera_version = num
|
||||
if not batocera_version:
|
||||
# Preserve existing version when fetch fails (offline mode)
|
||||
existing = Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml"
|
||||
existing = (
|
||||
Path(__file__).resolve().parents[2] / "platforms" / "batocera.yml"
|
||||
)
|
||||
if existing.exists():
|
||||
with open(existing) as f:
|
||||
old = yaml.safe_load(f) or {}
|
||||
@@ -369,6 +374,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main():
|
||||
from scripts.scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape batocera BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ the Ideal non-bad option is selected as canonical.
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
try:
|
||||
from .base_scraper import (
|
||||
@@ -108,12 +107,33 @@ SYSTEM_ID_MAP: dict[str, str] = {
|
||||
|
||||
# Cores that overlap with BizHawk's system coverage
|
||||
BIZHAWK_CORES = [
|
||||
"gambatte", "mgba", "sameboy", "melonds", "snes9x", "bsnes",
|
||||
"beetle_psx", "beetle_saturn", "beetle_pce", "beetle_pcfx",
|
||||
"beetle_wswan", "beetle_vb", "beetle_ngp", "opera", "stella",
|
||||
"picodrive", "ppsspp", "handy", "quicknes", "genesis_plus_gx",
|
||||
"ares", "mupen64plus_next", "puae", "prboom", "virtualjaguar",
|
||||
"vice_x64", "mame",
|
||||
"gambatte",
|
||||
"mgba",
|
||||
"sameboy",
|
||||
"melonds",
|
||||
"snes9x",
|
||||
"bsnes",
|
||||
"beetle_psx",
|
||||
"beetle_saturn",
|
||||
"beetle_pce",
|
||||
"beetle_pcfx",
|
||||
"beetle_wswan",
|
||||
"beetle_vb",
|
||||
"beetle_ngp",
|
||||
"opera",
|
||||
"stella",
|
||||
"picodrive",
|
||||
"ppsspp",
|
||||
"handy",
|
||||
"quicknes",
|
||||
"genesis_plus_gx",
|
||||
"ares",
|
||||
"mupen64plus_next",
|
||||
"puae",
|
||||
"prboom",
|
||||
"virtualjaguar",
|
||||
"vice_x64",
|
||||
"mame",
|
||||
]
|
||||
|
||||
|
||||
@@ -137,9 +157,7 @@ def _safe_arithmetic(expr: str) -> int:
|
||||
def _strip_comments(source: str) -> str:
|
||||
"""Remove block comments and #if false blocks."""
|
||||
source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL)
|
||||
source = re.sub(
|
||||
r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL
|
||||
)
|
||||
source = re.sub(r"#if\s+false\b.*?#endif", "", source, flags=re.DOTALL)
|
||||
return source
|
||||
|
||||
|
||||
@@ -158,14 +176,14 @@ def parse_firmware_database(
|
||||
var_to_hash: dict[str, str] = {}
|
||||
|
||||
file_re = re.compile(
|
||||
r'(?:var\s+(\w+)\s*=\s*)?'
|
||||
r'File\(\s*'
|
||||
r"(?:var\s+(\w+)\s*=\s*)?"
|
||||
r"File\(\s*"
|
||||
r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*'
|
||||
r'([^,]+?)\s*,\s*'
|
||||
r"([^,]+?)\s*,\s*"
|
||||
r'"([^"]+)"\s*,\s*'
|
||||
r'"([^"]*)"'
|
||||
r'(?:\s*,\s*isBad:\s*(true|false))?'
|
||||
r'\s*\)'
|
||||
r"(?:\s*,\s*isBad:\s*(true|false))?"
|
||||
r"\s*\)"
|
||||
)
|
||||
|
||||
for m in file_re.finditer(source):
|
||||
@@ -194,15 +212,15 @@ def parse_firmware_database(
|
||||
|
||||
# FirmwareAndOption one-liner
|
||||
fao_re = re.compile(
|
||||
r'FirmwareAndOption\(\s*'
|
||||
r"FirmwareAndOption\(\s*"
|
||||
r'(?:"([A-Fa-f0-9]+)"|SHA1Checksum\.Dummy)\s*,\s*'
|
||||
r'([^,]+?)\s*,\s*'
|
||||
r"([^,]+?)\s*,\s*"
|
||||
r'"([^"]+)"\s*,\s*'
|
||||
r'"([^"]+)"\s*,\s*'
|
||||
r'"([^"]+)"\s*,\s*'
|
||||
r'"([^"]*)"'
|
||||
r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?'
|
||||
r'\s*\)'
|
||||
r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?"
|
||||
r"\s*\)"
|
||||
)
|
||||
|
||||
# Firmware(system, id, desc)
|
||||
@@ -213,10 +231,10 @@ def parse_firmware_database(
|
||||
# Option(system, id, in varref|File(...), status?)
|
||||
option_re = re.compile(
|
||||
r'Option\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*,\s*'
|
||||
r'(?:in\s+(\w+)'
|
||||
r"(?:in\s+(\w+)"
|
||||
r'|File\(\s*"([A-Fa-f0-9]+)"\s*,\s*([^,]+?)\s*,\s*"([^"]+)"\s*,\s*"([^"]*)"\s*\))'
|
||||
r'(?:\s*,\s*FirmwareOptionStatus\.(\w+))?'
|
||||
r'\s*\)'
|
||||
r"(?:\s*,\s*FirmwareOptionStatus\.(\w+))?"
|
||||
r"\s*\)"
|
||||
)
|
||||
|
||||
# Collect firmware slots
|
||||
@@ -269,15 +287,17 @@ def parse_firmware_database(
|
||||
desc = m.group(6)
|
||||
status = m.group(7) or "Acceptable"
|
||||
|
||||
records.append({
|
||||
"system": system,
|
||||
"firmware_id": fw_id,
|
||||
"sha1": sha1,
|
||||
"name": name,
|
||||
"size": _safe_arithmetic(size_expr),
|
||||
"description": desc,
|
||||
"status": status,
|
||||
})
|
||||
records.append(
|
||||
{
|
||||
"system": system,
|
||||
"firmware_id": fw_id,
|
||||
"sha1": sha1,
|
||||
"name": name,
|
||||
"size": _safe_arithmetic(size_expr),
|
||||
"description": desc,
|
||||
"status": status,
|
||||
}
|
||||
)
|
||||
|
||||
# Build records from Firmware+Option pairs, picking best option
|
||||
for (system, fw_id), options in slot_options.items():
|
||||
@@ -291,15 +311,17 @@ def parse_firmware_database(
|
||||
viable.sort(key=lambda x: STATUS_RANK.get(x[1], 2), reverse=True)
|
||||
best_file, best_status = viable[0]
|
||||
|
||||
records.append({
|
||||
"system": system,
|
||||
"firmware_id": fw_id,
|
||||
"sha1": best_file["sha1"],
|
||||
"name": best_file["name"],
|
||||
"size": best_file["size"],
|
||||
"description": best_file.get("description", desc),
|
||||
"status": best_status,
|
||||
})
|
||||
records.append(
|
||||
{
|
||||
"system": system,
|
||||
"firmware_id": fw_id,
|
||||
"sha1": best_file["sha1"],
|
||||
"name": best_file["name"],
|
||||
"size": best_file["size"],
|
||||
"description": best_file.get("description", desc),
|
||||
"status": best_status,
|
||||
}
|
||||
)
|
||||
|
||||
return records, files_by_hash
|
||||
|
||||
|
||||
@@ -13,19 +13,24 @@ Complements libretro_scraper (System.dat) with:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
import urllib.request
|
||||
|
||||
try:
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
except ImportError:
|
||||
# Allow running directly: python scripts/scraper/coreinfo_scraper.py
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
from scraper.base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
from scraper.base_scraper import (
|
||||
BaseScraper,
|
||||
BiosRequirement,
|
||||
fetch_github_latest_version,
|
||||
)
|
||||
|
||||
PLATFORM_NAME = "libretro_coreinfo"
|
||||
|
||||
@@ -168,11 +173,13 @@ def _extract_firmware(info: dict) -> list[dict]:
|
||||
if _is_native_lib(path):
|
||||
continue
|
||||
|
||||
firmware.append({
|
||||
"path": path,
|
||||
"desc": desc,
|
||||
"optional": opt.lower() == "true",
|
||||
})
|
||||
firmware.append(
|
||||
{
|
||||
"path": path,
|
||||
"desc": desc,
|
||||
"optional": opt.lower() == "true",
|
||||
}
|
||||
)
|
||||
|
||||
return firmware
|
||||
|
||||
@@ -182,7 +189,7 @@ def _extract_md5_from_notes(info: dict) -> dict[str, str]:
|
||||
notes = info.get("notes", "")
|
||||
md5_map = {}
|
||||
|
||||
for match in re.finditer(r'\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})', notes):
|
||||
for match in re.finditer(r"\(!\)\s+(.+?)\s+\(md5\):\s+([a-f0-9]{32})", notes):
|
||||
filename = match.group(1).strip()
|
||||
md5 = match.group(2)
|
||||
md5_map[filename] = md5
|
||||
@@ -202,15 +209,19 @@ class Scraper(BaseScraper):
|
||||
# Use the tree API to get all files at once
|
||||
url = f"{GITHUB_API}/git/trees/master?recursive=1"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = json.loads(resp.read())
|
||||
|
||||
return [
|
||||
item["path"] for item in data.get("tree", [])
|
||||
item["path"]
|
||||
for item in data.get("tree", [])
|
||||
if item["path"].endswith("_libretro.info")
|
||||
]
|
||||
except (urllib.error.URLError, json.JSONDecodeError) as e:
|
||||
@@ -220,7 +231,9 @@ class Scraper(BaseScraper):
|
||||
"""Fetch and parse a single .info file."""
|
||||
url = f"{RAW_BASE}/{filename}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
req = urllib.request.Request(
|
||||
url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
content = resp.read().decode("utf-8")
|
||||
return _parse_info_file(content)
|
||||
@@ -253,17 +266,25 @@ class Scraper(BaseScraper):
|
||||
|
||||
basename = path.split("/")[-1] if "/" in path else path
|
||||
# Full path when basename is generic to avoid SGB1.sfc/program.rom vs SGB2.sfc/program.rom collisions
|
||||
GENERIC_NAMES = {"program.rom", "data.rom", "boot.rom", "bios.bin", "firmware.bin"}
|
||||
GENERIC_NAMES = {
|
||||
"program.rom",
|
||||
"data.rom",
|
||||
"boot.rom",
|
||||
"bios.bin",
|
||||
"firmware.bin",
|
||||
}
|
||||
name = path if basename.lower() in GENERIC_NAMES else basename
|
||||
md5 = md5_map.get(basename)
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination=path,
|
||||
required=not fw["optional"],
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=name,
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination=path,
|
||||
required=not fw["optional"],
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -281,7 +302,9 @@ def main():
|
||||
"""CLI entry point."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Scrape libretro-core-info firmware requirements")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scrape libretro-core-info firmware requirements"
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--compare-db", help="Compare against database.json")
|
||||
args = parser.parse_args()
|
||||
@@ -296,6 +319,7 @@ def main():
|
||||
|
||||
if args.compare_db:
|
||||
import json as _json
|
||||
|
||||
with open(args.compare_db) as f:
|
||||
db = _json.load(f)
|
||||
|
||||
@@ -320,6 +344,7 @@ def main():
|
||||
return
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
by_system = defaultdict(list)
|
||||
for r in reqs:
|
||||
by_system[r.system].append(r)
|
||||
|
||||
@@ -10,13 +10,13 @@ Parses files like libretro's System.dat which uses the format:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatRom:
|
||||
"""A ROM entry from a DAT file."""
|
||||
|
||||
name: str
|
||||
size: int
|
||||
crc32: str
|
||||
@@ -28,6 +28,7 @@ class DatRom:
|
||||
@dataclass
|
||||
class DatMetadata:
|
||||
"""Metadata from a DAT file header."""
|
||||
|
||||
name: str = ""
|
||||
version: str = ""
|
||||
description: str = ""
|
||||
@@ -53,7 +54,10 @@ def parse_dat(content: str) -> list[DatRom]:
|
||||
|
||||
if stripped.startswith("comment "):
|
||||
value = stripped[8:].strip().strip('"')
|
||||
if value in ("System", "System, firmware, and BIOS files used by libretro cores."):
|
||||
if value in (
|
||||
"System",
|
||||
"System, firmware, and BIOS files used by libretro cores.",
|
||||
):
|
||||
continue
|
||||
current_system = value
|
||||
|
||||
@@ -78,9 +82,16 @@ def parse_dat_metadata(content: str) -> DatMetadata:
|
||||
if in_header and stripped == ")":
|
||||
break
|
||||
if in_header:
|
||||
for field in ("name", "version", "description", "author", "homepage", "url"):
|
||||
for field in (
|
||||
"name",
|
||||
"version",
|
||||
"description",
|
||||
"author",
|
||||
"homepage",
|
||||
"url",
|
||||
):
|
||||
if stripped.startswith(f"{field} "):
|
||||
value = stripped[len(field) + 1:].strip().strip('"')
|
||||
value = stripped[len(field) + 1 :].strip().strip('"')
|
||||
setattr(meta, field, value)
|
||||
|
||||
return meta
|
||||
@@ -94,7 +105,7 @@ def _parse_rom_line(line: str, system: str) -> DatRom | None:
|
||||
if start == -1 or end == -1 or end <= start:
|
||||
return None
|
||||
|
||||
content = line[start + 1:end].strip()
|
||||
content = line[start + 1 : end].strip()
|
||||
|
||||
fields = {}
|
||||
i = 0
|
||||
|
||||
@@ -14,9 +14,8 @@ from __future__ import annotations
|
||||
import csv
|
||||
import io
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
try:
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
@@ -31,8 +30,7 @@ CHECKBIOS_URL = (
|
||||
)
|
||||
|
||||
CSV_BASE_URL = (
|
||||
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/"
|
||||
"main/docs/tables"
|
||||
"https://raw.githubusercontent.com/EmuDeck/emudeck.github.io/main/docs/tables"
|
||||
)
|
||||
|
||||
CSV_SHEETS = [
|
||||
@@ -117,10 +115,22 @@ KNOWN_BIOS_FILES = {
|
||||
{"name": "scph5502.bin", "destination": "scph5502.bin", "region": "EU"},
|
||||
],
|
||||
"sony-playstation-2": [
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.BIN", "destination": "SCPH-70004_BIOS_V12_EUR_200.BIN"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.EROM", "destination": "SCPH-70004_BIOS_V12_EUR_200.EROM"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1"},
|
||||
{"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2", "destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2"},
|
||||
{
|
||||
"name": "SCPH-70004_BIOS_V12_EUR_200.BIN",
|
||||
"destination": "SCPH-70004_BIOS_V12_EUR_200.BIN",
|
||||
},
|
||||
{
|
||||
"name": "SCPH-70004_BIOS_V12_EUR_200.EROM",
|
||||
"destination": "SCPH-70004_BIOS_V12_EUR_200.EROM",
|
||||
},
|
||||
{
|
||||
"name": "SCPH-70004_BIOS_V12_EUR_200.ROM1",
|
||||
"destination": "SCPH-70004_BIOS_V12_EUR_200.ROM1",
|
||||
},
|
||||
{
|
||||
"name": "SCPH-70004_BIOS_V12_EUR_200.ROM2",
|
||||
"destination": "SCPH-70004_BIOS_V12_EUR_200.ROM2",
|
||||
},
|
||||
],
|
||||
"sega-mega-cd": [
|
||||
{"name": "bios_CD_E.bin", "destination": "bios_CD_E.bin", "region": "EU"},
|
||||
@@ -157,17 +167,17 @@ KNOWN_BIOS_FILES = {
|
||||
}
|
||||
|
||||
_RE_ARRAY = re.compile(
|
||||
r'(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
||||
r"(?:local\s+)?(\w+)=\(\s*((?:[0-9a-fA-F]+\s*)+)\)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_RE_FUNC = re.compile(
|
||||
r'function\s+(check\w+Bios)\s*\(\)',
|
||||
r"function\s+(check\w+Bios)\s*\(\)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
_RE_LOCAL_HASHES = re.compile(
|
||||
r'local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)',
|
||||
r"local\s+hashes=\(\s*((?:[0-9a-fA-F]+\s*)+)\)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
@@ -184,7 +194,9 @@ def _fetch_url(url: str) -> str:
|
||||
class Scraper(BaseScraper):
|
||||
"""Scraper for EmuDeck checkBIOS.sh and CSV cheat sheets."""
|
||||
|
||||
def __init__(self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL):
|
||||
def __init__(
|
||||
self, checkbios_url: str = CHECKBIOS_URL, csv_base_url: str = CSV_BASE_URL
|
||||
):
|
||||
super().__init__(url=checkbios_url)
|
||||
self.checkbios_url = checkbios_url
|
||||
self.csv_base_url = csv_base_url
|
||||
@@ -241,12 +253,12 @@ class Scraper(BaseScraper):
|
||||
@staticmethod
|
||||
def _clean_markdown(text: str) -> str:
|
||||
"""Strip markdown/HTML artifacts from CSV fields."""
|
||||
text = re.sub(r'\*\*', '', text) # bold
|
||||
text = re.sub(r':material-[^:]+:\{[^}]*\}', '', text) # mkdocs material icons
|
||||
text = re.sub(r':material-[^:]+:', '', text)
|
||||
text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # [text](url) -> text
|
||||
text = re.sub(r'<br\s*/?>', ' ', text) # <br/>
|
||||
text = re.sub(r'<[^>]+>', '', text) # remaining HTML
|
||||
text = re.sub(r"\*\*", "", text) # bold
|
||||
text = re.sub(r":material-[^:]+:\{[^}]*\}", "", text) # mkdocs material icons
|
||||
text = re.sub(r":material-[^:]+:", "", text)
|
||||
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) # [text](url) -> text
|
||||
text = re.sub(r"<br\s*/?>", " ", text) # <br/>
|
||||
text = re.sub(r"<[^>]+>", "", text) # remaining HTML
|
||||
return text.strip()
|
||||
|
||||
def _parse_csv_bios(self, csv_text: str) -> list[dict]:
|
||||
@@ -274,28 +286,32 @@ class Scraper(BaseScraper):
|
||||
system_col = self._clean_markdown((row[key] or ""))
|
||||
break
|
||||
slug = None
|
||||
for part in re.split(r'[`\s/]+', folder_col):
|
||||
part = part.strip().strip('`').lower()
|
||||
for part in re.split(r"[`\s/]+", folder_col):
|
||||
part = part.strip().strip("`").lower()
|
||||
if part and part in SYSTEM_SLUG_MAP:
|
||||
slug = SYSTEM_SLUG_MAP[part]
|
||||
break
|
||||
if not slug:
|
||||
clean = re.sub(r'[^a-z0-9\-]', '', folder_col.strip().strip('`').lower())
|
||||
clean = re.sub(
|
||||
r"[^a-z0-9\-]", "", folder_col.strip().strip("`").lower()
|
||||
)
|
||||
slug = clean if clean else "unknown"
|
||||
entries.append({
|
||||
"system": slug,
|
||||
"system_name": system_col,
|
||||
"bios_raw": bios_col,
|
||||
})
|
||||
entries.append(
|
||||
{
|
||||
"system": slug,
|
||||
"system_name": system_col,
|
||||
"bios_raw": bios_col,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
def _extract_filenames_from_bios_field(self, bios_raw: str) -> list[dict]:
|
||||
"""Extract individual BIOS filenames from a CSV BIOS field."""
|
||||
results = []
|
||||
bios_raw = re.sub(r'<br\s*/?>', ' ', bios_raw)
|
||||
bios_raw = bios_raw.replace('`', '')
|
||||
bios_raw = re.sub(r"<br\s*/?>", " ", bios_raw)
|
||||
bios_raw = bios_raw.replace("`", "")
|
||||
patterns = re.findall(
|
||||
r'[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)',
|
||||
r"[\w\-./]+\.(?:bin|rom|zip|BIN|ROM|ZIP|EROM|ROM1|ROM2|n64|txt|keys)",
|
||||
bios_raw,
|
||||
)
|
||||
for p in patterns:
|
||||
@@ -324,21 +340,25 @@ class Scraper(BaseScraper):
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
requirements.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
)
|
||||
)
|
||||
|
||||
for md5 in system_hashes:
|
||||
requirements.append(BiosRequirement(
|
||||
name=f"{system}:{md5}",
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination="",
|
||||
required=True,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=f"{system}:{md5}",
|
||||
system=system,
|
||||
md5=md5,
|
||||
destination="",
|
||||
required=True,
|
||||
)
|
||||
)
|
||||
|
||||
for sheet in CSV_SHEETS:
|
||||
csv_text = self._fetch_csv(sheet)
|
||||
@@ -353,19 +373,21 @@ class Scraper(BaseScraper):
|
||||
seen.add(key)
|
||||
if system in KNOWN_BIOS_FILES:
|
||||
continue
|
||||
requirements.append(BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=f["name"],
|
||||
system=system,
|
||||
destination=f.get("destination", f["name"]),
|
||||
required=True,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
def validate_format(self, raw_data: str) -> bool:
|
||||
has_ps = "PSBios=" in raw_data or "PSBios =" in raw_data
|
||||
has_func = "checkPS1BIOS" in raw_data or "checkPS2BIOS" in raw_data
|
||||
has_md5 = re.search(r'[0-9a-f]{32}', raw_data) is not None
|
||||
has_md5 = re.search(r"[0-9a-f]{32}", raw_data) is not None
|
||||
return has_ps and has_func and has_md5
|
||||
|
||||
def generate_platform_yaml(self) -> dict:
|
||||
@@ -419,14 +441,17 @@ class Scraper(BaseScraper):
|
||||
"contents/functions/EmuScripts"
|
||||
)
|
||||
name_overrides = {
|
||||
"pcsx2qt": "pcsx2", "rpcs3legacy": "rpcs3",
|
||||
"cemuproton": "cemu", "rmg": "mupen64plus_next",
|
||||
"pcsx2qt": "pcsx2",
|
||||
"rpcs3legacy": "rpcs3",
|
||||
"cemuproton": "cemu",
|
||||
"rmg": "mupen64plus_next",
|
||||
}
|
||||
skip = {"retroarch_maincfg", "retroarch"}
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
api_url, headers={"User-Agent": "retrobios-scraper/1.0"},
|
||||
api_url,
|
||||
headers={"User-Agent": "retrobios-scraper/1.0"},
|
||||
)
|
||||
data = json.loads(urllib.request.urlopen(req, timeout=30).read())
|
||||
except (urllib.error.URLError, OSError):
|
||||
@@ -454,6 +479,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main():
|
||||
from scripts.scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape emudeck BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -13,22 +13,22 @@ import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from scripts.scraper.fbneo_parser import parse_fbneo_source_tree
|
||||
from scripts.scraper._hash_merge import compute_diff, merge_fbneo_profile
|
||||
from scripts.scraper.fbneo_parser import parse_fbneo_source_tree
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
REPO_URL = 'https://github.com/finalburnneo/FBNeo.git'
|
||||
REPO_URL = "https://github.com/finalburnneo/FBNeo.git"
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
CLONE_DIR = REPO_ROOT / 'tmp' / 'fbneo'
|
||||
CACHE_PATH = REPO_ROOT / 'data' / 'fbneo-hashes.json'
|
||||
EMULATORS_DIR = REPO_ROOT / 'emulators'
|
||||
CLONE_DIR = REPO_ROOT / "tmp" / "fbneo"
|
||||
CACHE_PATH = REPO_ROOT / "data" / "fbneo-hashes.json"
|
||||
EMULATORS_DIR = REPO_ROOT / "emulators"
|
||||
STALE_HOURS = 24
|
||||
|
||||
|
||||
@@ -37,8 +37,8 @@ def _is_cache_fresh() -> bool:
|
||||
if not CACHE_PATH.exists():
|
||||
return False
|
||||
try:
|
||||
data = json.loads(CACHE_PATH.read_text(encoding='utf-8'))
|
||||
fetched_at = datetime.fromisoformat(data['fetched_at'])
|
||||
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||
fetched_at = datetime.fromisoformat(data["fetched_at"])
|
||||
return datetime.now(timezone.utc) - fetched_at < timedelta(hours=STALE_HOURS)
|
||||
except (json.JSONDecodeError, KeyError, ValueError):
|
||||
return False
|
||||
@@ -53,8 +53,14 @@ def _sparse_clone() -> None:
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
'git', 'clone', '--depth', '1', '--filter=blob:none',
|
||||
'--sparse', REPO_URL, str(CLONE_DIR),
|
||||
"git",
|
||||
"clone",
|
||||
"--depth",
|
||||
"1",
|
||||
"--filter=blob:none",
|
||||
"--sparse",
|
||||
REPO_URL,
|
||||
str(CLONE_DIR),
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
@@ -62,7 +68,7 @@ def _sparse_clone() -> None:
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
['git', 'sparse-checkout', 'set', 'src/burn/drv', 'src/burner/resource.h'],
|
||||
["git", "sparse-checkout", "set", "src/burn/drv", "src/burner/resource.h"],
|
||||
cwd=CLONE_DIR,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
@@ -76,42 +82,44 @@ def _extract_version() -> tuple[str, str]:
|
||||
Returns (version, commit_sha). Falls back to resource.h if no tag.
|
||||
"""
|
||||
result = subprocess.run(
|
||||
['git', 'describe', '--tags', '--abbrev=0'],
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
cwd=CLONE_DIR,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Prefer real version tags over pseudo-tags like "latest"
|
||||
version = 'unknown'
|
||||
version = "unknown"
|
||||
if result.returncode == 0:
|
||||
tag = result.stdout.strip()
|
||||
if tag and tag != 'latest':
|
||||
if tag and tag != "latest":
|
||||
version = tag
|
||||
# Fallback: resource.h
|
||||
if version == 'unknown':
|
||||
if version == "unknown":
|
||||
version = _version_from_resource_h()
|
||||
# Last resort: use GitHub API for latest real release tag
|
||||
if version == 'unknown':
|
||||
if version == "unknown":
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
req = urllib.request.Request(
|
||||
'https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10',
|
||||
headers={'User-Agent': 'retrobios-scraper/1.0'},
|
||||
"https://api.github.com/repos/finalburnneo/FBNeo/tags?per_page=10",
|
||||
headers={"User-Agent": "retrobios-scraper/1.0"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
import json as json_mod
|
||||
|
||||
tags = json_mod.loads(resp.read())
|
||||
for t in tags:
|
||||
if t['name'] != 'latest' and t['name'].startswith('v'):
|
||||
version = t['name']
|
||||
if t["name"] != "latest" and t["name"].startswith("v"):
|
||||
version = t["name"]
|
||||
break
|
||||
except (urllib.error.URLError, OSError):
|
||||
pass
|
||||
|
||||
sha_result = subprocess.run(
|
||||
['git', 'rev-parse', 'HEAD'],
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=CLONE_DIR,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
@@ -124,17 +132,17 @@ def _extract_version() -> tuple[str, str]:
|
||||
|
||||
def _version_from_resource_h() -> str:
|
||||
"""Fallback: parse VER_FULL_VERSION_STR from resource.h."""
|
||||
resource_h = CLONE_DIR / 'src' / 'burner' / 'resource.h'
|
||||
resource_h = CLONE_DIR / "src" / "burner" / "resource.h"
|
||||
if not resource_h.exists():
|
||||
return 'unknown'
|
||||
return "unknown"
|
||||
|
||||
text = resource_h.read_text(encoding='utf-8', errors='replace')
|
||||
text = resource_h.read_text(encoding="utf-8", errors="replace")
|
||||
for line in text.splitlines():
|
||||
if 'VER_FULL_VERSION_STR' in line:
|
||||
if "VER_FULL_VERSION_STR" in line:
|
||||
parts = line.split('"')
|
||||
if len(parts) >= 2:
|
||||
return parts[1]
|
||||
return 'unknown'
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _cleanup() -> None:
|
||||
@@ -146,33 +154,33 @@ def _cleanup() -> None:
|
||||
def fetch_and_cache(force: bool = False) -> dict[str, Any]:
|
||||
"""Clone, parse, and write JSON cache. Returns the cache dict."""
|
||||
if not force and _is_cache_fresh():
|
||||
log.info('cache fresh, skipping clone (use --force to override)')
|
||||
return json.loads(CACHE_PATH.read_text(encoding='utf-8'))
|
||||
log.info("cache fresh, skipping clone (use --force to override)")
|
||||
return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||
|
||||
try:
|
||||
log.info('sparse cloning %s', REPO_URL)
|
||||
log.info("sparse cloning %s", REPO_URL)
|
||||
_sparse_clone()
|
||||
|
||||
log.info('extracting version')
|
||||
log.info("extracting version")
|
||||
version, commit = _extract_version()
|
||||
|
||||
log.info('parsing source tree')
|
||||
log.info("parsing source tree")
|
||||
bios_sets = parse_fbneo_source_tree(str(CLONE_DIR))
|
||||
|
||||
cache: dict[str, Any] = {
|
||||
'source': 'finalburnneo/FBNeo',
|
||||
'version': version,
|
||||
'commit': commit,
|
||||
'fetched_at': datetime.now(timezone.utc).isoformat(),
|
||||
'bios_sets': bios_sets,
|
||||
"source": "finalburnneo/FBNeo",
|
||||
"version": version,
|
||||
"commit": commit,
|
||||
"fetched_at": datetime.now(timezone.utc).isoformat(),
|
||||
"bios_sets": bios_sets,
|
||||
}
|
||||
|
||||
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
CACHE_PATH.write_text(
|
||||
json.dumps(cache, indent=2, ensure_ascii=False) + '\n',
|
||||
encoding='utf-8',
|
||||
json.dumps(cache, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
log.info('wrote %d BIOS sets to %s', len(bios_sets), CACHE_PATH)
|
||||
log.info("wrote %d BIOS sets to %s", len(bios_sets), CACHE_PATH)
|
||||
|
||||
return cache
|
||||
finally:
|
||||
@@ -182,48 +190,50 @@ def fetch_and_cache(force: bool = False) -> dict[str, Any]:
|
||||
def _find_fbneo_profiles() -> list[Path]:
|
||||
"""Find emulator profiles whose upstream references finalburnneo/FBNeo."""
|
||||
profiles: list[Path] = []
|
||||
for path in sorted(EMULATORS_DIR.glob('*.yml')):
|
||||
if path.name.endswith('.old.yml'):
|
||||
for path in sorted(EMULATORS_DIR.glob("*.yml")):
|
||||
if path.name.endswith(".old.yml"):
|
||||
continue
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding='utf-8'))
|
||||
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
except (yaml.YAMLError, OSError):
|
||||
continue
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
upstream = data.get('upstream', '')
|
||||
if isinstance(upstream, str) and 'finalburnneo/fbneo' in upstream.lower():
|
||||
upstream = data.get("upstream", "")
|
||||
if isinstance(upstream, str) and "finalburnneo/fbneo" in upstream.lower():
|
||||
profiles.append(path)
|
||||
return profiles
|
||||
|
||||
|
||||
def _format_diff(profile_name: str, diff: dict[str, Any], show_added: bool = True) -> str:
|
||||
def _format_diff(
|
||||
profile_name: str, diff: dict[str, Any], show_added: bool = True
|
||||
) -> str:
|
||||
"""Format diff for a single profile."""
|
||||
lines: list[str] = []
|
||||
lines.append(f' {profile_name}:')
|
||||
lines.append(f" {profile_name}:")
|
||||
|
||||
added = diff.get('added', [])
|
||||
updated = diff.get('updated', [])
|
||||
oos = diff.get('out_of_scope', 0)
|
||||
added = diff.get("added", [])
|
||||
updated = diff.get("updated", [])
|
||||
oos = diff.get("out_of_scope", 0)
|
||||
|
||||
if not added and not updated:
|
||||
lines.append(' no changes')
|
||||
lines.append(" no changes")
|
||||
if oos:
|
||||
lines.append(f' . {oos} out of scope')
|
||||
return '\n'.join(lines)
|
||||
lines.append(f" . {oos} out of scope")
|
||||
return "\n".join(lines)
|
||||
|
||||
if show_added:
|
||||
for label in added:
|
||||
lines.append(f' + {label}')
|
||||
lines.append(f" + {label}")
|
||||
elif added:
|
||||
lines.append(f' + {len(added)} new ROMs available (main profile only)')
|
||||
lines.append(f" + {len(added)} new ROMs available (main profile only)")
|
||||
for label in updated:
|
||||
lines.append(f' ~ {label}')
|
||||
lines.append(f' = {diff["unchanged"]} unchanged')
|
||||
lines.append(f" ~ {label}")
|
||||
lines.append(f" = {diff['unchanged']} unchanged")
|
||||
if oos:
|
||||
lines.append(f' . {oos} out of scope')
|
||||
lines.append(f" . {oos} out of scope")
|
||||
|
||||
return '\n'.join(lines)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run(
|
||||
@@ -234,82 +244,84 @@ def run(
|
||||
"""Main entry point for the scraper."""
|
||||
cache = fetch_and_cache(force=force)
|
||||
|
||||
version = cache.get('version', 'unknown')
|
||||
commit = cache.get('commit', '?')[:12]
|
||||
bios_sets = cache.get('bios_sets', {})
|
||||
version = cache.get("version", "unknown")
|
||||
commit = cache.get("commit", "?")[:12]
|
||||
bios_sets = cache.get("bios_sets", {})
|
||||
profiles = _find_fbneo_profiles()
|
||||
|
||||
if json_output:
|
||||
result: dict[str, Any] = {
|
||||
'source': cache.get('source'),
|
||||
'version': version,
|
||||
'commit': cache.get('commit'),
|
||||
'bios_set_count': len(bios_sets),
|
||||
'profiles': {},
|
||||
"source": cache.get("source"),
|
||||
"version": version,
|
||||
"commit": cache.get("commit"),
|
||||
"bios_set_count": len(bios_sets),
|
||||
"profiles": {},
|
||||
}
|
||||
for path in profiles:
|
||||
diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo')
|
||||
result['profiles'][path.stem] = diff
|
||||
diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo")
|
||||
result["profiles"][path.stem] = diff
|
||||
print(json.dumps(result, indent=2))
|
||||
return 0
|
||||
|
||||
header = (
|
||||
f'fbneo-hashes: {len(bios_sets)} BIOS sets '
|
||||
f'from finalburnneo/FBNeo @ {version} ({commit})'
|
||||
f"fbneo-hashes: {len(bios_sets)} BIOS sets "
|
||||
f"from finalburnneo/FBNeo @ {version} ({commit})"
|
||||
)
|
||||
print(header)
|
||||
print()
|
||||
|
||||
if not profiles:
|
||||
print(' no matching emulator profiles found')
|
||||
print(" no matching emulator profiles found")
|
||||
return 0
|
||||
|
||||
for path in profiles:
|
||||
is_main = path.name == 'fbneo.yml'
|
||||
diff = compute_diff(str(path), str(CACHE_PATH), mode='fbneo')
|
||||
is_main = path.name == "fbneo.yml"
|
||||
diff = compute_diff(str(path), str(CACHE_PATH), mode="fbneo")
|
||||
print(_format_diff(path.stem, diff, show_added=is_main))
|
||||
|
||||
effective_added = diff['added'] if is_main else []
|
||||
if not dry_run and (effective_added or diff['updated']):
|
||||
effective_added = diff["added"] if is_main else []
|
||||
if not dry_run and (effective_added or diff["updated"]):
|
||||
merge_fbneo_profile(str(path), str(CACHE_PATH), write=True, add_new=is_main)
|
||||
log.info('merged changes into %s', path.name)
|
||||
log.info("merged changes into %s", path.name)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Scrape FBNeo BIOS set hashes from upstream source',
|
||||
description="Scrape FBNeo BIOS set hashes from upstream source",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='show diff without writing changes',
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="show diff without writing changes",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--force',
|
||||
action='store_true',
|
||||
help='force re-clone even if cache is fresh',
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="force re-clone even if cache is fresh",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--json',
|
||||
action='store_true',
|
||||
dest='json_output',
|
||||
help='output diff as JSON',
|
||||
"--json",
|
||||
action="store_true",
|
||||
dest="json_output",
|
||||
help="output diff as JSON",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(name)s: %(message)s',
|
||||
format="%(name)s: %(message)s",
|
||||
)
|
||||
|
||||
sys.exit(run(
|
||||
dry_run=args.dry_run,
|
||||
force=args.force,
|
||||
json_output=args.json_output,
|
||||
))
|
||||
sys.exit(
|
||||
run(
|
||||
dry_run=args.dry_run,
|
||||
force=args.force,
|
||||
json_output=args.json_output,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -11,18 +11,17 @@ import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
_ROM_ENTRY_RE = re.compile(
|
||||
r'\{\s*"([^"]+)"\s*,\s*(0x[\da-fA-F]+)\s*,\s*(0x[\da-fA-F]+)\s*,\s*([^}]+)\}',
|
||||
)
|
||||
|
||||
_BURN_DRIVER_RE = re.compile(
|
||||
r'struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};',
|
||||
r"struct\s+BurnDriver\s+BurnDrv(\w+)\s*=\s*\{(.*?)\};",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
_ROM_DESC_RE = re.compile(
|
||||
r'static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||
r"static\s+struct\s+BurnRomInfo\s+(\w+)RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
@@ -37,7 +36,7 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
|
||||
for match in _BURN_DRIVER_RE.finditer(source):
|
||||
body = match.group(2)
|
||||
if 'BDF_BOARDROM' not in body:
|
||||
if "BDF_BOARDROM" not in body:
|
||||
continue
|
||||
|
||||
# Set name is the first quoted string in the struct body
|
||||
@@ -46,11 +45,11 @@ def find_bios_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
continue
|
||||
|
||||
set_name = name_match.group(1)
|
||||
line_num = source[:match.start()].count('\n') + 1
|
||||
line_num = source[: match.start()].count("\n") + 1
|
||||
|
||||
results[set_name] = {
|
||||
'source_file': filename,
|
||||
'source_line': line_num,
|
||||
"source_file": filename,
|
||||
"source_line": line_num,
|
||||
}
|
||||
|
||||
return results
|
||||
@@ -63,9 +62,9 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]:
|
||||
Sentinel entries (empty name) are skipped.
|
||||
"""
|
||||
pattern = re.compile(
|
||||
r'static\s+struct\s+BurnRomInfo\s+'
|
||||
r"static\s+struct\s+BurnRomInfo\s+"
|
||||
+ re.escape(set_name)
|
||||
+ r'RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};',
|
||||
+ r"RomDesc\s*\[\s*\]\s*=\s*\{(.*?)\};",
|
||||
re.DOTALL,
|
||||
)
|
||||
match = pattern.search(source)
|
||||
@@ -80,13 +79,15 @@ def parse_rom_info(source: str, set_name: str) -> list[dict]:
|
||||
if not name:
|
||||
continue
|
||||
size = int(entry.group(2), 16)
|
||||
crc32 = format(int(entry.group(3), 16), '08x')
|
||||
crc32 = format(int(entry.group(3), 16), "08x")
|
||||
|
||||
roms.append({
|
||||
'name': name,
|
||||
'size': size,
|
||||
'crc32': crc32,
|
||||
})
|
||||
roms.append(
|
||||
{
|
||||
"name": name,
|
||||
"size": size,
|
||||
"crc32": crc32,
|
||||
}
|
||||
)
|
||||
|
||||
return roms
|
||||
|
||||
@@ -100,7 +101,7 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
|
||||
Returns a dict mapping set name to:
|
||||
{source_file, source_line, roms: [{name, size, crc32}, ...]}
|
||||
"""
|
||||
drv_path = Path(base_path) / 'src' / 'burn' / 'drv'
|
||||
drv_path = Path(base_path) / "src" / "burn" / "drv"
|
||||
if not drv_path.is_dir():
|
||||
return {}
|
||||
|
||||
@@ -108,20 +109,20 @@ def parse_fbneo_source_tree(base_path: str) -> dict[str, dict]:
|
||||
|
||||
for root, _dirs, files in os.walk(drv_path):
|
||||
for fname in files:
|
||||
if not fname.endswith('.cpp'):
|
||||
if not fname.endswith(".cpp"):
|
||||
continue
|
||||
|
||||
filepath = Path(root) / fname
|
||||
source = filepath.read_text(encoding='utf-8', errors='replace')
|
||||
source = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
rel_path = str(filepath.relative_to(base_path))
|
||||
|
||||
bios_sets = find_bios_sets(source, rel_path)
|
||||
for set_name, meta in bios_sets.items():
|
||||
roms = parse_rom_info(source, set_name)
|
||||
results[set_name] = {
|
||||
'source_file': meta['source_file'],
|
||||
'source_line': meta['source_line'],
|
||||
'roms': roms,
|
||||
"source_file": meta["source_file"],
|
||||
"source_line": meta["source_line"],
|
||||
"roms": roms,
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
@@ -8,9 +8,8 @@ Hash: SHA1 primary
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
|
||||
@@ -18,18 +17,17 @@ from .dat_parser import parse_dat, parse_dat_metadata, validate_dat_format
|
||||
PLATFORM_NAME = "libretro"
|
||||
|
||||
SOURCE_URL = (
|
||||
"https://raw.githubusercontent.com/libretro/libretro-database/"
|
||||
"master/dat/System.dat"
|
||||
"https://raw.githubusercontent.com/libretro/libretro-database/master/dat/System.dat"
|
||||
)
|
||||
|
||||
# Libretro cores that expect BIOS files in a subdirectory of system/.
|
||||
# System.dat lists filenames flat; the scraper prepends the prefix.
|
||||
# ref: each core's libretro.c or equivalent -see platforms/README.md
|
||||
CORE_SUBDIR_MAP = {
|
||||
"nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c
|
||||
"sharp-x68000": "keropi", # px68k/libretro/libretro.c
|
||||
"sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp
|
||||
"sega-dreamcast-arcade": "dc", # flycast -same subfolder
|
||||
"nec-pc-98": "np2kai", # libretro-np2kai/sdl/libretro.c
|
||||
"sharp-x68000": "keropi", # px68k/libretro/libretro.c
|
||||
"sega-dreamcast": "dc", # flycast/shell/libretro/libretro.cpp
|
||||
"sega-dreamcast-arcade": "dc", # flycast -same subfolder
|
||||
}
|
||||
|
||||
SYSTEM_SLUG_MAP = {
|
||||
@@ -100,7 +98,6 @@ class Scraper(BaseScraper):
|
||||
def __init__(self, url: str = SOURCE_URL):
|
||||
super().__init__(url=url)
|
||||
|
||||
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Parse System.dat and return BIOS requirements."""
|
||||
raw = self._fetch_raw()
|
||||
@@ -113,7 +110,9 @@ class Scraper(BaseScraper):
|
||||
|
||||
for rom in roms:
|
||||
native_system = rom.system
|
||||
system_slug = SYSTEM_SLUG_MAP.get(native_system, native_system.lower().replace(" ", "-"))
|
||||
system_slug = SYSTEM_SLUG_MAP.get(
|
||||
native_system, native_system.lower().replace(" ", "-")
|
||||
)
|
||||
|
||||
destination = rom.name
|
||||
name = rom.name.split("/")[-1] if "/" in rom.name else rom.name
|
||||
@@ -122,17 +121,19 @@ class Scraper(BaseScraper):
|
||||
if subdir and not destination.startswith(subdir + "/"):
|
||||
destination = f"{subdir}/{destination}"
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
sha1=rom.sha1 or None,
|
||||
md5=rom.md5 or None,
|
||||
crc32=rom.crc32 or None,
|
||||
size=rom.size or None,
|
||||
destination=destination,
|
||||
required=True,
|
||||
native_id=native_system,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
sha1=rom.sha1 or None,
|
||||
md5=rom.md5 or None,
|
||||
crc32=rom.crc32 or None,
|
||||
size=rom.size or None,
|
||||
destination=destination,
|
||||
required=True,
|
||||
native_id=native_system,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -158,17 +159,22 @@ class Scraper(BaseScraper):
|
||||
"""Fetch per-core metadata from libretro-core-info .info files."""
|
||||
metadata = {}
|
||||
try:
|
||||
url = f"https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
})
|
||||
url = "https://api.github.com/repos/libretro/libretro-core-info/git/trees/master?recursive=1"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
import json
|
||||
|
||||
tree = json.loads(resp.read())
|
||||
|
||||
info_files = [
|
||||
item["path"] for item in tree.get("tree", [])
|
||||
item["path"]
|
||||
for item in tree.get("tree", [])
|
||||
if item["path"].endswith("_libretro.info")
|
||||
]
|
||||
|
||||
@@ -176,7 +182,9 @@ class Scraper(BaseScraper):
|
||||
core_name = filename.replace("_libretro.info", "")
|
||||
try:
|
||||
info_url = f"https://raw.githubusercontent.com/libretro/libretro-core-info/master/{filename}"
|
||||
req = urllib.request.Request(info_url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
req = urllib.request.Request(
|
||||
info_url, headers={"User-Agent": "retrobios-scraper/1.0"}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
content = resp.read().decode("utf-8")
|
||||
|
||||
@@ -194,10 +202,11 @@ class Scraper(BaseScraper):
|
||||
system_name = info.get("systemname", "")
|
||||
manufacturer = info.get("manufacturer", "")
|
||||
display_name = info.get("display_name", "")
|
||||
categories = info.get("categories", "")
|
||||
info.get("categories", "")
|
||||
|
||||
# Map core to our system slug via firmware paths
|
||||
from .coreinfo_scraper import CORE_SYSTEM_MAP
|
||||
|
||||
system_slug = CORE_SYSTEM_MAP.get(core_name)
|
||||
if not system_slug:
|
||||
continue
|
||||
@@ -267,7 +276,11 @@ class Scraper(BaseScraper):
|
||||
# ref: Vircon32/libretro.c -virtual console, single BIOS
|
||||
"vircon32": {
|
||||
"files": [
|
||||
{"name": "Vircon32Bios.v32", "destination": "Vircon32Bios.v32", "required": True},
|
||||
{
|
||||
"name": "Vircon32Bios.v32",
|
||||
"destination": "Vircon32Bios.v32",
|
||||
"required": True,
|
||||
},
|
||||
],
|
||||
"core": "vircon32",
|
||||
"manufacturer": "Vircon",
|
||||
@@ -276,7 +289,11 @@ class Scraper(BaseScraper):
|
||||
# ref: xrick/src/sysvid.c, xrick/src/data.c -game data archive
|
||||
"xrick": {
|
||||
"files": [
|
||||
{"name": "data.zip", "destination": "xrick/data.zip", "required": True},
|
||||
{
|
||||
"name": "data.zip",
|
||||
"destination": "xrick/data.zip",
|
||||
"required": True,
|
||||
},
|
||||
],
|
||||
"core": "xrick",
|
||||
"manufacturer": "Other",
|
||||
@@ -318,27 +335,51 @@ class Scraper(BaseScraper):
|
||||
|
||||
# segasp.zip for Sega System SP (Flycast)
|
||||
if "sega-dreamcast-arcade" in systems:
|
||||
existing = {f["name"] for f in systems["sega-dreamcast-arcade"].get("files", [])}
|
||||
existing = {
|
||||
f["name"] for f in systems["sega-dreamcast-arcade"].get("files", [])
|
||||
}
|
||||
if "segasp.zip" not in existing:
|
||||
systems["sega-dreamcast-arcade"]["files"].append({
|
||||
"name": "segasp.zip",
|
||||
"destination": "dc/segasp.zip",
|
||||
"required": True,
|
||||
})
|
||||
systems["sega-dreamcast-arcade"]["files"].append(
|
||||
{
|
||||
"name": "segasp.zip",
|
||||
"destination": "dc/segasp.zip",
|
||||
"required": True,
|
||||
}
|
||||
)
|
||||
|
||||
# Extra files missing from System.dat for specific systems.
|
||||
# Each traced to the core's source code.
|
||||
EXTRA_SYSTEM_FILES = {
|
||||
# melonDS DS DSi mode -ref: JesseTG/melonds-ds/src/libretro.cpp
|
||||
"nintendo-ds": [
|
||||
{"name": "dsi_bios7.bin", "destination": "dsi_bios7.bin", "required": True},
|
||||
{"name": "dsi_bios9.bin", "destination": "dsi_bios9.bin", "required": True},
|
||||
{"name": "dsi_firmware.bin", "destination": "dsi_firmware.bin", "required": True},
|
||||
{"name": "dsi_nand.bin", "destination": "dsi_nand.bin", "required": True},
|
||||
{
|
||||
"name": "dsi_bios7.bin",
|
||||
"destination": "dsi_bios7.bin",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"name": "dsi_bios9.bin",
|
||||
"destination": "dsi_bios9.bin",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"name": "dsi_firmware.bin",
|
||||
"destination": "dsi_firmware.bin",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"name": "dsi_nand.bin",
|
||||
"destination": "dsi_nand.bin",
|
||||
"required": True,
|
||||
},
|
||||
],
|
||||
# bsnes SGB naming -ref: bsnes/target-libretro/libretro.cpp
|
||||
"nintendo-sgb": [
|
||||
{"name": "sgb.boot.rom", "destination": "sgb.boot.rom", "required": False},
|
||||
{
|
||||
"name": "sgb.boot.rom",
|
||||
"destination": "sgb.boot.rom",
|
||||
"required": False,
|
||||
},
|
||||
],
|
||||
# JollyCV -ref: jollycv/libretro.c
|
||||
"coleco-colecovision": [
|
||||
@@ -348,12 +389,20 @@ class Scraper(BaseScraper):
|
||||
],
|
||||
# Kronos ST-V -ref: libretro-kronos/libretro/libretro.c
|
||||
"sega-saturn": [
|
||||
{"name": "stvbios.zip", "destination": "kronos/stvbios.zip", "required": True},
|
||||
{
|
||||
"name": "stvbios.zip",
|
||||
"destination": "kronos/stvbios.zip",
|
||||
"required": True,
|
||||
},
|
||||
],
|
||||
# PCSX ReARMed / Beetle PSX alt BIOS -ref: pcsx_rearmed/libpcsxcore/misc.c
|
||||
# docs say PSXONPSP660.bin (uppercase) but core accepts any case
|
||||
"sony-playstation": [
|
||||
{"name": "psxonpsp660.bin", "destination": "psxonpsp660.bin", "required": False},
|
||||
{
|
||||
"name": "psxonpsp660.bin",
|
||||
"destination": "psxonpsp660.bin",
|
||||
"required": False,
|
||||
},
|
||||
],
|
||||
# Dolphin GC -ref: DolphinLibretro/Boot.cpp:72-73,
|
||||
# BootManager.cpp:200-217, CommonPaths.h:139 GC_IPL="IPL.bin"
|
||||
@@ -361,15 +410,43 @@ class Scraper(BaseScraper):
|
||||
# System.dat gc-ntsc-*.bin names are NOT what Dolphin loads.
|
||||
# We add the correct Dolphin paths for BIOS + essential firmware.
|
||||
"nintendo-gamecube": [
|
||||
{"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/USA/IPL.bin", "required": False},
|
||||
{"name": "gc-pal-12.bin", "destination": "dolphin-emu/Sys/GC/EUR/IPL.bin", "required": False},
|
||||
{"name": "gc-ntsc-12.bin", "destination": "dolphin-emu/Sys/GC/JAP/IPL.bin", "required": False},
|
||||
{
|
||||
"name": "gc-ntsc-12.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/USA/IPL.bin",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "gc-pal-12.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/EUR/IPL.bin",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "gc-ntsc-12.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/JAP/IPL.bin",
|
||||
"required": False,
|
||||
},
|
||||
# DSP firmware -ref: Source/Core/Core/HW/DSPLLE/DSPHost.cpp
|
||||
{"name": "dsp_coef.bin", "destination": "dolphin-emu/Sys/GC/dsp_coef.bin", "required": True},
|
||||
{"name": "dsp_rom.bin", "destination": "dolphin-emu/Sys/GC/dsp_rom.bin", "required": True},
|
||||
{
|
||||
"name": "dsp_coef.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/dsp_coef.bin",
|
||||
"required": True,
|
||||
},
|
||||
{
|
||||
"name": "dsp_rom.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/dsp_rom.bin",
|
||||
"required": True,
|
||||
},
|
||||
# Fonts -ref: Source/Core/Core/HW/EXI/EXI_DeviceIPL.cpp
|
||||
{"name": "font_western.bin", "destination": "dolphin-emu/Sys/GC/font_western.bin", "required": False},
|
||||
{"name": "font_japanese.bin", "destination": "dolphin-emu/Sys/GC/font_japanese.bin", "required": False},
|
||||
{
|
||||
"name": "font_western.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/font_western.bin",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "font_japanese.bin",
|
||||
"destination": "dolphin-emu/Sys/GC/font_japanese.bin",
|
||||
"required": False,
|
||||
},
|
||||
],
|
||||
# minivmac casing -ref: minivmac/src/MYOSGLUE.c
|
||||
# doc says MacII.rom, repo has MacII.ROM -both work on case-insensitive FS
|
||||
@@ -455,6 +532,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main():
|
||||
from scripts.scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape libretro BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -21,16 +21,16 @@ from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from .mame_parser import parse_mame_source_tree
|
||||
from ._hash_merge import compute_diff, merge_mame_profile
|
||||
from .mame_parser import parse_mame_source_tree
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
_CACHE_PATH = _ROOT / 'data' / 'mame-hashes.json'
|
||||
_CLONE_DIR = _ROOT / 'tmp' / 'mame'
|
||||
_EMULATORS_DIR = _ROOT / 'emulators'
|
||||
_REPO_URL = 'https://github.com/mamedev/mame.git'
|
||||
_CACHE_PATH = _ROOT / "data" / "mame-hashes.json"
|
||||
_CLONE_DIR = _ROOT / "tmp" / "mame"
|
||||
_EMULATORS_DIR = _ROOT / "emulators"
|
||||
_REPO_URL = "https://github.com/mamedev/mame.git"
|
||||
_STALE_HOURS = 24
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ def _load_cache() -> dict[str, Any] | None:
|
||||
if not _CACHE_PATH.exists():
|
||||
return None
|
||||
try:
|
||||
with open(_CACHE_PATH, encoding='utf-8') as f:
|
||||
with open(_CACHE_PATH, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
@@ -50,7 +50,7 @@ def _load_cache() -> dict[str, Any] | None:
|
||||
def _is_stale(cache: dict[str, Any] | None) -> bool:
|
||||
if cache is None:
|
||||
return True
|
||||
fetched_at = cache.get('fetched_at')
|
||||
fetched_at = cache.get("fetched_at")
|
||||
if not fetched_at:
|
||||
return True
|
||||
try:
|
||||
@@ -63,17 +63,19 @@ def _is_stale(cache: dict[str, Any] | None) -> bool:
|
||||
|
||||
def _write_cache(data: dict[str, Any]) -> None:
|
||||
_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(_CACHE_PATH, 'w', encoding='utf-8') as f:
|
||||
with open(_CACHE_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
log.info('cache written to %s', _CACHE_PATH)
|
||||
log.info("cache written to %s", _CACHE_PATH)
|
||||
|
||||
|
||||
# ── Git operations ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _run_git(args: list[str], cwd: Path | None = None) -> subprocess.CompletedProcess[str]:
|
||||
def _run_git(
|
||||
args: list[str], cwd: Path | None = None
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
['git', *args],
|
||||
["git", *args],
|
||||
cwd=cwd,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
@@ -86,17 +88,20 @@ def _sparse_clone() -> None:
|
||||
shutil.rmtree(_CLONE_DIR)
|
||||
_CLONE_DIR.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
log.info('sparse cloning mamedev/mame into %s', _CLONE_DIR)
|
||||
_run_git([
|
||||
'clone',
|
||||
'--depth', '1',
|
||||
'--filter=blob:none',
|
||||
'--sparse',
|
||||
_REPO_URL,
|
||||
str(_CLONE_DIR),
|
||||
])
|
||||
log.info("sparse cloning mamedev/mame into %s", _CLONE_DIR)
|
||||
_run_git(
|
||||
['sparse-checkout', 'set', 'src/mame', 'src/devices'],
|
||||
[
|
||||
"clone",
|
||||
"--depth",
|
||||
"1",
|
||||
"--filter=blob:none",
|
||||
"--sparse",
|
||||
_REPO_URL,
|
||||
str(_CLONE_DIR),
|
||||
]
|
||||
)
|
||||
_run_git(
|
||||
["sparse-checkout", "set", "src/mame", "src/devices"],
|
||||
cwd=_CLONE_DIR,
|
||||
)
|
||||
|
||||
@@ -106,41 +111,41 @@ def _get_version() -> str:
|
||||
# Use GitHub API to get the latest release tag.
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
'https://api.github.com/repos/mamedev/mame/releases/latest',
|
||||
headers={'User-Agent': 'retrobios-scraper/1.0',
|
||||
'Accept': 'application/vnd.github.v3+json'},
|
||||
"https://api.github.com/repos/mamedev/mame/releases/latest",
|
||||
headers={
|
||||
"User-Agent": "retrobios-scraper/1.0",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
tag = data.get('tag_name', '')
|
||||
tag = data.get("tag_name", "")
|
||||
if tag:
|
||||
return _parse_version_tag(tag)
|
||||
except (urllib.error.URLError, json.JSONDecodeError, OSError):
|
||||
pass
|
||||
return 'unknown'
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _parse_version_tag(tag: str) -> str:
|
||||
prefix = 'mame'
|
||||
prefix = "mame"
|
||||
raw = tag.removeprefix(prefix) if tag.startswith(prefix) else tag
|
||||
if raw.isdigit() and len(raw) >= 4:
|
||||
return f'{raw[0]}.{raw[1:]}'
|
||||
return f"{raw[0]}.{raw[1:]}"
|
||||
return raw
|
||||
|
||||
|
||||
|
||||
|
||||
def _get_commit() -> str:
|
||||
try:
|
||||
result = _run_git(['rev-parse', 'HEAD'], cwd=_CLONE_DIR)
|
||||
result = _run_git(["rev-parse", "HEAD"], cwd=_CLONE_DIR)
|
||||
return result.stdout.strip()
|
||||
except subprocess.CalledProcessError:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def _cleanup() -> None:
|
||||
if _CLONE_DIR.exists():
|
||||
log.info('cleaning up %s', _CLONE_DIR)
|
||||
log.info("cleaning up %s", _CLONE_DIR)
|
||||
shutil.rmtree(_CLONE_DIR)
|
||||
|
||||
|
||||
@@ -149,18 +154,21 @@ def _cleanup() -> None:
|
||||
|
||||
def _find_mame_profiles() -> list[Path]:
|
||||
profiles: list[Path] = []
|
||||
for path in sorted(_EMULATORS_DIR.glob('*.yml')):
|
||||
if path.name.endswith('.old.yml'):
|
||||
for path in sorted(_EMULATORS_DIR.glob("*.yml")):
|
||||
if path.name.endswith(".old.yml"):
|
||||
continue
|
||||
try:
|
||||
with open(path, encoding='utf-8') as f:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
upstream = data.get('upstream', '')
|
||||
upstream = data.get("upstream", "")
|
||||
# Only match profiles tracking current MAME (not frozen snapshots
|
||||
# which have upstream like "mamedev/mame/tree/mame0139")
|
||||
if isinstance(upstream, str) and upstream.rstrip('/') == 'https://github.com/mamedev/mame':
|
||||
if (
|
||||
isinstance(upstream, str)
|
||||
and upstream.rstrip("/") == "https://github.com/mamedev/mame"
|
||||
):
|
||||
profiles.append(path)
|
||||
except (yaml.YAMLError, OSError):
|
||||
continue
|
||||
@@ -179,36 +187,36 @@ def _format_diff(
|
||||
lines: list[str] = []
|
||||
name = profile_path.stem
|
||||
|
||||
added = diff.get('added', [])
|
||||
updated = diff.get('updated', [])
|
||||
removed = diff.get('removed', [])
|
||||
unchanged = diff.get('unchanged', 0)
|
||||
added = diff.get("added", [])
|
||||
updated = diff.get("updated", [])
|
||||
removed = diff.get("removed", [])
|
||||
unchanged = diff.get("unchanged", 0)
|
||||
|
||||
if not added and not updated and not removed:
|
||||
lines.append(f' {name}:')
|
||||
lines.append(' no changes')
|
||||
lines.append(f" {name}:")
|
||||
lines.append(" no changes")
|
||||
return lines
|
||||
|
||||
lines.append(f' {name}:')
|
||||
lines.append(f" {name}:")
|
||||
|
||||
if show_added:
|
||||
bios_sets = hashes.get('bios_sets', {})
|
||||
bios_sets = hashes.get("bios_sets", {})
|
||||
for set_name in added:
|
||||
rom_count = len(bios_sets.get(set_name, {}).get('roms', []))
|
||||
source_file = bios_sets.get(set_name, {}).get('source_file', '')
|
||||
source_line = bios_sets.get(set_name, {}).get('source_line', '')
|
||||
ref = f'{source_file}:{source_line}' if source_file else ''
|
||||
lines.append(f' + {set_name}.zip ({ref}, {rom_count} ROMs)')
|
||||
rom_count = len(bios_sets.get(set_name, {}).get("roms", []))
|
||||
source_file = bios_sets.get(set_name, {}).get("source_file", "")
|
||||
source_line = bios_sets.get(set_name, {}).get("source_line", "")
|
||||
ref = f"{source_file}:{source_line}" if source_file else ""
|
||||
lines.append(f" + {set_name}.zip ({ref}, {rom_count} ROMs)")
|
||||
elif added:
|
||||
lines.append(f' + {len(added)} new sets available (main profile only)')
|
||||
lines.append(f" + {len(added)} new sets available (main profile only)")
|
||||
|
||||
for set_name in updated:
|
||||
lines.append(f' ~ {set_name}.zip (contents changed)')
|
||||
lines.append(f" ~ {set_name}.zip (contents changed)")
|
||||
|
||||
oos = diff.get('out_of_scope', 0)
|
||||
lines.append(f' = {unchanged} unchanged')
|
||||
oos = diff.get("out_of_scope", 0)
|
||||
lines.append(f" = {unchanged} unchanged")
|
||||
if oos:
|
||||
lines.append(f' . {oos} out of scope (not BIOS root sets)')
|
||||
lines.append(f" . {oos} out of scope (not BIOS root sets)")
|
||||
return lines
|
||||
|
||||
|
||||
@@ -218,7 +226,7 @@ def _format_diff(
|
||||
def _fetch_hashes(force: bool) -> dict[str, Any]:
|
||||
cache = _load_cache()
|
||||
if not force and not _is_stale(cache):
|
||||
log.info('using cached data from %s', cache.get('fetched_at', ''))
|
||||
log.info("using cached data from %s", cache.get("fetched_at", ""))
|
||||
return cache # type: ignore[return-value]
|
||||
|
||||
try:
|
||||
@@ -228,11 +236,11 @@ def _fetch_hashes(force: bool) -> dict[str, Any]:
|
||||
commit = _get_commit()
|
||||
|
||||
data: dict[str, Any] = {
|
||||
'source': 'mamedev/mame',
|
||||
'version': version,
|
||||
'commit': commit,
|
||||
'fetched_at': datetime.now(timezone.utc).isoformat(),
|
||||
'bios_sets': bios_sets,
|
||||
"source": "mamedev/mame",
|
||||
"version": version,
|
||||
"commit": commit,
|
||||
"fetched_at": datetime.now(timezone.utc).isoformat(),
|
||||
"bios_sets": bios_sets,
|
||||
}
|
||||
_write_cache(data)
|
||||
return data
|
||||
@@ -243,34 +251,36 @@ def _fetch_hashes(force: bool) -> dict[str, Any]:
|
||||
def _run(args: argparse.Namespace) -> None:
|
||||
hashes = _fetch_hashes(args.force)
|
||||
|
||||
total_sets = len(hashes.get('bios_sets', {}))
|
||||
version = hashes.get('version', 'unknown')
|
||||
commit = hashes.get('commit', '')[:12]
|
||||
total_sets = len(hashes.get("bios_sets", {}))
|
||||
version = hashes.get("version", "unknown")
|
||||
commit = hashes.get("commit", "")[:12]
|
||||
|
||||
if args.json:
|
||||
json.dump(hashes, sys.stdout, indent=2, ensure_ascii=False)
|
||||
sys.stdout.write('\n')
|
||||
sys.stdout.write("\n")
|
||||
return
|
||||
|
||||
print(f'mame-hashes: {total_sets} BIOS root sets from mamedev/mame'
|
||||
f' @ {version} ({commit})')
|
||||
print(
|
||||
f"mame-hashes: {total_sets} BIOS root sets from mamedev/mame"
|
||||
f" @ {version} ({commit})"
|
||||
)
|
||||
print()
|
||||
|
||||
profiles = _find_mame_profiles()
|
||||
if not profiles:
|
||||
print(' no profiles with mamedev/mame upstream found')
|
||||
print(" no profiles with mamedev/mame upstream found")
|
||||
return
|
||||
|
||||
for profile_path in profiles:
|
||||
is_main = profile_path.name == 'mame.yml'
|
||||
diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode='mame')
|
||||
is_main = profile_path.name == "mame.yml"
|
||||
diff = compute_diff(str(profile_path), str(_CACHE_PATH), mode="mame")
|
||||
lines = _format_diff(profile_path, diff, hashes, show_added=is_main)
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
if not args.dry_run:
|
||||
updated = diff.get('updated', [])
|
||||
added = diff.get('added', []) if is_main else []
|
||||
updated = diff.get("updated", [])
|
||||
added = diff.get("added", []) if is_main else []
|
||||
if added or updated:
|
||||
merge_mame_profile(
|
||||
str(profile_path),
|
||||
@@ -278,32 +288,32 @@ def _run(args: argparse.Namespace) -> None:
|
||||
write=True,
|
||||
add_new=is_main,
|
||||
)
|
||||
log.info('merged into %s', profile_path.name)
|
||||
log.info("merged into %s", profile_path.name)
|
||||
|
||||
print()
|
||||
if args.dry_run:
|
||||
print('(dry run, no files modified)')
|
||||
print("(dry run, no files modified)")
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='mame_hash_scraper',
|
||||
description='Fetch MAME BIOS hashes from source and merge into profiles.',
|
||||
prog="mame_hash_scraper",
|
||||
description="Fetch MAME BIOS hashes from source and merge into profiles.",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='show diff only, do not modify profiles',
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="show diff only, do not modify profiles",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--json',
|
||||
action='store_true',
|
||||
help='output raw JSON to stdout',
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="output raw JSON to stdout",
|
||||
)
|
||||
parser.add_argument(
|
||||
'--force',
|
||||
action='store_true',
|
||||
help='re-fetch even if cache is fresh',
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="re-fetch even if cache is fresh",
|
||||
)
|
||||
return parser
|
||||
|
||||
@@ -311,12 +321,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
def main() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(levelname)s: %(message)s',
|
||||
format="%(levelname)s: %(message)s",
|
||||
)
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
_run(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -14,27 +14,27 @@ from pathlib import Path
|
||||
|
||||
# Macros that declare a machine entry
|
||||
_MACHINE_MACROS = re.compile(
|
||||
r'\b(GAME|SYST|COMP|CONS)\s*\(',
|
||||
r"\b(GAME|SYST|COMP|CONS)\s*\(",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
# ROM block boundaries
|
||||
_ROM_START = re.compile(r'ROM_START\s*\(\s*(\w+)\s*\)')
|
||||
_ROM_END = re.compile(r'ROM_END')
|
||||
_ROM_START = re.compile(r"ROM_START\s*\(\s*(\w+)\s*\)")
|
||||
_ROM_END = re.compile(r"ROM_END")
|
||||
|
||||
# ROM_REGION variants: ROM_REGION, ROM_REGION16_BE, ROM_REGION16_LE, ROM_REGION32_LE, etc.
|
||||
_ROM_REGION = re.compile(
|
||||
r'ROM_REGION\w*\s*\('
|
||||
r'\s*(0x[\da-fA-F]+|\d+)\s*,' # size
|
||||
r'\s*"([^"]+)"\s*,', # tag
|
||||
r"ROM_REGION\w*\s*\("
|
||||
r"\s*(0x[\da-fA-F]+|\d+)\s*," # size
|
||||
r'\s*"([^"]+)"\s*,', # tag
|
||||
)
|
||||
|
||||
# ROM_SYSTEM_BIOS( index, label, description )
|
||||
_ROM_SYSTEM_BIOS = re.compile(
|
||||
r'ROM_SYSTEM_BIOS\s*\('
|
||||
r'\s*(\d+)\s*,' # index
|
||||
r'\s*"([^"]+)"\s*,' # label
|
||||
r'\s*"([^"]+)"\s*\)', # description
|
||||
r"ROM_SYSTEM_BIOS\s*\("
|
||||
r"\s*(\d+)\s*," # index
|
||||
r'\s*"([^"]+)"\s*,' # label
|
||||
r'\s*"([^"]+)"\s*\)', # description
|
||||
)
|
||||
|
||||
# All ROM_LOAD variants including custom BIOS macros.
|
||||
@@ -44,23 +44,23 @@ _ROM_SYSTEM_BIOS = re.compile(
|
||||
# The key pattern: any macro containing "ROM_LOAD" or "ROMX_LOAD" in its name,
|
||||
# with the first quoted string being the ROM filename.
|
||||
_ROM_LOAD = re.compile(
|
||||
r'\b\w*ROMX?_LOAD\w*\s*\('
|
||||
r'[^"]*' # skip any args before the filename (e.g., bios index)
|
||||
r'"([^"]+)"\s*,' # name (first quoted string)
|
||||
r'\s*(0x[\da-fA-F]+|\d+)\s*,' # offset
|
||||
r'\s*(0x[\da-fA-F]+|\d+)\s*,', # size
|
||||
r"\b\w*ROMX?_LOAD\w*\s*\("
|
||||
r'[^"]*' # skip any args before the filename (e.g., bios index)
|
||||
r'"([^"]+)"\s*,' # name (first quoted string)
|
||||
r"\s*(0x[\da-fA-F]+|\d+)\s*," # offset
|
||||
r"\s*(0x[\da-fA-F]+|\d+)\s*,", # size
|
||||
)
|
||||
|
||||
# CRC32 and SHA1 within a ROM_LOAD line
|
||||
_CRC_SHA = re.compile(
|
||||
r'CRC\s*\(\s*([0-9a-fA-F]+)\s*\)'
|
||||
r'\s+'
|
||||
r'SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)',
|
||||
r"CRC\s*\(\s*([0-9a-fA-F]+)\s*\)"
|
||||
r"\s+"
|
||||
r"SHA1\s*\(\s*([0-9a-fA-F]+)\s*\)",
|
||||
)
|
||||
|
||||
_NO_DUMP = re.compile(r'\bNO_DUMP\b')
|
||||
_BAD_DUMP = re.compile(r'\bBAD_DUMP\b')
|
||||
_ROM_BIOS = re.compile(r'ROM_BIOS\s*\(\s*(\d+)\s*\)')
|
||||
_NO_DUMP = re.compile(r"\bNO_DUMP\b")
|
||||
_BAD_DUMP = re.compile(r"\bBAD_DUMP\b")
|
||||
_ROM_BIOS = re.compile(r"ROM_BIOS\s*\(\s*(\d+)\s*\)")
|
||||
|
||||
|
||||
def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
@@ -77,8 +77,8 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
if block_end == -1:
|
||||
continue
|
||||
|
||||
block = source[start:block_end + 1]
|
||||
if 'MACHINE_IS_BIOS_ROOT' not in block:
|
||||
block = source[start : block_end + 1]
|
||||
if "MACHINE_IS_BIOS_ROOT" not in block:
|
||||
continue
|
||||
|
||||
# Extract set name: first arg after the opening paren
|
||||
@@ -97,11 +97,11 @@ def find_bios_root_sets(source: str, filename: str) -> dict[str, dict]:
|
||||
continue
|
||||
|
||||
set_name = args[1].strip()
|
||||
line_no = source[:match.start()].count('\n') + 1
|
||||
line_no = source[: match.start()].count("\n") + 1
|
||||
|
||||
results[set_name] = {
|
||||
'source_file': filename,
|
||||
'source_line': line_no,
|
||||
"source_file": filename,
|
||||
"source_line": line_no,
|
||||
}
|
||||
|
||||
return results
|
||||
@@ -115,7 +115,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]:
|
||||
extracts all ROM entries. Skips NO_DUMP, flags BAD_DUMP.
|
||||
"""
|
||||
pattern = re.compile(
|
||||
r'ROM_START\s*\(\s*' + re.escape(set_name) + r'\s*\)',
|
||||
r"ROM_START\s*\(\s*" + re.escape(set_name) + r"\s*\)",
|
||||
)
|
||||
start_match = pattern.search(source)
|
||||
if not start_match:
|
||||
@@ -125,7 +125,7 @@ def parse_rom_block(source: str, set_name: str) -> list[dict]:
|
||||
if not end_match:
|
||||
return []
|
||||
|
||||
block = source[start_match.end():end_match.start()]
|
||||
block = source[start_match.end() : end_match.start()]
|
||||
|
||||
# Pre-expand macros: find #define macros in the file that contain
|
||||
# ROM_LOAD/ROM_REGION/ROM_SYSTEM_BIOS calls, then expand their
|
||||
@@ -144,26 +144,26 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]:
|
||||
results: dict[str, dict] = {}
|
||||
root = Path(base_path)
|
||||
|
||||
search_dirs = [root / 'src' / 'mame', root / 'src' / 'devices']
|
||||
search_dirs = [root / "src" / "mame", root / "src" / "devices"]
|
||||
|
||||
for search_dir in search_dirs:
|
||||
if not search_dir.is_dir():
|
||||
continue
|
||||
for dirpath, _dirnames, filenames in os.walk(search_dir):
|
||||
for fname in filenames:
|
||||
if not fname.endswith(('.cpp', '.c', '.h', '.hxx')):
|
||||
if not fname.endswith((".cpp", ".c", ".h", ".hxx")):
|
||||
continue
|
||||
filepath = Path(dirpath) / fname
|
||||
rel_path = str(filepath.relative_to(root))
|
||||
content = filepath.read_text(encoding='utf-8', errors='replace')
|
||||
content = filepath.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
bios_sets = find_bios_root_sets(content, rel_path)
|
||||
for set_name, info in bios_sets.items():
|
||||
roms = parse_rom_block(content, set_name)
|
||||
results[set_name] = {
|
||||
'source_file': info['source_file'],
|
||||
'source_line': info['source_line'],
|
||||
'roms': roms,
|
||||
"source_file": info["source_file"],
|
||||
"source_line": info["source_line"],
|
||||
"roms": roms,
|
||||
}
|
||||
|
||||
return results
|
||||
@@ -171,13 +171,20 @@ def parse_mame_source_tree(base_path: str) -> dict[str, dict]:
|
||||
|
||||
# Regex for #define macros that span multiple lines (backslash continuation)
|
||||
_DEFINE_RE = re.compile(
|
||||
r'^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)',
|
||||
r"^\s*#\s*define\s+(\w+)(?:\([^)]*\))?\s*((?:.*\\\n)*.*)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
# ROM-related tokens that indicate a macro is relevant for expansion
|
||||
_ROM_TOKENS = {'ROM_LOAD', 'ROMX_LOAD', 'ROM_REGION', 'ROM_SYSTEM_BIOS',
|
||||
'ROM_FILL', 'ROM_COPY', 'ROM_RELOAD'}
|
||||
_ROM_TOKENS = {
|
||||
"ROM_LOAD",
|
||||
"ROMX_LOAD",
|
||||
"ROM_REGION",
|
||||
"ROM_SYSTEM_BIOS",
|
||||
"ROM_FILL",
|
||||
"ROM_COPY",
|
||||
"ROM_RELOAD",
|
||||
}
|
||||
|
||||
|
||||
def _collect_rom_macros(source: str) -> dict[str, str]:
|
||||
@@ -193,14 +200,14 @@ def _collect_rom_macros(source: str) -> dict[str, str]:
|
||||
name = m.group(1)
|
||||
body = m.group(2)
|
||||
# Join backslash-continued lines
|
||||
body = body.replace('\\\n', ' ')
|
||||
body = body.replace("\\\n", " ")
|
||||
# Only keep macros that contain ROM-related tokens
|
||||
if not any(tok in body for tok in _ROM_TOKENS):
|
||||
continue
|
||||
# Skip wrapper macros: if the body contains ROMX_LOAD/ROM_LOAD
|
||||
# with unquoted args (formal parameters), it's a wrapper.
|
||||
# These are already recognized by the _ROM_LOAD regex directly.
|
||||
if re.search(r'ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,', body):
|
||||
if re.search(r"ROMX?_LOAD\s*\(\s*\w+\s*,\s*\w+\s*,", body):
|
||||
continue
|
||||
macros[name] = body
|
||||
return macros
|
||||
@@ -223,7 +230,7 @@ def _expand_macros(block: str, macros: dict[str, str], depth: int = 5) -> str:
|
||||
iterations += 1
|
||||
for name, body in macros.items():
|
||||
# Match macro invocation: NAME or NAME(args)
|
||||
pattern = re.compile(r'\b' + re.escape(name) + r'(?:\s*\([^)]*\))?')
|
||||
pattern = re.compile(r"\b" + re.escape(name) + r"(?:\s*\([^)]*\))?")
|
||||
if pattern.search(block):
|
||||
block = pattern.sub(body, block)
|
||||
changed = True
|
||||
@@ -237,9 +244,9 @@ def _find_closing_paren(source: str, start: int) -> int:
|
||||
i = start
|
||||
while i < len(source):
|
||||
ch = source[i]
|
||||
if ch == '(':
|
||||
if ch == "(":
|
||||
depth += 1
|
||||
elif ch == ')':
|
||||
elif ch == ")":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return i
|
||||
@@ -268,24 +275,24 @@ def _split_macro_args(inner: str) -> list[str]:
|
||||
i += 1
|
||||
if i < len(inner):
|
||||
current.append(inner[i])
|
||||
elif ch == '(':
|
||||
elif ch == "(":
|
||||
depth += 1
|
||||
current.append(ch)
|
||||
elif ch == ')':
|
||||
elif ch == ")":
|
||||
if depth == 0:
|
||||
args.append(''.join(current))
|
||||
args.append("".join(current))
|
||||
break
|
||||
depth -= 1
|
||||
current.append(ch)
|
||||
elif ch == ',' and depth == 0:
|
||||
args.append(''.join(current))
|
||||
elif ch == "," and depth == 0:
|
||||
args.append("".join(current))
|
||||
current = []
|
||||
else:
|
||||
current.append(ch)
|
||||
i += 1
|
||||
|
||||
if current:
|
||||
remaining = ''.join(current).strip()
|
||||
remaining = "".join(current).strip()
|
||||
if remaining:
|
||||
args.append(remaining)
|
||||
|
||||
@@ -300,15 +307,15 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
Processes matches in order of appearance to track region and BIOS context.
|
||||
"""
|
||||
roms: list[dict] = []
|
||||
current_region = ''
|
||||
current_region = ""
|
||||
bios_labels: dict[int, tuple[str, str]] = {}
|
||||
|
||||
# Build a combined pattern that matches all interesting tokens
|
||||
# and process them in order of occurrence
|
||||
token_patterns = [
|
||||
('region', _ROM_REGION),
|
||||
('bios_label', _ROM_SYSTEM_BIOS),
|
||||
('rom_load', _ROM_LOAD),
|
||||
("region", _ROM_REGION),
|
||||
("bios_label", _ROM_SYSTEM_BIOS),
|
||||
("rom_load", _ROM_LOAD),
|
||||
]
|
||||
|
||||
# Collect all matches with their positions
|
||||
@@ -321,22 +328,22 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
events.sort(key=lambda e: e[0])
|
||||
|
||||
for _pos, tag, m in events:
|
||||
if tag == 'region':
|
||||
if tag == "region":
|
||||
current_region = m.group(2)
|
||||
elif tag == 'bios_label':
|
||||
elif tag == "bios_label":
|
||||
idx = int(m.group(1))
|
||||
bios_labels[idx] = (m.group(2), m.group(3))
|
||||
elif tag == 'rom_load':
|
||||
elif tag == "rom_load":
|
||||
# Get the full macro call as context (find closing paren)
|
||||
context_start = m.start()
|
||||
# Find the opening paren of the ROM_LOAD macro
|
||||
paren_pos = block.find('(', context_start)
|
||||
paren_pos = block.find("(", context_start)
|
||||
if paren_pos != -1:
|
||||
close_pos = _find_closing_paren(block, paren_pos)
|
||||
context_end = close_pos + 1 if close_pos != -1 else m.end() + 200
|
||||
else:
|
||||
context_end = m.end() + 200
|
||||
context = block[context_start:min(context_end, len(block))]
|
||||
context = block[context_start : min(context_end, len(block))]
|
||||
|
||||
if _NO_DUMP.search(context):
|
||||
continue
|
||||
@@ -345,8 +352,8 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
rom_size = _parse_int(m.group(3))
|
||||
|
||||
crc_sha_match = _CRC_SHA.search(context)
|
||||
crc32 = ''
|
||||
sha1 = ''
|
||||
crc32 = ""
|
||||
sha1 = ""
|
||||
if crc_sha_match:
|
||||
crc32 = crc_sha_match.group(1).lower()
|
||||
sha1 = crc_sha_match.group(2).lower()
|
||||
@@ -354,8 +361,8 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
bad_dump = bool(_BAD_DUMP.search(context))
|
||||
|
||||
bios_index = None
|
||||
bios_label = ''
|
||||
bios_description = ''
|
||||
bios_label = ""
|
||||
bios_description = ""
|
||||
bios_ref = _ROM_BIOS.search(context)
|
||||
if bios_ref:
|
||||
bios_index = int(bios_ref.group(1))
|
||||
@@ -363,18 +370,18 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
bios_label, bios_description = bios_labels[bios_index]
|
||||
|
||||
entry: dict = {
|
||||
'name': rom_name,
|
||||
'size': rom_size,
|
||||
'crc32': crc32,
|
||||
'sha1': sha1,
|
||||
'region': current_region,
|
||||
'bad_dump': bad_dump,
|
||||
"name": rom_name,
|
||||
"size": rom_size,
|
||||
"crc32": crc32,
|
||||
"sha1": sha1,
|
||||
"region": current_region,
|
||||
"bad_dump": bad_dump,
|
||||
}
|
||||
|
||||
if bios_index is not None:
|
||||
entry['bios_index'] = bios_index
|
||||
entry['bios_label'] = bios_label
|
||||
entry['bios_description'] = bios_description
|
||||
entry["bios_index"] = bios_index
|
||||
entry["bios_label"] = bios_label
|
||||
entry["bios_description"] = bios_description
|
||||
|
||||
roms.append(entry)
|
||||
|
||||
@@ -384,6 +391,6 @@ def _parse_rom_entries(block: str) -> list[dict]:
|
||||
def _parse_int(value: str) -> int:
|
||||
"""Parse an integer that may be hex (0x...) or decimal."""
|
||||
value = value.strip()
|
||||
if value.startswith('0x') or value.startswith('0X'):
|
||||
if value.startswith("0x") or value.startswith("0X"):
|
||||
return int(value, 16)
|
||||
return int(value)
|
||||
|
||||
@@ -16,8 +16,6 @@ Recalbox verification logic:
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag
|
||||
@@ -121,17 +119,19 @@ class Scraper(BaseScraper):
|
||||
for bios_elem in system_elem.findall("bios"):
|
||||
paths_str = bios_elem.get("path", "")
|
||||
md5_str = bios_elem.get("md5", "")
|
||||
core = bios_elem.get("core", "")
|
||||
bios_elem.get("core", "")
|
||||
mandatory = bios_elem.get("mandatory", "true") != "false"
|
||||
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
note = bios_elem.get("note", "")
|
||||
bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
bios_elem.get("note", "")
|
||||
|
||||
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
|
||||
if not paths:
|
||||
continue
|
||||
|
||||
primary_path = paths[0]
|
||||
name = primary_path.split("/")[-1] if "/" in primary_path else primary_path
|
||||
name = (
|
||||
primary_path.split("/")[-1] if "/" in primary_path else primary_path
|
||||
)
|
||||
|
||||
md5_list = [m.strip() for m in md5_str.split(",") if m.strip()]
|
||||
all_md5 = ",".join(md5_list) if md5_list else None
|
||||
@@ -141,14 +141,16 @@ class Scraper(BaseScraper):
|
||||
continue
|
||||
seen.add(dedup_key)
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=all_md5,
|
||||
destination=primary_path,
|
||||
required=mandatory,
|
||||
native_id=platform,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=name,
|
||||
system=system_slug,
|
||||
md5=all_md5,
|
||||
destination=primary_path,
|
||||
required=mandatory,
|
||||
native_id=platform,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -168,7 +170,9 @@ class Scraper(BaseScraper):
|
||||
md5_str = bios_elem.get("md5", "")
|
||||
core = bios_elem.get("core", "")
|
||||
mandatory = bios_elem.get("mandatory", "true") != "false"
|
||||
hash_match_mandatory = bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
hash_match_mandatory = (
|
||||
bios_elem.get("hashMatchMandatory", "true") != "false"
|
||||
)
|
||||
note = bios_elem.get("note", "")
|
||||
|
||||
paths = [p.strip() for p in paths_str.split("|") if p.strip()]
|
||||
@@ -179,17 +183,19 @@ class Scraper(BaseScraper):
|
||||
|
||||
name = paths[0].split("/")[-1] if "/" in paths[0] else paths[0]
|
||||
|
||||
requirements.append({
|
||||
"name": name,
|
||||
"system": system_slug,
|
||||
"system_name": system_name,
|
||||
"paths": paths,
|
||||
"md5_list": md5_list,
|
||||
"core": core,
|
||||
"mandatory": mandatory,
|
||||
"hash_match_mandatory": hash_match_mandatory,
|
||||
"note": note,
|
||||
})
|
||||
requirements.append(
|
||||
{
|
||||
"name": name,
|
||||
"system": system_slug,
|
||||
"system_name": system_name,
|
||||
"paths": paths,
|
||||
"md5_list": md5_list,
|
||||
"core": core,
|
||||
"mandatory": mandatory,
|
||||
"hash_match_mandatory": hash_match_mandatory,
|
||||
"note": note,
|
||||
}
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -245,7 +251,9 @@ def main():
|
||||
parser = argparse.ArgumentParser(description="Scrape Recalbox es_bios.xml")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
parser.add_argument("--full", action="store_true", help="Show full Recalbox-specific fields")
|
||||
parser.add_argument(
|
||||
"--full", action="store_true", help="Show full Recalbox-specific fields"
|
||||
)
|
||||
parser.add_argument("--output", "-o")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -264,6 +272,7 @@ def main():
|
||||
|
||||
if args.dry_run:
|
||||
from collections import defaultdict
|
||||
|
||||
by_system = defaultdict(list)
|
||||
for r in reqs:
|
||||
by_system[r.system].append(r)
|
||||
@@ -272,7 +281,7 @@ def main():
|
||||
for f in files[:5]:
|
||||
print(f" {f.name} (md5={f.md5[:12] if f.md5 else 'N/A'}...)")
|
||||
if len(files) > 5:
|
||||
print(f" ... +{len(files)-5} more")
|
||||
print(f" ... +{len(files) - 5} more")
|
||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
||||
return
|
||||
|
||||
|
||||
@@ -9,9 +9,6 @@ Hash: MD5 primary
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
try:
|
||||
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_version
|
||||
@@ -43,7 +40,6 @@ class Scraper(BaseScraper):
|
||||
super().__init__(url=url)
|
||||
self._parsed: dict | None = None
|
||||
|
||||
|
||||
def _parse_json(self) -> dict:
|
||||
if self._parsed is not None:
|
||||
return self._parsed
|
||||
@@ -89,13 +85,15 @@ class Scraper(BaseScraper):
|
||||
|
||||
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=name,
|
||||
system=SYSTEM_SLUG_MAP.get(sys_key, sys_key),
|
||||
md5=md5 or None,
|
||||
destination=file_path,
|
||||
required=True,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=name,
|
||||
system=SYSTEM_SLUG_MAP.get(sys_key, sys_key),
|
||||
md5=md5 or None,
|
||||
destination=file_path,
|
||||
required=True,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -170,6 +168,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main():
|
||||
from scripts.scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape retrobat BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -29,8 +29,8 @@ import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
@@ -43,16 +43,16 @@ PLATFORM_NAME = "retrodeck"
|
||||
COMPONENTS_REPO = "RetroDECK/components"
|
||||
COMPONENTS_BRANCH = "main"
|
||||
COMPONENTS_API_URL = (
|
||||
f"https://api.github.com/repos/{COMPONENTS_REPO}"
|
||||
f"/git/trees/{COMPONENTS_BRANCH}"
|
||||
)
|
||||
RAW_BASE = (
|
||||
f"https://raw.githubusercontent.com/{COMPONENTS_REPO}"
|
||||
f"/{COMPONENTS_BRANCH}"
|
||||
f"https://api.github.com/repos/{COMPONENTS_REPO}/git/trees/{COMPONENTS_BRANCH}"
|
||||
)
|
||||
RAW_BASE = f"https://raw.githubusercontent.com/{COMPONENTS_REPO}/{COMPONENTS_BRANCH}"
|
||||
SKIP_DIRS = {"archive_later", "archive_old", "automation-tools", ".github"}
|
||||
NON_EMULATOR_COMPONENTS = {
|
||||
"framework", "es-de", "steam-rom-manager", "flips", "portmaster",
|
||||
"framework",
|
||||
"es-de",
|
||||
"steam-rom-manager",
|
||||
"flips",
|
||||
"portmaster",
|
||||
}
|
||||
|
||||
# RetroDECK system ID -> retrobios slug.
|
||||
@@ -358,13 +358,20 @@ class Scraper(BaseScraper):
|
||||
|
||||
required_raw = entry.get("required", "")
|
||||
required = bool(required_raw) and str(required_raw).lower() not in (
|
||||
"false", "no", "optional", "",
|
||||
"false",
|
||||
"no",
|
||||
"optional",
|
||||
"",
|
||||
)
|
||||
|
||||
key = (system, filename.lower())
|
||||
if key in seen:
|
||||
existing = next(
|
||||
(r for r in requirements if (r.system, r.name.lower()) == key),
|
||||
(
|
||||
r
|
||||
for r in requirements
|
||||
if (r.system, r.name.lower()) == key
|
||||
),
|
||||
None,
|
||||
)
|
||||
if existing and md5 and existing.md5 and md5 != existing.md5:
|
||||
@@ -376,13 +383,15 @@ class Scraper(BaseScraper):
|
||||
continue
|
||||
seen.add(key)
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=filename,
|
||||
system=system,
|
||||
destination=destination,
|
||||
md5=md5,
|
||||
required=required,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=filename,
|
||||
system=system,
|
||||
destination=destination,
|
||||
md5=md5,
|
||||
required=required,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -390,11 +399,14 @@ class Scraper(BaseScraper):
|
||||
reqs = self.fetch_requirements()
|
||||
manifests = self._get_manifests()
|
||||
|
||||
cores = sorted({
|
||||
comp_name for comp_name, _ in manifests
|
||||
if comp_name not in SKIP_DIRS
|
||||
and comp_name not in NON_EMULATOR_COMPONENTS
|
||||
})
|
||||
cores = sorted(
|
||||
{
|
||||
comp_name
|
||||
for comp_name, _ in manifests
|
||||
if comp_name not in SKIP_DIRS
|
||||
and comp_name not in NON_EMULATOR_COMPONENTS
|
||||
}
|
||||
)
|
||||
|
||||
systems: dict[str, dict] = {}
|
||||
for req in reqs:
|
||||
@@ -423,6 +435,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main() -> None:
|
||||
from scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape RetroDECK BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -138,16 +138,18 @@ class Scraper(BaseScraper):
|
||||
crc32 = (entry.get("crc") or "").strip() or None
|
||||
size = int(entry["size"]) if entry.get("size") else None
|
||||
|
||||
requirements.append(BiosRequirement(
|
||||
name=filename,
|
||||
system=system,
|
||||
sha1=sha1,
|
||||
md5=md5,
|
||||
crc32=crc32,
|
||||
size=size,
|
||||
destination=f"{igdb_slug}/{filename}",
|
||||
required=True,
|
||||
))
|
||||
requirements.append(
|
||||
BiosRequirement(
|
||||
name=filename,
|
||||
system=system,
|
||||
sha1=sha1,
|
||||
md5=md5,
|
||||
crc32=crc32,
|
||||
size=size,
|
||||
destination=f"{igdb_slug}/{filename}",
|
||||
required=True,
|
||||
)
|
||||
)
|
||||
|
||||
return requirements
|
||||
|
||||
@@ -164,7 +166,7 @@ class Scraper(BaseScraper):
|
||||
for key in list(data.keys())[:5]:
|
||||
if ":" not in key:
|
||||
return False
|
||||
_, entry = key.split(":", 1), data[key]
|
||||
_, _entry = key.split(":", 1), data[key]
|
||||
if not isinstance(data[key], dict):
|
||||
return False
|
||||
if "md5" not in data[key] and "sha1" not in data[key]:
|
||||
@@ -217,6 +219,7 @@ class Scraper(BaseScraper):
|
||||
|
||||
def main():
|
||||
from scripts.scraper.base_scraper import scraper_cli
|
||||
|
||||
scraper_cli(Scraper, "Scrape RomM BIOS requirements")
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
Auto-detects *_targets_scraper.py files and exposes their scrapers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
|
||||
@@ -6,6 +6,7 @@ Sources (batocera-linux/batocera.linux):
|
||||
- package/batocera/emulationstation/batocera-es-system/es_systems.yml
|
||||
-- emulator requireAnyOf flag mapping
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -35,23 +36,23 @@ _HEADERS = {
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
|
||||
_TARGET_FLAG_RE = re.compile(r'^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y', re.MULTILINE)
|
||||
_TARGET_FLAG_RE = re.compile(r"^(BR2_PACKAGE_BATOCERA_TARGET_\w+)=y", re.MULTILINE)
|
||||
|
||||
# Matches: select BR2_PACKAGE_FOO (optional: if CONDITION)
|
||||
# Condition may span multiple lines (backslash continuation)
|
||||
_SELECT_RE = re.compile(
|
||||
r'^\s+select\s+(BR2_PACKAGE_\w+)' # package being selected
|
||||
r'(?:\s+if\s+((?:[^\n]|\\\n)+?))?' # optional "if CONDITION" (may continue with \)
|
||||
r'(?:\s*#[^\n]*)?$', # optional trailing comment
|
||||
r"^\s+select\s+(BR2_PACKAGE_\w+)" # package being selected
|
||||
r"(?:\s+if\s+((?:[^\n]|\\\n)+?))?" # optional "if CONDITION" (may continue with \)
|
||||
r"(?:\s*#[^\n]*)?$", # optional trailing comment
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
# Meta-flag definition: "if COND\n\tconfig DERIVED_FLAG\n\t...\nendif"
|
||||
_META_BLOCK_RE = re.compile(
|
||||
r'^if\s+((?:[^\n]|\\\n)+?)\n' # condition (may span lines via \)
|
||||
r'(?:.*?\n)*?' # optional lines before the config
|
||||
r'\s+config\s+(BR2_PACKAGE_\w+)' # derived flag name
|
||||
r'.*?^endif', # end of block
|
||||
r"^if\s+((?:[^\n]|\\\n)+?)\n" # condition (may span lines via \)
|
||||
r"(?:.*?\n)*?" # optional lines before the config
|
||||
r"\s+config\s+(BR2_PACKAGE_\w+)" # derived flag name
|
||||
r".*?^endif", # end of block
|
||||
re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
|
||||
@@ -80,7 +81,7 @@ def _fetch_json(url: str) -> list | dict | None:
|
||||
|
||||
def _normalise_condition(raw: str) -> str:
|
||||
"""Strip backslash-continuations and collapse whitespace."""
|
||||
return re.sub(r'\\\n\s*', ' ', raw).strip()
|
||||
return re.sub(r"\\\n\s*", " ", raw).strip()
|
||||
|
||||
|
||||
def _tokenise(condition: str) -> list[str]:
|
||||
@@ -89,14 +90,16 @@ def _tokenise(condition: str) -> list[str]:
|
||||
return token_re.findall(condition)
|
||||
|
||||
|
||||
def _check_condition(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
|
||||
def _check_condition(
|
||||
tokens: list[str], pos: int, active: frozenset[str]
|
||||
) -> tuple[bool, int]:
|
||||
"""Recursive descent check of a Kconfig boolean expression."""
|
||||
return _check_or(tokens, pos, active)
|
||||
|
||||
|
||||
def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
|
||||
left, pos = _check_and(tokens, pos, active)
|
||||
while pos < len(tokens) and tokens[pos] == '||':
|
||||
while pos < len(tokens) and tokens[pos] == "||":
|
||||
pos += 1
|
||||
right, pos = _check_and(tokens, pos, active)
|
||||
left = left or right
|
||||
@@ -105,7 +108,7 @@ def _check_or(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool
|
||||
|
||||
def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
|
||||
left, pos = _check_not(tokens, pos, active)
|
||||
while pos < len(tokens) and tokens[pos] == '&&':
|
||||
while pos < len(tokens) and tokens[pos] == "&&":
|
||||
pos += 1
|
||||
right, pos = _check_not(tokens, pos, active)
|
||||
left = left and right
|
||||
@@ -113,24 +116,26 @@ def _check_and(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[boo
|
||||
|
||||
|
||||
def _check_not(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
|
||||
if pos < len(tokens) and tokens[pos] == '!':
|
||||
if pos < len(tokens) and tokens[pos] == "!":
|
||||
pos += 1
|
||||
val, pos = _check_atom(tokens, pos, active)
|
||||
return not val, pos
|
||||
return _check_atom(tokens, pos, active)
|
||||
|
||||
|
||||
def _check_atom(tokens: list[str], pos: int, active: frozenset[str]) -> tuple[bool, int]:
|
||||
def _check_atom(
|
||||
tokens: list[str], pos: int, active: frozenset[str]
|
||||
) -> tuple[bool, int]:
|
||||
if pos >= len(tokens):
|
||||
return True, pos
|
||||
tok = tokens[pos]
|
||||
if tok == '(':
|
||||
if tok == "(":
|
||||
pos += 1
|
||||
val, pos = _check_or(tokens, pos, active)
|
||||
if pos < len(tokens) and tokens[pos] == ')':
|
||||
if pos < len(tokens) and tokens[pos] == ")":
|
||||
pos += 1
|
||||
return val, pos
|
||||
if tok.startswith('BR2_'):
|
||||
if tok.startswith("BR2_"):
|
||||
pos += 1
|
||||
return tok in active, pos
|
||||
if tok.startswith('"'):
|
||||
@@ -170,7 +175,9 @@ def _parse_meta_flags(text: str) -> list[tuple[str, str]]:
|
||||
return results
|
||||
|
||||
|
||||
def _expand_flags(primary_flag: str, meta_rules: list[tuple[str, str]]) -> frozenset[str]:
|
||||
def _expand_flags(
|
||||
primary_flag: str, meta_rules: list[tuple[str, str]]
|
||||
) -> frozenset[str]:
|
||||
"""Given a board's primary flag, expand to all active derived flags.
|
||||
|
||||
Iterates until stable (handles chained derivations like X86_64_ANY -> X86_ANY).
|
||||
@@ -194,7 +201,7 @@ def _parse_selects(text: str) -> list[tuple[str, str]]:
|
||||
results: list[tuple[str, str]] = []
|
||||
for m in _SELECT_RE.finditer(text):
|
||||
pkg = m.group(1)
|
||||
cond = _normalise_condition(m.group(2) or '')
|
||||
cond = _normalise_condition(m.group(2) or "")
|
||||
results.append((pkg, cond))
|
||||
return results
|
||||
|
||||
@@ -261,7 +268,8 @@ class Scraper(BaseTargetScraper):
|
||||
if not data or not isinstance(data, list):
|
||||
return []
|
||||
return [
|
||||
item["name"] for item in data
|
||||
item["name"]
|
||||
for item in data
|
||||
if isinstance(item, dict)
|
||||
and item.get("name", "").startswith("batocera-")
|
||||
and item.get("name", "").endswith(".board")
|
||||
|
||||
@@ -4,6 +4,7 @@ Sources:
|
||||
SteamOS: dragoonDorise/EmuDeck -functions/EmuScripts/*.sh
|
||||
Windows: EmuDeck/emudeck-we -functions/EmuScripts/*.ps1
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -20,8 +21,12 @@ from . import BaseTargetScraper
|
||||
|
||||
PLATFORM_NAME = "emudeck"
|
||||
|
||||
STEAMOS_API = "https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts"
|
||||
WINDOWS_API = "https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts"
|
||||
STEAMOS_API = (
|
||||
"https://api.github.com/repos/dragoonDorise/EmuDeck/contents/functions/EmuScripts"
|
||||
)
|
||||
WINDOWS_API = (
|
||||
"https://api.github.com/repos/EmuDeck/emudeck-we/contents/functions/EmuScripts"
|
||||
)
|
||||
|
||||
# Map EmuDeck script names to emulator profile keys
|
||||
# Script naming: emuDeckDolphin.sh -> dolphin
|
||||
@@ -70,8 +75,8 @@ def _list_emuscripts(api_url: str) -> list[str]:
|
||||
def _script_to_core(filename: str) -> str | None:
|
||||
"""Convert EmuScripts filename to core profile key."""
|
||||
# Strip extension and emuDeck prefix
|
||||
name = re.sub(r'\.(sh|ps1)$', '', filename, flags=re.IGNORECASE)
|
||||
name = re.sub(r'^emuDeck', '', name, flags=re.IGNORECASE)
|
||||
name = re.sub(r"\.(sh|ps1)$", "", filename, flags=re.IGNORECASE)
|
||||
name = re.sub(r"^emuDeck", "", name, flags=re.IGNORECASE)
|
||||
if not name:
|
||||
return None
|
||||
key = name.lower()
|
||||
@@ -86,8 +91,9 @@ class Scraper(BaseTargetScraper):
|
||||
def __init__(self, url: str = "https://github.com/dragoonDorise/EmuDeck"):
|
||||
super().__init__(url=url)
|
||||
|
||||
def _fetch_cores_for_target(self, api_url: str, label: str,
|
||||
arch: str = "x86_64") -> list[str]:
|
||||
def _fetch_cores_for_target(
|
||||
self, api_url: str, label: str, arch: str = "x86_64"
|
||||
) -> list[str]:
|
||||
print(f" fetching {label} EmuScripts...", file=sys.stderr)
|
||||
scripts = _list_emuscripts(api_url)
|
||||
cores: list[str] = []
|
||||
@@ -99,7 +105,7 @@ class Scraper(BaseTargetScraper):
|
||||
seen.add(core)
|
||||
cores.append(core)
|
||||
# Detect RetroArch presence (provides all libretro cores)
|
||||
name = re.sub(r'\.(sh|ps1)$', '', script, flags=re.IGNORECASE)
|
||||
name = re.sub(r"\.(sh|ps1)$", "", script, flags=re.IGNORECASE)
|
||||
if name.lower() in ("emudeckretroarch", "retroarch_maincfg"):
|
||||
has_retroarch = True
|
||||
|
||||
@@ -112,15 +118,18 @@ class Scraper(BaseTargetScraper):
|
||||
seen.add(c)
|
||||
cores.append(c)
|
||||
|
||||
print(f" {label}: {standalone_count} standalone + "
|
||||
f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total",
|
||||
file=sys.stderr)
|
||||
print(
|
||||
f" {label}: {standalone_count} standalone + "
|
||||
f"{len(cores) - standalone_count} via RetroArch = {len(cores)} total",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return sorted(cores)
|
||||
|
||||
@staticmethod
|
||||
def _load_retroarch_cores(arch: str) -> list[str]:
|
||||
"""Load RetroArch target cores for given architecture."""
|
||||
import os
|
||||
|
||||
target_path = os.path.join("platforms", "targets", "retroarch.yml")
|
||||
if not os.path.exists(target_path):
|
||||
return []
|
||||
@@ -157,9 +166,7 @@ class Scraper(BaseTargetScraper):
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scrape EmuDeck emulator targets"
|
||||
)
|
||||
parser = argparse.ArgumentParser(description="Scrape EmuDeck emulator targets")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show target summary")
|
||||
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -16,6 +16,7 @@ Buildbot structure varies by platform:
|
||||
- ps2: playstation/ps2/latest/ -> *_libretro_ps2.elf.zip
|
||||
- vita: bundles only (VPK) - no individual cores
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
@@ -64,7 +65,9 @@ RECIPE_TARGETS: list[tuple[str, str, str]] = [
|
||||
("playstation/vita", "playstation-vita", "armv7"),
|
||||
]
|
||||
|
||||
RECIPE_BASE_URL = "https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/"
|
||||
RECIPE_BASE_URL = (
|
||||
"https://raw.githubusercontent.com/libretro/libretro-super/master/recipes/"
|
||||
)
|
||||
|
||||
# Match any href containing _libretro followed by a platform-specific extension
|
||||
# Covers: .so.zip, .dll.zip, .dylib.zip, .nro.zip, .dol.zip, .rpx.zip,
|
||||
@@ -75,7 +78,7 @@ _HREF_RE = re.compile(
|
||||
)
|
||||
|
||||
# Extract core name: everything before _libretro
|
||||
_CORE_NAME_RE = re.compile(r'^(.+?)_libretro')
|
||||
_CORE_NAME_RE = re.compile(r"^(.+?)_libretro")
|
||||
|
||||
|
||||
class Scraper(BaseTargetScraper):
|
||||
@@ -180,12 +183,16 @@ def main() -> None:
|
||||
data = scraper.fetch_targets()
|
||||
|
||||
total_cores = sum(len(t["cores"]) for t in data["targets"].values())
|
||||
print(f"\n{len(data['targets'])} targets, {total_cores} total core entries",
|
||||
file=sys.stderr)
|
||||
print(
|
||||
f"\n{len(data['targets'])} targets, {total_cores} total core entries",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if args.dry_run:
|
||||
for name, info in sorted(data["targets"].items()):
|
||||
print(f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores")
|
||||
print(
|
||||
f" {name:30s} {info['architecture']:10s} {len(info['cores']):>4d} cores"
|
||||
)
|
||||
return
|
||||
|
||||
if args.output:
|
||||
|
||||
@@ -4,6 +4,7 @@ Source: https://github.com/RetroPie/RetroPie-Setup/tree/master/scriptmodules/lib
|
||||
Parses rp_module_id and rp_module_flags from each scriptmodule to determine
|
||||
which platforms each core supports.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
||||
Reference in New Issue
Block a user