fix: resolve truncated md5 in batocera scraper

Batocera upstream has a truncated 29-char MD5 for zx48.rom.
The scraper now resolves truncated hashes via prefix match
against database.json, preventing schema validation failures.
This commit is contained in:
Abdessamad Derraz
2026-03-19 23:49:58 +01:00
parent 16fd815099
commit 21bc225cac
3 changed files with 325 additions and 254 deletions

View File

@@ -9,10 +9,12 @@ Hash: MD5 primary
from __future__ import annotations
import ast
import json
import re
import sys
import urllib.request
import urllib.error
from pathlib import Path
import yaml
@@ -93,6 +95,32 @@ SYSTEM_SLUG_MAP = {
}
_MD5_RE = re.compile(r'^[a-fA-F0-9]+$')
def _load_md5_index() -> dict[str, str]:
"""Load by_md5 index from database.json for prefix resolution."""
db_path = Path(__file__).resolve().parents[2] / "database.json"
if not db_path.exists():
return {}
with open(db_path) as f:
db = json.load(f)
return db.get("indexes", {}).get("by_md5", {})
def _resolve_truncated_md5(md5: str, md5_index: dict[str, str]) -> str:
"""Resolve a truncated MD5 to its full 32-char version via prefix match."""
if not md5 or len(md5) == 32:
return md5
if not _MD5_RE.match(md5):
return md5
matches = [k for k in md5_index if k.startswith(md5)]
if len(matches) == 1:
print(f" fixed truncated md5: {md5} -> {matches[0]}", file=sys.stderr)
return matches[0]
return md5
class Scraper(BaseScraper):
"""Scraper for batocera-systems Python dict."""
@@ -204,6 +232,7 @@ class Scraper(BaseScraper):
systems = self._extract_systems_dict(raw)
requirements = []
md5_index = _load_md5_index()
for sys_key, sys_data in systems.items():
system_slug = SYSTEM_SLUG_MAP.get(sys_key, sys_key)
@@ -211,7 +240,7 @@ class Scraper(BaseScraper):
for bios in bios_files:
file_path = bios.get("file", "")
md5 = bios.get("md5", "")
md5 = _resolve_truncated_md5(bios.get("md5", ""), md5_index)
zipped_file = bios.get("zippedFile", "")
if file_path.startswith("bios/"):