From 74269bab84be76ebcdf8d33f980a34dec543419d Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:49:33 +0200 Subject: [PATCH] fix: rewrite exporters to match exact native formats --- scripts/exporter/base_exporter.py | 5 ++ scripts/exporter/batocera_exporter.py | 55 +++++++++++------- scripts/exporter/recalbox_exporter.py | 79 +++++++++++++++++++------- scripts/exporter/retrobat_exporter.py | 37 +++++++++--- scripts/exporter/systemdat_exporter.py | 70 +++++++++++++---------- tests/test_e2e.py | 10 +++- 6 files changed, 175 insertions(+), 81 deletions(-) diff --git a/scripts/exporter/base_exporter.py b/scripts/exporter/base_exporter.py index aecac1af..1839dc69 100644 --- a/scripts/exporter/base_exporter.py +++ b/scripts/exporter/base_exporter.py @@ -25,3 +25,8 @@ class BaseExporter(ABC): @abstractmethod def validate(self, truth_data: dict, output_path: str) -> list[str]: """Validate exported file against truth data, return list of issues.""" + + @staticmethod + def _is_pattern(name: str) -> bool: + """Check if a filename is a placeholder pattern (not a real file).""" + return "<" in name or ">" in name or "*" in name diff --git a/scripts/exporter/batocera_exporter.py b/scripts/exporter/batocera_exporter.py index 4208a20a..0edf6c34 100644 --- a/scripts/exporter/batocera_exporter.py +++ b/scripts/exporter/batocera_exporter.py @@ -1,4 +1,8 @@ -"""Exporter for Batocera batocera-systems format (Python dict).""" +"""Exporter for Batocera batocera-systems format. + +Produces a Python dict matching the exact format of +batocera-linux/batocera-scripts/scripts/batocera-systems. +""" from __future__ import annotations @@ -7,6 +11,11 @@ from pathlib import Path from .base_exporter import BaseExporter +def _slug_to_display(slug: str) -> str: + """Convert slug to display name: 'atari-5200' -> 'Atari 5200'.""" + return slug.replace("-", " ").title() + + class Exporter(BaseExporter): """Export truth data to Batocera batocera-systems format.""" @@ -20,20 +29,19 @@ class Exporter(BaseExporter): output_path: str, scraped_data: dict | None = None, ) -> None: + # Build native_id and display name maps from scraped data native_map: dict[str, str] = {} + display_map: dict[str, str] = {} if scraped_data: for sys_id, sys_data in scraped_data.get("systems", {}).items(): nid = sys_data.get("native_id") if nid: native_map[sys_id] = nid + dname = sys_data.get("name") + if dname: + display_map[sys_id] = dname - lines: list[str] = [ - "#!/usr/bin/env python3", - "# Generated batocera-systems BIOS declarations", - "from collections import OrderedDict", - "", - "systems = {", - ] + lines: list[str] = ["systems = {", ""] systems = truth_data.get("systems", {}) for sys_id in sorted(systems): @@ -43,26 +51,34 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - lines.append(f' "{native_id}": {{') - lines.append(' "biosFiles": [') + display_name = display_map.get(sys_id, _slug_to_display(sys_id)) + # Build biosFiles entries as compact single-line dicts + bios_parts: list[str] = [] for fe in files: name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue dest = fe.get("destination", name) md5 = fe.get("md5", "") if isinstance(md5, list): md5 = md5[0] if md5 else "" - lines.append(" {") - lines.append(f' "file": "bios/{dest}",') - lines.append(f' "md5": "{md5}",') - lines.append(" },") + entry_parts = [] + if md5: + entry_parts.append(f'"md5": "{md5}"') + entry_parts.append(f'"file": "bios/{dest}"') + bios_parts.append("{ " + ", ".join(entry_parts) + " }") - lines.append(" ],") - lines.append(" },") + bios_str = ", ".join(bios_parts) + line = ( + f' "{native_id}": ' + f'{{ "name": "{display_name}", ' + f'"biosFiles": [ {bios_str} ] }},' + ) + lines.append(line) + lines.append("") lines.append("}") lines.append("") Path(output_path).write_text("\n".join(lines), encoding="utf-8") @@ -73,8 +89,9 @@ class Exporter(BaseExporter): for sys_data in truth_data.get("systems", {}).values(): for fe in sys_data.get("files", []): name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue - if name not in content: + dest = fe.get("destination", name) + if dest not in content and name not in content: issues.append(f"missing: {name}") return issues diff --git a/scripts/exporter/recalbox_exporter.py b/scripts/exporter/recalbox_exporter.py index a7c06606..c014e6df 100644 --- a/scripts/exporter/recalbox_exporter.py +++ b/scripts/exporter/recalbox_exporter.py @@ -1,13 +1,25 @@ -"""Exporter for Recalbox es_bios.xml format.""" +"""Exporter for Recalbox es_bios.xml format. + +Produces XML matching the exact format of recalbox's es_bios.xml: +- XML namespace declaration +- +- with optional mandatory, hashMatchMandatory, note +- mandatory absent = true (only explicit when false) +- 2-space indentation +""" from __future__ import annotations from pathlib import Path -from xml.etree.ElementTree import Element, SubElement, ElementTree, indent from .base_exporter import BaseExporter +def _slug_to_display(slug: str) -> str: + """Convert slug to display name.""" + return slug.replace("-", " ").title() + + class Exporter(BaseExporter): """Export truth data to Recalbox es_bios.xml format.""" @@ -22,13 +34,21 @@ class Exporter(BaseExporter): scraped_data: dict | None = None, ) -> None: native_map: dict[str, str] = {} + display_map: dict[str, str] = {} if scraped_data: for sys_id, sys_data in scraped_data.get("systems", {}).items(): nid = sys_data.get("native_id") if nid: native_map[sys_id] = nid + dname = sys_data.get("name") + if dname: + display_map[sys_id] = dname - root = Element("biosList") + lines: list[str] = [ + '', + '', + ] systems = truth_data.get("systems", {}) for sys_id in sorted(systems): @@ -38,33 +58,46 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - system_el = SubElement(root, "system", platform=native_id) + display_name = display_map.get(sys_id, _slug_to_display(sys_id)) + + lines.append(f' ') for fe in files: name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue dest = fe.get("destination", name) + # Recalbox paths include system prefix + path = f"{native_id}/{dest}" if "/" not in dest else dest + md5 = fe.get("md5", "") if isinstance(md5, list): md5 = ",".join(md5) - required = fe.get("required", False) - attrs = { - "path": dest, - "md5": md5, - "mandatory": "true" if required else "false", - "hashMatchMandatory": "true" if required else "false", - } - SubElement(system_el, "bios", **attrs) + required = fe.get("required", True) - indent(root, space=" ") - tree = ElementTree(root) - tree.write(output_path, encoding="unicode", xml_declaration=True) - # Add trailing newline - with open(output_path, "a") as f: - f.write("\n") + # Build cores string from _cores + cores_list = fe.get("_cores", []) + core_str = ",".join(f"libretro/{c}" for c in cores_list) if cores_list else "" + + attrs = [f'path="{path}"'] + if md5: + attrs.append(f'md5="{md5}"') + if not required: + attrs.append('mandatory="false"') + if not required: + attrs.append('hashMatchMandatory="true"') + if core_str: + attrs.append(f'core="{core_str}"') + + lines.append(f' ') + + lines.append(" ") + + lines.append("") + lines.append("") + Path(output_path).write_text("\n".join(lines), encoding="utf-8") def validate(self, truth_data: dict, output_path: str) -> list[str]: from xml.etree.ElementTree import parse as xml_parse @@ -77,14 +110,16 @@ class Exporter(BaseExporter): path = bios_el.get("path", "") if path: exported_paths.add(path) + # Also index basename + exported_paths.add(path.split("/")[-1]) issues: list[str] = [] for sys_data in truth_data.get("systems", {}).values(): for fe in sys_data.get("files", []): name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue dest = fe.get("destination", name) - if dest not in exported_paths: - issues.append(f"missing: {dest}") + if name not in exported_paths and dest not in exported_paths: + issues.append(f"missing: {name}") return issues diff --git a/scripts/exporter/retrobat_exporter.py b/scripts/exporter/retrobat_exporter.py index e4e5b457..20c2f9cc 100644 --- a/scripts/exporter/retrobat_exporter.py +++ b/scripts/exporter/retrobat_exporter.py @@ -1,13 +1,25 @@ -"""Exporter for RetroBat batocera-systems.json format.""" +"""Exporter for RetroBat batocera-systems.json format. + +Produces JSON matching the exact format of +RetroBat-Official/emulatorlauncher/batocera-systems/Resources/batocera-systems.json: +- System keys with "name" and "biosFiles" fields +- Each biosFile has "md5" before "file" (matching original key order) +""" from __future__ import annotations import json +from collections import OrderedDict from pathlib import Path from .base_exporter import BaseExporter +def _slug_to_display(slug: str) -> str: + """Convert slug to display name.""" + return slug.replace("-", " ").title() + + class Exporter(BaseExporter): """Export truth data to RetroBat batocera-systems.json format.""" @@ -22,13 +34,17 @@ class Exporter(BaseExporter): scraped_data: dict | None = None, ) -> None: native_map: dict[str, str] = {} + display_map: dict[str, str] = {} if scraped_data: for sys_id, sys_data in scraped_data.get("systems", {}).items(): nid = sys_data.get("native_id") if nid: native_map[sys_id] = nid + dname = sys_data.get("name") + if dname: + display_map[sys_id] = dname - output: dict[str, dict] = {} + output: OrderedDict[str, dict] = OrderedDict() systems = truth_data.get("systems", {}) for sys_id in sorted(systems): @@ -38,24 +54,30 @@ class Exporter(BaseExporter): continue native_id = native_map.get(sys_id, sys_id) - bios_files: list[dict] = [] + display_name = display_map.get(sys_id, _slug_to_display(sys_id)) + bios_files: list[OrderedDict] = [] for fe in files: name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue dest = fe.get("destination", name) md5 = fe.get("md5", "") if isinstance(md5, list): md5 = md5[0] if md5 else "" - entry = {"file": f"bios/{dest}"} + # Original format has md5 before file + entry: OrderedDict[str, str] = OrderedDict() if md5: entry["md5"] = md5 + entry["file"] = f"bios/{dest}" bios_files.append(entry) if bios_files: - output[native_id] = {"biosFiles": bios_files} + sys_entry: OrderedDict[str, object] = OrderedDict() + sys_entry["name"] = display_name + sys_entry["biosFiles"] = bios_files + output[native_id] = sys_entry Path(output_path).write_text( json.dumps(output, indent=2, ensure_ascii=False) + "\n", @@ -69,7 +91,6 @@ class Exporter(BaseExporter): for sys_data in data.values(): for bf in sys_data.get("biosFiles", []): path = bf.get("file", "") - # Strip bios/ prefix, index both full path and basename stripped = path.removeprefix("bios/") exported_files.add(stripped) basename = path.split("/")[-1] if "/" in path else path @@ -79,7 +100,7 @@ class Exporter(BaseExporter): for sys_data in truth_data.get("systems", {}).values(): for fe in sys_data.get("files", []): name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue dest = fe.get("destination", name) if name not in exported_files and dest not in exported_files: diff --git a/scripts/exporter/systemdat_exporter.py b/scripts/exporter/systemdat_exporter.py index 8d041f3e..27b11abb 100644 --- a/scripts/exporter/systemdat_exporter.py +++ b/scripts/exporter/systemdat_exporter.py @@ -1,4 +1,8 @@ -"""Exporter for libretro System.dat (clrmamepro DAT format).""" +"""Exporter for libretro System.dat (clrmamepro DAT format). + +Produces a single 'game' block with all ROMs grouped by system, +matching the exact format of libretro-database/dat/System.dat. +""" from __future__ import annotations @@ -13,7 +17,7 @@ from .base_exporter import BaseExporter def _slug_to_native(slug: str) -> str: - """Convert a system slug to a native 'Manufacturer - Console' name.""" + """Convert a system slug to 'Manufacturer - Console' format.""" parts = slug.split("-", 1) if len(parts) == 1: return parts[0].title() @@ -42,45 +46,54 @@ class Exporter(BaseExporter): if nid: native_map[sys_id] = nid - lines: list[str] = [] - lines.append('clrmamepro (') - lines.append('\tname "System.dat"') - lines.append(')') + lines: list[str] = [ + "clrmamepro (", + '\tname "System"', + '\tdescription "System"', + '\tcomment "System, firmware, and BIOS files used by libretro cores."', + ")", + "", + "game (", + '\tname "System"', + ] systems = truth_data.get("systems", {}) for sys_id in sorted(systems): sys_data = systems[sys_id] - native_name = native_map.get(sys_id, _slug_to_native(sys_id)) + files = sys_data.get("files", []) + if not files: + continue - for fe in sys_data.get("files", []): + native_name = native_map.get(sys_id, _slug_to_native(sys_id)) + lines.append("") + lines.append(f'\tcomment "{native_name}"') + + for fe in files: name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue - dest = fe.get("path", name) - size = fe.get("size", 0) + rom_parts = [f"name {name}"] + size = fe.get("size") + if size: + rom_parts.append(f"size {size}") crc = fe.get("crc32", "") - md5 = fe.get("md5", "") - sha1 = fe.get("sha1", "") - - rom_parts = [f'name "{name}"'] - rom_parts.append(f"size {size}") if crc: - rom_parts.append(f"crc {crc}") + rom_parts.append(f"crc {crc.upper()}") + md5 = fe.get("md5", "") + if isinstance(md5, list): + md5 = md5[0] if md5 else "" if md5: rom_parts.append(f"md5 {md5}") + sha1 = fe.get("sha1", "") + if isinstance(sha1, list): + sha1 = sha1[0] if sha1 else "" if sha1: rom_parts.append(f"sha1 {sha1}") - rom_str = " ".join(rom_parts) - game_name = f"{native_name}/{dest}" - lines.append("") - lines.append("game (") - lines.append(f'\tname "{game_name}"') - lines.append(f'\tdescription "{name}"') - lines.append(f"\trom ( {rom_str} )") - lines.append(")") + lines.append(f"\trom ( {' '.join(rom_parts)} )") + lines.append(")") lines.append("") Path(output_path).write_text("\n".join(lines), encoding="utf-8") @@ -93,12 +106,11 @@ class Exporter(BaseExporter): exported_names.add(rom.name) issues: list[str] = [] - for sys_id, sys_data in truth_data.get("systems", {}).items(): + for sys_data in truth_data.get("systems", {}).values(): for fe in sys_data.get("files", []): name = fe.get("name", "") - if name.startswith("_"): + if name.startswith("_") or self._is_pattern(name): continue if name not in exported_names: - issues.append(f"missing: {name} (system {sys_id})") - + issues.append(f"missing: {name}") return issues diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 352b0be4..e64dffcf 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -3170,7 +3170,8 @@ class TestE2E(unittest.TestCase): self.assertIn("Sony - PlayStation", content) self.assertIn("scph5501.bin", content) self.assertIn("b056ee5a4d65937e1a3a17e1e78f3258ea49c38e", content) - self.assertIn('name "System.dat"', content) + self.assertIn('name "System"', content) + self.assertIn("71AF80B4", content) # CRC uppercase issues = exporter.validate(truth, out_path) self.assertEqual(issues, []) @@ -3557,10 +3558,13 @@ class TestE2E(unittest.TestCase): exp.export(truth, out, scraped_data=scraped) content = open(out).read() - self.assertIn("", content) + self.assertIn("