feat: platform-core registry for exact pack generation

resolve_platform_cores() links platforms to their cores via
three strategies: all_libretro, explicit list, system ID
fallback. Pack generation always includes core requirements
beyond platform baseline. Case-insensitive dedup prevents
conflicts on Windows/macOS. Data dir strip_components fixes
doubled paths for Dolphin and PPSSPP caches.
This commit is contained in:
Abdessamad Derraz
2026-03-19 16:10:43 +01:00
parent 257ec1a527
commit 6a21a99c22
15 changed files with 758 additions and 57 deletions

View File

@@ -125,9 +125,14 @@ def load_platform_config(platform_name: str, platforms_dir: str = "platforms") -
(f.get("name"), f.get("destination", f.get("name")))
for f in system.get("files", [])
}
existing_lower = {
f.get("destination", f.get("name", "")).lower()
for f in system.get("files", [])
}
for gf in shared_groups[group_name]:
key = (gf.get("name"), gf.get("destination", gf.get("name")))
if key not in existing:
dest_lower = gf.get("destination", gf.get("name", "")).lower()
if key not in existing and dest_lower not in existing_lower:
system.setdefault("files", []).append(gf)
existing.add(key)
@@ -348,6 +353,44 @@ def group_identical_platforms(
return [(group, representatives[fp]) for fp, group in fingerprints.items()]
def resolve_platform_cores(
config: dict, profiles: dict[str, dict],
) -> set[str]:
"""Resolve which emulator profiles are relevant for a platform.
Resolution strategies (by priority):
1. cores: "all_libretro" — all profiles with libretro in type
2. cores: [list] — profiles whose dict key matches a core name
3. cores: absent — fallback to systems intersection
Alias profiles are always excluded (they point to another profile).
"""
cores_config = config.get("cores")
if cores_config == "all_libretro":
return {
name for name, p in profiles.items()
if "libretro" in p.get("type", "")
and p.get("type") != "alias"
}
if isinstance(cores_config, list):
core_set = set(cores_config)
return {
name for name in profiles
if name in core_set
and profiles[name].get("type") != "alias"
}
# Fallback: system ID intersection
platform_systems = set(config.get("systems", {}).keys())
return {
name for name, p in profiles.items()
if set(p.get("systems", [])) & platform_systems
and p.get("type") != "alias"
}
def safe_extract_zip(zip_path: str, dest_dir: str) -> None:
"""Extract a ZIP file safely, preventing zip-slip path traversal."""
dest = os.path.realpath(dest_dir)

View File

@@ -182,7 +182,7 @@ def _collect_emulator_extras(
base_dest: str,
emu_profiles: dict | None = None,
) -> list[dict]:
"""Collect extra files from emulator profiles not in the platform pack.
"""Collect core requirement files from emulator profiles not in the platform pack.
Uses the same system-overlap matching as verify.py cross-reference:
- Matches emulators by shared system IDs with the platform
@@ -237,15 +237,15 @@ def generate_pack(
platform_display = config.get("platform", platform_name)
base_dest = config.get("base_destination", "")
suffix = "Complete_Pack" if include_extras else "BIOS_Pack"
zip_name = f"{platform_display.replace(' ', '_')}_{suffix}.zip"
zip_name = f"{platform_display.replace(' ', '_')}_BIOS_Pack.zip"
zip_path = os.path.join(output_dir, zip_name)
os.makedirs(output_dir, exist_ok=True)
total_files = 0
missing_files = []
user_provided = []
seen_destinations = set()
seen_destinations: set[str] = set()
seen_lower: set[str] = set() # case-insensitive dedup for Windows/macOS
# Per-file status: worst status wins (missing > untested > ok)
file_status: dict[str, str] = {}
file_reasons: dict[str, str] = {}
@@ -277,6 +277,7 @@ def generate_pack(
if already_packed:
continue
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
file_status.setdefault(dedup_key, "ok")
instructions = file_entry.get("instructions", "Please provide this file manually.")
instr_name = f"INSTRUCTIONS_{file_entry['name']}.txt"
@@ -301,6 +302,7 @@ def generate_pack(
else:
zf.write(tmp_path, full_dest)
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
file_status.setdefault(dedup_key, "ok")
total_files += 1
else:
@@ -352,6 +354,7 @@ def generate_pack(
if already_packed:
continue
seen_destinations.add(dedup_key)
seen_lower.add(dedup_key.lower())
extract = file_entry.get("extract", False)
if extract and local_path.endswith(".zip"):
@@ -360,30 +363,33 @@ def generate_pack(
zf.write(local_path, full_dest)
total_files += 1
# Tier 2: emulator extras (files cores need but platform doesn't declare)
extra_count = 0
if include_extras:
emu_profiles = load_emulator_profiles(emulators_dir)
extras = _collect_emulator_extras(
config, emulators_dir, db,
seen_destinations, base_dest, emu_profiles,
)
for fe in extras:
dest = _sanitize_path(fe.get("destination", fe["name"]))
if not dest:
continue
full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen_destinations:
continue
# Core requirements: files platform's cores need but YAML doesn't declare
emu_profiles = load_emulator_profiles(emulators_dir)
core_files = _collect_emulator_extras(
config, emulators_dir, db,
seen_destinations, base_dest, emu_profiles,
)
core_count = 0
for fe in core_files:
dest = _sanitize_path(fe.get("destination", fe["name"]))
if not dest:
continue
full_dest = f"{base_dest}/{dest}" if base_dest else dest
if full_dest in seen_destinations:
continue
# Skip case-insensitive duplicates (Windows/macOS FS safety)
if full_dest.lower() in seen_lower:
continue
local_path, status = resolve_file(fe, db, bios_dir, zip_contents)
if status in ("not_found", "external", "user_provided"):
continue
local_path, status = resolve_file(fe, db, bios_dir, zip_contents)
if status in ("not_found", "external", "user_provided"):
continue
zf.write(local_path, full_dest)
seen_destinations.add(full_dest)
extra_count += 1
total_files += 1
zf.write(local_path, full_dest)
seen_destinations.add(full_dest)
seen_lower.add(full_dest.lower())
core_count += 1
total_files += 1
# Data directories from _data_dirs.yml
for sys_id, system in sorted(config.get("systems", {}).items()):
@@ -406,9 +412,10 @@ def generate_pack(
src = os.path.join(root, fname)
rel = os.path.relpath(src, local_path)
full = f"{dd_prefix}/{rel}"
if full in seen_destinations:
if full in seen_destinations or full.lower() in seen_lower:
continue
seen_destinations.add(full)
seen_lower.add(full.lower())
zf.write(src, full)
total_files += 1
@@ -422,8 +429,8 @@ def generate_pack(
parts.append(f"{files_untested} untested")
if files_miss:
parts.append(f"{files_miss} missing")
extras_msg = f", {extra_count} extras" if extra_count else ""
print(f" {zip_path}: {total_files} files packed{extras_msg}, {', '.join(parts)} [{verification_mode}]")
baseline = total_files - core_count
print(f" {zip_path}: {total_files} files packed ({baseline} baseline + {core_count} from cores), {', '.join(parts)} [{verification_mode}]")
for key, reason in sorted(file_reasons.items()):
status = file_status.get(key, "")
@@ -467,8 +474,9 @@ def main():
parser.add_argument("--db", default=DEFAULT_DB_FILE, help="Path to database.json")
parser.add_argument("--bios-dir", default=DEFAULT_BIOS_DIR)
parser.add_argument("--output-dir", "-o", default=DEFAULT_OUTPUT_DIR)
# --include-extras is now a no-op: core requirements are always included
parser.add_argument("--include-extras", action="store_true",
help="Include emulator-recommended files not declared by platform")
help="(no-op) Core requirements are always included")
parser.add_argument("--emulators-dir", default="emulators")
parser.add_argument("--offline", action="store_true",
help="Skip data directory freshness check, use cache only")

View File

@@ -73,9 +73,18 @@ def parse_pack_counts(output: str) -> dict[str, tuple[int, int]]:
if m:
current_label = m.group(1)
continue
frac_m = re.search(r"(\d+)/(\d+) files OK", line)
if frac_m and "files packed" in line:
ok, total = int(frac_m.group(1)), int(frac_m.group(2))
if "files packed" not in line:
continue
# New format: "622 files packed (359 baseline + 263 from cores), 358/359 files OK"
base_m = re.search(r"\((\d+) baseline", line)
ok_m = re.search(r"(\d+)/(\d+) files OK", line)
if base_m and ok_m:
baseline = int(base_m.group(1))
ok, total = int(ok_m.group(1)), int(ok_m.group(2))
counts[current_label] = (ok, total)
elif ok_m:
# Fallback: old format without baseline
ok, total = int(ok_m.group(1)), int(ok_m.group(2))
counts[current_label] = (ok, total)
return counts
@@ -123,8 +132,9 @@ def main():
help="Skip data directory refresh")
parser.add_argument("--output-dir", default="dist",
help="Pack output directory (default: dist/)")
# --include-extras is now a no-op: core requirements are always included
parser.add_argument("--include-extras", action="store_true",
help="Include Tier 2 emulator extras in packs")
help="(no-op) Core requirements are always included")
args = parser.parse_args()
results = {}

View File

@@ -14,6 +14,8 @@ import sys
import urllib.request
import urllib.error
import yaml
from .base_scraper import BaseScraper, BiosRequirement, fetch_github_latest_tag
PLATFORM_NAME = "batocera"
@@ -23,6 +25,12 @@ SOURCE_URL = (
"master/package/batocera/core/batocera-scripts/scripts/batocera-systems"
)
CONFIGGEN_DEFAULTS_URL = (
"https://raw.githubusercontent.com/batocera-linux/batocera.linux/"
"master/package/batocera/core/batocera-configgen/configs/"
"configgen-defaults.yml"
)
SYSTEM_SLUG_MAP = {
"atari800": "atari-400-800",
"atari5200": "atari-5200",
@@ -91,6 +99,28 @@ class Scraper(BaseScraper):
def __init__(self, url: str = SOURCE_URL):
super().__init__(url=url)
def _fetch_cores(self) -> list[str]:
"""Extract core names from Batocera configgen-defaults.yml."""
try:
req = urllib.request.Request(
CONFIGGEN_DEFAULTS_URL,
headers={"User-Agent": "retrobios-scraper/1.0"},
)
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read().decode("utf-8")
except urllib.error.URLError as e:
raise ConnectionError(
f"Failed to fetch {CONFIGGEN_DEFAULTS_URL}: {e}"
) from e
data = yaml.safe_load(raw)
cores: set[str] = set()
for system, cfg in data.items():
if system == "default" or not isinstance(cfg, dict):
continue
core = cfg.get("core")
if core:
cores.add(core)
return sorted(cores)
def _extract_systems_dict(self, raw: str) -> dict:
"""Extract and parse the 'systems' dict from the Python source via ast.literal_eval."""
@@ -244,6 +274,7 @@ class Scraper(BaseScraper):
"base_destination": "bios",
"hash_type": "md5",
"verification_mode": "md5",
"cores": self._fetch_cores(),
"systems": systems,
}

View File

@@ -88,6 +88,20 @@ class Scraper(BaseScraper):
def __init__(self, url: str = SOURCE_URL):
super().__init__(url=url)
def _fetch_cores(self) -> list[str]:
"""Extract unique core names from es_bios.xml bios elements."""
raw = self._fetch_raw()
root = ET.fromstring(raw)
cores: set[str] = set()
for bios_elem in root.findall(".//system/bios"):
raw_core = bios_elem.get("core", "").strip()
if not raw_core:
continue
for part in raw_core.split(","):
name = part.strip()
if name:
cores.add(name)
return sorted(cores)
def fetch_requirements(self) -> list[BiosRequirement]:
"""Parse es_bios.xml and return BIOS requirements."""
@@ -214,6 +228,7 @@ class Scraper(BaseScraper):
"base_destination": "bios",
"hash_type": "md5",
"verification_mode": "md5",
"cores": self._fetch_cores(),
"systems": systems,
}

View File

@@ -37,7 +37,7 @@ sys.path.insert(0, os.path.dirname(__file__))
from common import (
build_zip_contents_index, check_inside_zip, group_identical_platforms,
load_emulator_profiles, load_platform_config, md5sum, md5_composite,
resolve_local_file,
resolve_local_file, resolve_platform_cores,
)
DEFAULT_DB = "database.json"
@@ -198,9 +198,7 @@ def find_undeclared_files(
"""Find files needed by cores but not declared in platform config."""
# Collect all filenames declared by this platform
declared_names: set[str] = set()
platform_systems: set[str] = set()
for sys_id, system in config.get("systems", {}).items():
platform_systems.add(sys_id)
for fe in system.get("files", []):
name = fe.get("name", "")
if name:
@@ -217,15 +215,13 @@ def find_undeclared_files(
by_name = db.get("indexes", {}).get("by_name", {})
profiles = emu_profiles if emu_profiles is not None else load_emulator_profiles(emulators_dir)
relevant = resolve_platform_cores(config, profiles)
undeclared = []
seen = set()
for emu_name, profile in sorted(profiles.items()):
# Skip launchers — they don't use system_dir for BIOS
if profile.get("type") == "launcher":
if profile.get("type") in ("launcher", "alias"):
continue
emu_systems = set(profile.get("systems", []))
# Only check emulators whose systems overlap with this platform
if not emu_systems & platform_systems:
if emu_name not in relevant:
continue
for f in profile.get("files", []):
@@ -268,10 +264,12 @@ def find_exclusion_notes(
for sys_id in config.get("systems", {}):
platform_systems.add(sys_id)
relevant = resolve_platform_cores(config, profiles)
notes = []
for emu_name, profile in sorted(profiles.items()):
emu_systems = set(profile.get("systems", []))
if not emu_systems & platform_systems:
# Match by core resolution OR system intersection (documents all potential emulators)
if emu_name not in relevant and not (emu_systems & platform_systems):
continue
emu_display = profile.get("emulator", emu_name)