feat: align gap analysis coherence, add 7 BIOS files, unsourceable field

cross_reference.py: add bios_mode/agnostic/load_from filters, archive
grouping, unsourceable field support. verify.py: case-insensitive
by_name lookup, storage:release in_repo, unsourceable skip, trailing
slash fix. generate_site.py: enriched all_declared, platform-relevant
profile filtering, proper in_repo resolution on emulator pages,
acknowledged gaps section.

New BIOS: delta2.rom (XRoar), tilekey.dat + sprites.sif (NXEngine),
Gram Kracker.ctg + cf7+.ctg + ti-pcard.ctg (ti99sim), desc.dat
(SDLPAL). Profiles: hle_fallback on tilekey.dat/key.txt, unsourceable
on 7 files with source-verified reasons.
This commit is contained in:
Abdessamad Derraz
2026-04-02 15:35:24 +02:00
parent 5ee81b30c6
commit 73ccb216f5
18 changed files with 592 additions and 25 deletions

View File

@@ -140,6 +140,20 @@ def _resolve_source(
return None
def _resolve_archive_source(
archive_name: str,
by_name: dict[str, list],
by_name_lower: dict[str, str],
data_names: set[str] | None = None,
by_path_suffix: dict | None = None,
) -> str:
"""Resolve source for an archive (ZIP) name, returning a source category string."""
result = _resolve_source(
archive_name, by_name, by_name_lower, data_names, by_path_suffix,
)
return result if result is not None else "missing"
def cross_reference(
profiles: dict[str, dict],
declared: dict[str, set[str]],
@@ -175,6 +189,10 @@ def cross_reference(
emu_files = profile.get("files", [])
systems = profile.get("systems", [])
# Skip filename-agnostic profiles (BIOS detected without fixed names)
if profile.get("bios_mode") == "agnostic":
continue
if all_declared is not None:
platform_names = all_declared
else:
@@ -184,13 +202,28 @@ def cross_reference(
gaps = []
covered = []
unsourceable_list: list[dict] = []
archive_gaps: dict[str, dict] = {}
seen_files: set[str] = set()
for f in emu_files:
fname = f.get("name", "")
if not fname:
if not fname or fname in seen_files:
continue
# Collect unsourceable files separately (documented, not a gap)
unsourceable_reason = f.get("unsourceable", "")
if unsourceable_reason:
seen_files.add(fname)
unsourceable_list.append({
"name": fname,
"required": f.get("required", False),
"reason": unsourceable_reason,
"source_ref": f.get("source_ref", ""),
})
continue
# Skip pattern placeholders (e.g., <bios>.bin, <user-selected>.bin)
if "<" in fname or ">" in fname:
if "<" in fname or ">" in fname or "*" in fname:
continue
# Skip UI-imported files with explicit path: null (not resolvable by pack)
@@ -202,6 +235,61 @@ def cross_reference(
if file_mode == "standalone":
continue
# Skip files loaded from non-system directories (save_dir, content_dir)
load_from = f.get("load_from", "")
if load_from and load_from != "system_dir":
continue
# Skip filename-agnostic files (handled by agnostic scan)
if f.get("agnostic"):
continue
archive = f.get("archive")
# Check platform declaration (by name or archive)
in_platform = fname in platform_names
if not in_platform and archive:
in_platform = archive in platform_names
if in_platform:
seen_files.add(fname)
covered.append({
"name": fname,
"required": f.get("required", False),
"in_platform": True,
})
continue
seen_files.add(fname)
# Group archived files by archive name
if archive:
if archive not in archive_gaps:
source = _resolve_archive_source(
archive, by_name, by_name_lower, data_names,
by_path_suffix,
)
archive_gaps[archive] = {
"name": archive,
"required": False,
"note": "",
"source_ref": "",
"in_platform": False,
"in_repo": source != "missing",
"source": source,
"archive": archive,
"archive_file_count": 0,
"archive_required_count": 0,
}
entry = archive_gaps[archive]
entry["archive_file_count"] += 1
if f.get("required", False):
entry["archive_required_count"] += 1
entry["required"] = True
if not entry["source_ref"] and f.get("source_ref"):
entry["source_ref"] = f["source_ref"]
continue
# --- resolve source provenance ---
storage = f.get("storage", "")
if storage in ("release", "large_file"):
@@ -235,22 +323,21 @@ def cross_reference(
source = "missing"
in_repo = source != "missing"
in_platform = fname in platform_names
entry = {
"name": fname,
"required": f.get("required", False),
"note": f.get("note", ""),
"source_ref": f.get("source_ref", ""),
"in_platform": in_platform,
"in_platform": False,
"in_repo": in_repo,
"source": source,
}
gaps.append(entry)
if not in_platform:
gaps.append(entry)
else:
covered.append(entry)
# Append grouped archive gaps
for ag in sorted(archive_gaps.values(), key=lambda e: e["name"]):
gaps.append(ag)
report[emu_name] = {
"emulator": profile.get("emulator", emu_name),
@@ -264,6 +351,7 @@ def cross_reference(
"gap_data": sum(1 for g in gaps if g["source"] == "data"),
"gap_large_file": sum(1 for g in gaps if g["source"] == "large_file"),
"gap_details": gaps,
"unsourceable": unsourceable_list,
}
return report
@@ -301,7 +389,12 @@ def print_report(report: dict) -> None:
req = "*" if g["required"] else " "
src = g.get("source", "missing").upper()
note = f" -- {g['note']}" if g["note"] else ""
print(f" {req} {g['name']} [{src}]{note}")
archive_info = ""
if g.get("archive"):
fc = g.get("archive_file_count", 0)
rc = g.get("archive_required_count", 0)
archive_info = f" ({fc} files, {rc} required)"
print(f" {req} {g['name']} [{src}]{archive_info}{note}")
total_gaps += gaps
for key in totals:

View File

@@ -1709,6 +1709,35 @@ def generate_gap_analysis(
)
lines.append("")
# ---- Section 4: Acknowledged gaps (unsourceable files) ----
all_unsourceable: list[dict] = []
for emu_name, data in sorted(report.items()):
for u in data.get("unsourceable", []):
all_unsourceable.append({
"name": u["name"],
"emulator": data["emulator"],
"reason": u["reason"],
"source_ref": u.get("source_ref", ""),
})
if all_unsourceable:
lines.extend([
"## Acknowledged Gaps",
"",
f"{len(all_unsourceable)} files documented as unsourceable "
"(verified from source code).",
"",
"| File | Emulator | Reason | Source ref |",
"|------|----------|--------|-----------|",
])
for u in sorted(all_unsourceable, key=lambda x: x["name"]):
lines.append(
f"| `{u['name']}` | {u['emulator']} | {u['reason']} "
f"| {u['source_ref']} |"
)
lines.append("")
lines.extend(["", f'<div class="rb-timestamp">Generated on {_timestamp()}.</div>'])
return "\n".join(lines) + "\n"

View File

@@ -298,13 +298,23 @@ def _name_in_index(
by_name: dict,
by_path_suffix: dict | None = None,
data_names: set[str] | None = None,
by_name_lower: dict[str, str] | None = None,
) -> bool:
"""Check if a name is resolvable in the database indexes or data directories."""
# Strip trailing slash for directory-type entries (e.g. nestopia/samples/foo/)
name = name.rstrip("/")
if name in by_name:
return True
basename = name.rsplit("/", 1)[-1]
basename = name.rsplit("/", 1)[-1] if "/" in name else name
if basename != name and basename in by_name:
return True
# Case-insensitive by_name lookup
if by_name_lower:
key = name.lower()
if key in by_name_lower:
return True
if basename != name and basename.lower() in by_name_lower:
return True
if by_path_suffix and name in by_path_suffix:
return True
if data_names:
@@ -345,6 +355,7 @@ def find_undeclared_files(
declared_dd.add(ref)
by_name = db.get("indexes", {}).get("by_name", {})
by_name_lower = {k.lower(): k for k in by_name}
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
profiles = (
emu_profiles
@@ -378,6 +389,10 @@ def find_undeclared_files(
fname = f.get("name", "")
if not fname or fname in seen_files:
continue
# Skip unsourceable files (documented reason, not a gap)
if f.get("unsourceable"):
seen_files.add(fname)
continue
# Skip pattern placeholders (e.g., <user-selected>.bin)
if "<" in fname or ">" in fname or "*" in fname:
continue
@@ -416,7 +431,8 @@ def find_undeclared_files(
if archive:
if archive not in archive_entries:
in_repo = _name_in_index(
archive, by_name, by_path_suffix, data_names
archive, by_name, by_path_suffix, data_names,
by_name_lower,
)
archive_entries[archive] = {
"emulator": profile.get("emulator", emu_name),
@@ -447,11 +463,20 @@ def find_undeclared_files(
else:
dest = f.get("path") or fname
# Resolution: try name, then path basename, then path_suffix
in_repo = _name_in_index(fname, by_name, by_path_suffix, data_names)
if not in_repo and dest != fname:
path_base = dest.rsplit("/", 1)[-1]
in_repo = _name_in_index(path_base, by_name, by_path_suffix, data_names)
# Resolution: storage flag, then name, then path basename
storage = f.get("storage", "")
if storage in ("release", "large_file"):
in_repo = True
else:
in_repo = _name_in_index(
fname, by_name, by_path_suffix, data_names, by_name_lower,
)
if not in_repo and dest != fname:
path_base = dest.rsplit("/", 1)[-1]
in_repo = _name_in_index(
path_base, by_name, by_path_suffix, data_names,
by_name_lower,
)
checks = _parse_validation(f.get("validation"))
undeclared.append(