6 Commits

Author SHA1 Message Date
Abdessamad Derraz
0401d058a1 feat: add by_sha256 index, fix reporting attribution
generate_db: add by_sha256 index for O(1) variant lookup.
verify: _find_best_variant uses indexed sha256 instead of O(n) scan.
validation: check_file_validation returns (reason, emulators) tuple,
attributing mismatch only to emulators whose check actually failed.
beetle_psx: remove incorrect size field for ps1_rom.bin (code does
not validate size, swanstation is sole size authority).
2026-04-02 00:59:01 +02:00
Abdessamad Derraz
95b7a9813c feat: add custom site stylesheet with theme and components 2026-04-02 00:48:02 +02:00
Abdessamad Derraz
22829cfab9 feat: improve site ux with cards, badges and hash truncation 2026-04-02 00:46:21 +02:00
Abdessamad Derraz
2326306f2b fix: adler32 byteswap for dolphin dsp validation
Dolphin computes adler32 on byte-swapped (16-bit) data, not raw
file bytes. Add adler32_byteswap flag to dolphin/primehack/ishiiruka
profiles and support it in validation.py.

Reduces hash mismatch discrepancies from 18 to 2.
2026-04-01 22:51:39 +02:00
Abdessamad Derraz
28ecf19f2b fix: variant resolution suppresses false discrepancies
_find_best_variant now searches by hash (md5, sha1, crc32, sha256)
across the entire database instead of only by filename. Finds
variants stored under different names (e.g. eu_mcd2_9306.bin for
bios_CD_E.bin, scph1001_v20.bin for scph1001.bin).

verify_entry_existence now also calls _find_best_variant to
suppress discrepancies when a matching variant exists in the repo.

Reduces false discrepancies from 22 to 11 (4 unique files where
the variant genuinely does not exist in the repo).
2026-04-01 22:45:43 +02:00
Abdessamad Derraz
91925120c9 feat: unify gap analysis with verify results and source provenance
Single source of truth for gap page: verification status from
verify.py (verified/untested/missing/mismatch), file provenance
from cross_reference (bios/data/large_file/missing).

cross_reference.py: _find_in_repo -> _resolve_source returning
source category, stop skipping storage: release/large_file,
add by_path_suffix lookup, all_declared param for global check.

generate_site.py: gap page now shows verification by platform,
18 hash mismatches, and core complement with provenance breakdown.
2026-04-01 22:33:37 +02:00
14 changed files with 8825 additions and 369 deletions

File diff suppressed because it is too large Load Diff

578
docs_assets/extra.css Normal file
View File

@@ -0,0 +1,578 @@
/* RetroBIOS custom theme */
/* ── Color palette ── */
:root {
--rb-primary: #4a4e8a;
--rb-primary-light: #6366a0;
--rb-primary-dark: #363870;
--rb-accent: #e8594f;
--rb-success: #2e7d32;
--rb-warning: #f57c00;
--rb-danger: #c62828;
--rb-info: #1565c0;
--rb-muted: #78909c;
--rb-surface: #f5f6fa;
--rb-border: #e0e3eb;
--rb-text-secondary: #546e7a;
}
[data-md-color-scheme="slate"] {
--rb-surface: #1e1e2e;
--rb-border: #313244;
--rb-text-secondary: #a6adc8;
}
/* ── Material theme overrides ── */
[data-md-color-scheme="default"] {
--md-primary-fg-color: var(--rb-primary);
--md-primary-fg-color--light: var(--rb-primary-light);
--md-primary-fg-color--dark: var(--rb-primary-dark);
--md-accent-fg-color: var(--rb-accent);
}
[data-md-color-scheme="slate"] {
--md-primary-fg-color: var(--rb-primary-light);
--md-accent-fg-color: var(--rb-accent);
}
/* ── Hero section (home page) ── */
.rb-hero {
background: linear-gradient(135deg, var(--rb-primary) 0%, var(--rb-primary-dark) 100%);
color: white;
padding: 2.5rem 2rem;
border-radius: 12px;
margin-bottom: 2rem;
position: relative;
overflow: hidden;
}
.rb-hero::before {
content: "";
position: absolute;
top: 0;
right: 0;
width: 300px;
height: 100%;
background: linear-gradient(135deg, transparent 40%, rgba(255,255,255,0.04) 100%);
pointer-events: none;
}
.rb-hero h1 {
margin: 0 0 0.5rem;
font-size: 2rem;
font-weight: 700;
color: white !important;
border: none;
}
.rb-hero p {
margin: 0;
opacity: 0.9;
font-size: 1.1rem;
}
/* ── Stat cards ── */
.rb-stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
gap: 1rem;
margin: 1.5rem 0;
}
.rb-stat {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 1.2rem;
text-align: center;
transition: transform 0.15s ease, box-shadow 0.15s ease;
}
.rb-stat:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
}
.rb-stat .rb-stat-value {
font-size: 1.8rem;
font-weight: 700;
color: var(--rb-primary);
line-height: 1.2;
display: block;
}
[data-md-color-scheme="slate"] .rb-stat .rb-stat-value {
color: var(--rb-primary-light);
}
.rb-stat .rb-stat-label {
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--rb-text-secondary);
margin-top: 0.3rem;
display: block;
}
/* ── Progress bars (inline) ── */
.rb-progress {
display: inline-flex;
align-items: center;
gap: 0.5rem;
width: 100%;
}
.rb-progress-bar {
flex: 1;
height: 6px;
background: var(--rb-border);
border-radius: 3px;
overflow: hidden;
min-width: 60px;
}
.rb-progress-fill {
height: 100%;
border-radius: 3px;
transition: width 0.3s ease;
}
.rb-progress-fill[data-level="high"] { background: var(--rb-success); }
.rb-progress-fill[data-level="mid"] { background: var(--rb-warning); }
.rb-progress-fill[data-level="low"] { background: var(--rb-danger); }
.rb-progress-text {
font-size: 0.8rem;
font-weight: 600;
white-space: nowrap;
min-width: 3.5em;
text-align: right;
}
/* ── Status badges ── */
.rb-badge {
display: inline-block;
padding: 0.15em 0.6em;
border-radius: 10px;
font-size: 0.75rem;
font-weight: 600;
letter-spacing: 0.02em;
line-height: 1.5;
white-space: nowrap;
}
.rb-badge-success { background: #e8f5e9; color: #1b5e20; }
.rb-badge-warning { background: #fff3e0; color: #e65100; }
.rb-badge-danger { background: #ffebee; color: #b71c1c; }
.rb-badge-info { background: #e3f2fd; color: #0d47a1; }
.rb-badge-muted { background: #eceff1; color: #455a64; }
[data-md-color-scheme="slate"] .rb-badge-success { background: #1b5e20; color: #a5d6a7; }
[data-md-color-scheme="slate"] .rb-badge-warning { background: #e65100; color: #ffcc80; }
[data-md-color-scheme="slate"] .rb-badge-danger { background: #b71c1c; color: #ef9a9a; }
[data-md-color-scheme="slate"] .rb-badge-info { background: #0d47a1; color: #90caf9; }
[data-md-color-scheme="slate"] .rb-badge-muted { background: #37474f; color: #b0bec5; }
/* ── Tables ── */
.md-typeset table:not([class]) {
border-collapse: separate;
border-spacing: 0;
font-size: 0.85rem;
border: 1px solid var(--rb-border);
border-radius: 8px;
overflow: hidden;
}
.md-typeset table:not([class]) th {
background: var(--rb-surface);
font-weight: 600;
text-transform: uppercase;
font-size: 0.75rem;
letter-spacing: 0.04em;
color: var(--rb-text-secondary);
padding: 0.75rem 1rem;
position: sticky;
top: 0;
z-index: 1;
}
.md-typeset table:not([class]) td {
padding: 0.6rem 1rem;
border-top: 1px solid var(--rb-border);
}
.md-typeset table:not([class]) tbody tr:hover {
background: rgba(74, 78, 138, 0.04);
}
[data-md-color-scheme="slate"] .md-typeset table:not([class]) tbody tr:hover {
background: rgba(99, 102, 160, 0.08);
}
/* Zebra striping */
.md-typeset table:not([class]) tbody tr:nth-child(even) {
background: rgba(0, 0, 0, 0.015);
}
[data-md-color-scheme="slate"] .md-typeset table:not([class]) tbody tr:nth-child(even) {
background: rgba(255, 255, 255, 0.02);
}
/* ── Platform cards (home page) ── */
.rb-platform-row td:first-child img {
vertical-align: middle;
border-radius: 4px;
}
/* ── Quick start grid ── */
.rb-quickstart {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
gap: 0.75rem;
margin: 1rem 0;
}
.rb-quickstart-item {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 0.8rem 1rem;
display: flex;
align-items: center;
gap: 0.6rem;
}
.rb-quickstart-item code {
background: transparent;
font-weight: 600;
color: var(--rb-primary);
}
/* ── Section separators ── */
.rb-section {
margin-top: 2rem;
padding-top: 1.5rem;
border-top: 1px solid var(--rb-border);
}
/* ── Methodology steps ── */
.rb-methodology ol {
counter-reset: method-step;
list-style: none;
padding-left: 0;
}
.rb-methodology ol li {
counter-increment: method-step;
padding: 0.6rem 0 0.6rem 2.5rem;
position: relative;
}
.rb-methodology ol li::before {
content: counter(method-step);
position: absolute;
left: 0;
top: 0.5rem;
width: 1.8rem;
height: 1.8rem;
background: var(--rb-primary);
color: white;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: 700;
font-size: 0.8rem;
}
/* ── Classification badges (emulators) ── */
.rb-cls-official_port { border-left: 3px solid var(--rb-success); }
.rb-cls-community_fork { border-left: 3px solid var(--rb-info); }
.rb-cls-pure_libretro { border-left: 3px solid var(--rb-primary); }
.rb-cls-game_engine { border-left: 3px solid #7b1fa2; }
.rb-cls-enhanced_fork { border-left: 3px solid #00838f; }
.rb-cls-frozen_snapshot { border-left: 3px solid var(--rb-muted); }
.rb-cls-embedded_hle { border-left: 3px solid #4e342e; }
.rb-cls-launcher { border-left: 3px solid #37474f; }
/* ── Gap analysis priority markers ── */
.rb-gap-required {
color: var(--rb-danger);
font-weight: 600;
}
.rb-gap-optional {
color: var(--rb-muted);
}
/* ── Scrollable table container improvements ── */
.md-typeset__scrollwrap {
margin: 1rem 0;
}
/* ── Footer timestamp ── */
.rb-timestamp {
font-size: 0.8rem;
color: var(--rb-text-secondary);
margin-top: 2rem;
padding-top: 1rem;
border-top: 1px solid var(--rb-border);
}
/* ── Info card (platform/emulator metadata) ── */
.rb-info-card {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 1rem 1.2rem;
margin: 1rem 0;
display: grid;
grid-template-columns: auto 1fr;
gap: 0.3rem 1.2rem;
font-size: 0.9rem;
}
.rb-info-card dt {
font-weight: 600;
color: var(--rb-text-secondary);
white-space: nowrap;
}
.rb-info-card dd {
margin: 0;
}
/* ── File entry cards (emulator detail) ── */
.rb-file-entry {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 1rem 1.2rem;
margin: 0.75rem 0;
}
.rb-file-entry-required {
border-left: 3px solid var(--rb-danger);
}
.rb-file-entry-optional {
border-left: 3px solid var(--rb-muted);
}
.rb-file-header {
display: flex;
align-items: center;
gap: 0.5rem;
flex-wrap: wrap;
margin-bottom: 0.5rem;
}
.rb-file-header code {
font-size: 0.95rem;
font-weight: 700;
}
.rb-file-entry ul {
margin: 0.3rem 0 0;
padding-left: 1.2rem;
}
.rb-file-entry li {
font-size: 0.85rem;
margin: 0.15rem 0;
}
/* ── Hash display (truncated with tooltip) ── */
.rb-hash {
font-family: monospace;
font-size: 0.78rem;
color: var(--rb-text-secondary);
cursor: help;
letter-spacing: -0.02em;
}
/* System page file cards */
.rb-sys-file {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 0.8rem 1rem;
margin: 0.6rem 0;
}
.rb-sys-file > p:first-child {
margin-top: 0;
}
.rb-sys-file ul {
margin: 0.3rem 0 0;
padding-left: 1rem;
font-size: 0.85rem;
}
.rb-sys-file li {
margin: 0.1rem 0;
}
/* Emulator metadata card */
.rb-meta-card {
background: var(--rb-surface);
border: 1px solid var(--rb-border);
border-radius: 8px;
padding: 0.8rem 0;
margin: 1rem 0;
overflow: hidden;
}
.rb-meta-card table {
border: none !important;
margin: 0 !important;
border-radius: 0 !important;
}
.rb-meta-card th {
display: none;
}
.rb-meta-card td:first-child {
font-weight: 600;
color: var(--rb-text-secondary);
white-space: nowrap;
width: 140px;
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.03em;
}
.rb-meta-card td {
border-top: 1px solid var(--rb-border) !important;
}
.rb-meta-card tr:first-child td {
border-top: none !important;
}
/* ── Platform detail: coverage bar ── */
.rb-coverage-bar {
background: var(--rb-border);
border-radius: 4px;
height: 8px;
overflow: hidden;
margin: 0.5rem 0;
max-width: 300px;
}
.rb-coverage-fill {
height: 100%;
border-radius: 4px;
background: var(--rb-success);
}
/* ── Emulator index: section accent ── */
.md-typeset h2 .rb-cls-dot {
display: inline-block;
width: 10px;
height: 10px;
border-radius: 50%;
margin-right: 0.4rem;
vertical-align: middle;
}
.rb-dot-official_port { background: var(--rb-success); }
.rb-dot-community_fork { background: var(--rb-info); }
.rb-dot-pure_libretro { background: var(--rb-primary); }
.rb-dot-game_engine { background: #7b1fa2; }
.rb-dot-enhanced_fork { background: #00838f; }
.rb-dot-frozen_snapshot { background: var(--rb-muted); }
.rb-dot-embedded_hle { background: #4e342e; }
.rb-dot-launcher { background: #37474f; }
.rb-dot-other { background: #9e9e9e; }
/* ── Cross-ref: classification in table ── */
.rb-cls-label {
font-size: 0.8rem;
padding: 0.1em 0.5em;
border-radius: 4px;
background: var(--rb-surface);
border: 1px solid var(--rb-border);
white-space: nowrap;
}
/* ── Buttons ── */
.md-typeset .md-button {
border-radius: 6px;
font-weight: 600;
text-transform: none;
letter-spacing: 0;
}
/* Pack button in tables: smaller */
.md-typeset table .md-button {
font-size: 0.75rem;
padding: 0.3em 0.8em;
}
/* ── Hide permalink anchors in hero ── */
.rb-hero .headerlink {
display: none;
}
/* ── Compact stat variant ── */
.rb-stats-compact {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin: 1rem 0;
}
.rb-stats-compact .rb-stat {
padding: 0.6rem 1rem;
flex: 1;
min-width: 100px;
}
.rb-stats-compact .rb-stat-value {
font-size: 1.3rem;
}
.rb-stats-compact .rb-stat-label {
font-size: 0.7rem;
}
/* ── Responsive ── */
@media (max-width: 768px) {
.rb-hero {
padding: 1.5rem;
}
.rb-hero h1 {
font-size: 1.5rem;
}
.rb-stats {
grid-template-columns: repeat(2, 1fr);
gap: 0.5rem;
}
.rb-stat {
padding: 0.8rem;
}
.rb-stat .rb-stat-value {
font-size: 1.4rem;
}
.rb-stat .rb-stat-label {
font-size: 0.7rem;
}
.rb-file-entry {
padding: 0.7rem 0.8rem;
}
.rb-sys-file {
padding: 0.6rem 0.8rem;
}
}
@media (max-width: 480px) {
.rb-stats {
grid-template-columns: repeat(2, 1fr);
}
.rb-hero h1 {
font-size: 1.3rem;
}
}

View File

@@ -92,10 +92,9 @@ files:
note: "override_bios=1. Falls back to region BIOS if not found."
- name: "ps1_rom.bin"
description: "PS3 embedded PS1 BIOS (region-free override)"
description: "PS3 embedded PS1 BIOS, first 512KB extracted (region-free override)"
region: "Auto"
required: false
size: 524288
sha1: "c40146361eb8cf670b19fdc9759190257803cab7"
md5: "81bbe60ba7a3d1cea1d48c14cbcc647b"
validation: [sha1]

View File

@@ -65,6 +65,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0x66f334fe"
adler32_byteswap: true
note: "DSP instruction ROM for LLE audio. Free replacement (v0.4) included"
source_ref: "Source/Core/Common/CommonPaths.h:136, Source/Core/Core/HW/DSPLLE/DSPLLE.cpp:84-117"
@@ -75,6 +76,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0xf3b93527"
adler32_byteswap: true
note: "DSP coefficient ROM for LLE audio and HLE polyphase resampling. Free replacement included"
source_ref: "Source/Core/Common/CommonPaths.h:137, Source/Core/Core/DSP/DSPCore.cpp:32-33, Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp:55-62"

View File

@@ -62,6 +62,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0x66f334fe"
adler32_byteswap: true
note: "DSP instruction ROM for LLE audio. Free replacement included"
source_ref: "Source/Core/Common/CommonPaths.h:112, Source/Core/Core/HW/DSPLLE/DSPLLE.cpp:142-150, Source/Core/Core/DSP/DSPCore.cpp:48,67"
@@ -72,6 +73,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0xf3b93527"
adler32_byteswap: true
note: "DSP coefficient ROM for LLE audio and HLE polyphase resampling. Free replacement included"
source_ref: "Source/Core/Common/CommonPaths.h:113, Source/Core/Core/HW/DSPLLE/DSPLLE.cpp:143-153, Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp:43-72, Source/Core/Core/DSP/DSPCore.cpp:48,68"

View File

@@ -68,6 +68,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0x66f334fe"
adler32_byteswap: true
note: "DSP instruction ROM for LLE audio. Free replacement (v0.4) included"
source_ref: "Source/Core/Common/CommonPaths.h:135, Source/Core/Core/HW/DSPLLE/DSPLLE.cpp:87-117"
@@ -79,6 +80,7 @@ files:
hle_fallback: true
validation: [size]
known_hash_adler32: "0xf3b93527"
adler32_byteswap: true
note: "DSP coefficient ROM for LLE audio. Free replacement included"
source_ref: "Source/Core/Common/CommonPaths.h:136, Source/Core/Core/DSP/DSPCore.cpp:32-38"

View File

@@ -20,6 +20,8 @@ theme:
icon: material/brightness-4
name: Switch to auto
font: false
icon:
logo: material/chip
features:
- navigation.tabs
- navigation.sections
@@ -29,6 +31,8 @@ theme:
- search.highlight
- content.tabs.link
- toc.follow
extra_css:
- stylesheets/extra.css
markdown_extensions:
- tables
- admonition

View File

@@ -103,32 +103,41 @@ def _build_supplemental_index(
return names
def _find_in_repo(
def _resolve_source(
fname: str,
by_name: dict[str, list],
by_name_lower: dict[str, str],
data_names: set[str] | None = None,
) -> bool:
by_path_suffix: dict | None = None,
) -> str | None:
"""Return the source category for a file, or None if not found.
Returns ``"bios"`` (in database.json / bios/), ``"data"`` (in data/),
or ``None`` (not available anywhere).
"""
# bios/ via database.json by_name
if fname in by_name:
return True
# For directory entries or paths, extract the meaningful basename
return "bios"
stripped = fname.rstrip("/")
basename = stripped.rsplit("/", 1)[-1] if "/" in stripped else None
if basename and basename in by_name:
return True
return "bios"
key = fname.lower()
if key in by_name_lower:
return True
return "bios"
if basename:
key = basename.lower()
if key in by_name_lower:
return True
if basename.lower() in by_name_lower:
return "bios"
# bios/ via by_path_suffix (regional variants)
if by_path_suffix and fname in by_path_suffix:
return "bios"
# data/ supplemental index
if data_names:
if fname in data_names or key in data_names:
return True
return "data"
if basename and (basename in data_names or basename.lower() in data_names):
return True
return False
return "data"
return None
def cross_reference(
@@ -137,30 +146,44 @@ def cross_reference(
db: dict,
platform_data_dirs: dict[str, set[str]] | None = None,
data_names: set[str] | None = None,
all_declared: set[str] | None = None,
) -> dict:
"""Compare emulator profiles against platform declarations.
Returns a report with gaps (files emulators need but platforms don't list)
and coverage stats. Files covered by matching data_directories between
emulator profile and platform config are not reported as gaps.
Checks both bios/ (via database) and data/ (via data_names index).
and coverage stats. Each gap entry carries a ``source`` field indicating
where the file is available: ``"bios"`` (bios/ via database.json),
``"data"`` (data/ directory), ``"large_file"`` (GitHub release asset),
or ``"missing"`` (not available anywhere).
The boolean ``in_repo`` is derived: ``source != "missing"``.
When *all_declared* is provided (flat set of every filename declared by
any platform for any system), it is used for the ``in_platform`` check
instead of the per-system lookup. This is appropriate for the global
gap analysis page where "undeclared" means "no platform declares it at all".
"""
platform_data_dirs = platform_data_dirs or {}
by_name = db.get("indexes", {}).get("by_name", {})
by_name_lower = {k.lower(): k for k in by_name}
by_md5 = db.get("indexes", {}).get("by_md5", {})
by_path_suffix = db.get("indexes", {}).get("by_path_suffix", {})
db_files = db.get("files", {})
report = {}
for emu_name, profile in profiles.items():
emu_files = profile.get("files", [])
systems = profile.get("systems", [])
platform_names = set()
for sys_id in systems:
platform_names.update(declared.get(sys_id, set()))
if all_declared is not None:
platform_names = all_declared
else:
platform_names = set()
for sys_id in systems:
platform_names.update(declared.get(sys_id, set()))
gaps = []
covered = []
by_md5 = db.get("indexes", {}).get("by_md5", {})
for f in emu_files:
fname = f.get("name", "")
if not fname:
@@ -174,37 +197,45 @@ def cross_reference(
if "path" in f and f["path"] is None:
continue
# Skip release asset files (stored in GitHub releases, not bios/)
if f.get("storage") == "release":
continue
# Skip standalone-only files
file_mode = f.get("mode", "both")
if file_mode == "standalone":
continue
# --- resolve source provenance ---
storage = f.get("storage", "")
if storage in ("release", "large_file"):
source = "large_file"
else:
source = _resolve_source(
fname, by_name, by_name_lower, data_names, by_path_suffix
)
if source is None:
path_field = f.get("path", "")
if path_field and path_field != fname:
source = _resolve_source(
path_field, by_name, by_name_lower,
data_names, by_path_suffix,
)
# Try MD5 hash match
if source is None:
md5_raw = f.get("md5", "")
if md5_raw:
for md5_val in md5_raw.split(","):
md5_val = md5_val.strip().lower()
if md5_val and by_md5.get(md5_val):
source = "bios"
break
# Try SHA1 hash match
if source is None:
sha1 = f.get("sha1", "")
if sha1 and sha1 in db_files:
source = "bios"
if source is None:
source = "missing"
in_repo = source != "missing"
in_platform = fname in platform_names
in_repo = _find_in_repo(fname, by_name, by_name_lower, data_names)
if not in_repo:
path_field = f.get("path", "")
if path_field and path_field != fname:
in_repo = _find_in_repo(
path_field, by_name, by_name_lower, data_names
)
# Try MD5 hash match (handles files that exist under different names)
if not in_repo:
md5_raw = f.get("md5", "")
if md5_raw:
for md5_val in md5_raw.split(","):
md5_val = md5_val.strip().lower()
if md5_val and by_md5.get(md5_val):
in_repo = True
break
# Try SHA1 hash match
if not in_repo:
sha1 = f.get("sha1", "")
if sha1 and sha1 in db.get("files", {}):
in_repo = True
entry = {
"name": fname,
@@ -213,6 +244,7 @@ def cross_reference(
"source_ref": f.get("source_ref", ""),
"in_platform": in_platform,
"in_repo": in_repo,
"source": source,
}
if not in_platform:
@@ -227,7 +259,10 @@ def cross_reference(
"platform_covered": len(covered),
"gaps": len(gaps),
"gap_in_repo": sum(1 for g in gaps if g["in_repo"]),
"gap_missing": sum(1 for g in gaps if not g["in_repo"]),
"gap_missing": sum(1 for g in gaps if g["source"] == "missing"),
"gap_bios": sum(1 for g in gaps if g["source"] == "bios"),
"gap_data": sum(1 for g in gaps if g["source"] == "data"),
"gap_large_file": sum(1 for g in gaps if g["source"] == "large_file"),
"gap_details": gaps,
}
@@ -240,15 +275,19 @@ def print_report(report: dict) -> None:
print("=" * 60)
total_gaps = 0
total_in_repo = 0
total_missing = 0
totals: dict[str, int] = {"bios": 0, "data": 0, "large_file": 0, "missing": 0}
for emu_name, data in sorted(report.items()):
gaps = data["gaps"]
if gaps == 0:
status = "OK"
else:
status = f"{data['gap_in_repo']} in repo, {data['gap_missing']} missing"
continue
parts = []
for key in ("bios", "data", "large_file", "missing"):
count = data.get(f"gap_{key}", 0)
if count:
parts.append(f"{count} {key}")
status = ", ".join(parts) if parts else "OK"
print(f"\n{data['emulator']} ({', '.join(data['systems'])})")
print(
@@ -256,23 +295,24 @@ def print_report(report: dict) -> None:
f"{data['platform_covered']} declared by platforms, "
f"{gaps} undeclared"
)
print(f" Gaps: {status}")
if gaps > 0:
print(f" Gaps: {status}")
for g in data["gap_details"]:
req = "*" if g["required"] else " "
loc = "repo" if g["in_repo"] else "MISSING"
note = f" -- {g['note']}" if g["note"] else ""
print(f" {req} {g['name']} [{loc}]{note}")
for g in data["gap_details"]:
req = "*" if g["required"] else " "
src = g.get("source", "missing").upper()
note = f" -- {g['note']}" if g["note"] else ""
print(f" {req} {g['name']} [{src}]{note}")
total_gaps += gaps
total_in_repo += data["gap_in_repo"]
total_missing += data["gap_missing"]
for key in totals:
totals[key] += data.get(f"gap_{key}", 0)
print(f"\n{'=' * 60}")
print(f"Total: {total_gaps} undeclared files across all emulators")
print(f" {total_in_repo} already in repo (can be added to packs)")
print(f" {total_missing} missing from repo (need to be sourced)")
available = totals["bios"] + totals["data"] + totals["large_file"]
print(f" {available} available (bios: {totals['bios']}, data: {totals['data']}, "
f"large_file: {totals['large_file']})")
print(f" {totals['missing']} missing (need to be sourced)")
def main():

View File

@@ -168,6 +168,7 @@ def build_indexes(files: dict, aliases: dict) -> dict:
by_md5 = {}
by_name = {}
by_crc32 = {}
by_sha256 = {}
by_path_suffix = {}
for sha1, entry in files.items():
@@ -179,6 +180,7 @@ def build_indexes(files: dict, aliases: dict) -> dict:
by_name[name].append(sha1)
by_crc32[entry["crc32"]] = sha1
by_sha256[entry["sha256"]] = sha1
# Path suffix index for regional variant resolution
suffix = _path_suffix(entry["path"])
@@ -208,6 +210,7 @@ def build_indexes(files: dict, aliases: dict) -> dict:
"by_md5": by_md5,
"by_name": by_name,
"by_crc32": by_crc32,
"by_sha256": by_sha256,
"by_path_suffix": by_path_suffix,
}

View File

@@ -1308,10 +1308,11 @@ def generate_pack(
and validation_index
):
fname = file_entry.get("name", "")
reason = check_file_validation(
check = check_file_validation(
local_path, fname, validation_index, bios_dir
)
if reason:
if check:
reason, emus_list = check
better = _find_candidate_satisfying_both(
file_entry,
db,
@@ -1322,8 +1323,7 @@ def generate_pack(
if better:
local_path = better
else:
ventry = validation_index.get(fname, {})
emus = ", ".join(ventry.get("emulators", []))
emus = ", ".join(emus_list)
file_reasons.setdefault(
dedup_key,
f"{platform_display} says OK but {emus} says {reason}",

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,25 @@ from common import compute_hashes
# verify.py cannot reproduce these -size checks still apply if combined.
_CRYPTO_CHECKS = frozenset({"signature", "crypto"})
def _adler32_byteswapped(path: str) -> str:
"""Compute adler32 on 16-bit byte-swapped data.
Dolphin's DSP loader swaps every 16-bit word before hashing
(Common::swap16 in DSPLLE.cpp:LoadDSPRom). This reproduces that
transform so verify.py can match the expected adler32 values.
"""
import struct
import zlib
with open(path, "rb") as f:
data = f.read()
# Pad to even length if necessary
if len(data) % 2:
data += b"\x00"
swapped = struct.pack(f">{len(data) // 2}H", *struct.unpack(f"<{len(data) // 2}H", data))
return format(zlib.adler32(swapped) & 0xFFFFFFFF, "08x")
# All reproducible validation types.
_HASH_CHECKS = frozenset({"crc32", "md5", "sha1", "adler32"})
@@ -72,6 +91,7 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
"sha1": set(),
"sha256": set(),
"adler32": set(),
"adler32_byteswap": False,
"crypto_only": set(),
"emulators": set(),
"per_emulator": {},
@@ -120,6 +140,8 @@ def _build_validation_index(profiles: dict) -> dict[str, dict]:
if norm.startswith("0x"):
norm = norm[2:]
index[fname]["adler32"].add(norm)
if f.get("adler32_byteswap"):
index[fname]["adler32_byteswap"] = True
# Per-emulator ground truth detail
expected: dict = {}
if "size" in checks:
@@ -182,35 +204,56 @@ def build_ground_truth(filename: str, validation_index: dict[str, dict]) -> list
return result
def _emulators_for_check(
check_type: str, per_emulator: dict[str, dict],
) -> list[str]:
"""Return emulator names that validate a specific check type."""
result = []
for emu, detail in per_emulator.items():
emu_checks = detail.get("checks", [])
if check_type in emu_checks:
result.append(emu)
# adler32 is stored as known_hash, not always in validation list
if check_type == "adler32" and detail.get("expected", {}).get("adler32"):
if emu not in result:
result.append(emu)
return sorted(result)
def check_file_validation(
local_path: str,
filename: str,
validation_index: dict[str, dict],
bios_dir: str = "bios",
) -> str | None:
) -> tuple[str, list[str]] | None:
"""Check emulator-level validation on a resolved file.
Supports: size (exact/min/max), crc32, md5, sha1, adler32,
signature (RSA-2048 PKCS1v15 SHA256), crypto (AES-128-CBC + SHA256).
Returns None if all checks pass or no validation applies.
Returns a reason string if a check fails.
Returns (reason, emulators) tuple on failure, where *emulators*
lists only those cores whose check actually failed.
"""
entry = validation_index.get(filename)
if not entry:
return None
checks = entry["checks"]
pe = entry.get("per_emulator", {})
# Size checks -sizes is a set of accepted values
if "size" in checks:
actual_size = os.path.getsize(local_path)
if entry["sizes"] and actual_size not in entry["sizes"]:
expected = ",".join(str(s) for s in sorted(entry["sizes"]))
return f"size mismatch: got {actual_size}, accepted [{expected}]"
emus = _emulators_for_check("size", pe)
return f"size mismatch: got {actual_size}, accepted [{expected}]", emus
if entry["min_size"] is not None and actual_size < entry["min_size"]:
return f"size too small: min {entry['min_size']}, got {actual_size}"
emus = _emulators_for_check("size", pe)
return f"size too small: min {entry['min_size']}, got {actual_size}", emus
if entry["max_size"] is not None and actual_size > entry["max_size"]:
return f"size too large: max {entry['max_size']}, got {actual_size}"
emus = _emulators_for_check("size", pe)
return f"size too large: max {entry['max_size']}, got {actual_size}", emus
# Hash checks -compute once, reuse for all hash types.
# Each hash field is a set of accepted values (multiple valid ROM versions).
@@ -223,11 +266,23 @@ def check_file_validation(
if hash_type in checks and entry[hash_type]:
if hashes[hash_type].lower() not in entry[hash_type]:
expected = ",".join(sorted(entry[hash_type]))
return f"{hash_type} mismatch: got {hashes[hash_type]}, accepted [{expected}]"
emus = _emulators_for_check(hash_type, pe)
return (
f"{hash_type} mismatch: got {hashes[hash_type]}, "
f"accepted [{expected}]",
emus,
)
if entry["adler32"]:
if hashes["adler32"].lower() not in entry["adler32"]:
actual_adler = hashes["adler32"].lower()
if entry.get("adler32_byteswap"):
actual_adler = _adler32_byteswapped(local_path)
if actual_adler not in entry["adler32"]:
expected = ",".join(sorted(entry["adler32"]))
return f"adler32 mismatch: got 0x{hashes['adler32']}, accepted [{expected}]"
emus = _emulators_for_check("adler32", pe)
return (
f"adler32 mismatch: got 0x{actual_adler}, accepted [{expected}]",
emus,
)
# Signature/crypto checks (3DS RSA, AES)
if entry["crypto_only"]:
@@ -235,7 +290,8 @@ def check_file_validation(
crypto_reason = check_crypto_validation(local_path, filename, bios_dir)
if crypto_reason:
return crypto_reason
emus = sorted(entry.get("emulators", []))
return crypto_reason, emus
return None

View File

@@ -94,6 +94,7 @@ def verify_entry_existence(
file_entry: dict,
local_path: str | None,
validation_index: dict[str, dict] | None = None,
db: dict | None = None,
) -> dict:
"""RetroArch verification: path_is_valid() -file exists = OK."""
name = file_entry.get("name", "")
@@ -102,11 +103,21 @@ def verify_entry_existence(
return {"name": name, "status": Status.MISSING, "required": required}
result = {"name": name, "status": Status.OK, "required": required}
if validation_index:
reason = check_file_validation(local_path, name, validation_index)
if reason:
ventry = validation_index.get(name, {})
emus = ", ".join(ventry.get("emulators", []))
result["discrepancy"] = f"file present (OK) but {emus} says {reason}"
check = check_file_validation(local_path, name, validation_index)
if check:
reason, emus_list = check
suppressed = False
if db:
better = _find_best_variant(
file_entry, db, local_path, validation_index,
)
if better:
suppressed = True
if not suppressed:
emus = ", ".join(emus_list)
result["discrepancy"] = (
f"file present (OK) but {emus} says {reason}"
)
return result
@@ -561,35 +572,82 @@ def _find_best_variant(
current_path: str,
validation_index: dict,
) -> str | None:
"""Search for a repo file that passes both platform MD5 and emulator validation."""
"""Search for a repo file that passes emulator validation.
Two-pass search:
1. Hash lookup — use the emulator's expected hashes (sha1, md5, sha256,
crc32) to find candidates directly in the DB indexes. This finds
variants stored under different filenames (e.g. megacd2_v200_eu.bin
for bios_CD_E.bin).
2. Name lookup — check all files sharing the same name (aliases,
.variants/ with name-based suffixes).
If any candidate on disk passes ``check_file_validation``, the
discrepancy is suppressed — the repo has what the emulator needs.
"""
fname = file_entry.get("name", "")
if not fname or fname not in validation_index:
return None
md5_expected = file_entry.get("md5", "")
md5_set = (
{m.strip().lower() for m in md5_expected.split(",") if m.strip()}
if md5_expected
else set()
)
by_name = db.get("indexes", {}).get("by_name", {})
files_db = db.get("files", {})
current_real = os.path.realpath(current_path)
seen_paths: set[str] = set()
for sha1 in by_name.get(fname, []):
def _try_candidate(sha1: str) -> str | None:
candidate = files_db.get(sha1, {})
path = candidate.get("path", "")
if (
not path
or not os.path.exists(path)
or os.path.realpath(path) == os.path.realpath(current_path)
):
continue
if md5_set and candidate.get("md5", "").lower() not in md5_set:
continue
reason = check_file_validation(path, fname, validation_index)
if reason is None:
if not path or not os.path.exists(path):
return None
rp = os.path.realpath(path)
if rp == current_real or rp in seen_paths:
return None
seen_paths.add(rp)
if check_file_validation(path, fname, validation_index) is None:
return path
return None
# Pass 1: hash-based lookup from emulator expected values
ventry = validation_index[fname]
indexes = db.get("indexes", {})
for hash_type, db_index_key in (
("sha1", None),
("md5", "by_md5"),
("crc32", "by_crc32"),
("sha256", "by_sha256"),
):
expected = ventry.get(hash_type)
if not expected:
continue
if db_index_key is None:
# SHA1 is the primary key of files_db
for h in expected:
if h in files_db:
result = _try_candidate(h)
if result:
return result
continue
db_index = indexes.get(db_index_key, {})
for h in expected:
entries = db_index.get(h)
if not entries:
continue
if isinstance(entries, list):
for sha1 in entries:
result = _try_candidate(sha1)
if result:
return result
elif isinstance(entries, str):
result = _try_candidate(entries)
if result:
return result
# Pass 2: name-based lookup (aliases, .variants/ with same filename)
by_name = db.get("indexes", {}).get("by_name", {})
for sha1 in by_name.get(fname, []):
result = _try_candidate(sha1)
if result:
return result
return None
@@ -655,6 +713,7 @@ def verify_platform(
file_entry,
local_path,
validation_index,
db,
)
elif mode == "sha1":
result = verify_entry_sha1(file_entry, local_path)
@@ -665,8 +724,11 @@ def verify_platform(
# mismatches are reported as discrepancies, not failures.
if result["status"] == Status.OK and local_path and validation_index:
fname = file_entry.get("name", "")
reason = check_file_validation(local_path, fname, validation_index)
if reason:
check = check_file_validation(
local_path, fname, validation_index,
)
if check:
reason, emus_list = check
better = _find_best_variant(
file_entry,
db,
@@ -674,8 +736,7 @@ def verify_platform(
validation_index,
)
if not better:
ventry = validation_index.get(fname, {})
emus = ", ".join(ventry.get("emulators", []))
emus = ", ".join(emus_list)
result["discrepancy"] = (
f"{platform} says OK but {emus} says {reason}"
)
@@ -1166,8 +1227,9 @@ def verify_emulator(
result = {"name": name, "status": Status.MISSING, "required": required}
else:
# Apply emulator validation
reason = check_file_validation(local_path, name, validation_index)
if reason:
check = check_file_validation(local_path, name, validation_index)
if check:
reason, _emus = check
result = {
"name": name,
"status": Status.UNTESTED,

View File

@@ -1137,9 +1137,11 @@ class TestE2E(unittest.TestCase):
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
path = self.files["present_opt.bin"]["path"]
reason = check_file_validation(path, "present_opt.bin", index)
self.assertIsNotNone(reason)
result = check_file_validation(path, "present_opt.bin", index)
self.assertIsNotNone(result)
reason, emus = result
self.assertIn("size mismatch", reason)
self.assertIsInstance(emus, list)
def test_73_validation_crc32_pass(self):
"""File with correct CRC32 passes validation."""
@@ -1154,9 +1156,11 @@ class TestE2E(unittest.TestCase):
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
path = self.files["no_md5.bin"]["path"]
reason = check_file_validation(path, "no_md5.bin", index)
self.assertIsNotNone(reason)
result = check_file_validation(path, "no_md5.bin", index)
self.assertIsNotNone(result)
reason, emus = result
self.assertIn("crc32 mismatch", reason)
self.assertIsInstance(emus, list)
def test_75_validation_applied_in_existence_mode(self):
"""Existence mode reports discrepancy when validation fails, keeps OK."""
@@ -1212,9 +1216,11 @@ class TestE2E(unittest.TestCase):
profiles = load_emulator_profiles(self.emulators_dir)
index = _build_validation_index(profiles)
path = self.files["alias_target.bin"]["path"]
reason = check_file_validation(path, "alias_target.bin", index)
self.assertIsNotNone(reason)
result = check_file_validation(path, "alias_target.bin", index)
self.assertIsNotNone(result)
reason, emus = result
self.assertIn("md5 mismatch", reason)
self.assertIsInstance(emus, list)
def test_81_validation_index_has_md5_sha1(self):
"""Validation index stores md5 and sha1 when declared."""