mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
289 lines
9.4 KiB
Python
289 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
||
"""Validate BIOS file contributions in Pull Requests.
|
||
|
||
Usage:
|
||
python scripts/validate_pr.py [files...]
|
||
python scripts/validate_pr.py --changed # Auto-detect changed files via git
|
||
|
||
Multi-layer validation:
|
||
1. Hash verified against known databases (System.dat, batocera-systems)
|
||
2. File size matches expected value
|
||
3. File referenced in ≥1 platform config
|
||
4. Duplicate detection against database.json
|
||
5. Security checks (no executables, reasonable sizes)
|
||
|
||
Outputs a structured report suitable for PR comments.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import subprocess
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
sys.path.insert(0, os.path.dirname(__file__))
|
||
from common import compute_hashes, load_database as _load_database
|
||
|
||
try:
|
||
import yaml
|
||
except ImportError:
|
||
yaml = None
|
||
|
||
DEFAULT_DB = "database.json"
|
||
DEFAULT_PLATFORMS_DIR = "platforms"
|
||
|
||
BLOCKED_EXTENSIONS = {
|
||
".exe", ".bat", ".cmd", ".sh", ".ps1", ".vbs", ".js",
|
||
".msi", ".dll", ".so", ".dylib", ".py", ".rb", ".pl",
|
||
}
|
||
|
||
MAX_FILE_SIZE = 100 * 1024 * 1024
|
||
|
||
|
||
class ValidationResult:
|
||
def __init__(self, filepath: str):
|
||
self.filepath = filepath
|
||
self.filename = os.path.basename(filepath)
|
||
self.checks = [] # (status, message) tuples
|
||
self.sha1 = ""
|
||
self.md5 = ""
|
||
self.crc32 = ""
|
||
self.size = 0
|
||
|
||
def add_check(self, passed: bool, message: str):
|
||
self.checks.append(("PASS" if passed else "FAIL", message))
|
||
|
||
def add_warning(self, message: str):
|
||
self.checks.append(("WARN", message))
|
||
|
||
def add_info(self, message: str):
|
||
self.checks.append(("INFO", message))
|
||
|
||
@property
|
||
def passed(self) -> bool:
|
||
return all(s != "FAIL" for s, _ in self.checks)
|
||
|
||
def to_markdown(self) -> str:
|
||
status = "✅" if self.passed else "❌"
|
||
lines = [f"### {status} `{self.filename}`"]
|
||
lines.append("")
|
||
lines.append(f"- **Path**: `{self.filepath}`")
|
||
lines.append(f"- **Size**: {self.size:,} bytes")
|
||
lines.append(f"- **SHA1**: `{self.sha1}`")
|
||
lines.append(f"- **MD5**: `{self.md5}`")
|
||
lines.append(f"- **CRC32**: `{self.crc32}`")
|
||
lines.append("")
|
||
|
||
for status_str, message in self.checks:
|
||
if status_str == "PASS":
|
||
lines.append(f"- ✅ {message}")
|
||
elif status_str == "FAIL":
|
||
lines.append(f"- ❌ {message}")
|
||
elif status_str == "WARN":
|
||
lines.append(f"- ⚠️ {message}")
|
||
else:
|
||
lines.append(f"- ℹ️ {message}")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def load_database(db_path: str) -> dict | None:
|
||
try:
|
||
return _load_database(db_path)
|
||
except (FileNotFoundError, json.JSONDecodeError):
|
||
return None
|
||
|
||
|
||
def load_platform_hashes(platforms_dir: str) -> dict:
|
||
"""Load all known hashes from platform configs."""
|
||
known = {"sha1": set(), "md5": set(), "names": set()}
|
||
|
||
if not os.path.isdir(platforms_dir) or yaml is None:
|
||
return known
|
||
|
||
for f in Path(platforms_dir).glob("*.yml"):
|
||
if f.name.startswith("_"):
|
||
continue
|
||
with open(f) as fh:
|
||
try:
|
||
config = yaml.safe_load(fh) or {}
|
||
except yaml.YAMLError:
|
||
continue
|
||
|
||
for sys_id, system in config.get("systems", {}).items():
|
||
for file_entry in system.get("files", []):
|
||
if "sha1" in file_entry:
|
||
known["sha1"].add(file_entry["sha1"])
|
||
if "md5" in file_entry:
|
||
known["md5"].add(file_entry["md5"])
|
||
if "name" in file_entry:
|
||
known["names"].add(file_entry["name"])
|
||
|
||
return known
|
||
|
||
|
||
def validate_file(
|
||
filepath: str,
|
||
db: dict | None,
|
||
platform_hashes: dict,
|
||
) -> ValidationResult:
|
||
"""Run all validation checks on a file."""
|
||
result = ValidationResult(filepath)
|
||
|
||
if not os.path.exists(filepath):
|
||
result.add_check(False, f"File not found: {filepath}")
|
||
return result
|
||
|
||
result.size = os.path.getsize(filepath)
|
||
hashes = compute_hashes(filepath)
|
||
result.sha1 = hashes["sha1"]
|
||
result.md5 = hashes["md5"]
|
||
result.crc32 = hashes["crc32"]
|
||
|
||
ext = os.path.splitext(filepath)[1].lower()
|
||
if ext in BLOCKED_EXTENSIONS:
|
||
result.add_check(False, f"Blocked file extension: {ext}")
|
||
|
||
if result.size > MAX_FILE_SIZE:
|
||
result.add_check(False, f"File too large for embedded storage ({result.size:,} > {MAX_FILE_SIZE:,} bytes). Use storage: external in platform config.")
|
||
elif result.size == 0:
|
||
result.add_check(False, "File is empty (0 bytes)")
|
||
else:
|
||
result.add_check(True, f"File size OK ({result.size:,} bytes)")
|
||
|
||
if db:
|
||
if result.sha1 in db.get("files", {}):
|
||
existing = db["files"][result.sha1]
|
||
result.add_warning(f"Duplicate: identical file already exists at `{existing['path']}`")
|
||
else:
|
||
result.add_check(True, "Not a duplicate in database")
|
||
|
||
sha1_known = result.sha1 in platform_hashes.get("sha1", set())
|
||
md5_known = result.md5 in platform_hashes.get("md5", set())
|
||
name_known = result.filename in platform_hashes.get("names", set())
|
||
|
||
if sha1_known:
|
||
result.add_check(True, "SHA1 matches known platform requirement")
|
||
elif md5_known:
|
||
result.add_check(True, "MD5 matches known platform requirement")
|
||
elif name_known:
|
||
result.add_warning("Filename matches a known requirement but hash differs - may be a variant")
|
||
else:
|
||
result.add_warning("File not referenced in any platform config - needs manual review")
|
||
|
||
if filepath.startswith("bios/"):
|
||
parts = filepath.split("/")
|
||
if len(parts) >= 4:
|
||
result.add_check(True, f"Correct placement: bios/{parts[1]}/{parts[2]}/")
|
||
else:
|
||
result.add_warning("File should be in bios/Manufacturer/Console/ structure")
|
||
else:
|
||
result.add_warning(f"File is not under bios/ directory")
|
||
|
||
if name_known and not sha1_known and not md5_known:
|
||
result.add_info(
|
||
"This may be a valid variant. If accepted, it will be placed in "
|
||
f"`.variants/{result.filename}.{result.sha1[:8]}`"
|
||
)
|
||
|
||
return result
|
||
|
||
|
||
def get_changed_files() -> list[str]:
|
||
"""Get list of changed files in current PR/branch using git."""
|
||
try:
|
||
for base in ("main", "master", "v2"):
|
||
try:
|
||
result = subprocess.run(
|
||
["git", "diff", "--name-only", f"origin/{base}...HEAD"],
|
||
capture_output=True, text=True, check=True,
|
||
)
|
||
files = [f for f in result.stdout.strip().split("\n") if f.startswith("bios/")]
|
||
if files:
|
||
return files
|
||
except subprocess.CalledProcessError:
|
||
continue
|
||
except (subprocess.CalledProcessError, OSError):
|
||
pass
|
||
|
||
result = subprocess.run(
|
||
["git", "diff", "--cached", "--name-only"],
|
||
capture_output=True, text=True,
|
||
)
|
||
return [f for f in result.stdout.strip().split("\n") if f.startswith("bios/") and f]
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="Validate BIOS file contributions")
|
||
parser.add_argument("files", nargs="*", help="Files to validate")
|
||
parser.add_argument("--changed", action="store_true", help="Auto-detect changed BIOS files")
|
||
parser.add_argument("--db", default=DEFAULT_DB, help="Path to database.json")
|
||
parser.add_argument("--platforms-dir", default=DEFAULT_PLATFORMS_DIR)
|
||
parser.add_argument("--markdown", action="store_true", help="Output as markdown (for PR comments)")
|
||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||
args = parser.parse_args()
|
||
|
||
files = args.files
|
||
if args.changed:
|
||
files = get_changed_files()
|
||
if not files:
|
||
print("No changed BIOS files detected")
|
||
return
|
||
|
||
if not files:
|
||
parser.error("No files specified. Use --changed or provide file paths.")
|
||
|
||
db = load_database(args.db)
|
||
platform_hashes = load_platform_hashes(args.platforms_dir)
|
||
|
||
results = []
|
||
for f in files:
|
||
result = validate_file(f, db, platform_hashes)
|
||
results.append(result)
|
||
|
||
all_passed = all(r.passed for r in results)
|
||
|
||
if args.json:
|
||
output = []
|
||
for r in results:
|
||
output.append({
|
||
"file": r.filepath,
|
||
"passed": r.passed,
|
||
"sha1": r.sha1,
|
||
"md5": r.md5,
|
||
"size": r.size,
|
||
"checks": [{"status": s, "message": m} for s, m in r.checks],
|
||
})
|
||
print(json.dumps(output, indent=2))
|
||
elif args.markdown:
|
||
lines = ["## BIOS Validation Report", ""]
|
||
status = "✅ All checks passed" if all_passed else "❌ Some checks failed"
|
||
lines.append(f"**Status**: {status}")
|
||
lines.append("")
|
||
|
||
for r in results:
|
||
lines.append(r.to_markdown())
|
||
lines.append("")
|
||
|
||
print("\n".join(lines))
|
||
else:
|
||
for r in results:
|
||
status = "PASS" if r.passed else "FAIL"
|
||
print(f"\n[{status}] {r.filepath}")
|
||
print(f" SHA1: {r.sha1}")
|
||
print(f" MD5: {r.md5}")
|
||
print(f" Size: {r.size:,}")
|
||
for s, m in r.checks:
|
||
marker = "✓" if s == "PASS" else "✗" if s == "FAIL" else "!" if s == "WARN" else "i"
|
||
print(f" [{marker}] {m}")
|
||
|
||
if not all_passed:
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|