fix: round 2 audit fixes, updated emulator profiles

Scripts: - fix generate_site nav regex destroying mkdocs.yml content - fix auto_fetch comma-separated MD5 in find_missing - fix verify print_platform_result conflating untested/missing - fix validate_pr path traversal and symlink check - fix batocera_scraper brace counting and escaped quotes in strings - fix emudeck_scraper hash search crossing function boundaries - fix pipeline.py cwd to repo root via Path(__file__) - normalize SHA1 comparison to lowercase in generate_pack Emulator profiles: - emux_gb/nes/sms: reclassify from alias to standalone profiles - ep128emu: remove .info-only files not referenced in source - fbalpha2012 variants: full source-verified profiles - fbneo_cps12: add new profile
2026-06-12 18:15:30 -05:00 · 2026-03-19 15:00:18 +01:00
parent 38d605c7d5
commit 257ec1a527
19 changed files with 483 additions and 82 deletions
@@ -81,8 +81,10 @@ def find_missing(config: dict, db: dict) -> list[dict]:
            found = False
            if sha1 and sha1 in db.get("files", {}):
                found = True
-            elif md5 and md5 in db.get("indexes", {}).get("by_md5", {}):
-                found = True
+            elif md5:
+                by_md5 = db.get("indexes", {}).get("by_md5", {})
+                md5_list = [m.strip() for m in md5.split(",") if m.strip()]
+                found = any(m in by_md5 for m in md5_list)

            if not found:
                missing.append({
@@ -53,7 +53,7 @@ def _verify_file_hash(path: str, expected_sha1: str = "",
        return True
    hashes = compute_hashes(path)
    if expected_sha1:
-        return hashes["sha1"] == expected_sha1
+        return hashes["sha1"].lower() == expected_sha1.lower()
    md5_list = [m.strip().lower() for m in expected_md5.split(",") if m.strip()]
    return hashes["md5"].lower() in md5_list

@@ -797,7 +797,7 @@ def main():
    # Replace nav section (everything from \nnav: to the next top-level key or EOF)
    import re
    if "\nnav:" in content:
-        content = re.sub(r'\nnav:.*', '\n' + nav_yaml.rstrip(), content, count=1, flags=re.DOTALL)
+        content = re.sub(r'\nnav:\n(?:[ \t]+.*\n?)*', '\n' + nav_yaml, content, count=1)
    else:
        content += "\n" + nav_yaml
    with open("mkdocs.yml", "w") as f:
@@ -21,13 +21,15 @@ import json
 import subprocess
 import sys
 import time
+from pathlib import Path


 def run(cmd: list[str], label: str) -> tuple[bool, str]:
    """Run a command. Returns (success, captured_output)."""
    print(f"\n--- {label} ---", flush=True)
    start = time.monotonic()
-    result = subprocess.run(cmd, capture_output=True, text=True, cwd=".")
+    repo_root = str(Path(__file__).resolve().parent.parent)
+    result = subprocess.run(cmd, capture_output=True, text=True, cwd=repo_root)
    elapsed = time.monotonic() - start

    output = result.stdout
@@ -101,14 +101,26 @@ class Scraper(BaseScraper):
        start = match.start() + raw[match.start():].index("{")
        depth = 0
        i = start
+        in_str = False
+        str_ch = None
        while i < len(raw):
-            if raw[i] == "{":
+            ch = raw[i]
+            if in_str:
+                if ch == '\\':
+                    i += 2
+                    continue
+                if ch == str_ch:
+                    in_str = False
+            elif ch in ('"', "'"):
+                in_str = True
+                str_ch = ch
+            elif ch == "{":
                depth += 1
-            elif raw[i] == "}":
+            elif ch == "}":
                depth -= 1
                if depth == 0:
                    break
-            elif raw[i] == "#":
+            elif ch == "#":
                while i < len(raw) and raw[i] != "\n":
                    i += 1
            i += 1
@@ -120,10 +132,15 @@ class Scraper(BaseScraper):
            in_string = False
            string_char = None
            clean = []
-            for j, ch in enumerate(line):
-                if ch in ('"', "'") and j > 0 and line[j - 1] == '\\':
+            j = 0
+            while j < len(line):
+                ch = line[j]
+                if ch == '\\' and j + 1 < len(line):
                    clean.append(ch)
-                elif ch in ('"', "'") and not in_string:
+                    clean.append(line[j + 1])
+                    j += 2
+                    continue
+                if ch in ('"', "'") and not in_string:
                    in_string = True
                    string_char = ch
                    clean.append(ch)
@@ -134,6 +151,7 @@ class Scraper(BaseScraper):
                    break
                else:
                    clean.append(ch)
+                j += 1
            lines.append("".join(clean))

        clean_dict_str = "\n".join(lines)
@@ -227,8 +227,10 @@ class Scraper(BaseScraper):
                continue
            system = FUNCTION_HASH_MAP[func_name]
            func_start = func_match.start()
-            remaining = script[func_start:]
-            local_match = _RE_LOCAL_HASHES.search(remaining)
+            next_func = _RE_FUNC.search(script, func_match.end())
+            func_end = next_func.start() if next_func else len(script)
+            func_body = script[func_start:func_end]
+            local_match = _RE_LOCAL_HASHES.search(func_body)
            if local_match:
                hashes_raw = local_match.group(1)
                hashes = [h.strip() for h in hashes_raw.split() if h.strip()]
@@ -174,14 +174,17 @@ def validate_file(
    else:
        result.add_warning("File not referenced in any platform config - needs manual review")

-    if filepath.startswith("bios/"):
-        parts = filepath.split("/")
+    normalized = os.path.normpath(filepath)
+    if os.path.islink(filepath):
+        result.add_check(False, "Symlinks are not allowed")
+    elif normalized.startswith("bios" + os.sep):
+        parts = normalized.split(os.sep)
        if len(parts) >= 4:
            result.add_check(True, f"Correct placement: bios/{parts[1]}/{parts[2]}/")
        else:
            result.add_warning("File should be in bios/Manufacturer/Console/ structure")
    else:
-        result.add_warning(f"File is not under bios/ directory")
+        result.add_warning("File is not under bios/ directory")

    if name_known and not sha1_known and not md5_known:
        result.add_info(
@@ -421,8 +421,9 @@ def print_platform_result(result: dict, group: list[str]) -> None:
        else:
            parts = [f"{ok_count}/{total} present"]
    else:
-        untested = c.get(Severity.WARNING, 0)
-        missing = c.get(Severity.CRITICAL, 0)
+        sc = result.get("status_counts", {})
+        untested = sc.get(Status.UNTESTED, 0)
+        missing = sc.get(Status.MISSING, 0)
        parts = [f"{ok_count}/{total} OK"]
        if untested:
            parts.append(f"{untested} untested")