libretro/scripts/diff_truth.py

#!/usr/bin/env python3
"""Compare scraped platform YAMLs against ground-truth YAMLs.

Usage:
    python scripts/diff_truth.py --all
    python scripts/diff_truth.py --platform retroarch
    python scripts/diff_truth.py --platform retroarch --json
    python scripts/diff_truth.py --all --format markdown
"""

from __future__ import annotations

import argparse
import json
import os
import sys

sys.path.insert(0, os.path.dirname(__file__))
from common import list_registered_platforms, load_platform_config, require_yaml
from truth import diff_platform_truth

yaml = require_yaml()


def _load_truth(truth_dir: str, platform: str) -> dict | None:
    path = os.path.join(truth_dir, f"{platform}.yml")
    if not os.path.exists(path):
        return None
    with open(path) as f:
        return yaml.safe_load(f) or {}


def _format_terminal(report: dict) -> str:
    lines: list[str] = []
    platform = report.get("platform", "unknown")
    s = report["summary"]

    lines.append(f"=== {platform} ===")
    lines.append(
        f"  {s['systems_compared']} systems compared: "
        f"{s['systems_fully_covered']} full, "
        f"{s['systems_partially_covered']} partial, "
        f"{s['systems_uncovered']} uncovered"
    )

    totals = []
    if s["total_missing"]:
        totals.append(f"{s['total_missing']} missing")
    if s["total_extra_phantom"]:
        totals.append(f"{s['total_extra_phantom']} phantom")
    if s["total_extra_unprofiled"]:
        totals.append(f"{s['total_extra_unprofiled']} unprofiled")
    if s["total_hash_mismatch"]:
        totals.append(f"{s['total_hash_mismatch']} hash")
    if s["total_required_mismatch"]:
        totals.append(f"{s['total_required_mismatch']} required")
    if totals:
        lines.append(f"  divergences: {', '.join(totals)}")
    else:
        lines.append("  no divergences")

    for sys_id, div in sorted(report.get("divergences", {}).items()):
        labels: list[str] = []
        if div.get("missing"):
            labels.append(f"MISSING:{len(div['missing'])}")
        if div.get("extra_phantom"):
            labels.append(f"PHANTOM:{len(div['extra_phantom'])}")
        if div.get("extra_unprofiled"):
            labels.append(f"UNPROF:{len(div['extra_unprofiled'])}")
        if div.get("hash_mismatch"):
            labels.append(f"HASH:{len(div['hash_mismatch'])}")
        if div.get("required_mismatch"):
            labels.append(f"REQ:{len(div['required_mismatch'])}")
        lines.append(f"  {sys_id}: {' '.join(labels)}")

        for m in div.get("missing", []):
            cores = ", ".join(m.get("cores", []))
            lines.append(f"    + {m['name']}  [{cores}]")
        for h in div.get("hash_mismatch", []):
            ht = h["hash_type"]
            lines.append(
                f"    ~ {h['name']}  {ht}: {h[f'truth_{ht}']} != {h[f'scraped_{ht}']}"
            )
        for p in div.get("extra_phantom", []):
            lines.append(f"    - {p['name']}  (phantom)")
        for u in div.get("extra_unprofiled", []):
            lines.append(f"    ? {u['name']}  (unprofiled)")
        for r in div.get("required_mismatch", []):
            lines.append(
                f"    ! {r['name']}  required: {r['truth_required']} != {r['scraped_required']}"
            )

    uncovered = report.get("uncovered_systems", [])
    if uncovered:
        lines.append(f"  uncovered ({len(uncovered)}): {', '.join(uncovered)}")

    return "\n".join(lines)


def _format_markdown(report: dict) -> str:
    lines: list[str] = []
    platform = report.get("platform", "unknown")
    s = report["summary"]

    lines.append(f"# {platform}")
    lines.append("")
    lines.append(
        f"**{s['systems_compared']}** systems compared | "
        f"**{s['systems_fully_covered']}** full | "
        f"**{s['systems_partially_covered']}** partial | "
        f"**{s['systems_uncovered']}** uncovered"
    )
    lines.append(
        f"**{s['total_missing']}** missing | "
        f"**{s['total_extra_phantom']}** phantom | "
        f"**{s['total_extra_unprofiled']}** unprofiled | "
        f"**{s['total_hash_mismatch']}** hash | "
        f"**{s['total_required_mismatch']}** required"
    )
    lines.append("")

    for sys_id, div in sorted(report.get("divergences", {}).items()):
        lines.append(f"## {sys_id}")
        lines.append("")
        for m in div.get("missing", []):
            refs = ""
            if m.get("source_refs"):
                refs = " " + " ".join(f"`{r}`" for r in m["source_refs"])
            lines.append(f"- **Add** `{m['name']}`{refs}")
        for h in div.get("hash_mismatch", []):
            ht = h["hash_type"]
            lines.append(
                f"- **Fix hash** `{h['name']}` {ht}: `{h[f'truth_{ht}']}` != `{h[f'scraped_{ht}']}`"
            )
        for p in div.get("extra_phantom", []):
            lines.append(f"- **Remove** `{p['name']}` (phantom)")
        for u in div.get("extra_unprofiled", []):
            lines.append(f"- **Check** `{u['name']}` (unprofiled cores)")
        for r in div.get("required_mismatch", []):
            lines.append(
                f"- **Fix required** `{r['name']}`: truth={r['truth_required']}, scraped={r['scraped_required']}"
            )
        lines.append("")

    uncovered = report.get("uncovered_systems", [])
    if uncovered:
        lines.append("## Uncovered systems")
        lines.append("")
        for u in uncovered:
            lines.append(f"- {u}")
        lines.append("")

    return "\n".join(lines)


def main() -> None:
    parser = argparse.ArgumentParser(description="Compare scraped vs truth YAMLs")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "--all", action="store_true", help="diff all registered platforms"
    )
    group.add_argument("--platform", help="diff a single platform")
    parser.add_argument(
        "--json", action="store_true", dest="json_output", help="JSON output"
    )
    parser.add_argument(
        "--format", choices=["terminal", "markdown"], default="terminal"
    )
    parser.add_argument("--truth-dir", default="dist/truth")
    parser.add_argument("--platforms-dir", default="platforms")
    parser.add_argument("--include-archived", action="store_true")
    args = parser.parse_args()

    if args.all:
        platforms = list_registered_platforms(
            args.platforms_dir, include_archived=args.include_archived
        )
    else:
        platforms = [args.platform]

    reports: list[dict] = []
    formatter = _format_markdown if args.format == "markdown" else _format_terminal

    for platform in platforms:
        truth = _load_truth(args.truth_dir, platform)
        if truth is None:
            if not args.json_output:
                print(
                    f"skip {platform}: no truth YAML in {args.truth_dir}/",
                    file=sys.stderr,
                )
            continue

        try:
            scraped = load_platform_config(platform, args.platforms_dir)
        except FileNotFoundError:
            if not args.json_output:
                print(f"skip {platform}: no scraped config", file=sys.stderr)
            continue

        report = diff_platform_truth(truth, scraped)
        report["platform"] = platform

        if args.json_output:
            reports.append(report)
        else:
            print(formatter(report))
            print()

    if args.json_output:
        json.dump(reports, sys.stdout, indent=2)
        print()


if __name__ == "__main__":
    main()