From 8d81aee2352a4516107e880e871ee6440d7f34d3 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Tue, 17 Mar 2026 15:35:30 +0100 Subject: [PATCH] refactor: quality audit fixes, honest verification reporting - batocera_scraper: fix OrderedDict parsing for ast.literal_eval - auto_fetch: fix TypeError when sha1/md5 is None - verify: filter non-ZIP files for zipped_file entries (F2) - verify: distinguish ZIP read errors from hash mismatches (F5) - generate_pack: track seen_destinations with source hash (F7) Batocera ep64/ep128.zip now correctly reported as MISSING instead of false UNTESTED (resolved to .rom instead of .zip) --- scripts/auto_fetch.py | 4 ++-- scripts/generate_pack.py | 4 ++-- scripts/scraper/batocera_scraper.py | 4 +++- scripts/verify.py | 23 +++++++++++++++-------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/scripts/auto_fetch.py b/scripts/auto_fetch.py index 949d759f..065dabfe 100644 --- a/scripts/auto_fetch.py +++ b/scripts/auto_fetch.py @@ -345,8 +345,8 @@ def generate_issue_body(missing: list[dict], platform: str) -> str: ] for entry in missing: - sha1 = entry.get("sha1", "N/A") - md5 = entry.get("md5", "N/A") + sha1 = entry.get("sha1") or "N/A" + md5 = entry.get("md5") or "N/A" lines.append(f"| `{entry['name']}` | {entry['system']} | `{sha1[:12]}...` | `{md5[:12]}...` |") lines.extend([ diff --git a/scripts/generate_pack.py b/scripts/generate_pack.py index 714770c6..0de565f9 100644 --- a/scripts/generate_pack.py +++ b/scripts/generate_pack.py @@ -263,7 +263,7 @@ def generate_pack( missing_files = [] untested_files = [] user_provided = [] - seen_destinations = set() + seen_destinations = {} with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: for sys_id, system in sorted(config.get("systems", {}).items()): @@ -279,7 +279,7 @@ def generate_pack( dedup_key = full_dest if dedup_key in seen_destinations: continue - seen_destinations.add(dedup_key) + seen_destinations[dedup_key] = file_entry.get("sha1") or file_entry.get("md5") or "" storage = file_entry.get("storage", "embedded") diff --git a/scripts/scraper/batocera_scraper.py b/scripts/scraper/batocera_scraper.py index 2d56f000..aa67acc0 100644 --- a/scripts/scraper/batocera_scraper.py +++ b/scripts/scraper/batocera_scraper.py @@ -150,7 +150,9 @@ class Scraper(BaseScraper): clean_dict_str = "\n".join(lines) - clean_dict_str = clean_dict_str.replace("OrderedDict(", "dict(") + # OrderedDict({...}) -> just the inner dict literal + clean_dict_str = re.sub(r'OrderedDict\(\s*\{', '{', clean_dict_str) + clean_dict_str = re.sub(r'\}\s*\)', '}', clean_dict_str) try: return ast.literal_eval(clean_dict_str) diff --git a/scripts/verify.py b/scripts/verify.py index 02a029b1..cf60ddff 100644 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -120,13 +120,16 @@ def resolve_to_local_path(file_entry: dict, db: dict) -> str | None: if os.path.exists(path): candidates.append((path, entry.get("md5", ""))) if candidates: + if has_zipped_file: + candidates = [(p, m) for p, m in candidates if p.endswith(".zip")] if md5 and not has_zipped_file: md5_lower = md5.lower() for path, db_md5 in candidates: if db_md5.lower() == md5_lower: return path - primary = [p for p, _ in candidates if "/.variants/" not in p] - return primary[0] if primary else candidates[0][0] + if candidates: + primary = [p for p, _ in candidates if "/.variants/" not in p] + return primary[0] if primary else candidates[0][0] return None @@ -156,17 +159,21 @@ def verify_entry_md5(file_entry: dict, local_path: str | None) -> dict: if zipped_file: found_in_zip = False + had_error = False for md5_candidate in md5_list or [""]: result = check_inside_zip(local_path, zipped_file, md5_candidate) if result == Status.OK: return {"name": name, "status": Status.OK, "path": local_path} - if result != "not_in_zip": + if result == "error": + had_error = True + elif result != "not_in_zip": found_in_zip = True - reason = ( - f"{zipped_file} not found inside ZIP" - if not found_in_zip - else f"{zipped_file} MD5 mismatch inside ZIP" - ) + if had_error and not found_in_zip: + reason = f"{local_path} is not a valid ZIP or read error" + elif not found_in_zip: + reason = f"{zipped_file} not found inside ZIP" + else: + reason = f"{zipped_file} MD5 mismatch inside ZIP" return { "name": name, "status": Status.UNTESTED, "path": local_path, "reason": reason,