refactor: harden codebase and remove unicode artifacts

- fix urllib.parse.quote import (was urllib.request.quote) - add operator precedence parens in generate_pack dedup check - narrow bare except to specific types in batocera target scraper - cache load_platform_config and build_zip_contents_index results - add selective algorithm support to compute_hashes - atomic write for fetch_large_file (tmp + rename) - add response size limit to base scraper fetch - extract build_target_cores_cache to common.py (dedup verify/pack) - hoist _build_supplemental_index out of per-platform loop - migrate function-attribute caches to module-level dicts - add @abstractmethod to BaseTargetScraper.fetch_targets - remove backward-compat re-exports from common.py - replace em-dashes and unicode arrows with ASCII equivalents - remove decorative section dividers and obvious comments
2026-06-12 18:15:30 -05:00 · 2026-03-29 23:15:20 +02:00
parent 0c5cde83e1
commit 2e21d64a08
17 changed files with 102 additions and 165 deletions
@@ -1,4 +1,4 @@
-"""Deduplicate bios/ directory — keep one canonical file per unique content.
+"""Deduplicate bios/ directory -keep one canonical file per unique content.

 Usage:
    python scripts/dedup.py [--dry-run] [--bios-dir bios]
@@ -11,7 +11,7 @@ Two types of deduplication:

 2. MAME DEVICE CLONES: Different filenames with identical content in the same
   MAME directory (e.g., bbc_m87.zip and bbc_24bbc.zip are identical ZIPs).
-   These are NOT aliases — MAME loads each by its unique name. Instead of
+   These are NOT aliases -MAME loads each by its unique name. Instead of
   deleting, we create a _mame_clones.json mapping so generate_pack.py can
   pack all names from a single canonical file.

@@ -94,7 +94,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
        if len(paths) <= 1:
            continue

-        # Separate by filename — same name = true duplicate, different name = clone
+        # Separate by filename -same name = true duplicate, different name = clone
        by_name: dict[str, list[str]] = defaultdict(list)
        for p in paths:
            by_name[os.path.basename(p)].append(p)
@@ -106,7 +106,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
                name_paths.sort(key=path_priority)
                true_dupes_to_remove.extend(name_paths[1:])

-        # Different filenames, same content — need special handling
+        # Different filenames, same content -need special handling
        unique_names = sorted(by_name.keys())
        if len(unique_names) > 1:
            # Check if these are all in MAME/Arcade dirs AND all ZIPs
@@ -133,7 +133,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
                            true_dupes_to_remove.append(p)
            else:
                # Non-MAME different names (e.g., 64DD_IPL_US.n64 vs IPL_USA.n64)
-                # Keep ALL — each name may be needed by a different emulator
+                # Keep ALL -each name may be needed by a different emulator
                # Only remove true duplicates (same name in multiple dirs)
                pass

@@ -143,7 +143,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
        # Find the best canonical across all paths
        all_paths = [p for p in paths if p not in true_dupes_to_remove]
        if not all_paths:
-            # All copies were marked for removal — keep the best one
+            # All copies were marked for removal -keep the best one
            all_paths_sorted = sorted(paths, key=path_priority)
            all_paths = [all_paths_sorted[0]]
            true_dupes_to_remove = [p for p in paths if p != all_paths[0]]