refactor: harden codebase and remove unicode artifacts

- fix urllib.parse.quote import (was urllib.request.quote)
- add operator precedence parens in generate_pack dedup check
- narrow bare except to specific types in batocera target scraper
- cache load_platform_config and build_zip_contents_index results
- add selective algorithm support to compute_hashes
- atomic write for fetch_large_file (tmp + rename)
- add response size limit to base scraper fetch
- extract build_target_cores_cache to common.py (dedup verify/pack)
- hoist _build_supplemental_index out of per-platform loop
- migrate function-attribute caches to module-level dicts
- add @abstractmethod to BaseTargetScraper.fetch_targets
- remove backward-compat re-exports from common.py
- replace em-dashes and unicode arrows with ASCII equivalents
- remove decorative section dividers and obvious comments
This commit is contained in:
Abdessamad Derraz
2026-03-29 23:15:20 +02:00
parent 0c5cde83e1
commit 2e21d64a08
17 changed files with 102 additions and 165 deletions

View File

@@ -1,4 +1,4 @@
"""Deduplicate bios/ directory keep one canonical file per unique content.
"""Deduplicate bios/ directory -keep one canonical file per unique content.
Usage:
python scripts/dedup.py [--dry-run] [--bios-dir bios]
@@ -11,7 +11,7 @@ Two types of deduplication:
2. MAME DEVICE CLONES: Different filenames with identical content in the same
MAME directory (e.g., bbc_m87.zip and bbc_24bbc.zip are identical ZIPs).
These are NOT aliases MAME loads each by its unique name. Instead of
These are NOT aliases -MAME loads each by its unique name. Instead of
deleting, we create a _mame_clones.json mapping so generate_pack.py can
pack all names from a single canonical file.
@@ -94,7 +94,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
if len(paths) <= 1:
continue
# Separate by filename same name = true duplicate, different name = clone
# Separate by filename -same name = true duplicate, different name = clone
by_name: dict[str, list[str]] = defaultdict(list)
for p in paths:
by_name[os.path.basename(p)].append(p)
@@ -106,7 +106,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
name_paths.sort(key=path_priority)
true_dupes_to_remove.extend(name_paths[1:])
# Different filenames, same content need special handling
# Different filenames, same content -need special handling
unique_names = sorted(by_name.keys())
if len(unique_names) > 1:
# Check if these are all in MAME/Arcade dirs AND all ZIPs
@@ -133,7 +133,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
true_dupes_to_remove.append(p)
else:
# Non-MAME different names (e.g., 64DD_IPL_US.n64 vs IPL_USA.n64)
# Keep ALL each name may be needed by a different emulator
# Keep ALL -each name may be needed by a different emulator
# Only remove true duplicates (same name in multiple dirs)
pass
@@ -143,7 +143,7 @@ def deduplicate(bios_dir: str, dry_run: bool = False) -> dict:
# Find the best canonical across all paths
all_paths = [p for p in paths if p not in true_dupes_to_remove]
if not all_paths:
# All copies were marked for removal keep the best one
# All copies were marked for removal -keep the best one
all_paths_sorted = sorted(paths, key=path_priority)
all_paths = [all_paths_sorted[0]]
true_dupes_to_remove = [p for p in paths if p != all_paths[0]]