fix: audit fixes across verify, pack, security, and performance

- fix KeyError in compute_coverage (generate_readme, generate_site)
- fix comma-separated MD5 handling in generate_pack check_inside_zip
- fix _verify_file_hash to handle multi-MD5 for large files
- fix external downloads not tracked in seen_destinations/file_status
- fix tar path traversal in _is_safe_tar_member (refresh_data_dirs)
- fix predictable tmp path in download.py
- fix _sanitize_path to filter "." components
- remove blanket data_dir suppression in find_undeclared_files
- remove blanket data_dir suppression in cross_reference
- add status_counts to verify_platform return value
- add md5_composite cache for repeated ZIP hashing
This commit is contained in:
Abdessamad Derraz
2026-03-19 14:04:34 +01:00
parent e1410ef4a6
commit 38d605c7d5
9 changed files with 68 additions and 45 deletions

View File

@@ -58,19 +58,28 @@ def md5sum(source: str | Path | object) -> str:
return h.hexdigest()
_md5_composite_cache: dict[str, str] = {}
def md5_composite(filepath: str | Path) -> str:
"""Compute composite MD5 of a ZIP - matches Recalbox's Zip::Md5Composite().
Sorts filenames alphabetically, reads each file's contents in order,
feeds everything into a single MD5 hasher. The result is independent
of ZIP compression level or metadata.
of ZIP compression level or metadata. Results are cached per path.
"""
key = str(filepath)
cached = _md5_composite_cache.get(key)
if cached is not None:
return cached
with zipfile.ZipFile(filepath) as zf:
names = sorted(n for n in zf.namelist() if not n.endswith("/"))
h = hashlib.md5()
for name in names:
h.update(zf.read(name))
return h.hexdigest()
result = h.hexdigest()
_md5_composite_cache[key] = result
return result
def load_platform_config(platform_name: str, platforms_dir: str = "platforms") -> dict: