fix: scrapers merge into existing YAML instead of overwriting

This commit is contained in:
Abdessamad Derraz
2026-03-30 16:31:40 +02:00
parent 6818a18a42
commit 425ea064ae
2 changed files with 64 additions and 42 deletions

View File

@@ -8,6 +8,7 @@ import urllib.request
import urllib.error
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
@@ -231,7 +232,28 @@ def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirement
if req.zipped_file:
entry["zipped_file"] = req.zipped_file
config["systems"][sys_id]["files"].append(entry)
with open(args.output, "w") as f:
# Merge into existing YAML: preserve fields the scraper doesn't generate
# (data_directories, case_insensitive_fs, manually added metadata).
# The scraper replaces systems + files; everything else is preserved.
output_path = Path(args.output)
if output_path.exists():
with open(output_path) as f:
existing = yaml.safe_load(f) or {}
# Preserve existing keys not generated by the scraper.
# Only keys present in the NEW config are considered scraper-generated.
# Everything else in the existing file is preserved.
for key, val in existing.items():
if key not in config:
config[key] = val
# Preserve per-system fields not generated by the scraper
# (data_directories, native_id from manual additions, etc.)
existing_systems = existing.get("systems", {})
for sys_id, sys_data in config.get("systems", {}).items():
old_sys = existing_systems.get(sys_id, {})
for field in ("data_directories",):
if field in old_sys and field not in sys_data:
sys_data[field] = old_sys[field]
with open(output_path, "w") as f:
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
print(f"Written {len(reqs)} entries to {args.output}")
return