refactor: extract _fetch_raw to BaseScraper (DRY)

Identical _fetch_raw() implementation (URL fetch + cache + error handling)
was duplicated in 4 scrapers. Moved to BaseScraper.__init__ with url param.

Each scraper now passes url to super().__init__() and inherits _fetch_raw().
Eliminates ~48 lines of duplicated code.

DRY audit now clean: resolve logic in common.py, scraper CLI in base_scraper,
_fetch_raw in BaseScraper. Remaining duplications are justified (different
list_platforms semantics, context-specific hash computation).
This commit is contained in:
Abdessamad Derraz
2026-03-18 08:22:21 +01:00
parent 2466fc4a97
commit 3de4bf8190
5 changed files with 22 additions and 53 deletions

View File

@@ -48,6 +48,24 @@ class ChangeSet:
class BaseScraper(ABC):
"""Abstract base class for platform BIOS requirement scrapers."""
def __init__(self, url: str = ""):
self.url = url
self._raw_data: str | None = None
def _fetch_raw(self) -> str:
"""Fetch raw content from source URL. Cached after first call."""
if self._raw_data is not None:
return self._raw_data
if not self.url:
raise ValueError("No source URL configured")
try:
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
with urllib.request.urlopen(req, timeout=30) as resp:
self._raw_data = resp.read().decode("utf-8")
return self._raw_data
except urllib.error.URLError as e:
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
@abstractmethod
def fetch_requirements(self) -> list[BiosRequirement]:
"""Fetch current BIOS requirements from the platform source."""