refactor: extract _fetch_raw to BaseScraper (DRY)

Identical _fetch_raw() implementation (URL fetch + cache + error handling)
was duplicated in 4 scrapers. Moved to BaseScraper.__init__ with url param.

Each scraper now passes url to super().__init__() and inherits _fetch_raw().
Eliminates ~48 lines of duplicated code.

DRY audit now clean: resolve logic in common.py, scraper CLI in base_scraper,
_fetch_raw in BaseScraper. Remaining duplications are justified (different
list_platforms semantics, context-specific hash computation).
This commit is contained in:
Abdessamad Derraz
2026-03-18 08:22:21 +01:00
parent 2466fc4a97
commit 3de4bf8190
5 changed files with 22 additions and 53 deletions

View File

@@ -88,21 +88,8 @@ class Scraper(BaseScraper):
"""Scraper for libretro System.dat."""
def __init__(self, url: str = SOURCE_URL):
self.url = url
self._raw_data: str | None = None
super().__init__(url=url)
def _fetch_raw(self) -> str:
"""Fetch raw DAT content from source URL."""
if self._raw_data is not None:
return self._raw_data
try:
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
with urllib.request.urlopen(req, timeout=30) as resp:
self._raw_data = resp.read().decode("utf-8")
return self._raw_data
except urllib.error.URLError as e:
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
def fetch_requirements(self) -> list[BiosRequirement]:
"""Parse System.dat and return BIOS requirements."""