mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-13 12:22:33 -05:00
refactor: extract _fetch_raw to BaseScraper (DRY)
Identical _fetch_raw() implementation (URL fetch + cache + error handling) was duplicated in 4 scrapers. Moved to BaseScraper.__init__ with url param. Each scraper now passes url to super().__init__() and inherits _fetch_raw(). Eliminates ~48 lines of duplicated code. DRY audit now clean: resolve logic in common.py, scraper CLI in base_scraper, _fetch_raw in BaseScraper. Remaining duplications are justified (different list_platforms semantics, context-specific hash computation).
This commit is contained in:
@@ -48,6 +48,24 @@ class ChangeSet:
|
||||
class BaseScraper(ABC):
|
||||
"""Abstract base class for platform BIOS requirement scrapers."""
|
||||
|
||||
def __init__(self, url: str = ""):
|
||||
self.url = url
|
||||
self._raw_data: str | None = None
|
||||
|
||||
def _fetch_raw(self) -> str:
|
||||
"""Fetch raw content from source URL. Cached after first call."""
|
||||
if self._raw_data is not None:
|
||||
return self._raw_data
|
||||
if not self.url:
|
||||
raise ValueError("No source URL configured")
|
||||
try:
|
||||
req = urllib.request.Request(self.url, headers={"User-Agent": "retrobios-scraper/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
self._raw_data = resp.read().decode("utf-8")
|
||||
return self._raw_data
|
||||
except urllib.error.URLError as e:
|
||||
raise ConnectionError(f"Failed to fetch {self.url}: {e}") from e
|
||||
|
||||
@abstractmethod
|
||||
def fetch_requirements(self) -> list[BiosRequirement]:
|
||||
"""Fetch current BIOS requirements from the platform source."""
|
||||
|
||||
Reference in New Issue
Block a user