mirror of
https://github.com/Abdess/retroarch_system.git
synced 2026-04-18 23:02:33 -05:00
refactor: extract scraper_cli() to base_scraper.py (DRY)
Shared CLI boilerplate for all scrapers: argparse, dry-run, json, yaml output. 4 scrapers (libretro, batocera, retrobat, emudeck) reduced from ~58 lines main() each to 3 lines calling scraper_cli(). ~220 lines of duplicated boilerplate eliminated. recalbox + coreinfo keep custom main() (extra flags: --full, --compare-db).
This commit is contained in:
@@ -135,6 +135,64 @@ def fetch_github_latest_version(repo: str) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def scraper_cli(scraper_class: type, description: str = "Scrape BIOS requirements") -> None:
|
||||||
|
"""Shared CLI entry point for all scrapers. Eliminates main() boilerplate."""
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=description)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show scraped data")
|
||||||
|
parser.add_argument("--output", "-o", help="Output YAML file")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
scraper = scraper_class()
|
||||||
|
try:
|
||||||
|
reqs = scraper.fetch_requirements()
|
||||||
|
except (ConnectionError, ValueError) as e:
|
||||||
|
print(f"Error: {e}", file=sys.stderr)
|
||||||
|
import sys
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
by_system: dict[str, list] = {}
|
||||||
|
for req in reqs:
|
||||||
|
by_system.setdefault(req.system, []).append(req)
|
||||||
|
for system, files in sorted(by_system.items()):
|
||||||
|
req_count = sum(1 for f in files if f.required)
|
||||||
|
opt_count = len(files) - req_count
|
||||||
|
print(f" {system}: {req_count} required, {opt_count} optional")
|
||||||
|
print(f"\nTotal: {len(reqs)} BIOS entries across {len(by_system)} systems")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
data = [{"name": r.name, "system": r.system, "sha1": r.sha1, "md5": r.md5,
|
||||||
|
"size": r.size, "required": r.required} for r in reqs]
|
||||||
|
print(json.dumps(data, indent=2))
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
# Generate platform YAML
|
||||||
|
import yaml
|
||||||
|
config = {"systems": {}}
|
||||||
|
for req in reqs:
|
||||||
|
sys_id = req.system
|
||||||
|
config["systems"].setdefault(sys_id, {"files": []})
|
||||||
|
entry = {"name": req.name, "destination": req.destination or req.name, "required": req.required}
|
||||||
|
if req.sha1:
|
||||||
|
entry["sha1"] = req.sha1
|
||||||
|
if req.md5:
|
||||||
|
entry["md5"] = req.md5
|
||||||
|
if req.zipped_file:
|
||||||
|
entry["zipped_file"] = req.zipped_file
|
||||||
|
config["systems"][sys_id]["files"].append(entry)
|
||||||
|
with open(args.output, "w") as f:
|
||||||
|
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||||
|
print(f"Written {len(reqs)} entries to {args.output}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Scraped {len(reqs)} requirements. Use --dry-run, --json, or --output.")
|
||||||
|
|
||||||
|
|
||||||
def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None:
|
def fetch_github_latest_tag(repo: str, prefix: str = "") -> str | None:
|
||||||
"""Fetch the most recent matching tag from a GitHub repo."""
|
"""Fetch the most recent matching tag from a GitHub repo."""
|
||||||
url = f"https://api.github.com/repos/{repo}/tags?per_page=50"
|
url = f"https://api.github.com/repos/{repo}/tags?per_page=50"
|
||||||
|
|||||||
@@ -259,60 +259,8 @@ class Scraper(BaseScraper):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""CLI entry point for testing."""
|
from scripts.scraper.base_scraper import scraper_cli
|
||||||
import argparse
|
scraper_cli(Scraper, "Scrape batocera BIOS requirements")
|
||||||
import json
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Scrape batocera-systems")
|
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
|
||||||
parser.add_argument("--json", action="store_true")
|
|
||||||
parser.add_argument("--output", "-o")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
scraper = Scraper()
|
|
||||||
|
|
||||||
try:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
except (ConnectionError, ValueError) as e:
|
|
||||||
print(f"Error: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
|
|
||||||
for system, files in sorted(by_system.items()):
|
|
||||||
print(f"\n{system} ({len(files)} files):")
|
|
||||||
for f in files:
|
|
||||||
hash_info = f.md5[:12] if f.md5 else "no-hash"
|
|
||||||
print(f" {f.name} ({hash_info}...)")
|
|
||||||
|
|
||||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.json:
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
print(json.dumps(config, indent=2))
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
except ImportError:
|
|
||||||
print("Error: PyYAML required", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
with open(args.output, "w") as f:
|
|
||||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
||||||
print(f"Written to {args.output}")
|
|
||||||
else:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -403,58 +403,8 @@ class Scraper(BaseScraper):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
import argparse
|
from scripts.scraper.base_scraper import scraper_cli
|
||||||
import json
|
scraper_cli(Scraper, "Scrape emudeck BIOS requirements")
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Scrape EmuDeck BIOS requirements")
|
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
|
||||||
parser.add_argument("--json", action="store_true")
|
|
||||||
parser.add_argument("--output", "-o")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
scraper = Scraper()
|
|
||||||
|
|
||||||
try:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
except (ConnectionError, ValueError) as e:
|
|
||||||
print(f"Error: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
by_system: dict[str, list[BiosRequirement]] = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
|
|
||||||
for system, files in sorted(by_system.items()):
|
|
||||||
print(f"\n{system} ({len(files)} files):")
|
|
||||||
for f in files:
|
|
||||||
hash_info = f.md5[:12] if f.md5 else "no-hash"
|
|
||||||
print(f" {f.name} ({hash_info}...)")
|
|
||||||
|
|
||||||
print(f"\nTotal: {len(reqs)} BIOS entries across {len(by_system)} systems")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.json:
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
print(json.dumps(config, indent=2))
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
except ImportError:
|
|
||||||
print("Error: PyYAML required", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
with open(args.output, "w") as f:
|
|
||||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
||||||
print(f"Written to {args.output}")
|
|
||||||
else:
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
print(f"Scraped {len(reqs)} BIOS entries across {len(by_system)} systems")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -263,60 +263,8 @@ class Scraper(BaseScraper):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""CLI entry point for testing."""
|
from scripts.scraper.base_scraper import scraper_cli
|
||||||
import argparse
|
scraper_cli(Scraper, "Scrape libretro BIOS requirements")
|
||||||
import json
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Scrape libretro System.dat")
|
|
||||||
parser.add_argument("--dry-run", action="store_true", help="Just show what would be scraped")
|
|
||||||
parser.add_argument("--output", "-o", help="Output YAML file")
|
|
||||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
scraper = Scraper()
|
|
||||||
|
|
||||||
try:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
except (ConnectionError, ValueError) as e:
|
|
||||||
print(f"Error: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
|
|
||||||
for system, files in sorted(by_system.items()):
|
|
||||||
print(f"\n{system} ({len(files)} files):")
|
|
||||||
for f in files:
|
|
||||||
hash_info = f.sha1[:12] if f.sha1 else f.md5[:12] if f.md5 else "no-hash"
|
|
||||||
print(f" {f.name} ({f.size or '?'} bytes, {hash_info}...)")
|
|
||||||
|
|
||||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.json:
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
print(json.dumps(config, indent=2))
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
except ImportError:
|
|
||||||
print("Error: PyYAML required (pip install pyyaml)", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
with open(args.output, "w") as f:
|
|
||||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
||||||
print(f"Written to {args.output}")
|
|
||||||
else:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -158,58 +158,8 @@ class Scraper(BaseScraper):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""CLI entry point for testing."""
|
from scripts.scraper.base_scraper import scraper_cli
|
||||||
import argparse
|
scraper_cli(Scraper, "Scrape retrobat BIOS requirements")
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Scrape RetroBat batocera-systems.json")
|
|
||||||
parser.add_argument("--dry-run", action="store_true")
|
|
||||||
parser.add_argument("--json", action="store_true")
|
|
||||||
parser.add_argument("--output", "-o")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
scraper = Scraper()
|
|
||||||
|
|
||||||
try:
|
|
||||||
reqs = scraper.fetch_requirements()
|
|
||||||
except (ConnectionError, ValueError) as e:
|
|
||||||
print(f"Error: {e}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if args.dry_run:
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
|
|
||||||
for system, files in sorted(by_system.items()):
|
|
||||||
print(f"\n{system} ({len(files)} files):")
|
|
||||||
for f in files:
|
|
||||||
hash_info = f.md5[:12] if f.md5 else "no-hash"
|
|
||||||
print(f" {f.name} ({hash_info}...)")
|
|
||||||
|
|
||||||
print(f"\nTotal: {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.json:
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
print(json.dumps(config, indent=2))
|
|
||||||
return
|
|
||||||
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
except ImportError:
|
|
||||||
print("Error: PyYAML required", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
config = scraper.generate_platform_yaml()
|
|
||||||
with open(args.output, "w") as f:
|
|
||||||
yaml.dump(config, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
||||||
print(f"Written to {args.output}")
|
|
||||||
else:
|
|
||||||
by_system = {}
|
|
||||||
for req in reqs:
|
|
||||||
by_system.setdefault(req.system, []).append(req)
|
|
||||||
print(f"Scraped {len(reqs)} BIOS files across {len(by_system)} systems")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user