from __future__ import annotations import time from abc import ABC, abstractmethod from typing import Any from scrapling import Fetcher, StealthyFetcher, PlayWrightFetcher from ..models.request import ScrapeRequest, SelectorDef from ..models.response import ScrapeResponse def apply_selectors(page: Any, selectors: list[SelectorDef]) -> dict[str, Any]: """Extract data from a Scrapling page object using CSS/XPath selectors.""" result: dict[str, Any] = {} for sel in selectors: try: if sel.selector_type == "css": if sel.multiple: elements = page.css(sel.selector) else: elements = [page.css_first(sel.selector)] else: if sel.multiple: elements = page.xpath(sel.selector) else: elements = [page.xpath_first(sel.selector)] def extract_value(el: Any) -> str | None: if el is None: return None if sel.attribute: return el.attrib.get(sel.attribute) return el.text if sel.multiple: result[sel.name] = [extract_value(el) for el in (elements or [])] else: result[sel.name] = extract_value(elements[0] if elements else None) except Exception as exc: result[sel.name] = None result[f"{sel.name}_error"] = str(exc) return result class BaseScraper(ABC): @abstractmethod async def scrape(self, req: ScrapeRequest) -> ScrapeResponse: ... def _build_response( self, req: ScrapeRequest, page: Any, fetcher_name: str, start: float, ) -> ScrapeResponse: elapsed = (time.perf_counter() - start) * 1000 html = page.html if req.return_html else None data = apply_selectors(page, req.selectors) if req.selectors else {} return ScrapeResponse( url=req.url, status_code=page.status if hasattr(page, "status") else 200, html=html, data=data, fetcher_used=fetcher_name, elapsed_ms=round(elapsed, 2), )