scrapling/service/scrapers/fetcher.py

from __future__ import annotations

import asyncio
import time

from scrapling import Fetcher

from ..models.request import ScrapeRequest
from ..models.response import ScrapeResponse
from .base import BaseScraper


class HttpScraper(BaseScraper):
    """Wraps Scrapling's Fetcher — plain HTTP, fastest option."""

    async def scrape(self, req: ScrapeRequest) -> ScrapeResponse:
        start = time.perf_counter()
        fetcher = Fetcher(auto_match=False)

        kwargs: dict = {
            "url": req.url,
            "timeout": req.timeout / 1000,
        }
        if req.headers:
            kwargs["headers"] = req.headers
        if req.proxy:
            kwargs["proxy"] = req.proxy

        page = await asyncio.to_thread(fetcher.get, **kwargs)
        return self._build_response(req, page, "http", start)