Files
scrapling/service/scrapers/fetcher.py

31 lines
824 B
Python

from __future__ import annotations
import asyncio
import time
from scrapling import Fetcher
from ..models.request import ScrapeRequest
from ..models.response import ScrapeResponse
from .base import BaseScraper
class HttpScraper(BaseScraper):
"""Wraps Scrapling's Fetcher — plain HTTP, fastest option."""
async def scrape(self, req: ScrapeRequest) -> ScrapeResponse:
start = time.perf_counter()
fetcher = Fetcher(auto_match=False)
kwargs: dict = {
"url": req.url,
"timeout": req.timeout / 1000,
}
if req.headers:
kwargs["headers"] = req.headers
if req.proxy:
kwargs["proxy"] = req.proxy
page = await asyncio.to_thread(fetcher.get, **kwargs)
return self._build_response(req, page, "http", start)