from __future__ import annotations from typing import Any, Literal from pydantic import BaseModel, field_validator class SelectorDef(BaseModel): name: str selector: str selector_type: Literal["css", "xpath"] = "css" attribute: str | None = None # None = get text content multiple: bool = False class ScrapeRequest(BaseModel): url: str fetcher_type: Literal["http", "stealth", "dynamic"] = "http" selectors: list[SelectorDef] = [] return_html: bool = False timeout: int = 30000 proxy: str | None = None headers: dict[str, str] = {} # dynamic-fetcher specific wait_selector: str | None = None network_idle: bool = False headless: bool = True @field_validator("url") @classmethod def url_must_have_scheme(cls, v: str) -> str: if not v.startswith(("http://", "https://")): raise ValueError("URL must start with http:// or https://") return v @field_validator("timeout") @classmethod def timeout_range(cls, v: int) -> int: if not (1000 <= v <= 120_000): raise ValueError("timeout must be between 1000 and 120000 ms") return v