feat: initial commit
This commit is contained in:
4
service/models/__init__.py
Normal file
4
service/models/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .request import ScrapeRequest, SelectorDef
|
||||
from .response import ScrapeResponse, HealthResponse
|
||||
|
||||
__all__ = ["ScrapeRequest", "SelectorDef", "ScrapeResponse", "HealthResponse"]
|
||||
41
service/models/request.py
Normal file
41
service/models/request.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
|
||||
class SelectorDef(BaseModel):
|
||||
name: str
|
||||
selector: str
|
||||
selector_type: Literal["css", "xpath"] = "css"
|
||||
attribute: str | None = None # None = get text content
|
||||
multiple: bool = False
|
||||
|
||||
|
||||
class ScrapeRequest(BaseModel):
|
||||
url: str
|
||||
fetcher_type: Literal["http", "stealth", "dynamic"] = "http"
|
||||
selectors: list[SelectorDef] = []
|
||||
return_html: bool = False
|
||||
timeout: int = 30000
|
||||
proxy: str | None = None
|
||||
headers: dict[str, str] = {}
|
||||
# dynamic-fetcher specific
|
||||
wait_selector: str | None = None
|
||||
network_idle: bool = False
|
||||
headless: bool = True
|
||||
|
||||
@field_validator("url")
|
||||
@classmethod
|
||||
def url_must_have_scheme(cls, v: str) -> str:
|
||||
if not v.startswith(("http://", "https://")):
|
||||
raise ValueError("URL must start with http:// or https://")
|
||||
return v
|
||||
|
||||
@field_validator("timeout")
|
||||
@classmethod
|
||||
def timeout_range(cls, v: int) -> int:
|
||||
if not (1000 <= v <= 120_000):
|
||||
raise ValueError("timeout must be between 1000 and 120000 ms")
|
||||
return v
|
||||
21
service/models/response.py
Normal file
21
service/models/response.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ScrapeResponse(BaseModel):
|
||||
url: str
|
||||
status_code: int
|
||||
html: str | None = None
|
||||
data: dict[str, Any] = {}
|
||||
fetcher_used: str
|
||||
elapsed_ms: float
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
dynamic_session_ready: bool
|
||||
Reference in New Issue
Block a user