feat: initial commit

This commit is contained in:
2026-04-18 08:59:04 +02:00
commit 862c0d1703
32 changed files with 8492 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
from .request import ScrapeRequest, SelectorDef
from .response import ScrapeResponse, HealthResponse
__all__ = ["ScrapeRequest", "SelectorDef", "ScrapeResponse", "HealthResponse"]

41
service/models/request.py Normal file
View File

@@ -0,0 +1,41 @@
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, field_validator
class SelectorDef(BaseModel):
name: str
selector: str
selector_type: Literal["css", "xpath"] = "css"
attribute: str | None = None # None = get text content
multiple: bool = False
class ScrapeRequest(BaseModel):
url: str
fetcher_type: Literal["http", "stealth", "dynamic"] = "http"
selectors: list[SelectorDef] = []
return_html: bool = False
timeout: int = 30000
proxy: str | None = None
headers: dict[str, str] = {}
# dynamic-fetcher specific
wait_selector: str | None = None
network_idle: bool = False
headless: bool = True
@field_validator("url")
@classmethod
def url_must_have_scheme(cls, v: str) -> str:
if not v.startswith(("http://", "https://")):
raise ValueError("URL must start with http:// or https://")
return v
@field_validator("timeout")
@classmethod
def timeout_range(cls, v: int) -> int:
if not (1000 <= v <= 120_000):
raise ValueError("timeout must be between 1000 and 120000 ms")
return v

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel
class ScrapeResponse(BaseModel):
url: str
status_code: int
html: str | None = None
data: dict[str, Any] = {}
fetcher_used: str
elapsed_ms: float
error: str | None = None
class HealthResponse(BaseModel):
status: str
version: str
dynamic_session_ready: bool