139 lines
4.2 KiB
YAML
139 lines
4.2 KiB
YAML
version: '3'
|
|
|
|
dotenv: ['.env.test']
|
|
|
|
vars:
|
|
URL: 'https://example.com'
|
|
FETCHER: 'http'
|
|
FORMAT: 'pretty'
|
|
SERVICE_URL: 'http://localhost:8765'
|
|
|
|
tasks:
|
|
|
|
# ── Build ──────────────────────────────────────────────────────────────────
|
|
|
|
build:
|
|
desc: Compile TypeScript to dist/
|
|
cmds:
|
|
- npm run build
|
|
|
|
dev:
|
|
desc: Watch mode — recompile on change
|
|
cmds:
|
|
- npm run dev
|
|
|
|
# ── Code quality ───────────────────────────────────────────────────────────
|
|
|
|
lint:
|
|
desc: Run ESLint
|
|
cmds:
|
|
- npm run lint
|
|
|
|
format:
|
|
desc: Format source with Prettier
|
|
cmds:
|
|
- npm run format
|
|
|
|
# ── Tests ──────────────────────────────────────────────────────────────────
|
|
|
|
test:
|
|
desc: Run all unit tests
|
|
cmds:
|
|
- npm test
|
|
|
|
test:watch:
|
|
desc: Run tests in watch mode
|
|
cmds:
|
|
- npm run test:watch
|
|
|
|
test:coverage:
|
|
desc: Run tests with coverage report
|
|
cmds:
|
|
- npm run test:coverage
|
|
|
|
# ── Python service ─────────────────────────────────────────────────────────
|
|
|
|
service:install:
|
|
desc: Install Python service dependencies (creates .venv in service/)
|
|
dir: service
|
|
cmds:
|
|
- python3 -m venv .venv
|
|
- .venv/bin/pip install -e ".[dev]"
|
|
- .venv/bin/scrapling install
|
|
|
|
service:start:
|
|
desc: Start Scrapling microservice on port 8765
|
|
dir: service
|
|
cmds:
|
|
- .venv/bin/uvicorn main:app --host 0.0.0.0 --port 8765 --reload
|
|
|
|
service:health:
|
|
desc: Check Scrapling service health
|
|
cmds:
|
|
- curl -s {{.SERVICE_URL}}/health | python3 -m json.tool
|
|
|
|
# ── Scrapling CLI runner ────────────────────────────────────────────────────
|
|
|
|
scrape:
|
|
desc: "Scrape a URL [URL=https://example.com] [FETCHER=http|stealth|dynamic] [FORMAT=pretty|json]"
|
|
cmds:
|
|
- >
|
|
npx ts-node scripts/scrapling-run.ts {{.URL}}
|
|
--fetcher {{.FETCHER}}
|
|
--format {{.FORMAT}}
|
|
|
|
scrape:html:
|
|
desc: "Scrape and return raw HTML [URL=https://example.com]"
|
|
cmds:
|
|
- npx ts-node scripts/scrapling-run.ts {{.URL}} --html --format json
|
|
|
|
scrape:stealth:
|
|
desc: "Scrape with stealth fetcher [URL=https://example.com]"
|
|
cmds:
|
|
- npx ts-node scripts/scrapling-run.ts {{.URL}} --fetcher stealth --format {{.FORMAT}}
|
|
|
|
scrape:dynamic:
|
|
desc: "Scrape with Playwright browser [URL=https://example.com]"
|
|
cmds:
|
|
- npx ts-node scripts/scrapling-run.ts {{.URL}} --fetcher dynamic --format {{.FORMAT}}
|
|
|
|
# ── Docker ─────────────────────────────────────────────────────────────────
|
|
|
|
docker:build:
|
|
desc: Build Docker image for Scrapling service
|
|
cmds:
|
|
- docker build -t scrapling-service ./service
|
|
|
|
docker:up:
|
|
desc: Start full stack (n8n + scrapling-service) via Docker Compose
|
|
cmds:
|
|
- docker compose up -d
|
|
|
|
docker:down:
|
|
desc: Stop Docker Compose stack
|
|
cmds:
|
|
- docker compose down
|
|
|
|
docker:logs:
|
|
desc: Tail Docker Compose logs
|
|
cmds:
|
|
- docker compose logs -f
|
|
|
|
# ── Composite ─────────────────────────────────────────────────────────────
|
|
|
|
check:
|
|
desc: Lint + test (pre-push safety check)
|
|
cmds:
|
|
- task: lint
|
|
- task: test
|
|
|
|
setup:
|
|
desc: Install all dependencies and copy .env.test.example if missing
|
|
cmds:
|
|
- npm install
|
|
- |
|
|
if [ ! -f .env.test ]; then
|
|
cp .env.test.example .env.test
|
|
echo ".env.test created — fill in your credentials"
|
|
fi
|