Files
scrapling/Taskfile.yml

139 lines
4.2 KiB
YAML

version: '3'
dotenv: ['.env.test']
vars:
URL: 'https://example.com'
FETCHER: 'http'
FORMAT: 'pretty'
SERVICE_URL: 'http://localhost:8765'
tasks:
# ── Build ──────────────────────────────────────────────────────────────────
build:
desc: Compile TypeScript to dist/
cmds:
- npm run build
dev:
desc: Watch mode — recompile on change
cmds:
- npm run dev
# ── Code quality ───────────────────────────────────────────────────────────
lint:
desc: Run ESLint
cmds:
- npm run lint
format:
desc: Format source with Prettier
cmds:
- npm run format
# ── Tests ──────────────────────────────────────────────────────────────────
test:
desc: Run all unit tests
cmds:
- npm test
test:watch:
desc: Run tests in watch mode
cmds:
- npm run test:watch
test:coverage:
desc: Run tests with coverage report
cmds:
- npm run test:coverage
# ── Python service ─────────────────────────────────────────────────────────
service:install:
desc: Install Python service dependencies (creates .venv in service/)
dir: service
cmds:
- python3 -m venv .venv
- .venv/bin/pip install -e ".[dev]"
- .venv/bin/scrapling install
service:start:
desc: Start Scrapling microservice on port 8765
dir: service
cmds:
- .venv/bin/uvicorn main:app --host 0.0.0.0 --port 8765 --reload
service:health:
desc: Check Scrapling service health
cmds:
- curl -s {{.SERVICE_URL}}/health | python3 -m json.tool
# ── Scrapling CLI runner ────────────────────────────────────────────────────
scrape:
desc: "Scrape a URL [URL=https://example.com] [FETCHER=http|stealth|dynamic] [FORMAT=pretty|json]"
cmds:
- >
npx ts-node scripts/scrapling-run.ts {{.URL}}
--fetcher {{.FETCHER}}
--format {{.FORMAT}}
scrape:html:
desc: "Scrape and return raw HTML [URL=https://example.com]"
cmds:
- npx ts-node scripts/scrapling-run.ts {{.URL}} --html --format json
scrape:stealth:
desc: "Scrape with stealth fetcher [URL=https://example.com]"
cmds:
- npx ts-node scripts/scrapling-run.ts {{.URL}} --fetcher stealth --format {{.FORMAT}}
scrape:dynamic:
desc: "Scrape with Playwright browser [URL=https://example.com]"
cmds:
- npx ts-node scripts/scrapling-run.ts {{.URL}} --fetcher dynamic --format {{.FORMAT}}
# ── Docker ─────────────────────────────────────────────────────────────────
docker:build:
desc: Build Docker image for Scrapling service
cmds:
- docker build -t scrapling-service ./service
docker:up:
desc: Start full stack (n8n + scrapling-service) via Docker Compose
cmds:
- docker compose up -d
docker:down:
desc: Stop Docker Compose stack
cmds:
- docker compose down
docker:logs:
desc: Tail Docker Compose logs
cmds:
- docker compose logs -f
# ── Composite ─────────────────────────────────────────────────────────────
check:
desc: Lint + test (pre-push safety check)
cmds:
- task: lint
- task: test
setup:
desc: Install all dependencies and copy .env.test.example if missing
cmds:
- npm install
- |
if [ ! -f .env.test ]; then
cp .env.test.example .env.test
echo ".env.test created — fill in your credentials"
fi