#!/usr/bin/env python3 """Convert a PDF to a self-contained HTML presentation. Each page is rendered as a PNG image (via pdftoppm) and base64-embedded into a single HTML file with slide navigation (arrows, swipe, click). Requirements: poppler-utils (pdftoppm) Usage: python3 convert-pdf.py input.pdf [output.html] """ import base64 import glob import os import subprocess import sys import tempfile from pathlib import Path def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150): pdf_path = str(Path(pdf_path).resolve()) if not Path(pdf_path).exists(): print(f"Error: {pdf_path} not found") sys.exit(1) # Check for pdftoppm if subprocess.run(["which", "pdftoppm"], capture_output=True).returncode != 0: print("Error: pdftoppm not found. Install poppler-utils:") print(" apt install poppler-utils # Debian/Ubuntu") print(" brew install poppler # macOS") sys.exit(1) with tempfile.TemporaryDirectory() as tmpdir: prefix = os.path.join(tmpdir, "page") result = subprocess.run( ["pdftoppm", "-png", "-r", str(dpi), pdf_path, prefix], capture_output=True, text=True ) if result.returncode != 0: print(f"Error converting PDF: {result.stderr}") sys.exit(1) pages = sorted(glob.glob(f"{prefix}-*.png")) if not pages: print("Error: No pages rendered from PDF") sys.exit(1) slides_html = [] for i, page_path in enumerate(pages, 1): with open(page_path, "rb") as f: b64 = base64.b64encode(f.read()).decode() slides_html.append( f'' ) # Try to extract title from filename title = Path(pdf_path).stem.replace("-", " ").replace("_", " ") html = f'''