mirror of
https://github.com/github/awesome-copilot.git
synced 2026-03-20 08:05:12 +00:00
fix: handle large presentations with external assets mode (#1090)
This commit is contained in:
committed by
GitHub
parent
e4fc57f204
commit
07e1e665d6
@@ -34,6 +34,17 @@ Ask the user for a **repo name** if not provided. Default: filename without exte
|
|||||||
|
|
||||||
## 3. Conversion
|
## 3. Conversion
|
||||||
|
|
||||||
|
### Large File Handling
|
||||||
|
|
||||||
|
Both conversion scripts automatically detect large files and switch to **external assets mode**:
|
||||||
|
- **PPTX:** Files >20MB or with >50 images → images saved as separate files in `assets/`
|
||||||
|
- **PDF:** Files >20MB or with >50 pages → page PNGs saved in `assets/`
|
||||||
|
- Files >150MB print a warning (PPTX suggests PDF path instead)
|
||||||
|
|
||||||
|
This keeps individual files well under GitHub's 100MB limit. Small files still produce a single self-contained HTML.
|
||||||
|
|
||||||
|
You can force the behavior with `--external-assets` or `--no-external-assets`.
|
||||||
|
|
||||||
### HTML
|
### HTML
|
||||||
No conversion needed. Use the file directly as `index.html`.
|
No conversion needed. Use the file directly as `index.html`.
|
||||||
|
|
||||||
@@ -41,6 +52,8 @@ No conversion needed. Use the file directly as `index.html`.
|
|||||||
Run the conversion script:
|
Run the conversion script:
|
||||||
```bash
|
```bash
|
||||||
python3 SKILL_DIR/scripts/convert-pptx.py INPUT_FILE /tmp/output.html
|
python3 SKILL_DIR/scripts/convert-pptx.py INPUT_FILE /tmp/output.html
|
||||||
|
# For large files, force external assets:
|
||||||
|
python3 SKILL_DIR/scripts/convert-pptx.py INPUT_FILE /tmp/output.html --external-assets
|
||||||
```
|
```
|
||||||
If `python-pptx` is missing, tell the user: `pip install python-pptx`
|
If `python-pptx` is missing, tell the user: `pip install python-pptx`
|
||||||
|
|
||||||
@@ -48,8 +61,10 @@ If `python-pptx` is missing, tell the user: `pip install python-pptx`
|
|||||||
Convert with the included script (requires `poppler-utils` for `pdftoppm`):
|
Convert with the included script (requires `poppler-utils` for `pdftoppm`):
|
||||||
```bash
|
```bash
|
||||||
python3 SKILL_DIR/scripts/convert-pdf.py INPUT_FILE /tmp/output.html
|
python3 SKILL_DIR/scripts/convert-pdf.py INPUT_FILE /tmp/output.html
|
||||||
|
# For large files, force external assets:
|
||||||
|
python3 SKILL_DIR/scripts/convert-pdf.py INPUT_FILE /tmp/output.html --external-assets
|
||||||
```
|
```
|
||||||
Each page is rendered as a PNG and base64-embedded into a self-contained HTML with slide navigation.
|
Each page is rendered as a PNG and embedded into HTML with slide navigation.
|
||||||
If `pdftoppm` is missing, tell the user: `apt install poppler-utils` (or `brew install poppler` on macOS).
|
If `pdftoppm` is missing, tell the user: `apt install poppler-utils` (or `brew install poppler` on macOS).
|
||||||
|
|
||||||
### Google Slides
|
### Google Slides
|
||||||
@@ -72,7 +87,9 @@ bash SKILL_DIR/scripts/publish.sh /path/to/index.html REPO_NAME public "Descript
|
|||||||
|
|
||||||
Pass `private` instead of `public` if the user requests it.
|
Pass `private` instead of `public` if the user requests it.
|
||||||
|
|
||||||
The script creates the repo, pushes `index.html`, and enables GitHub Pages.
|
The script creates the repo, pushes `index.html` (plus `assets/` if present), and enables GitHub Pages.
|
||||||
|
|
||||||
|
**Note:** When external assets mode is used, the output HTML references files in `assets/`. The publish script automatically detects and copies the `assets/` directory alongside the HTML file. Make sure the HTML file and its `assets/` directory are in the same parent directory.
|
||||||
|
|
||||||
## 5. Output
|
## 5. Output
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Convert a PDF to a self-contained HTML presentation.
|
"""Convert a PDF to an HTML presentation.
|
||||||
|
|
||||||
Each page is rendered as a PNG image (via pdftoppm) and base64-embedded
|
Each page is rendered as a PNG image (via pdftoppm). Supports external assets
|
||||||
into a single HTML file with slide navigation (arrows, swipe, click).
|
mode for large files to avoid huge single-file HTML.
|
||||||
|
|
||||||
Requirements: poppler-utils (pdftoppm)
|
Requirements: poppler-utils (pdftoppm)
|
||||||
Usage: python3 convert-pdf.py input.pdf [output.html]
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
@@ -17,19 +17,50 @@ import tempfile
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150):
|
def get_page_count(pdf_path):
|
||||||
|
"""Get page count using pdfinfo if available."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(["pdfinfo", pdf_path], capture_output=True, text=True)
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
if line.startswith("Pages:"):
|
||||||
|
return int(line.split(":")[1].strip())
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150, external_assets=None):
|
||||||
pdf_path = str(Path(pdf_path).resolve())
|
pdf_path = str(Path(pdf_path).resolve())
|
||||||
if not Path(pdf_path).exists():
|
if not Path(pdf_path).exists():
|
||||||
print(f"Error: {pdf_path} not found")
|
print(f"Error: {pdf_path} not found")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Check for pdftoppm
|
|
||||||
if subprocess.run(["which", "pdftoppm"], capture_output=True).returncode != 0:
|
if subprocess.run(["which", "pdftoppm"], capture_output=True).returncode != 0:
|
||||||
print("Error: pdftoppm not found. Install poppler-utils:")
|
print("Error: pdftoppm not found. Install poppler-utils:")
|
||||||
print(" apt install poppler-utils # Debian/Ubuntu")
|
print(" apt install poppler-utils # Debian/Ubuntu")
|
||||||
print(" brew install poppler # macOS")
|
print(" brew install poppler # macOS")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
file_size_mb = os.path.getsize(pdf_path) / (1024 * 1024)
|
||||||
|
|
||||||
|
if file_size_mb > 150:
|
||||||
|
print(f"WARNING: PDF is {file_size_mb:.0f}MB — conversion may be slow and memory-intensive.")
|
||||||
|
|
||||||
|
page_count = get_page_count(pdf_path)
|
||||||
|
|
||||||
|
# Auto-detect external assets mode
|
||||||
|
if external_assets is None:
|
||||||
|
external_assets = file_size_mb > 20 or (page_count is not None and page_count > 50)
|
||||||
|
if external_assets:
|
||||||
|
print(f"Auto-enabling external assets mode (file: {file_size_mb:.1f}MB, pages: {page_count or 'unknown'})")
|
||||||
|
|
||||||
|
output = output_path or str(Path(pdf_path).with_suffix('.html'))
|
||||||
|
output_dir = Path(output).parent
|
||||||
|
|
||||||
|
if external_assets:
|
||||||
|
assets_dir = output_dir / "assets"
|
||||||
|
assets_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
prefix = os.path.join(tmpdir, "page")
|
prefix = os.path.join(tmpdir, "page")
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
@@ -48,15 +79,23 @@ def convert(pdf_path: str, output_path: str | None = None, dpi: int = 150):
|
|||||||
slides_html = []
|
slides_html = []
|
||||||
for i, page_path in enumerate(pages, 1):
|
for i, page_path in enumerate(pages, 1):
|
||||||
with open(page_path, "rb") as f:
|
with open(page_path, "rb") as f:
|
||||||
b64 = base64.b64encode(f.read()).decode()
|
page_bytes = f.read()
|
||||||
|
|
||||||
|
if external_assets:
|
||||||
|
img_name = f"img-{i:03d}.png"
|
||||||
|
(assets_dir / img_name).write_bytes(page_bytes)
|
||||||
|
src = f"assets/{img_name}"
|
||||||
|
else:
|
||||||
|
b64 = base64.b64encode(page_bytes).decode()
|
||||||
|
src = f"data:image/png;base64,{b64}"
|
||||||
|
|
||||||
slides_html.append(
|
slides_html.append(
|
||||||
f'<section class="slide">'
|
f'<section class="slide">'
|
||||||
f'<div class="slide-inner">'
|
f'<div class="slide-inner">'
|
||||||
f'<img src="data:image/png;base64,{b64}" alt="Page {i}">'
|
f'<img src="{src}" alt="Page {i}">'
|
||||||
f'</div></section>'
|
f'</div></section>'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Try to extract title from filename
|
|
||||||
title = Path(pdf_path).stem.replace("-", " ").replace("_", " ")
|
title = Path(pdf_path).stem.replace("-", " ").replace("_", " ")
|
||||||
|
|
||||||
html = f'''<!DOCTYPE html>
|
html = f'''<!DOCTYPE html>
|
||||||
@@ -108,14 +147,30 @@ show(0);
|
|||||||
</script>
|
</script>
|
||||||
</body></html>'''
|
</body></html>'''
|
||||||
|
|
||||||
output = output_path or str(Path(pdf_path).with_suffix('.html'))
|
|
||||||
Path(output).write_text(html, encoding='utf-8')
|
Path(output).write_text(html, encoding='utf-8')
|
||||||
|
output_size = os.path.getsize(output)
|
||||||
|
|
||||||
print(f"Converted to: {output}")
|
print(f"Converted to: {output}")
|
||||||
print(f"Pages: {len(slides_html)}")
|
print(f"Pages: {len(slides_html)}")
|
||||||
|
print(f"Output size: {output_size / (1024*1024):.1f}MB")
|
||||||
|
print(f"External assets: {'yes' if external_assets else 'no'}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
parser = argparse.ArgumentParser(description="Convert PDF to HTML presentation")
|
||||||
print("Usage: convert-pdf.py <file.pdf> [output.html]")
|
parser.add_argument("input", help="Path to .pdf file")
|
||||||
sys.exit(1)
|
parser.add_argument("output", nargs="?", help="Output HTML path (default: same name with .html)")
|
||||||
convert(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None)
|
parser.add_argument("--external-assets", action="store_true", default=None,
|
||||||
|
help="Save page images as separate files in assets/ directory (auto-detected for large files)")
|
||||||
|
parser.add_argument("--no-external-assets", action="store_true",
|
||||||
|
help="Force inline base64 even for large files")
|
||||||
|
parser.add_argument("--dpi", type=int, default=150, help="Render DPI (default: 150)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
ext_assets = None
|
||||||
|
if args.external_assets:
|
||||||
|
ext_assets = True
|
||||||
|
elif args.no_external_assets:
|
||||||
|
ext_assets = False
|
||||||
|
|
||||||
|
convert(args.input, args.output, dpi=args.dpi, external_assets=ext_assets)
|
||||||
|
|||||||
@@ -1,23 +1,27 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Convert a PPTX file to a self-contained HTML presentation with formatting preserved."""
|
"""Convert a PPTX file to an HTML presentation with formatting preserved.
|
||||||
import sys
|
|
||||||
|
Supports external assets mode for large files to avoid huge single-file HTML.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
try:
|
def _ensure_pptx():
|
||||||
from pptx import Presentation
|
try:
|
||||||
from pptx.util import Inches, Pt, Emu
|
from pptx import Presentation
|
||||||
from pptx.enum.text import PP_ALIGN
|
from pptx.enum.text import PP_ALIGN
|
||||||
from pptx.dml.color import RGBColor
|
return True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("ERROR: python-pptx not installed. Install with: pip install python-pptx")
|
print("ERROR: python-pptx not installed. Install with: pip install python-pptx")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def rgb_to_hex(rgb_color):
|
def rgb_to_hex(rgb_color):
|
||||||
"""Convert RGBColor to hex string."""
|
|
||||||
if rgb_color is None:
|
if rgb_color is None:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
@@ -27,7 +31,6 @@ def rgb_to_hex(rgb_color):
|
|||||||
|
|
||||||
|
|
||||||
def get_text_style(run):
|
def get_text_style(run):
|
||||||
"""Extract inline text styling from a run."""
|
|
||||||
styles = []
|
styles = []
|
||||||
try:
|
try:
|
||||||
if run.font.bold:
|
if run.font.bold:
|
||||||
@@ -48,7 +51,7 @@ def get_text_style(run):
|
|||||||
|
|
||||||
|
|
||||||
def get_alignment(paragraph):
|
def get_alignment(paragraph):
|
||||||
"""Get CSS text-align from paragraph alignment."""
|
from pptx.enum.text import PP_ALIGN
|
||||||
try:
|
try:
|
||||||
align = paragraph.alignment
|
align = paragraph.alignment
|
||||||
if align == PP_ALIGN.CENTER:
|
if align == PP_ALIGN.CENTER:
|
||||||
@@ -62,20 +65,7 @@ def get_alignment(paragraph):
|
|||||||
return "left"
|
return "left"
|
||||||
|
|
||||||
|
|
||||||
def extract_image(shape):
|
|
||||||
"""Extract image from shape as base64 data URI."""
|
|
||||||
try:
|
|
||||||
image = shape.image
|
|
||||||
content_type = image.content_type
|
|
||||||
image_bytes = image.blob
|
|
||||||
b64 = base64.b64encode(image_bytes).decode('utf-8')
|
|
||||||
return f"data:{content_type};base64,{b64}"
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_shape_position(shape, slide_width, slide_height):
|
def get_shape_position(shape, slide_width, slide_height):
|
||||||
"""Get shape position as percentages."""
|
|
||||||
try:
|
try:
|
||||||
left = (shape.left / slide_width) * 100 if shape.left else 0
|
left = (shape.left / slide_width) * 100 if shape.left else 0
|
||||||
top = (shape.top / slide_height) * 100 if shape.top else 0
|
top = (shape.top / slide_height) * 100 if shape.top else 0
|
||||||
@@ -87,12 +77,10 @@ def get_shape_position(shape, slide_width, slide_height):
|
|||||||
|
|
||||||
|
|
||||||
def get_slide_background(slide, prs):
|
def get_slide_background(slide, prs):
|
||||||
"""Extract slide background color from XML."""
|
|
||||||
from pptx.oxml.ns import qn
|
from pptx.oxml.ns import qn
|
||||||
for source in [slide, slide.slide_layout]:
|
for source in [slide, slide.slide_layout]:
|
||||||
try:
|
try:
|
||||||
bg_el = source.background._element
|
bg_el = source.background._element
|
||||||
# Look for solidFill > srgbClr inside bgPr
|
|
||||||
for sf in bg_el.iter(qn('a:solidFill')):
|
for sf in bg_el.iter(qn('a:solidFill')):
|
||||||
clr = sf.find(qn('a:srgbClr'))
|
clr = sf.find(qn('a:srgbClr'))
|
||||||
if clr is not None and clr.get('val'):
|
if clr is not None and clr.get('val'):
|
||||||
@@ -103,14 +91,12 @@ def get_slide_background(slide, prs):
|
|||||||
|
|
||||||
|
|
||||||
def get_shape_fill(shape):
|
def get_shape_fill(shape):
|
||||||
"""Extract shape fill color from XML."""
|
|
||||||
from pptx.oxml.ns import qn
|
from pptx.oxml.ns import qn
|
||||||
try:
|
try:
|
||||||
sp_pr = shape._element.find(qn('p:spPr'))
|
sp_pr = shape._element.find(qn('p:spPr'))
|
||||||
if sp_pr is None:
|
if sp_pr is None:
|
||||||
sp_pr = shape._element.find(qn('a:spPr'))
|
sp_pr = shape._element.find(qn('a:spPr'))
|
||||||
if sp_pr is None:
|
if sp_pr is None:
|
||||||
# Try direct child
|
|
||||||
for tag in ['{http://schemas.openxmlformats.org/drawingml/2006/main}spPr',
|
for tag in ['{http://schemas.openxmlformats.org/drawingml/2006/main}spPr',
|
||||||
'{http://schemas.openxmlformats.org/presentationml/2006/main}spPr']:
|
'{http://schemas.openxmlformats.org/presentationml/2006/main}spPr']:
|
||||||
sp_pr = shape._element.find(tag)
|
sp_pr = shape._element.find(tag)
|
||||||
@@ -128,7 +114,6 @@ def get_shape_fill(shape):
|
|||||||
|
|
||||||
|
|
||||||
def render_paragraph(paragraph):
|
def render_paragraph(paragraph):
|
||||||
"""Render a paragraph with inline formatting."""
|
|
||||||
align = get_alignment(paragraph)
|
align = get_alignment(paragraph)
|
||||||
parts = []
|
parts = []
|
||||||
for run in paragraph.runs:
|
for run in paragraph.runs:
|
||||||
@@ -147,12 +132,72 @@ def render_paragraph(paragraph):
|
|||||||
return f'<p style="text-align:{align};margin:0.3em 0;line-height:1.4">{content}</p>'
|
return f'<p style="text-align:{align};margin:0.3em 0;line-height:1.4">{content}</p>'
|
||||||
|
|
||||||
|
|
||||||
def convert(pptx_path, output_path=None):
|
def extract_image_data(shape):
|
||||||
|
"""Extract raw image bytes and content type from a shape."""
|
||||||
|
try:
|
||||||
|
image = shape.image
|
||||||
|
return image.blob, image.content_type
|
||||||
|
except:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def count_images(prs):
|
||||||
|
"""Count total images across all slides."""
|
||||||
|
count = 0
|
||||||
|
for slide in prs.slides:
|
||||||
|
for shape in slide.shapes:
|
||||||
|
if shape.shape_type == 13 or hasattr(shape, "image"):
|
||||||
|
try:
|
||||||
|
_ = shape.image
|
||||||
|
count += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
CONTENT_TYPE_TO_EXT = {
|
||||||
|
'image/png': '.png',
|
||||||
|
'image/jpeg': '.jpg',
|
||||||
|
'image/jpg': '.jpg',
|
||||||
|
'image/gif': '.gif',
|
||||||
|
'image/bmp': '.bmp',
|
||||||
|
'image/tiff': '.tiff',
|
||||||
|
'image/svg+xml': '.svg',
|
||||||
|
'image/webp': '.webp',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert(pptx_path, output_path=None, external_assets=None):
|
||||||
|
_ensure_pptx()
|
||||||
|
from pptx import Presentation
|
||||||
|
|
||||||
|
file_size_mb = os.path.getsize(pptx_path) / (1024 * 1024)
|
||||||
|
|
||||||
|
# Pre-flight warning for very large files
|
||||||
|
if file_size_mb > 150:
|
||||||
|
print(f"WARNING: File is {file_size_mb:.0f}MB — consider using PDF conversion (convert-pdf.py) for better performance.")
|
||||||
|
|
||||||
prs = Presentation(pptx_path)
|
prs = Presentation(pptx_path)
|
||||||
slide_width = prs.slide_width
|
slide_width = prs.slide_width
|
||||||
slide_height = prs.slide_height
|
slide_height = prs.slide_height
|
||||||
aspect_ratio = slide_width / slide_height if slide_height else 16/9
|
aspect_ratio = slide_width / slide_height if slide_height else 16/9
|
||||||
|
|
||||||
|
total_images = count_images(prs)
|
||||||
|
|
||||||
|
# Auto-detect external assets mode
|
||||||
|
if external_assets is None:
|
||||||
|
external_assets = file_size_mb > 20 or total_images > 50
|
||||||
|
if external_assets:
|
||||||
|
print(f"Auto-enabling external assets mode (file: {file_size_mb:.1f}MB, images: {total_images})")
|
||||||
|
|
||||||
|
output = output_path or str(Path(pptx_path).with_suffix('.html'))
|
||||||
|
output_dir = Path(output).parent
|
||||||
|
|
||||||
|
if external_assets:
|
||||||
|
assets_dir = output_dir / "assets"
|
||||||
|
assets_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
img_counter = 0
|
||||||
slides_html = []
|
slides_html = []
|
||||||
|
|
||||||
for i, slide in enumerate(prs.slides, 1):
|
for i, slide in enumerate(prs.slides, 1):
|
||||||
@@ -165,11 +210,20 @@ def convert(pptx_path, output_path=None):
|
|||||||
|
|
||||||
# Image
|
# Image
|
||||||
if shape.shape_type == 13 or hasattr(shape, "image"):
|
if shape.shape_type == 13 or hasattr(shape, "image"):
|
||||||
data_uri = extract_image(shape)
|
blob, content_type = extract_image_data(shape)
|
||||||
if data_uri:
|
if blob:
|
||||||
|
img_counter += 1
|
||||||
|
if external_assets:
|
||||||
|
ext = CONTENT_TYPE_TO_EXT.get(content_type, '.png')
|
||||||
|
img_name = f"img-{img_counter:03d}{ext}"
|
||||||
|
(assets_dir / img_name).write_bytes(blob)
|
||||||
|
src = f"assets/{img_name}"
|
||||||
|
else:
|
||||||
|
b64 = base64.b64encode(blob).decode('utf-8')
|
||||||
|
src = f"data:{content_type};base64,{b64}"
|
||||||
elements.append(
|
elements.append(
|
||||||
f'<div style="{pos_style};display:flex;align-items:center;justify-content:center">'
|
f'<div style="{pos_style};display:flex;align-items:center;justify-content:center">'
|
||||||
f'<img src="{data_uri}" style="max-width:100%;max-height:100%;object-fit:contain" alt="">'
|
f'<img src="{src}" style="max-width:100%;max-height:100%;object-fit:contain" alt="">'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
@@ -205,7 +259,7 @@ def convert(pptx_path, output_path=None):
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Decorative shape with fill (colored rectangles, bars, etc.)
|
# Decorative shape with fill
|
||||||
fill = get_shape_fill(shape)
|
fill = get_shape_fill(shape)
|
||||||
if fill:
|
if fill:
|
||||||
elements.append(
|
elements.append(
|
||||||
@@ -218,7 +272,6 @@ def convert(pptx_path, output_path=None):
|
|||||||
)
|
)
|
||||||
|
|
||||||
title = "Presentation"
|
title = "Presentation"
|
||||||
# Try to get title from first slide
|
|
||||||
if prs.slides:
|
if prs.slides:
|
||||||
for shape in prs.slides[0].shapes:
|
for shape in prs.slides[0].shapes:
|
||||||
if hasattr(shape, "text") and shape.text.strip() and len(shape.text.strip()) < 150:
|
if hasattr(shape, "text") and shape.text.strip() and len(shape.text.strip()) < 150:
|
||||||
@@ -293,14 +346,31 @@ scaleSlides();
|
|||||||
</script>
|
</script>
|
||||||
</body></html>'''
|
</body></html>'''
|
||||||
|
|
||||||
output = output_path or str(Path(pptx_path).with_suffix('.html'))
|
|
||||||
Path(output).write_text(html, encoding='utf-8')
|
Path(output).write_text(html, encoding='utf-8')
|
||||||
|
output_size = os.path.getsize(output)
|
||||||
|
|
||||||
|
# Summary
|
||||||
print(f"Converted to: {output}")
|
print(f"Converted to: {output}")
|
||||||
print(f"Slides: {len(slides_html)}")
|
print(f"Slides: {len(slides_html)}")
|
||||||
|
print(f"Images: {img_counter}")
|
||||||
|
print(f"Output size: {output_size / (1024*1024):.1f}MB")
|
||||||
|
print(f"External assets: {'yes' if external_assets else 'no'}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
parser = argparse.ArgumentParser(description="Convert PPTX to HTML presentation")
|
||||||
print("Usage: convert-pptx.py <file.pptx> [output.html]")
|
parser.add_argument("input", help="Path to .pptx file")
|
||||||
sys.exit(1)
|
parser.add_argument("output", nargs="?", help="Output HTML path (default: same name with .html)")
|
||||||
convert(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None)
|
parser.add_argument("--external-assets", action="store_true", default=None,
|
||||||
|
help="Save images as separate files in assets/ directory (auto-detected for large files)")
|
||||||
|
parser.add_argument("--no-external-assets", action="store_true",
|
||||||
|
help="Force inline base64 even for large files")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
ext_assets = None # auto-detect
|
||||||
|
if args.external_assets:
|
||||||
|
ext_assets = True
|
||||||
|
elif args.no_external_assets:
|
||||||
|
ext_assets = False
|
||||||
|
|
||||||
|
convert(args.input, args.output, external_assets=ext_assets)
|
||||||
|
|||||||
@@ -22,9 +22,20 @@ gh repo create "$REPO_NAME" --"$VISIBILITY" --description "$DESCRIPTION"
|
|||||||
# Clone, push, enable pages
|
# Clone, push, enable pages
|
||||||
TMPDIR=$(mktemp -d)
|
TMPDIR=$(mktemp -d)
|
||||||
git clone "https://github.com/$USERNAME/$REPO_NAME.git" "$TMPDIR"
|
git clone "https://github.com/$USERNAME/$REPO_NAME.git" "$TMPDIR"
|
||||||
|
|
||||||
|
HTML_DIR=$(dirname "$HTML_FILE")
|
||||||
|
|
||||||
|
# Copy HTML file as index.html
|
||||||
cp "$HTML_FILE" "$TMPDIR/index.html"
|
cp "$HTML_FILE" "$TMPDIR/index.html"
|
||||||
|
|
||||||
|
# Copy assets directory if it exists alongside the HTML file
|
||||||
|
if [ -d "$HTML_DIR/assets" ]; then
|
||||||
|
cp -r "$HTML_DIR/assets" "$TMPDIR/assets"
|
||||||
|
echo "Copied assets/ directory ($(find "$HTML_DIR/assets" -type f | wc -l) files)"
|
||||||
|
fi
|
||||||
|
|
||||||
cd "$TMPDIR"
|
cd "$TMPDIR"
|
||||||
git add index.html
|
git add -A
|
||||||
git commit -m "Publish content"
|
git commit -m "Publish content"
|
||||||
git push origin main
|
git push origin main
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user