#!/usr/bin/env python3 """Convert a PPTX file to a self-contained HTML presentation with formatting preserved.""" import sys import base64 import io import re from pathlib import Path try: from pptx import Presentation from pptx.util import Inches, Pt, Emu from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor except ImportError: print("ERROR: python-pptx not installed. Install with: pip install python-pptx") sys.exit(1) def rgb_to_hex(rgb_color): """Convert RGBColor to hex string.""" if rgb_color is None: return None try: return f"#{rgb_color}" except: return None def get_text_style(run): """Extract inline text styling from a run.""" styles = [] try: if run.font.bold: styles.append("font-weight:bold") if run.font.italic: styles.append("font-style:italic") if run.font.underline: styles.append("text-decoration:underline") if run.font.size: styles.append(f"font-size:{run.font.size.pt}pt") if run.font.color and run.font.color.rgb: styles.append(f"color:{rgb_to_hex(run.font.color.rgb)}") if run.font.name: styles.append(f"font-family:'{run.font.name}',sans-serif") except: pass return ";".join(styles) def get_alignment(paragraph): """Get CSS text-align from paragraph alignment.""" try: align = paragraph.alignment if align == PP_ALIGN.CENTER: return "center" elif align == PP_ALIGN.RIGHT: return "right" elif align == PP_ALIGN.JUSTIFY: return "justify" except: pass return "left" def extract_image(shape): """Extract image from shape as base64 data URI.""" try: image = shape.image content_type = image.content_type image_bytes = image.blob b64 = base64.b64encode(image_bytes).decode('utf-8') return f"data:{content_type};base64,{b64}" except: return None def get_shape_position(shape, slide_width, slide_height): """Get shape position as percentages.""" try: left = (shape.left / slide_width) * 100 if shape.left else 0 top = (shape.top / slide_height) * 100 if shape.top else 0 width = (shape.width / slide_width) * 100 if shape.width else 50 height = (shape.height / slide_height) * 100 if shape.height else 30 return left, top, width, height except: return 5, 5, 90, 40 def get_slide_background(slide, prs): """Extract slide background color from XML.""" from pptx.oxml.ns import qn for source in [slide, slide.slide_layout]: try: bg_el = source.background._element # Look for solidFill > srgbClr inside bgPr for sf in bg_el.iter(qn('a:solidFill')): clr = sf.find(qn('a:srgbClr')) if clr is not None and clr.get('val'): return f"background-color:#{clr.get('val')}" except: pass return "background-color:#ffffff" def get_shape_fill(shape): """Extract shape fill color from XML.""" from pptx.oxml.ns import qn try: sp_pr = shape._element.find(qn('p:spPr')) if sp_pr is None: sp_pr = shape._element.find(qn('a:spPr')) if sp_pr is None: # Try direct child for tag in ['{http://schemas.openxmlformats.org/drawingml/2006/main}spPr', '{http://schemas.openxmlformats.org/presentationml/2006/main}spPr']: sp_pr = shape._element.find(tag) if sp_pr is not None: break if sp_pr is not None: sf = sp_pr.find(qn('a:solidFill')) if sf is not None: clr = sf.find(qn('a:srgbClr')) if clr is not None and clr.get('val'): return f"#{clr.get('val')}" except: pass return None def render_paragraph(paragraph): """Render a paragraph with inline formatting.""" align = get_alignment(paragraph) parts = [] for run in paragraph.runs: text = run.text if not text: continue text = text.replace("&", "&").replace("<", "<").replace(">", ">") style = get_text_style(run) if style: parts.append(f'{text}') else: parts.append(text) if not parts: return "" content = "".join(parts) return f'

{content}

' def convert(pptx_path, output_path=None): prs = Presentation(pptx_path) slide_width = prs.slide_width slide_height = prs.slide_height aspect_ratio = slide_width / slide_height if slide_height else 16/9 slides_html = [] for i, slide in enumerate(prs.slides, 1): bg_style = get_slide_background(slide, prs) elements = [] for shape in sorted(slide.shapes, key=lambda s: (s.top or 0, s.left or 0)): left, top, width, height = get_shape_position(shape, slide_width, slide_height) pos_style = f"position:absolute;left:{left:.1f}%;top:{top:.1f}%;width:{width:.1f}%;height:{height:.1f}%" # Image if shape.shape_type == 13 or hasattr(shape, "image"): data_uri = extract_image(shape) if data_uri: elements.append( f'
' f'' f'
' ) continue # Table if shape.has_table: table = shape.table table_html = '' for row in table.rows: table_html += "" for cell in row.cells: cell_text = cell.text.replace("&", "&").replace("<", "<") table_html += f'' table_html += "" table_html += "
{cell_text}
" elements.append(f'
{table_html}
') continue # Text if shape.has_text_frame: text_parts = [] for para in shape.text_frame.paragraphs: rendered = render_paragraph(para) if rendered: text_parts.append(rendered) if text_parts: content = "".join(text_parts) fill = get_shape_fill(shape) fill_style = f"background-color:{fill};padding:1em;border-radius:8px;" if fill else "" elements.append( f'
' f'{content}
' ) continue # Decorative shape with fill (colored rectangles, bars, etc.) fill = get_shape_fill(shape) if fill: elements.append( f'
' ) slide_content = "\n".join(elements) slides_html.append( f'
\n
\n{slide_content}\n
\n
' ) title = "Presentation" # Try to get title from first slide if prs.slides: for shape in prs.slides[0].shapes: if hasattr(shape, "text") and shape.text.strip() and len(shape.text.strip()) < 150: title = shape.text.strip() break html = f''' {title} {chr(10).join(slides_html)}
''' output = output_path or str(Path(pptx_path).with_suffix('.html')) Path(output).write_text(html, encoding='utf-8') print(f"Converted to: {output}") print(f"Slides: {len(slides_html)}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: convert-pptx.py [output.html]") sys.exit(1) convert(sys.argv[1], sys.argv[2] if len(sys.argv) > 2 else None)