#!/usr/bin/env python3 """Convert a PPTX file to a self-contained HTML presentation with formatting preserved.""" import sys import base64 import io import re from pathlib import Path try: from pptx import Presentation from pptx.util import Inches, Pt, Emu from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor except ImportError: print("ERROR: python-pptx not installed. Install with: pip install python-pptx") sys.exit(1) def rgb_to_hex(rgb_color): """Convert RGBColor to hex string.""" if rgb_color is None: return None try: return f"#{rgb_color}" except: return None def get_text_style(run): """Extract inline text styling from a run.""" styles = [] try: if run.font.bold: styles.append("font-weight:bold") if run.font.italic: styles.append("font-style:italic") if run.font.underline: styles.append("text-decoration:underline") if run.font.size: styles.append(f"font-size:{run.font.size.pt}pt") if run.font.color and run.font.color.rgb: styles.append(f"color:{rgb_to_hex(run.font.color.rgb)}") if run.font.name: styles.append(f"font-family:'{run.font.name}',sans-serif") except: pass return ";".join(styles) def get_alignment(paragraph): """Get CSS text-align from paragraph alignment.""" try: align = paragraph.alignment if align == PP_ALIGN.CENTER: return "center" elif align == PP_ALIGN.RIGHT: return "right" elif align == PP_ALIGN.JUSTIFY: return "justify" except: pass return "left" def extract_image(shape): """Extract image from shape as base64 data URI.""" try: image = shape.image content_type = image.content_type image_bytes = image.blob b64 = base64.b64encode(image_bytes).decode('utf-8') return f"data:{content_type};base64,{b64}" except: return None def get_shape_position(shape, slide_width, slide_height): """Get shape position as percentages.""" try: left = (shape.left / slide_width) * 100 if shape.left else 0 top = (shape.top / slide_height) * 100 if shape.top else 0 width = (shape.width / slide_width) * 100 if shape.width else 50 height = (shape.height / slide_height) * 100 if shape.height else 30 return left, top, width, height except: return 5, 5, 90, 40 def get_slide_background(slide, prs): """Extract slide background color from XML.""" from pptx.oxml.ns import qn for source in [slide, slide.slide_layout]: try: bg_el = source.background._element # Look for solidFill > srgbClr inside bgPr for sf in bg_el.iter(qn('a:solidFill')): clr = sf.find(qn('a:srgbClr')) if clr is not None and clr.get('val'): return f"background-color:#{clr.get('val')}" except: pass return "background-color:#ffffff" def get_shape_fill(shape): """Extract shape fill color from XML.""" from pptx.oxml.ns import qn try: sp_pr = shape._element.find(qn('p:spPr')) if sp_pr is None: sp_pr = shape._element.find(qn('a:spPr')) if sp_pr is None: # Try direct child for tag in ['{http://schemas.openxmlformats.org/drawingml/2006/main}spPr', '{http://schemas.openxmlformats.org/presentationml/2006/main}spPr']: sp_pr = shape._element.find(tag) if sp_pr is not None: break if sp_pr is not None: sf = sp_pr.find(qn('a:solidFill')) if sf is not None: clr = sf.find(qn('a:srgbClr')) if clr is not None and clr.get('val'): return f"#{clr.get('val')}" except: pass return None def render_paragraph(paragraph): """Render a paragraph with inline formatting.""" align = get_alignment(paragraph) parts = [] for run in paragraph.runs: text = run.text if not text: continue text = text.replace("&", "&").replace("<", "<").replace(">", ">") style = get_text_style(run) if style: parts.append(f'{text}') else: parts.append(text) if not parts: return "" content = "".join(parts) return f'
{content}
' def convert(pptx_path, output_path=None): prs = Presentation(pptx_path) slide_width = prs.slide_width slide_height = prs.slide_height aspect_ratio = slide_width / slide_height if slide_height else 16/9 slides_html = [] for i, slide in enumerate(prs.slides, 1): bg_style = get_slide_background(slide, prs) elements = [] for shape in sorted(slide.shapes, key=lambda s: (s.top or 0, s.left or 0)): left, top, width, height = get_shape_position(shape, slide_width, slide_height) pos_style = f"position:absolute;left:{left:.1f}%;top:{top:.1f}%;width:{width:.1f}%;height:{height:.1f}%" # Image if shape.shape_type == 13 or hasattr(shape, "image"): data_uri = extract_image(shape) if data_uri: elements.append( f'| {cell_text} | ' table_html += "