diff --git a/04_PPTX_Update_Images.py b/04_PPTX_Update_Images.py index 1d70a0c..0becaf6 100644 --- a/04_PPTX_Update_Images.py +++ b/04_PPTX_Update_Images.py @@ -33,19 +33,6 @@ def _(TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET): return -@app.cell -def _(): - utils._calculate_file_sha1('figures/OneDrive_2026-01-28/All_Respondents/most_prominent_personality_traits.png') - return - - -@app.cell -def _(): - utils._calculate_perceptual_hash('figures/Picture.png') - - return - - @app.cell(hide_code=True) def _(): mo.md(r""" @@ -56,26 +43,21 @@ def _(): @app.cell def _(): - REPLACE_SOURCE = Path('data/test_replace_source.pptx') - REPLACE_TARGET = Path('data/test_replace_target.pptx') - return REPLACE_SOURCE, REPLACE_TARGET + REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_tagged.pptx') + REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx') - -app._unparsable_cell( - r""" - IMAGE_FILE = Path('figures/OneDrive_2026-01-28/Cons-Early_Professional/cold_distant_approachable_familiar_warm.png' - """, - name="_" -) + NEW_IMAGES_DIR = Path('figures/2-2-26') + return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET @app.cell -def _(IMAGE_FILE, REPLACE_SOURCE, REPLACE_TARGET): - utils.pptx_replace_named_image( - presentation_path=REPLACE_SOURCE, - target_tag=utils.image_alt_text_generator(IMAGE_FILE), - new_image_path=IMAGE_FILE, - save_path=REPLACE_TARGET) +def _(NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET): + # get all files in the image source directory and subdirectories + results = utils.pptx_replace_images_from_directory( + REPLACE_SOURCE, # Source presentation path, + NEW_IMAGES_DIR, # Source directory with new images + REPLACE_TARGET # Output path (optional, defaults to overwrite) + ) return diff --git a/utils.py b/utils.py index 98e6b2d..68a668a 100644 --- a/utils.py +++ b/utils.py @@ -37,16 +37,232 @@ def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str: else: return Path('/'.join(fparts[2:])).as_posix() +def _get_shape_alt_text(shape) -> str: + """ + Extract alt text from a PowerPoint shape. + + Args: + shape: A python-pptx shape object. + + Returns: + str: The alt text (descr attribute) or empty string if not found. + """ + try: + # Check for common property names used by python-pptx elements to store non-visual props + # nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group), + # nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector) + nvPr = None + for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']: + if hasattr(shape._element, attr): + nvPr = getattr(shape._element, attr) + break + + if nvPr is not None and hasattr(nvPr, 'cNvPr'): + return nvPr.cNvPr.get("descr", "") + except Exception: + pass + return "" + + +def pptx_replace_images_from_directory( + presentation_path: Union[str, Path], + image_source_dir: Union[str, Path], + save_path: Union[str, Path] = None +) -> dict: + """ + Replace all images in a PowerPoint presentation using images from a directory + where subdirectory/filename paths match the alt_text of each image. + + This function scans all images in the presentation, extracts their alt_text, + and looks for a matching image file in the source directory. The alt_text + should be a relative path (e.g., "All_Respondents/chart_name.png") that + corresponds to the directory structure under image_source_dir. + + Args: + presentation_path (str/Path): Path to the source .pptx file. + image_source_dir (str/Path): Root directory containing replacement images. + The directory structure should mirror the alt_text paths. + Example: if alt_text is "All_Respondents/voice_scale.png", the + replacement image should be at image_source_dir/All_Respondents/voice_scale.png + save_path (str/Path, optional): Path to save the modified presentation. + If None, overwrites the input file. + + Returns: + dict: Summary with keys: + - 'replaced': List of dicts with slide number, shape name, and matched path + - 'not_found': List of dicts with slide number, shape name, and alt_text + - 'no_alt_text': List of dicts with slide number and shape name + - 'total_images': Total number of picture shapes processed + + Example: + >>> pptx_replace_images_from_directory( + ... "presentation.pptx", + ... "figures/2-2-26/", + ... "presentation_updated.pptx" + ... ) + + Notes: + - Alt text should be set using update_ppt_alt_text() or image_alt_text_generator() + - Images without alt_text are skipped + - Original image position, size, and aspect ratio are preserved + """ + presentation_path = Path(presentation_path) + image_source_dir = Path(image_source_dir) + + if save_path is None: + save_path = presentation_path + else: + save_path = Path(save_path) + + if not presentation_path.exists(): + raise FileNotFoundError(f"Presentation not found: {presentation_path}") + if not image_source_dir.exists(): + raise FileNotFoundError(f"Image source directory not found: {image_source_dir}") + + # Build a lookup of all available images in the source directory + available_images = {} + for img_path in image_source_dir.rglob("*"): + if img_path.is_file() and img_path.suffix.lower() in {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'}: + # Store relative path from image_source_dir as key + rel_path = img_path.relative_to(image_source_dir).as_posix() + available_images[rel_path] = img_path + + print(f"Found {len(available_images)} images in source directory") + + # Open presentation + prs = Presentation(presentation_path) + + # Track results + results = { + 'replaced': [], + 'not_found': [], + 'no_alt_text': [], + 'total_images': 0 + } + + total_slides = len(prs.slides) + print(f"Processing {total_slides} slides...") + + for slide_idx, slide in enumerate(prs.slides): + slide_num = slide_idx + 1 + + # Use recursive iterator to find all pictures including those in groups + picture_shapes = list(_iter_picture_shapes(slide.shapes)) + + for shape in picture_shapes: + results['total_images'] += 1 + shape_name = shape.name or f"Unnamed (ID: {getattr(shape, 'shape_id', 'unknown')})" + + # Get alt text + alt_text = _get_shape_alt_text(shape) + + if not alt_text: + results['no_alt_text'].append({ + 'slide': slide_num, + 'shape_name': shape_name + }) + continue + + # Look for matching image in source directory + # Try the alt_text as-is, and also with common extensions if not present + matched_path = None + + if alt_text in available_images: + matched_path = available_images[alt_text] + else: + # Try adding common extensions if alt_text doesn't have one + alt_text_path = Path(alt_text) + if not alt_text_path.suffix: + for ext in ['.png', '.jpg', '.jpeg', '.gif']: + test_key = f"{alt_text}{ext}" + if test_key in available_images: + matched_path = available_images[test_key] + break + + if matched_path is None: + results['not_found'].append({ + 'slide': slide_num, + 'shape_name': shape_name, + 'alt_text': alt_text + }) + continue + + # Replace the image + try: + # Record coordinates + left, top, width, height = shape.left, shape.top, shape.width, shape.height + + # Remove old shape from XML + old_element = shape._element + old_element.getparent().remove(old_element) + + # Add new image at the same position/size + new_shape = slide.shapes.add_picture(str(matched_path), left, top, width, height) + + # Preserve the alt text on the new shape + new_nvPr = None + for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']: + if hasattr(new_shape._element, attr): + new_nvPr = getattr(new_shape._element, attr) + break + if new_nvPr and hasattr(new_nvPr, 'cNvPr'): + new_nvPr.cNvPr.set("descr", alt_text) + + results['replaced'].append({ + 'slide': slide_num, + 'shape_name': shape_name, + 'matched_path': str(matched_path) + }) + print(f"Slide {slide_num}: Replaced '{alt_text}'") + + except Exception as e: + results['not_found'].append({ + 'slide': slide_num, + 'shape_name': shape_name, + 'alt_text': alt_text, + 'error': str(e) + }) + + # Save presentation + prs.save(save_path) + + # Print summary + print("\n" + "=" * 80) + if results['replaced']: + print(f"✓ Saved updated presentation to {save_path} with {len(results['replaced'])} replacements.") + else: + print("No images matched or required updates.") + + if results['not_found']: + print(f"\n⚠ {len(results['not_found'])} image(s) not found in source directory:") + for item in results['not_found']: + print(f" • Slide {item['slide']}: '{item.get('alt_text', 'N/A')}'") + + if results['no_alt_text']: + print(f"\n⚠ {len(results['no_alt_text'])} image(s) without alt text (skipped):") + for item in results['no_alt_text']: + print(f" • Slide {item['slide']}: '{item['shape_name']}'") + + if not results['not_found'] and not results['no_alt_text']: + print("\n✓ All images replaced successfully!") + print("=" * 80) + + return results + + def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path): """ - Finds and replaces specific images in a PowerPoint presentation while - preserving their original position, size, and aspect ratio. + Finds and replaces a specific image in a PowerPoint presentation while + preserving its original position, size, and aspect ratio. This function performs a 'surgical' replacement: it records the coordinates of the existing image, removes it from the slide's XML, and inserts a new image into the exact same bounding box. It identifies the target image by searching for a specific string within the Shape Name (Selection Pane) or Alt Text. + + Note: For batch replacement of all images using a directory structure, + use pptx_replace_images_from_directory() instead. Args: presentation_path (str): The file path to the source .pptx file. @@ -73,24 +289,7 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save print(f"Checking shape: {shape.name} of type {shape.shape_type}...") shape_name = shape.name or "" - alt_text = "" - - # More robust strategy: Check for alt text in ANY valid element property - # This allows replacing Pictures, Placeholders, GraphicFrames, etc. - try: - # Check for common property names used by python-pptx elements to store non-visual props - # nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group), - # nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector) - nvPr = None - for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']: - if hasattr(shape._element, attr): - nvPr = getattr(shape._element, attr) - break - - if nvPr is not None and hasattr(nvPr, 'cNvPr'): - alt_text = nvPr.cNvPr.get("descr", "") - except Exception: - pass + alt_text = _get_shape_alt_text(shape) print(f"Alt Text for shape '{shape_name}': {alt_text}")