fix ppt update images

This commit is contained in:
2026-02-02 17:36:32 +01:00
parent 6ba30ff041
commit b7cf6adfb8
2 changed files with 230 additions and 49 deletions

239
utils.py
View File

@@ -37,16 +37,232 @@ def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str:
else:
return Path('/'.join(fparts[2:])).as_posix()
def _get_shape_alt_text(shape) -> str:
"""
Extract alt text from a PowerPoint shape.
Args:
shape: A python-pptx shape object.
Returns:
str: The alt text (descr attribute) or empty string if not found.
"""
try:
# Check for common property names used by python-pptx elements to store non-visual props
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
nvPr = None
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
if hasattr(shape._element, attr):
nvPr = getattr(shape._element, attr)
break
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
return nvPr.cNvPr.get("descr", "")
except Exception:
pass
return ""
def pptx_replace_images_from_directory(
presentation_path: Union[str, Path],
image_source_dir: Union[str, Path],
save_path: Union[str, Path] = None
) -> dict:
"""
Replace all images in a PowerPoint presentation using images from a directory
where subdirectory/filename paths match the alt_text of each image.
This function scans all images in the presentation, extracts their alt_text,
and looks for a matching image file in the source directory. The alt_text
should be a relative path (e.g., "All_Respondents/chart_name.png") that
corresponds to the directory structure under image_source_dir.
Args:
presentation_path (str/Path): Path to the source .pptx file.
image_source_dir (str/Path): Root directory containing replacement images.
The directory structure should mirror the alt_text paths.
Example: if alt_text is "All_Respondents/voice_scale.png", the
replacement image should be at image_source_dir/All_Respondents/voice_scale.png
save_path (str/Path, optional): Path to save the modified presentation.
If None, overwrites the input file.
Returns:
dict: Summary with keys:
- 'replaced': List of dicts with slide number, shape name, and matched path
- 'not_found': List of dicts with slide number, shape name, and alt_text
- 'no_alt_text': List of dicts with slide number and shape name
- 'total_images': Total number of picture shapes processed
Example:
>>> pptx_replace_images_from_directory(
... "presentation.pptx",
... "figures/2-2-26/",
... "presentation_updated.pptx"
... )
Notes:
- Alt text should be set using update_ppt_alt_text() or image_alt_text_generator()
- Images without alt_text are skipped
- Original image position, size, and aspect ratio are preserved
"""
presentation_path = Path(presentation_path)
image_source_dir = Path(image_source_dir)
if save_path is None:
save_path = presentation_path
else:
save_path = Path(save_path)
if not presentation_path.exists():
raise FileNotFoundError(f"Presentation not found: {presentation_path}")
if not image_source_dir.exists():
raise FileNotFoundError(f"Image source directory not found: {image_source_dir}")
# Build a lookup of all available images in the source directory
available_images = {}
for img_path in image_source_dir.rglob("*"):
if img_path.is_file() and img_path.suffix.lower() in {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'}:
# Store relative path from image_source_dir as key
rel_path = img_path.relative_to(image_source_dir).as_posix()
available_images[rel_path] = img_path
print(f"Found {len(available_images)} images in source directory")
# Open presentation
prs = Presentation(presentation_path)
# Track results
results = {
'replaced': [],
'not_found': [],
'no_alt_text': [],
'total_images': 0
}
total_slides = len(prs.slides)
print(f"Processing {total_slides} slides...")
for slide_idx, slide in enumerate(prs.slides):
slide_num = slide_idx + 1
# Use recursive iterator to find all pictures including those in groups
picture_shapes = list(_iter_picture_shapes(slide.shapes))
for shape in picture_shapes:
results['total_images'] += 1
shape_name = shape.name or f"Unnamed (ID: {getattr(shape, 'shape_id', 'unknown')})"
# Get alt text
alt_text = _get_shape_alt_text(shape)
if not alt_text:
results['no_alt_text'].append({
'slide': slide_num,
'shape_name': shape_name
})
continue
# Look for matching image in source directory
# Try the alt_text as-is, and also with common extensions if not present
matched_path = None
if alt_text in available_images:
matched_path = available_images[alt_text]
else:
# Try adding common extensions if alt_text doesn't have one
alt_text_path = Path(alt_text)
if not alt_text_path.suffix:
for ext in ['.png', '.jpg', '.jpeg', '.gif']:
test_key = f"{alt_text}{ext}"
if test_key in available_images:
matched_path = available_images[test_key]
break
if matched_path is None:
results['not_found'].append({
'slide': slide_num,
'shape_name': shape_name,
'alt_text': alt_text
})
continue
# Replace the image
try:
# Record coordinates
left, top, width, height = shape.left, shape.top, shape.width, shape.height
# Remove old shape from XML
old_element = shape._element
old_element.getparent().remove(old_element)
# Add new image at the same position/size
new_shape = slide.shapes.add_picture(str(matched_path), left, top, width, height)
# Preserve the alt text on the new shape
new_nvPr = None
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
if hasattr(new_shape._element, attr):
new_nvPr = getattr(new_shape._element, attr)
break
if new_nvPr and hasattr(new_nvPr, 'cNvPr'):
new_nvPr.cNvPr.set("descr", alt_text)
results['replaced'].append({
'slide': slide_num,
'shape_name': shape_name,
'matched_path': str(matched_path)
})
print(f"Slide {slide_num}: Replaced '{alt_text}'")
except Exception as e:
results['not_found'].append({
'slide': slide_num,
'shape_name': shape_name,
'alt_text': alt_text,
'error': str(e)
})
# Save presentation
prs.save(save_path)
# Print summary
print("\n" + "=" * 80)
if results['replaced']:
print(f"✓ Saved updated presentation to {save_path} with {len(results['replaced'])} replacements.")
else:
print("No images matched or required updates.")
if results['not_found']:
print(f"\n{len(results['not_found'])} image(s) not found in source directory:")
for item in results['not_found']:
print(f" • Slide {item['slide']}: '{item.get('alt_text', 'N/A')}'")
if results['no_alt_text']:
print(f"\n{len(results['no_alt_text'])} image(s) without alt text (skipped):")
for item in results['no_alt_text']:
print(f" • Slide {item['slide']}: '{item['shape_name']}'")
if not results['not_found'] and not results['no_alt_text']:
print("\n✓ All images replaced successfully!")
print("=" * 80)
return results
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
"""
Finds and replaces specific images in a PowerPoint presentation while
preserving their original position, size, and aspect ratio.
Finds and replaces a specific image in a PowerPoint presentation while
preserving its original position, size, and aspect ratio.
This function performs a 'surgical' replacement: it records the coordinates
of the existing image, removes it from the slide's XML, and inserts a
new image into the exact same bounding box. It identifies the target
image by searching for a specific string within the Shape Name
(Selection Pane) or Alt Text.
Note: For batch replacement of all images using a directory structure,
use pptx_replace_images_from_directory() instead.
Args:
presentation_path (str): The file path to the source .pptx file.
@@ -73,24 +289,7 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save
print(f"Checking shape: {shape.name} of type {shape.shape_type}...")
shape_name = shape.name or ""
alt_text = ""
# More robust strategy: Check for alt text in ANY valid element property
# This allows replacing Pictures, Placeholders, GraphicFrames, etc.
try:
# Check for common property names used by python-pptx elements to store non-visual props
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
nvPr = None
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
if hasattr(shape._element, attr):
nvPr = getattr(shape._element, attr)
break
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
alt_text = nvPr.cNvPr.get("descr", "")
except Exception:
pass
alt_text = _get_shape_alt_text(shape)
print(f"Alt Text for shape '{shape_name}': {alt_text}")