fix ppt update images
This commit is contained in:
@@ -33,19 +33,6 @@ def _(TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
utils._calculate_file_sha1('figures/OneDrive_2026-01-28/All_Respondents/most_prominent_personality_traits.png')
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
utils._calculate_perceptual_hash('figures/Picture.png')
|
||||
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
@@ -56,26 +43,21 @@ def _():
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
REPLACE_SOURCE = Path('data/test_replace_source.pptx')
|
||||
REPLACE_TARGET = Path('data/test_replace_target.pptx')
|
||||
return REPLACE_SOURCE, REPLACE_TARGET
|
||||
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_tagged.pptx')
|
||||
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
||||
|
||||
|
||||
app._unparsable_cell(
|
||||
r"""
|
||||
IMAGE_FILE = Path('figures/OneDrive_2026-01-28/Cons-Early_Professional/cold_distant_approachable_familiar_warm.png'
|
||||
""",
|
||||
name="_"
|
||||
)
|
||||
NEW_IMAGES_DIR = Path('figures/2-2-26')
|
||||
return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(IMAGE_FILE, REPLACE_SOURCE, REPLACE_TARGET):
|
||||
utils.pptx_replace_named_image(
|
||||
presentation_path=REPLACE_SOURCE,
|
||||
target_tag=utils.image_alt_text_generator(IMAGE_FILE),
|
||||
new_image_path=IMAGE_FILE,
|
||||
save_path=REPLACE_TARGET)
|
||||
def _(NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET):
|
||||
# get all files in the image source directory and subdirectories
|
||||
results = utils.pptx_replace_images_from_directory(
|
||||
REPLACE_SOURCE, # Source presentation path,
|
||||
NEW_IMAGES_DIR, # Source directory with new images
|
||||
REPLACE_TARGET # Output path (optional, defaults to overwrite)
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
|
||||
239
utils.py
239
utils.py
@@ -37,10 +37,223 @@ def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str:
|
||||
else:
|
||||
return Path('/'.join(fparts[2:])).as_posix()
|
||||
|
||||
def _get_shape_alt_text(shape) -> str:
|
||||
"""
|
||||
Extract alt text from a PowerPoint shape.
|
||||
|
||||
Args:
|
||||
shape: A python-pptx shape object.
|
||||
|
||||
Returns:
|
||||
str: The alt text (descr attribute) or empty string if not found.
|
||||
"""
|
||||
try:
|
||||
# Check for common property names used by python-pptx elements to store non-visual props
|
||||
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
|
||||
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
|
||||
nvPr = None
|
||||
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
||||
if hasattr(shape._element, attr):
|
||||
nvPr = getattr(shape._element, attr)
|
||||
break
|
||||
|
||||
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
|
||||
return nvPr.cNvPr.get("descr", "")
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def pptx_replace_images_from_directory(
|
||||
presentation_path: Union[str, Path],
|
||||
image_source_dir: Union[str, Path],
|
||||
save_path: Union[str, Path] = None
|
||||
) -> dict:
|
||||
"""
|
||||
Replace all images in a PowerPoint presentation using images from a directory
|
||||
where subdirectory/filename paths match the alt_text of each image.
|
||||
|
||||
This function scans all images in the presentation, extracts their alt_text,
|
||||
and looks for a matching image file in the source directory. The alt_text
|
||||
should be a relative path (e.g., "All_Respondents/chart_name.png") that
|
||||
corresponds to the directory structure under image_source_dir.
|
||||
|
||||
Args:
|
||||
presentation_path (str/Path): Path to the source .pptx file.
|
||||
image_source_dir (str/Path): Root directory containing replacement images.
|
||||
The directory structure should mirror the alt_text paths.
|
||||
Example: if alt_text is "All_Respondents/voice_scale.png", the
|
||||
replacement image should be at image_source_dir/All_Respondents/voice_scale.png
|
||||
save_path (str/Path, optional): Path to save the modified presentation.
|
||||
If None, overwrites the input file.
|
||||
|
||||
Returns:
|
||||
dict: Summary with keys:
|
||||
- 'replaced': List of dicts with slide number, shape name, and matched path
|
||||
- 'not_found': List of dicts with slide number, shape name, and alt_text
|
||||
- 'no_alt_text': List of dicts with slide number and shape name
|
||||
- 'total_images': Total number of picture shapes processed
|
||||
|
||||
Example:
|
||||
>>> pptx_replace_images_from_directory(
|
||||
... "presentation.pptx",
|
||||
... "figures/2-2-26/",
|
||||
... "presentation_updated.pptx"
|
||||
... )
|
||||
|
||||
Notes:
|
||||
- Alt text should be set using update_ppt_alt_text() or image_alt_text_generator()
|
||||
- Images without alt_text are skipped
|
||||
- Original image position, size, and aspect ratio are preserved
|
||||
"""
|
||||
presentation_path = Path(presentation_path)
|
||||
image_source_dir = Path(image_source_dir)
|
||||
|
||||
if save_path is None:
|
||||
save_path = presentation_path
|
||||
else:
|
||||
save_path = Path(save_path)
|
||||
|
||||
if not presentation_path.exists():
|
||||
raise FileNotFoundError(f"Presentation not found: {presentation_path}")
|
||||
if not image_source_dir.exists():
|
||||
raise FileNotFoundError(f"Image source directory not found: {image_source_dir}")
|
||||
|
||||
# Build a lookup of all available images in the source directory
|
||||
available_images = {}
|
||||
for img_path in image_source_dir.rglob("*"):
|
||||
if img_path.is_file() and img_path.suffix.lower() in {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'}:
|
||||
# Store relative path from image_source_dir as key
|
||||
rel_path = img_path.relative_to(image_source_dir).as_posix()
|
||||
available_images[rel_path] = img_path
|
||||
|
||||
print(f"Found {len(available_images)} images in source directory")
|
||||
|
||||
# Open presentation
|
||||
prs = Presentation(presentation_path)
|
||||
|
||||
# Track results
|
||||
results = {
|
||||
'replaced': [],
|
||||
'not_found': [],
|
||||
'no_alt_text': [],
|
||||
'total_images': 0
|
||||
}
|
||||
|
||||
total_slides = len(prs.slides)
|
||||
print(f"Processing {total_slides} slides...")
|
||||
|
||||
for slide_idx, slide in enumerate(prs.slides):
|
||||
slide_num = slide_idx + 1
|
||||
|
||||
# Use recursive iterator to find all pictures including those in groups
|
||||
picture_shapes = list(_iter_picture_shapes(slide.shapes))
|
||||
|
||||
for shape in picture_shapes:
|
||||
results['total_images'] += 1
|
||||
shape_name = shape.name or f"Unnamed (ID: {getattr(shape, 'shape_id', 'unknown')})"
|
||||
|
||||
# Get alt text
|
||||
alt_text = _get_shape_alt_text(shape)
|
||||
|
||||
if not alt_text:
|
||||
results['no_alt_text'].append({
|
||||
'slide': slide_num,
|
||||
'shape_name': shape_name
|
||||
})
|
||||
continue
|
||||
|
||||
# Look for matching image in source directory
|
||||
# Try the alt_text as-is, and also with common extensions if not present
|
||||
matched_path = None
|
||||
|
||||
if alt_text in available_images:
|
||||
matched_path = available_images[alt_text]
|
||||
else:
|
||||
# Try adding common extensions if alt_text doesn't have one
|
||||
alt_text_path = Path(alt_text)
|
||||
if not alt_text_path.suffix:
|
||||
for ext in ['.png', '.jpg', '.jpeg', '.gif']:
|
||||
test_key = f"{alt_text}{ext}"
|
||||
if test_key in available_images:
|
||||
matched_path = available_images[test_key]
|
||||
break
|
||||
|
||||
if matched_path is None:
|
||||
results['not_found'].append({
|
||||
'slide': slide_num,
|
||||
'shape_name': shape_name,
|
||||
'alt_text': alt_text
|
||||
})
|
||||
continue
|
||||
|
||||
# Replace the image
|
||||
try:
|
||||
# Record coordinates
|
||||
left, top, width, height = shape.left, shape.top, shape.width, shape.height
|
||||
|
||||
# Remove old shape from XML
|
||||
old_element = shape._element
|
||||
old_element.getparent().remove(old_element)
|
||||
|
||||
# Add new image at the same position/size
|
||||
new_shape = slide.shapes.add_picture(str(matched_path), left, top, width, height)
|
||||
|
||||
# Preserve the alt text on the new shape
|
||||
new_nvPr = None
|
||||
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
||||
if hasattr(new_shape._element, attr):
|
||||
new_nvPr = getattr(new_shape._element, attr)
|
||||
break
|
||||
if new_nvPr and hasattr(new_nvPr, 'cNvPr'):
|
||||
new_nvPr.cNvPr.set("descr", alt_text)
|
||||
|
||||
results['replaced'].append({
|
||||
'slide': slide_num,
|
||||
'shape_name': shape_name,
|
||||
'matched_path': str(matched_path)
|
||||
})
|
||||
print(f"Slide {slide_num}: Replaced '{alt_text}'")
|
||||
|
||||
except Exception as e:
|
||||
results['not_found'].append({
|
||||
'slide': slide_num,
|
||||
'shape_name': shape_name,
|
||||
'alt_text': alt_text,
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
# Save presentation
|
||||
prs.save(save_path)
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 80)
|
||||
if results['replaced']:
|
||||
print(f"✓ Saved updated presentation to {save_path} with {len(results['replaced'])} replacements.")
|
||||
else:
|
||||
print("No images matched or required updates.")
|
||||
|
||||
if results['not_found']:
|
||||
print(f"\n⚠ {len(results['not_found'])} image(s) not found in source directory:")
|
||||
for item in results['not_found']:
|
||||
print(f" • Slide {item['slide']}: '{item.get('alt_text', 'N/A')}'")
|
||||
|
||||
if results['no_alt_text']:
|
||||
print(f"\n⚠ {len(results['no_alt_text'])} image(s) without alt text (skipped):")
|
||||
for item in results['no_alt_text']:
|
||||
print(f" • Slide {item['slide']}: '{item['shape_name']}'")
|
||||
|
||||
if not results['not_found'] and not results['no_alt_text']:
|
||||
print("\n✓ All images replaced successfully!")
|
||||
print("=" * 80)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
|
||||
"""
|
||||
Finds and replaces specific images in a PowerPoint presentation while
|
||||
preserving their original position, size, and aspect ratio.
|
||||
Finds and replaces a specific image in a PowerPoint presentation while
|
||||
preserving its original position, size, and aspect ratio.
|
||||
|
||||
This function performs a 'surgical' replacement: it records the coordinates
|
||||
of the existing image, removes it from the slide's XML, and inserts a
|
||||
@@ -48,6 +261,9 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save
|
||||
image by searching for a specific string within the Shape Name
|
||||
(Selection Pane) or Alt Text.
|
||||
|
||||
Note: For batch replacement of all images using a directory structure,
|
||||
use pptx_replace_images_from_directory() instead.
|
||||
|
||||
Args:
|
||||
presentation_path (str): The file path to the source .pptx file.
|
||||
target_tag (str): The unique identifier to look for (e.g., 'HERO_IMAGE').
|
||||
@@ -73,24 +289,7 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save
|
||||
print(f"Checking shape: {shape.name} of type {shape.shape_type}...")
|
||||
|
||||
shape_name = shape.name or ""
|
||||
alt_text = ""
|
||||
|
||||
# More robust strategy: Check for alt text in ANY valid element property
|
||||
# This allows replacing Pictures, Placeholders, GraphicFrames, etc.
|
||||
try:
|
||||
# Check for common property names used by python-pptx elements to store non-visual props
|
||||
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
|
||||
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
|
||||
nvPr = None
|
||||
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
||||
if hasattr(shape._element, attr):
|
||||
nvPr = getattr(shape._element, attr)
|
||||
break
|
||||
|
||||
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
|
||||
alt_text = nvPr.cNvPr.get("descr", "")
|
||||
except Exception:
|
||||
pass
|
||||
alt_text = _get_shape_alt_text(shape)
|
||||
|
||||
print(f"Alt Text for shape '{shape_name}': {alt_text}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user