fix ppt update images
This commit is contained in:
@@ -33,19 +33,6 @@ def _(TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
|
||||||
def _():
|
|
||||||
utils._calculate_file_sha1('figures/OneDrive_2026-01-28/All_Respondents/most_prominent_personality_traits.png')
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
|
||||||
def _():
|
|
||||||
utils._calculate_perceptual_hash('figures/Picture.png')
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _():
|
def _():
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
@@ -56,26 +43,21 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
REPLACE_SOURCE = Path('data/test_replace_source.pptx')
|
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_tagged.pptx')
|
||||||
REPLACE_TARGET = Path('data/test_replace_target.pptx')
|
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
||||||
return REPLACE_SOURCE, REPLACE_TARGET
|
|
||||||
|
|
||||||
|
NEW_IMAGES_DIR = Path('figures/2-2-26')
|
||||||
app._unparsable_cell(
|
return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET
|
||||||
r"""
|
|
||||||
IMAGE_FILE = Path('figures/OneDrive_2026-01-28/Cons-Early_Professional/cold_distant_approachable_familiar_warm.png'
|
|
||||||
""",
|
|
||||||
name="_"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(IMAGE_FILE, REPLACE_SOURCE, REPLACE_TARGET):
|
def _(NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET):
|
||||||
utils.pptx_replace_named_image(
|
# get all files in the image source directory and subdirectories
|
||||||
presentation_path=REPLACE_SOURCE,
|
results = utils.pptx_replace_images_from_directory(
|
||||||
target_tag=utils.image_alt_text_generator(IMAGE_FILE),
|
REPLACE_SOURCE, # Source presentation path,
|
||||||
new_image_path=IMAGE_FILE,
|
NEW_IMAGES_DIR, # Source directory with new images
|
||||||
save_path=REPLACE_TARGET)
|
REPLACE_TARGET # Output path (optional, defaults to overwrite)
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
239
utils.py
239
utils.py
@@ -37,10 +37,223 @@ def image_alt_text_generator(fpath, include_dataset_dirname=False) -> str:
|
|||||||
else:
|
else:
|
||||||
return Path('/'.join(fparts[2:])).as_posix()
|
return Path('/'.join(fparts[2:])).as_posix()
|
||||||
|
|
||||||
|
def _get_shape_alt_text(shape) -> str:
|
||||||
|
"""
|
||||||
|
Extract alt text from a PowerPoint shape.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
shape: A python-pptx shape object.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The alt text (descr attribute) or empty string if not found.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check for common property names used by python-pptx elements to store non-visual props
|
||||||
|
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
|
||||||
|
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
|
||||||
|
nvPr = None
|
||||||
|
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
||||||
|
if hasattr(shape._element, attr):
|
||||||
|
nvPr = getattr(shape._element, attr)
|
||||||
|
break
|
||||||
|
|
||||||
|
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
|
||||||
|
return nvPr.cNvPr.get("descr", "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def pptx_replace_images_from_directory(
|
||||||
|
presentation_path: Union[str, Path],
|
||||||
|
image_source_dir: Union[str, Path],
|
||||||
|
save_path: Union[str, Path] = None
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Replace all images in a PowerPoint presentation using images from a directory
|
||||||
|
where subdirectory/filename paths match the alt_text of each image.
|
||||||
|
|
||||||
|
This function scans all images in the presentation, extracts their alt_text,
|
||||||
|
and looks for a matching image file in the source directory. The alt_text
|
||||||
|
should be a relative path (e.g., "All_Respondents/chart_name.png") that
|
||||||
|
corresponds to the directory structure under image_source_dir.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
presentation_path (str/Path): Path to the source .pptx file.
|
||||||
|
image_source_dir (str/Path): Root directory containing replacement images.
|
||||||
|
The directory structure should mirror the alt_text paths.
|
||||||
|
Example: if alt_text is "All_Respondents/voice_scale.png", the
|
||||||
|
replacement image should be at image_source_dir/All_Respondents/voice_scale.png
|
||||||
|
save_path (str/Path, optional): Path to save the modified presentation.
|
||||||
|
If None, overwrites the input file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Summary with keys:
|
||||||
|
- 'replaced': List of dicts with slide number, shape name, and matched path
|
||||||
|
- 'not_found': List of dicts with slide number, shape name, and alt_text
|
||||||
|
- 'no_alt_text': List of dicts with slide number and shape name
|
||||||
|
- 'total_images': Total number of picture shapes processed
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> pptx_replace_images_from_directory(
|
||||||
|
... "presentation.pptx",
|
||||||
|
... "figures/2-2-26/",
|
||||||
|
... "presentation_updated.pptx"
|
||||||
|
... )
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Alt text should be set using update_ppt_alt_text() or image_alt_text_generator()
|
||||||
|
- Images without alt_text are skipped
|
||||||
|
- Original image position, size, and aspect ratio are preserved
|
||||||
|
"""
|
||||||
|
presentation_path = Path(presentation_path)
|
||||||
|
image_source_dir = Path(image_source_dir)
|
||||||
|
|
||||||
|
if save_path is None:
|
||||||
|
save_path = presentation_path
|
||||||
|
else:
|
||||||
|
save_path = Path(save_path)
|
||||||
|
|
||||||
|
if not presentation_path.exists():
|
||||||
|
raise FileNotFoundError(f"Presentation not found: {presentation_path}")
|
||||||
|
if not image_source_dir.exists():
|
||||||
|
raise FileNotFoundError(f"Image source directory not found: {image_source_dir}")
|
||||||
|
|
||||||
|
# Build a lookup of all available images in the source directory
|
||||||
|
available_images = {}
|
||||||
|
for img_path in image_source_dir.rglob("*"):
|
||||||
|
if img_path.is_file() and img_path.suffix.lower() in {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'}:
|
||||||
|
# Store relative path from image_source_dir as key
|
||||||
|
rel_path = img_path.relative_to(image_source_dir).as_posix()
|
||||||
|
available_images[rel_path] = img_path
|
||||||
|
|
||||||
|
print(f"Found {len(available_images)} images in source directory")
|
||||||
|
|
||||||
|
# Open presentation
|
||||||
|
prs = Presentation(presentation_path)
|
||||||
|
|
||||||
|
# Track results
|
||||||
|
results = {
|
||||||
|
'replaced': [],
|
||||||
|
'not_found': [],
|
||||||
|
'no_alt_text': [],
|
||||||
|
'total_images': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
total_slides = len(prs.slides)
|
||||||
|
print(f"Processing {total_slides} slides...")
|
||||||
|
|
||||||
|
for slide_idx, slide in enumerate(prs.slides):
|
||||||
|
slide_num = slide_idx + 1
|
||||||
|
|
||||||
|
# Use recursive iterator to find all pictures including those in groups
|
||||||
|
picture_shapes = list(_iter_picture_shapes(slide.shapes))
|
||||||
|
|
||||||
|
for shape in picture_shapes:
|
||||||
|
results['total_images'] += 1
|
||||||
|
shape_name = shape.name or f"Unnamed (ID: {getattr(shape, 'shape_id', 'unknown')})"
|
||||||
|
|
||||||
|
# Get alt text
|
||||||
|
alt_text = _get_shape_alt_text(shape)
|
||||||
|
|
||||||
|
if not alt_text:
|
||||||
|
results['no_alt_text'].append({
|
||||||
|
'slide': slide_num,
|
||||||
|
'shape_name': shape_name
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Look for matching image in source directory
|
||||||
|
# Try the alt_text as-is, and also with common extensions if not present
|
||||||
|
matched_path = None
|
||||||
|
|
||||||
|
if alt_text in available_images:
|
||||||
|
matched_path = available_images[alt_text]
|
||||||
|
else:
|
||||||
|
# Try adding common extensions if alt_text doesn't have one
|
||||||
|
alt_text_path = Path(alt_text)
|
||||||
|
if not alt_text_path.suffix:
|
||||||
|
for ext in ['.png', '.jpg', '.jpeg', '.gif']:
|
||||||
|
test_key = f"{alt_text}{ext}"
|
||||||
|
if test_key in available_images:
|
||||||
|
matched_path = available_images[test_key]
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched_path is None:
|
||||||
|
results['not_found'].append({
|
||||||
|
'slide': slide_num,
|
||||||
|
'shape_name': shape_name,
|
||||||
|
'alt_text': alt_text
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Replace the image
|
||||||
|
try:
|
||||||
|
# Record coordinates
|
||||||
|
left, top, width, height = shape.left, shape.top, shape.width, shape.height
|
||||||
|
|
||||||
|
# Remove old shape from XML
|
||||||
|
old_element = shape._element
|
||||||
|
old_element.getparent().remove(old_element)
|
||||||
|
|
||||||
|
# Add new image at the same position/size
|
||||||
|
new_shape = slide.shapes.add_picture(str(matched_path), left, top, width, height)
|
||||||
|
|
||||||
|
# Preserve the alt text on the new shape
|
||||||
|
new_nvPr = None
|
||||||
|
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
||||||
|
if hasattr(new_shape._element, attr):
|
||||||
|
new_nvPr = getattr(new_shape._element, attr)
|
||||||
|
break
|
||||||
|
if new_nvPr and hasattr(new_nvPr, 'cNvPr'):
|
||||||
|
new_nvPr.cNvPr.set("descr", alt_text)
|
||||||
|
|
||||||
|
results['replaced'].append({
|
||||||
|
'slide': slide_num,
|
||||||
|
'shape_name': shape_name,
|
||||||
|
'matched_path': str(matched_path)
|
||||||
|
})
|
||||||
|
print(f"Slide {slide_num}: Replaced '{alt_text}'")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results['not_found'].append({
|
||||||
|
'slide': slide_num,
|
||||||
|
'shape_name': shape_name,
|
||||||
|
'alt_text': alt_text,
|
||||||
|
'error': str(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Save presentation
|
||||||
|
prs.save(save_path)
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
if results['replaced']:
|
||||||
|
print(f"✓ Saved updated presentation to {save_path} with {len(results['replaced'])} replacements.")
|
||||||
|
else:
|
||||||
|
print("No images matched or required updates.")
|
||||||
|
|
||||||
|
if results['not_found']:
|
||||||
|
print(f"\n⚠ {len(results['not_found'])} image(s) not found in source directory:")
|
||||||
|
for item in results['not_found']:
|
||||||
|
print(f" • Slide {item['slide']}: '{item.get('alt_text', 'N/A')}'")
|
||||||
|
|
||||||
|
if results['no_alt_text']:
|
||||||
|
print(f"\n⚠ {len(results['no_alt_text'])} image(s) without alt text (skipped):")
|
||||||
|
for item in results['no_alt_text']:
|
||||||
|
print(f" • Slide {item['slide']}: '{item['shape_name']}'")
|
||||||
|
|
||||||
|
if not results['not_found'] and not results['no_alt_text']:
|
||||||
|
print("\n✓ All images replaced successfully!")
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
|
def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save_path):
|
||||||
"""
|
"""
|
||||||
Finds and replaces specific images in a PowerPoint presentation while
|
Finds and replaces a specific image in a PowerPoint presentation while
|
||||||
preserving their original position, size, and aspect ratio.
|
preserving its original position, size, and aspect ratio.
|
||||||
|
|
||||||
This function performs a 'surgical' replacement: it records the coordinates
|
This function performs a 'surgical' replacement: it records the coordinates
|
||||||
of the existing image, removes it from the slide's XML, and inserts a
|
of the existing image, removes it from the slide's XML, and inserts a
|
||||||
@@ -48,6 +261,9 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save
|
|||||||
image by searching for a specific string within the Shape Name
|
image by searching for a specific string within the Shape Name
|
||||||
(Selection Pane) or Alt Text.
|
(Selection Pane) or Alt Text.
|
||||||
|
|
||||||
|
Note: For batch replacement of all images using a directory structure,
|
||||||
|
use pptx_replace_images_from_directory() instead.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
presentation_path (str): The file path to the source .pptx file.
|
presentation_path (str): The file path to the source .pptx file.
|
||||||
target_tag (str): The unique identifier to look for (e.g., 'HERO_IMAGE').
|
target_tag (str): The unique identifier to look for (e.g., 'HERO_IMAGE').
|
||||||
@@ -73,24 +289,7 @@ def pptx_replace_named_image(presentation_path, target_tag, new_image_path, save
|
|||||||
print(f"Checking shape: {shape.name} of type {shape.shape_type}...")
|
print(f"Checking shape: {shape.name} of type {shape.shape_type}...")
|
||||||
|
|
||||||
shape_name = shape.name or ""
|
shape_name = shape.name or ""
|
||||||
alt_text = ""
|
alt_text = _get_shape_alt_text(shape)
|
||||||
|
|
||||||
# More robust strategy: Check for alt text in ANY valid element property
|
|
||||||
# This allows replacing Pictures, Placeholders, GraphicFrames, etc.
|
|
||||||
try:
|
|
||||||
# Check for common property names used by python-pptx elements to store non-visual props
|
|
||||||
# nvPicPr (Picture), nvSpPr (Shape/Placeholder), nvGrpSpPr (Group),
|
|
||||||
# nvGraphicFramePr (GraphicFrame), nvCxnSpPr (Connector)
|
|
||||||
nvPr = None
|
|
||||||
for attr in ['nvPicPr', 'nvSpPr', 'nvGrpSpPr', 'nvGraphicFramePr', 'nvCxnSpPr']:
|
|
||||||
if hasattr(shape._element, attr):
|
|
||||||
nvPr = getattr(shape._element, attr)
|
|
||||||
break
|
|
||||||
|
|
||||||
if nvPr is not None and hasattr(nvPr, 'cNvPr'):
|
|
||||||
alt_text = nvPr.cNvPr.get("descr", "")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
print(f"Alt Text for shape '{shape_name}': {alt_text}")
|
print(f"Alt Text for shape '{shape_name}': {alt_text}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user