diff --git a/02-B_Thematic-Processing.py b/02-B_Thematic-Processing.py index f0dee91..d71cf19 100644 --- a/02-B_Thematic-Processing.py +++ b/02-B_Thematic-Processing.py @@ -194,13 +194,13 @@ def _(): # Start with loading all necessary libraries import numpy as np from os import path - from PIL import Image + from PIL import Image, ImageDraw from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") - return Image, WordCloud, np, plt + return Image, ImageDraw, WordCloud, np, plt @app.cell @@ -222,11 +222,23 @@ def _(df): # create list of keywords sorted by their frequencies. only store the keyword sorted_keywords = sorted(keyword_freq_filtered.items(), key=lambda x: x[1], reverse=True) - sorted_keywords_list = [kw for kw, freq in sorted_keywords] + sorted_keywords_list = [f"{kw}:{freq}" for kw, freq in sorted_keywords] sorted_keywords_list return (keyword_freq_filtered,) +@app.cell +def _(): + IGNORE_WORDS = { + 'chase as a brand': [ + "brand" + ] + } + + + return (IGNORE_WORDS,) + + @app.cell def _(plt): import random @@ -248,37 +260,137 @@ def _(plt): @app.cell -def _(Image, np): - chase_mask = np.array(Image.open("./data/assets/Chase-National-Bank-Logo.png")) +def _(): + # chase_mask = np.array(Image.open("./data/assets/Chase-National-Bank-Logo.png")) - def transform_format(val): - if val == 0: - return 255 - else: - return 1 + # def transform_format(val): + # if val == 0: + # return 255 + # else: + # return 1 - transformed_chase_mask = np.ndarray((chase_mask.shape[0], chase_mask.shape[1]), np.int32) - for i in range(len(chase_mask)): - transformed_chase_mask[i] = list(map(transform_format, chase_mask[i])) + # transformed_chase_mask = np.ndarray((chase_mask.shape[0], chase_mask.shape[1]), np.int32) + # for i in range(len(chase_mask)): + # transformed_chase_mask[i] = list(map(transform_format, chase_mask[i])) return @app.cell -def _(WordCloud, blue_color_func, keyword_freq_filtered, mo, plt): - wordcloud = WordCloud( - background_color='white', - width=800, - max_font_size=60, - max_words=20, - # colormap='Blues', - # relative_scaling=0.5, # Use rank in sorted frequency list instead of pure frequency - color_func=blue_color_func, - # mask=chase_mask - # random_state=42 - ).generate_from_frequencies(keyword_freq_filtered) +def _(mo): + buffer = -100 # Adjust this to increase/decrease space between logo and words + canvas_size = (1200, 800) + + logo_switch = mo.ui.switch(label="Include Chase Logo", value=False) + logo_switch + + return buffer, canvas_size, logo_switch + + +@app.cell(hide_code=True) +def _( + IGNORE_WORDS, + Image, + ImageDraw, + WordCloud, + blue_color_func, + buffer, + canvas_size, + keyword_freq_filtered, + logo_switch, + mo, + np, + plt, + tag_select, +): + # remove specific keywords depending on selected tag + if IGNORE_WORDS.get(tag_select.value.lower()): + for word in IGNORE_WORDS[tag_select.value.lower()]: + if word in keyword_freq_filtered: + del keyword_freq_filtered[word] + + if logo_switch.value: + # 1. Load the logo + # Make sure this path points to your uploaded file + logo_path = "./data/assets/JP-Morgan-Chase-Symbol.png" + logo = Image.open(logo_path).convert("RGBA") + + # Optional: Resize logo if it's too large or small for the canvas + # target_width = 600 + # ratio = target_width / logo.width + # logo = logo.resize((target_width, int(logo.height * ratio)), Image.Resampling.LANCZOS) + target_width = 600 # Set a reasonable size for the logo + if logo.width > target_width: + ratio = target_width / logo.width + new_height = int(logo.height * ratio) + # Use Image.Resampling.LANCZOS for high-quality downsampling + # If you get an error, try Image.LANCZOS or Image.ANTIALIAS + logo = logo.resize((target_width, new_height), Image.Resampling.LANCZOS) + + # 3. Create the mask (0 = draw here, 255 = don't draw here) + # Initialize with 0 (black/draw everywhere) + mask_image = Image.new("L", canvas_size, 0) + draw = ImageDraw.Draw(mask_image) + + # 4. Draw a protected circular area in the center + center = (canvas_size[0] // 2, canvas_size[1] // 2) + + # Calculate radius: half of logo max dimension + buffer + radius = (max(logo.size) // 2) + buffer + + # Draw the white circle (255) which the WordCloud will avoid + draw.ellipse( + (center[0] - radius, center[1] - radius, center[0] + radius, center[1] + radius), + fill=255 + ) + + chase_mask = np.array(mask_image) + + # Generate the WordCloud + wordcloud = WordCloud( + background_color='white', + width=canvas_size[0], + height=canvas_size[1], + max_font_size=100, # Increased font size for larger canvas + max_words=20, # Increased word count to fill space + color_func=blue_color_func, + mask=chase_mask, # Apply the circular mask + contour_width=0, + contour_color='steelblue' + ).generate_from_frequencies(keyword_freq_filtered) + + else: + # Generate the WordCloud + wordcloud = WordCloud( + background_color='white', + width=canvas_size[0], + height=canvas_size[1], + max_font_size=100, # Increased font size for larger canvas + max_words=20, # Increased word count to fill space + color_func=blue_color_func, + # mask=chase_mask, # Apply the circular mask + # contour_width=0, + # contour_color='steelblue' + ).generate_from_frequencies(keyword_freq_filtered) + + # Convert WordCloud to Image to composite the logo + wc_image = wordcloud.to_image() + + if logo_switch.value: + + # Calculate position to center the logo + logo_pos = ( + (canvas_size[0] - logo.width) // 2, + (canvas_size[1] - logo.height) // 2 + ) + + # Paste logo (using alpha channel as mask to keep transparency) + wc_image.paste(logo, logo_pos, logo) + + # Display the generated image + fig = plt.figure(figsize=(7,7)) # Display the generated image: - plt.imshow(wordcloud, interpolation='bilinear') + plt.imshow(wc_image, interpolation='bilinear') plt.axis("off") plt.show() @@ -289,11 +401,11 @@ def _(WordCloud, blue_color_func, keyword_freq_filtered, mo, plt): on_click=lambda val: True ) save_wordcloud_btn - return save_wordcloud_btn, wordcloud + return save_wordcloud_btn, wc_image @app.cell -def _(WORKING_DIR, mo, save_wordcloud_btn, tag_select, wordcloud): +def _(WORKING_DIR, mo, save_wordcloud_btn, tag_select, wc_image): # Wait for start processing button mo.stop(not save_wordcloud_btn.value, "Click button above to save wordcloud image") @@ -315,7 +427,7 @@ def _(WORKING_DIR, mo, save_wordcloud_btn, tag_select, wordcloud): next_number = 1 fpath = WORKING_DIR / f'wordcloud_{tag_select.value.replace(" ", "-")}_{next_number}.png' - wordcloud.to_file(fpath) + wc_image.save(fpath) mo.md(f"Wordcloud saved to: {fpath}") return