From 45dd121d90edb70dfab215ad9859fee2b3f35663 Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Mon, 2 Feb 2026 11:12:53 +0100 Subject: [PATCH] wordcloud --- 02_quant_analysis.py | 5 -- 03_quant_report.py | 122 +++++++++++++++++++++++++++++++++++++++- docs/wordcloud-usage.md | 85 ++++++++++++++++++++++++++++ plots.py | 90 +++++++++++++++++++++++++++++ utils.py | 2 +- wordclouds.py | 18 ++++++ 6 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 docs/wordcloud-usage.md create mode 100644 wordclouds.py diff --git a/02_quant_analysis.py b/02_quant_analysis.py index bc8b8b7..f6b5bcb 100644 --- a/02_quant_analysis.py +++ b/02_quant_analysis.py @@ -61,15 +61,12 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo): @app.cell def _(Path, RESULTS_FILE, data_all, mo): mo.md(f""" - --- # Load Data **Dataset:** `{Path(RESULTS_FILE).name}` **Responses**: `{data_all.collect().shape[0]}` - - """) return @@ -165,8 +162,6 @@ def _(S, mo): {filter_form} ''') - - return diff --git a/03_quant_report.py b/03_quant_report.py index ae2dd86..5a4ebb8 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -1,7 +1,7 @@ import marimo __generated_with = "0.19.2" -app = marimo.App(width="medium") +app = marimo.App(width="full") with app.setup: import marimo as mo @@ -166,7 +166,7 @@ def _(data_all): return -@app.cell(hide_code=True) +@app.cell def _(): mo.md(r""" ## Demographic Distributions @@ -204,6 +204,124 @@ def _(S, demo_plot_cols, demographics): return +@app.cell +def _(): + mo.md(r""" + --- + + # Brand Character Results + """) + return + + +@app.cell +def _(): + mo.md(r""" + ## Best performing: Original vs Refined frankenstein + """) + return + + +@app.cell +def _(S, data): + char_refine_rank = S.get_character_refine(data)[0] + # print(char_rank.collect().head()) + # print(char_refine_rank.collect().head()) + return + + +@app.cell +def _(): + mo.md(r""" + ## Character ranking points + """) + return + + +@app.cell +def _(S, char_rank): + char_rank_weighted = calculate_weighted_ranking_scores(char_rank) + S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score
(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice') + return + + +@app.cell +def _(): + mo.md(r""" + ## Character ranking 1-2-3 + """) + return + + +@app.cell +def _(S, data): + char_rank = S.get_character_ranking(data)[0] + return (char_rank,) + + +@app.cell +def _(S, char_rank): + S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3') + return + + +@app.cell +def _(): + mo.md(r""" + ## Character Ranking: times 1st place + """) + return + + +@app.cell +def _(S, char_rank): + S.plot_most_ranked_1(char_rank, title="Most Popular Character
(Number of Times Ranked 1st)", x_label='Character Personality') + return + + +@app.cell +def _(): + mo.md(r""" + ## Prominent predefined personality traits wordcloud + """) + return + + +@app.cell +def _(S, data): + top8_traits = S.get_top_8_traits(data)[0] + S.plot_traits_wordcloud( + data=top8_traits, + column='Top_8_Traits', + title="Most Prominent Personality Traits", + ) + return + + +@app.cell +def _(): + mo.md(r""" + ## Trait frequency per brand character + """) + return + + +@app.cell +def _(): + # Join respondent + return + + +@app.cell +def _(): + mo.md(r""" + --- + + # Spoken Voice Results + """) + return + + @app.cell(hide_code=True) def _(): mo.md(r""" diff --git a/docs/wordcloud-usage.md b/docs/wordcloud-usage.md new file mode 100644 index 0000000..857f41b --- /dev/null +++ b/docs/wordcloud-usage.md @@ -0,0 +1,85 @@ +# Word Cloud for Personality Traits - Usage Example + +This example shows how to use the `create_traits_wordcloud` function to visualize the most prominent personality traits from survey data. + +## Basic Usage in Jupyter/Marimo Notebook + +```python +from utils import JPMCSurvey, create_traits_wordcloud +from pathlib import Path + +# Load your survey data +RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv" +QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf" + +S = JPMCSurvey(RESULTS_FILE, QSF_FILE) +data = S.load_data() + +# Get Top 3 Traits data +top3_traits = S.get_top_3_traits(data)[0] + +# Create and display word cloud +fig = create_traits_wordcloud( + data=top3_traits, + column='Top_3_Traits', + title="Most Prominent Personality Traits", + fig_save_dir='figures', # Will save to figures/All_Respondents/ + filter_slug='All_Respondents' +) + +# Display in notebook +fig # or plt.show() +``` + +## With Active Filters + +If you're using the survey filter methods, you can pass the filter slug: + +```python +# Apply filters +S.set_filter_consumer(['Early Professional', 'Established Professional']) +filtered_data = S.get_filtered_data() + +# Get traits from filtered data +top3_traits = S.get_top_3_traits(filtered_data)[0] + +# Get the filter slug for directory naming +filter_slug = S._get_filter_slug() + +# Create word cloud with filtered data +fig = create_traits_wordcloud( + data=top3_traits, + column='Top_3_Traits', + title="Most Prominent Personality Traits
(Early & Established Professionals)", + fig_save_dir='figures', + filter_slug=filter_slug # e.g., 'Cons-Early_Professional_Established_Professional' +) + +fig +``` + +## Function Parameters + +- **data**: Polars DataFrame or LazyFrame with trait data +- **column**: Column name containing comma-separated traits (default: 'Top_3_Traits') +- **title**: Title for the word cloud +- **width**: Width in pixels (default: 1600) +- **height**: Height in pixels (default: 800) +- **background_color**: Background color (default: 'white') +- **fig_save_dir**: Directory to save PNG (default: None - doesn't save) +- **filter_slug**: Subdirectory name for filtered results (default: 'All_Respondents') + +## Colors + +The word cloud uses colors from `theme.py`: +- PRIMARY: #0077B6 (Medium Blue) +- RANK_1: #004C6D (Dark Blue) +- RANK_2: #008493 (Teal) +- RANK_3: #5AAE95 (Sea Green) + +## Output + +- **Returns**: matplotlib Figure object for display in notebooks +- **Saves**: PNG file to `{fig_save_dir}/{filter_slug}/{sanitized_title}.png` at 300 DPI + +The saved files follow the same naming convention as plots in `plots.py`. diff --git a/plots.py b/plots.py index 8b5ad4a..a86a19b 100644 --- a/plots.py +++ b/plots.py @@ -943,3 +943,93 @@ class JPMCPlotsMixin: chart = self._save_plot(chart, title) return chart + + def plot_traits_wordcloud( + self, + data: pl.LazyFrame | pl.DataFrame | None = None, + column: str = 'Top_3_Traits', + title: str = "Most Prominent Personality Traits", + width: int = 1600, + height: int = 800, + background_color: str = 'white', + ): + """Create a word cloud visualization of personality traits from survey data. + + Args: + data: Polars DataFrame or LazyFrame containing trait data + column: Name of column containing comma-separated traits + title: Title for the word cloud + width: Width of the word cloud image in pixels + height: Height of the word cloud image in pixels + background_color: Background color for the word cloud + + Returns: + matplotlib.figure.Figure: The word cloud figure for display in notebooks + """ + import matplotlib.pyplot as plt + from wordcloud import WordCloud + from collections import Counter + import random + + df = self._ensure_dataframe(data) + + # Extract and split traits + traits_list = [] + for row in df[column].drop_nulls(): + # Split by comma and clean whitespace + traits = [trait.strip() for trait in row.split(',')] + traits_list.extend(traits) + + # Create frequency dictionary + trait_freq = Counter(traits_list) + + # Color function using JPMC colors + def color_func(word, font_size, position, orientation, random_state=None, **kwargs): + colors = [ + ColorPalette.PRIMARY, + ColorPalette.RANK_1, + ColorPalette.RANK_2, + ColorPalette.RANK_3, + ] + return random.choice(colors) + + # Generate word cloud + wordcloud = WordCloud( + width=width, + height=height, + background_color=background_color, + color_func=color_func, + relative_scaling=0.5, + min_font_size=10, + prefer_horizontal=0.7, + collocations=False # Treat each word independently + ).generate_from_frequencies(trait_freq) + + # Create matplotlib figure + fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100) + ax.imshow(wordcloud, interpolation='bilinear') + ax.axis('off') + ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT) + + plt.tight_layout(pad=0) + + # Save figure if directory specified (using same pattern as other plots) + if hasattr(self, 'fig_save_dir') and self.fig_save_dir: + save_path = Path(self.fig_save_dir) + + # Add filter slug subfolder + filter_slug = self._get_filter_slug() + save_path = save_path / filter_slug + + if not save_path.exists(): + save_path.mkdir(parents=True, exist_ok=True) + + # Use _sanitize_filename for consistency + filename = f"{self._sanitize_filename(title)}.png" + filepath = save_path / filename + + # Save as PNG at high resolution + fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white') + print(f"Word cloud saved to: {filepath}") + + return fig diff --git a/utils.py b/utils.py index bd0081b..5c35c25 100644 --- a/utils.py +++ b/utils.py @@ -612,7 +612,7 @@ class JPMCSurvey(JPMCPlotsMixin): Renames columns using qid_descr_map if provided. """ - QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer'] + QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID7', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer'] return self._get_subset(q, QIDs), None diff --git a/wordclouds.py b/wordclouds.py new file mode 100644 index 0000000..1a62b73 --- /dev/null +++ b/wordclouds.py @@ -0,0 +1,18 @@ +"""Word cloud utilities for Voice Branding analysis. + +The main wordcloud function is available as a method on JPMCSurvey: + S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...') + +This module provides standalone imports for backwards compatibility. +""" +import numpy as np +from os import path +from PIL import Image, ImageDraw +from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator +import matplotlib.pyplot as plt + +import warnings +warnings.filterwarnings("ignore") + + +