wordcloud

This commit is contained in:
2026-02-02 11:12:53 +01:00
parent d770645d8e
commit 45dd121d90
6 changed files with 314 additions and 8 deletions

View File

@@ -61,15 +61,12 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
@app.cell @app.cell
def _(Path, RESULTS_FILE, data_all, mo): def _(Path, RESULTS_FILE, data_all, mo):
mo.md(f""" mo.md(f"""
--- ---
# Load Data # Load Data
**Dataset:** `{Path(RESULTS_FILE).name}` **Dataset:** `{Path(RESULTS_FILE).name}`
**Responses**: `{data_all.collect().shape[0]}` **Responses**: `{data_all.collect().shape[0]}`
""") """)
return return
@@ -165,8 +162,6 @@ def _(S, mo):
{filter_form} {filter_form}
''') ''')
return return

View File

@@ -1,7 +1,7 @@
import marimo import marimo
__generated_with = "0.19.2" __generated_with = "0.19.2"
app = marimo.App(width="medium") app = marimo.App(width="full")
with app.setup: with app.setup:
import marimo as mo import marimo as mo
@@ -166,7 +166,7 @@ def _(data_all):
return return
@app.cell(hide_code=True) @app.cell
def _(): def _():
mo.md(r""" mo.md(r"""
## Demographic Distributions ## Demographic Distributions
@@ -204,6 +204,124 @@ def _(S, demo_plot_cols, demographics):
return return
@app.cell
def _():
mo.md(r"""
---
# Brand Character Results
""")
return
@app.cell
def _():
mo.md(r"""
## Best performing: Original vs Refined frankenstein
""")
return
@app.cell
def _(S, data):
char_refine_rank = S.get_character_refine(data)[0]
# print(char_rank.collect().head())
# print(char_refine_rank.collect().head())
return
@app.cell
def _():
mo.md(r"""
## Character ranking points
""")
return
@app.cell
def _(S, char_rank):
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
return
@app.cell
def _():
mo.md(r"""
## Character ranking 1-2-3
""")
return
@app.cell
def _(S, data):
char_rank = S.get_character_ranking(data)[0]
return (char_rank,)
@app.cell
def _(S, char_rank):
S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
return
@app.cell
def _():
mo.md(r"""
## Character Ranking: times 1st place
""")
return
@app.cell
def _(S, char_rank):
S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
return
@app.cell
def _():
mo.md(r"""
## Prominent predefined personality traits wordcloud
""")
return
@app.cell
def _(S, data):
top8_traits = S.get_top_8_traits(data)[0]
S.plot_traits_wordcloud(
data=top8_traits,
column='Top_8_Traits',
title="Most Prominent Personality Traits",
)
return
@app.cell
def _():
mo.md(r"""
## Trait frequency per brand character
""")
return
@app.cell
def _():
# Join respondent
return
@app.cell
def _():
mo.md(r"""
---
# Spoken Voice Results
""")
return
@app.cell(hide_code=True) @app.cell(hide_code=True)
def _(): def _():
mo.md(r""" mo.md(r"""

85
docs/wordcloud-usage.md Normal file
View File

@@ -0,0 +1,85 @@
# Word Cloud for Personality Traits - Usage Example
This example shows how to use the `create_traits_wordcloud` function to visualize the most prominent personality traits from survey data.
## Basic Usage in Jupyter/Marimo Notebook
```python
from utils import JPMCSurvey, create_traits_wordcloud
from pathlib import Path
# Load your survey data
RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv"
QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf"
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
data = S.load_data()
# Get Top 3 Traits data
top3_traits = S.get_top_3_traits(data)[0]
# Create and display word cloud
fig = create_traits_wordcloud(
data=top3_traits,
column='Top_3_Traits',
title="Most Prominent Personality Traits",
fig_save_dir='figures', # Will save to figures/All_Respondents/
filter_slug='All_Respondents'
)
# Display in notebook
fig # or plt.show()
```
## With Active Filters
If you're using the survey filter methods, you can pass the filter slug:
```python
# Apply filters
S.set_filter_consumer(['Early Professional', 'Established Professional'])
filtered_data = S.get_filtered_data()
# Get traits from filtered data
top3_traits = S.get_top_3_traits(filtered_data)[0]
# Get the filter slug for directory naming
filter_slug = S._get_filter_slug()
# Create word cloud with filtered data
fig = create_traits_wordcloud(
data=top3_traits,
column='Top_3_Traits',
title="Most Prominent Personality Traits<br>(Early & Established Professionals)",
fig_save_dir='figures',
filter_slug=filter_slug # e.g., 'Cons-Early_Professional_Established_Professional'
)
fig
```
## Function Parameters
- **data**: Polars DataFrame or LazyFrame with trait data
- **column**: Column name containing comma-separated traits (default: 'Top_3_Traits')
- **title**: Title for the word cloud
- **width**: Width in pixels (default: 1600)
- **height**: Height in pixels (default: 800)
- **background_color**: Background color (default: 'white')
- **fig_save_dir**: Directory to save PNG (default: None - doesn't save)
- **filter_slug**: Subdirectory name for filtered results (default: 'All_Respondents')
## Colors
The word cloud uses colors from `theme.py`:
- PRIMARY: #0077B6 (Medium Blue)
- RANK_1: #004C6D (Dark Blue)
- RANK_2: #008493 (Teal)
- RANK_3: #5AAE95 (Sea Green)
## Output
- **Returns**: matplotlib Figure object for display in notebooks
- **Saves**: PNG file to `{fig_save_dir}/{filter_slug}/{sanitized_title}.png` at 300 DPI
The saved files follow the same naming convention as plots in `plots.py`.

View File

@@ -943,3 +943,93 @@ class JPMCPlotsMixin:
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
def plot_traits_wordcloud(
self,
data: pl.LazyFrame | pl.DataFrame | None = None,
column: str = 'Top_3_Traits',
title: str = "Most Prominent Personality Traits",
width: int = 1600,
height: int = 800,
background_color: str = 'white',
):
"""Create a word cloud visualization of personality traits from survey data.
Args:
data: Polars DataFrame or LazyFrame containing trait data
column: Name of column containing comma-separated traits
title: Title for the word cloud
width: Width of the word cloud image in pixels
height: Height of the word cloud image in pixels
background_color: Background color for the word cloud
Returns:
matplotlib.figure.Figure: The word cloud figure for display in notebooks
"""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
import random
df = self._ensure_dataframe(data)
# Extract and split traits
traits_list = []
for row in df[column].drop_nulls():
# Split by comma and clean whitespace
traits = [trait.strip() for trait in row.split(',')]
traits_list.extend(traits)
# Create frequency dictionary
trait_freq = Counter(traits_list)
# Color function using JPMC colors
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
colors = [
ColorPalette.PRIMARY,
ColorPalette.RANK_1,
ColorPalette.RANK_2,
ColorPalette.RANK_3,
]
return random.choice(colors)
# Generate word cloud
wordcloud = WordCloud(
width=width,
height=height,
background_color=background_color,
color_func=color_func,
relative_scaling=0.5,
min_font_size=10,
prefer_horizontal=0.7,
collocations=False # Treat each word independently
).generate_from_frequencies(trait_freq)
# Create matplotlib figure
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
plt.tight_layout(pad=0)
# Save figure if directory specified (using same pattern as other plots)
if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
save_path = Path(self.fig_save_dir)
# Add filter slug subfolder
filter_slug = self._get_filter_slug()
save_path = save_path / filter_slug
if not save_path.exists():
save_path.mkdir(parents=True, exist_ok=True)
# Use _sanitize_filename for consistency
filename = f"{self._sanitize_filename(title)}.png"
filepath = save_path / filename
# Save as PNG at high resolution
fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
print(f"Word cloud saved to: {filepath}")
return fig

View File

@@ -612,7 +612,7 @@ class JPMCSurvey(JPMCPlotsMixin):
Renames columns using qid_descr_map if provided. Renames columns using qid_descr_map if provided.
""" """
QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer'] QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID7', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
return self._get_subset(q, QIDs), None return self._get_subset(q, QIDs), None

18
wordclouds.py Normal file
View File

@@ -0,0 +1,18 @@
"""Word cloud utilities for Voice Branding analysis.
The main wordcloud function is available as a method on JPMCSurvey:
S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...')
This module provides standalone imports for backwards compatibility.
"""
import numpy as np
from os import path
from PIL import Image, ImageDraw
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")