wordcloud
This commit is contained in:
@@ -61,15 +61,12 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
|
||||
@app.cell
|
||||
def _(Path, RESULTS_FILE, data_all, mo):
|
||||
mo.md(f"""
|
||||
|
||||
---
|
||||
# Load Data
|
||||
|
||||
**Dataset:** `{Path(RESULTS_FILE).name}`
|
||||
|
||||
**Responses**: `{data_all.collect().shape[0]}`
|
||||
|
||||
|
||||
""")
|
||||
return
|
||||
|
||||
@@ -165,8 +162,6 @@ def _(S, mo):
|
||||
|
||||
{filter_form}
|
||||
''')
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import marimo
|
||||
|
||||
__generated_with = "0.19.2"
|
||||
app = marimo.App(width="medium")
|
||||
app = marimo.App(width="full")
|
||||
|
||||
with app.setup:
|
||||
import marimo as mo
|
||||
@@ -166,7 +166,7 @@ def _(data_all):
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Demographic Distributions
|
||||
@@ -204,6 +204,124 @@ def _(S, demo_plot_cols, demographics):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
---
|
||||
|
||||
# Brand Character Results
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Best performing: Original vs Refined frankenstein
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
char_refine_rank = S.get_character_refine(data)[0]
|
||||
# print(char_rank.collect().head())
|
||||
# print(char_refine_rank.collect().head())
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Character ranking points
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, char_rank):
|
||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||
S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Character ranking 1-2-3
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
char_rank = S.get_character_ranking(data)[0]
|
||||
return (char_rank,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, char_rank):
|
||||
S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Character Ranking: times 1st place
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, char_rank):
|
||||
S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Prominent predefined personality traits wordcloud
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
top8_traits = S.get_top_8_traits(data)[0]
|
||||
S.plot_traits_wordcloud(
|
||||
data=top8_traits,
|
||||
column='Top_8_Traits',
|
||||
title="Most Prominent Personality Traits",
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Trait frequency per brand character
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
# Join respondent
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
---
|
||||
|
||||
# Spoken Voice Results
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
|
||||
85
docs/wordcloud-usage.md
Normal file
85
docs/wordcloud-usage.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# Word Cloud for Personality Traits - Usage Example
|
||||
|
||||
This example shows how to use the `create_traits_wordcloud` function to visualize the most prominent personality traits from survey data.
|
||||
|
||||
## Basic Usage in Jupyter/Marimo Notebook
|
||||
|
||||
```python
|
||||
from utils import JPMCSurvey, create_traits_wordcloud
|
||||
from pathlib import Path
|
||||
|
||||
# Load your survey data
|
||||
RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv"
|
||||
QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf"
|
||||
|
||||
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||||
data = S.load_data()
|
||||
|
||||
# Get Top 3 Traits data
|
||||
top3_traits = S.get_top_3_traits(data)[0]
|
||||
|
||||
# Create and display word cloud
|
||||
fig = create_traits_wordcloud(
|
||||
data=top3_traits,
|
||||
column='Top_3_Traits',
|
||||
title="Most Prominent Personality Traits",
|
||||
fig_save_dir='figures', # Will save to figures/All_Respondents/
|
||||
filter_slug='All_Respondents'
|
||||
)
|
||||
|
||||
# Display in notebook
|
||||
fig # or plt.show()
|
||||
```
|
||||
|
||||
## With Active Filters
|
||||
|
||||
If you're using the survey filter methods, you can pass the filter slug:
|
||||
|
||||
```python
|
||||
# Apply filters
|
||||
S.set_filter_consumer(['Early Professional', 'Established Professional'])
|
||||
filtered_data = S.get_filtered_data()
|
||||
|
||||
# Get traits from filtered data
|
||||
top3_traits = S.get_top_3_traits(filtered_data)[0]
|
||||
|
||||
# Get the filter slug for directory naming
|
||||
filter_slug = S._get_filter_slug()
|
||||
|
||||
# Create word cloud with filtered data
|
||||
fig = create_traits_wordcloud(
|
||||
data=top3_traits,
|
||||
column='Top_3_Traits',
|
||||
title="Most Prominent Personality Traits<br>(Early & Established Professionals)",
|
||||
fig_save_dir='figures',
|
||||
filter_slug=filter_slug # e.g., 'Cons-Early_Professional_Established_Professional'
|
||||
)
|
||||
|
||||
fig
|
||||
```
|
||||
|
||||
## Function Parameters
|
||||
|
||||
- **data**: Polars DataFrame or LazyFrame with trait data
|
||||
- **column**: Column name containing comma-separated traits (default: 'Top_3_Traits')
|
||||
- **title**: Title for the word cloud
|
||||
- **width**: Width in pixels (default: 1600)
|
||||
- **height**: Height in pixels (default: 800)
|
||||
- **background_color**: Background color (default: 'white')
|
||||
- **fig_save_dir**: Directory to save PNG (default: None - doesn't save)
|
||||
- **filter_slug**: Subdirectory name for filtered results (default: 'All_Respondents')
|
||||
|
||||
## Colors
|
||||
|
||||
The word cloud uses colors from `theme.py`:
|
||||
- PRIMARY: #0077B6 (Medium Blue)
|
||||
- RANK_1: #004C6D (Dark Blue)
|
||||
- RANK_2: #008493 (Teal)
|
||||
- RANK_3: #5AAE95 (Sea Green)
|
||||
|
||||
## Output
|
||||
|
||||
- **Returns**: matplotlib Figure object for display in notebooks
|
||||
- **Saves**: PNG file to `{fig_save_dir}/{filter_slug}/{sanitized_title}.png` at 300 DPI
|
||||
|
||||
The saved files follow the same naming convention as plots in `plots.py`.
|
||||
90
plots.py
90
plots.py
@@ -943,3 +943,93 @@ class JPMCPlotsMixin:
|
||||
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_traits_wordcloud(
|
||||
self,
|
||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||
column: str = 'Top_3_Traits',
|
||||
title: str = "Most Prominent Personality Traits",
|
||||
width: int = 1600,
|
||||
height: int = 800,
|
||||
background_color: str = 'white',
|
||||
):
|
||||
"""Create a word cloud visualization of personality traits from survey data.
|
||||
|
||||
Args:
|
||||
data: Polars DataFrame or LazyFrame containing trait data
|
||||
column: Name of column containing comma-separated traits
|
||||
title: Title for the word cloud
|
||||
width: Width of the word cloud image in pixels
|
||||
height: Height of the word cloud image in pixels
|
||||
background_color: Background color for the word cloud
|
||||
|
||||
Returns:
|
||||
matplotlib.figure.Figure: The word cloud figure for display in notebooks
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
from wordcloud import WordCloud
|
||||
from collections import Counter
|
||||
import random
|
||||
|
||||
df = self._ensure_dataframe(data)
|
||||
|
||||
# Extract and split traits
|
||||
traits_list = []
|
||||
for row in df[column].drop_nulls():
|
||||
# Split by comma and clean whitespace
|
||||
traits = [trait.strip() for trait in row.split(',')]
|
||||
traits_list.extend(traits)
|
||||
|
||||
# Create frequency dictionary
|
||||
trait_freq = Counter(traits_list)
|
||||
|
||||
# Color function using JPMC colors
|
||||
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
||||
colors = [
|
||||
ColorPalette.PRIMARY,
|
||||
ColorPalette.RANK_1,
|
||||
ColorPalette.RANK_2,
|
||||
ColorPalette.RANK_3,
|
||||
]
|
||||
return random.choice(colors)
|
||||
|
||||
# Generate word cloud
|
||||
wordcloud = WordCloud(
|
||||
width=width,
|
||||
height=height,
|
||||
background_color=background_color,
|
||||
color_func=color_func,
|
||||
relative_scaling=0.5,
|
||||
min_font_size=10,
|
||||
prefer_horizontal=0.7,
|
||||
collocations=False # Treat each word independently
|
||||
).generate_from_frequencies(trait_freq)
|
||||
|
||||
# Create matplotlib figure
|
||||
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
|
||||
ax.imshow(wordcloud, interpolation='bilinear')
|
||||
ax.axis('off')
|
||||
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
|
||||
|
||||
plt.tight_layout(pad=0)
|
||||
|
||||
# Save figure if directory specified (using same pattern as other plots)
|
||||
if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
|
||||
save_path = Path(self.fig_save_dir)
|
||||
|
||||
# Add filter slug subfolder
|
||||
filter_slug = self._get_filter_slug()
|
||||
save_path = save_path / filter_slug
|
||||
|
||||
if not save_path.exists():
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Use _sanitize_filename for consistency
|
||||
filename = f"{self._sanitize_filename(title)}.png"
|
||||
filepath = save_path / filename
|
||||
|
||||
# Save as PNG at high resolution
|
||||
fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
|
||||
print(f"Word cloud saved to: {filepath}")
|
||||
|
||||
return fig
|
||||
|
||||
2
utils.py
2
utils.py
@@ -612,7 +612,7 @@ class JPMCSurvey(JPMCPlotsMixin):
|
||||
|
||||
Renames columns using qid_descr_map if provided.
|
||||
"""
|
||||
QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
|
||||
QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID7', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
|
||||
return self._get_subset(q, QIDs), None
|
||||
|
||||
|
||||
|
||||
18
wordclouds.py
Normal file
18
wordclouds.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Word cloud utilities for Voice Branding analysis.
|
||||
|
||||
The main wordcloud function is available as a method on JPMCSurvey:
|
||||
S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...')
|
||||
|
||||
This module provides standalone imports for backwards compatibility.
|
||||
"""
|
||||
import numpy as np
|
||||
from os import path
|
||||
from PIL import Image, ImageDraw
|
||||
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user