wordcloud

2026-02-02 11:12:53 +01:00
parent d770645d8e
commit 45dd121d90
6 changed files with 314 additions and 8 deletions
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -61,15 +61,12 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
@app.cell
 def _(Path, RESULTS_FILE, data_all, mo):
    mo.md(f"""
-
    ---
    # Load Data

    **Dataset:** `{Path(RESULTS_FILE).name}`

    **Responses**: `{data_all.collect().shape[0]}`
-
-
    """)
    return

@@ -165,8 +162,6 @@ def _(S, mo):

    {filter_form}
    ''')
-
-
    return


--- a/03_quant_report.py
+++ b/03_quant_report.py
@@ -1,7 +1,7 @@
 import marimo

 __generated_with = "0.19.2"
-app = marimo.App(width="medium")
+app = marimo.App(width="full")

 with app.setup:
    import marimo as mo
@@ -166,7 +166,7 @@ def _(data_all):
    return


-@app.cell(hide_code=True)
+@app.cell
 def _():
    mo.md(r"""
    ## Demographic Distributions
@@ -204,6 +204,124 @@ def _(S, demo_plot_cols, demographics):
    return


+@app.cell
+def _():
+    mo.md(r"""
+    ---
+
+    # Brand Character Results
+    """)
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Best performing: Original vs Refined frankenstein
+    """)
+    return
+
+
+@app.cell
+def _(S, data):
+    char_refine_rank = S.get_character_refine(data)[0]
+    # print(char_rank.collect().head())
+    # print(char_refine_rank.collect().head())
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Character ranking points
+    """)
+    return
+
+
+@app.cell
+def _(S, char_rank):
+    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
+    S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Character ranking 1-2-3
+    """)
+    return
+
+
+@app.cell
+def _(S, data):
+    char_rank = S.get_character_ranking(data)[0]
+    return (char_rank,)
+
+
+@app.cell
+def _(S, char_rank):
+    S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Character Ranking: times 1st place
+    """)
+    return
+
+
+@app.cell
+def _(S, char_rank):
+    S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Prominent predefined personality traits wordcloud
+    """)
+    return
+
+
+@app.cell
+def _(S, data):
+    top8_traits = S.get_top_8_traits(data)[0]
+    S.plot_traits_wordcloud(
+        data=top8_traits,
+        column='Top_8_Traits',
+        title="Most Prominent Personality Traits",
+    )
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ## Trait frequency per brand character
+    """)
+    return
+
+
+@app.cell
+def _():
+    # Join respondent 
+    return
+
+
+@app.cell
+def _():
+    mo.md(r"""
+    ---
+
+    # Spoken Voice Results
+    """)
+    return
+
+
@app.cell(hide_code=True)
 def _():
    mo.md(r"""
--- a/docs/wordcloud-usage.md
+++ b/docs/wordcloud-usage.md
@@ -0,0 +1,85 @@
+# Word Cloud for Personality Traits - Usage Example
+
+This example shows how to use the `create_traits_wordcloud` function to visualize the most prominent personality traits from survey data.
+
+## Basic Usage in Jupyter/Marimo Notebook
+
+```python
+from utils import JPMCSurvey, create_traits_wordcloud
+from pathlib import Path
+
+# Load your survey data
+RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv"
+QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf"
+
+S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+data = S.load_data()
+
+# Get Top 3 Traits data
+top3_traits = S.get_top_3_traits(data)[0]
+
+# Create and display word cloud
+fig = create_traits_wordcloud(
+    data=top3_traits,
+    column='Top_3_Traits',
+    title="Most Prominent Personality Traits",
+    fig_save_dir='figures',  # Will save to figures/All_Respondents/
+    filter_slug='All_Respondents'
+)
+
+# Display in notebook
+fig  # or plt.show()
+```
+
+## With Active Filters
+
+If you're using the survey filter methods, you can pass the filter slug:
+
+```python
+# Apply filters
+S.set_filter_consumer(['Early Professional', 'Established Professional'])
+filtered_data = S.get_filtered_data()
+
+# Get traits from filtered data
+top3_traits = S.get_top_3_traits(filtered_data)[0]
+
+# Get the filter slug for directory naming
+filter_slug = S._get_filter_slug()
+
+# Create word cloud with filtered data
+fig = create_traits_wordcloud(
+    data=top3_traits,
+    column='Top_3_Traits',
+    title="Most Prominent Personality Traits<br>(Early & Established Professionals)",
+    fig_save_dir='figures',
+    filter_slug=filter_slug  # e.g., 'Cons-Early_Professional_Established_Professional'
+)
+
+fig
+```
+
+## Function Parameters
+
+- **data**: Polars DataFrame or LazyFrame with trait data
+- **column**: Column name containing comma-separated traits (default: 'Top_3_Traits')
+- **title**: Title for the word cloud
+- **width**: Width in pixels (default: 1600)
+- **height**: Height in pixels (default: 800)
+- **background_color**: Background color (default: 'white')
+- **fig_save_dir**: Directory to save PNG (default: None - doesn't save)
+- **filter_slug**: Subdirectory name for filtered results (default: 'All_Respondents')
+
+## Colors
+
+The word cloud uses colors from `theme.py`:
+- PRIMARY: #0077B6 (Medium Blue)
+- RANK_1: #004C6D (Dark Blue)
+- RANK_2: #008493 (Teal)
+- RANK_3: #5AAE95 (Sea Green)
+
+## Output
+
+- **Returns**: matplotlib Figure object for display in notebooks
+- **Saves**: PNG file to `{fig_save_dir}/{filter_slug}/{sanitized_title}.png` at 300 DPI
+
+The saved files follow the same naming convention as plots in `plots.py`.
--- a/plots.py
+++ b/plots.py
@@ -943,3 +943,93 @@ class JPMCPlotsMixin:

        chart = self._save_plot(chart, title)
        return chart
+
+    def plot_traits_wordcloud(
+        self,
+        data: pl.LazyFrame | pl.DataFrame | None = None,
+        column: str = 'Top_3_Traits',
+        title: str = "Most Prominent Personality Traits",
+        width: int = 1600,
+        height: int = 800,
+        background_color: str = 'white',
+    ):
+        """Create a word cloud visualization of personality traits from survey data.
+        
+        Args:
+            data: Polars DataFrame or LazyFrame containing trait data
+            column: Name of column containing comma-separated traits
+            title: Title for the word cloud
+            width: Width of the word cloud image in pixels
+            height: Height of the word cloud image in pixels
+            background_color: Background color for the word cloud
+            
+        Returns:
+            matplotlib.figure.Figure: The word cloud figure for display in notebooks
+        """
+        import matplotlib.pyplot as plt
+        from wordcloud import WordCloud
+        from collections import Counter
+        import random
+        
+        df = self._ensure_dataframe(data)
+        
+        # Extract and split traits
+        traits_list = []
+        for row in df[column].drop_nulls():
+            # Split by comma and clean whitespace
+            traits = [trait.strip() for trait in row.split(',')]
+            traits_list.extend(traits)
+        
+        # Create frequency dictionary
+        trait_freq = Counter(traits_list)
+        
+        # Color function using JPMC colors
+        def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
+            colors = [
+                ColorPalette.PRIMARY,
+                ColorPalette.RANK_1,
+                ColorPalette.RANK_2,
+                ColorPalette.RANK_3,
+            ]
+            return random.choice(colors)
+        
+        # Generate word cloud
+        wordcloud = WordCloud(
+            width=width,
+            height=height,
+            background_color=background_color,
+            color_func=color_func,
+            relative_scaling=0.5,
+            min_font_size=10,
+            prefer_horizontal=0.7,
+            collocations=False  # Treat each word independently
+        ).generate_from_frequencies(trait_freq)
+        
+        # Create matplotlib figure
+        fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
+        ax.imshow(wordcloud, interpolation='bilinear')
+        ax.axis('off')
+        ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
+        
+        plt.tight_layout(pad=0)
+        
+        # Save figure if directory specified (using same pattern as other plots)
+        if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
+            save_path = Path(self.fig_save_dir)
+            
+            # Add filter slug subfolder
+            filter_slug = self._get_filter_slug()
+            save_path = save_path / filter_slug
+            
+            if not save_path.exists():
+                save_path.mkdir(parents=True, exist_ok=True)
+            
+            # Use _sanitize_filename for consistency
+            filename = f"{self._sanitize_filename(title)}.png"
+            filepath = save_path / filename
+            
+            # Save as PNG at high resolution
+            fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
+            print(f"Word cloud saved to: {filepath}")
+        
+        return fig
--- a/utils.py
+++ b/utils.py
@@ -612,7 +612,7 @@ class JPMCSurvey(JPMCPlotsMixin):
        
        Renames columns using qid_descr_map if provided.
        """
-        QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
+        QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID7', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
        return self._get_subset(q, QIDs), None


--- a/wordclouds.py
+++ b/wordclouds.py
@@ -0,0 +1,18 @@
+"""Word cloud utilities for Voice Branding analysis.
+
+The main wordcloud function is available as a method on JPMCSurvey:
+    S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...')
+
+This module provides standalone imports for backwards compatibility.
+"""
+import numpy as np
+from os import path
+from PIL import Image, ImageDraw
+from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
+import matplotlib.pyplot as plt
+
+import warnings
+warnings.filterwarnings("ignore")
+
+
+