wordcloud

2026-02-02 11:12:53 +01:00
parent d770645d8e
commit 45dd121d90
6 changed files with 314 additions and 8 deletions
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -61,15 +61,12 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
@app.cell
 def _(Path, RESULTS_FILE, data_all, mo):
    mo.md(f"""
    ---
    # Load Data
    **Dataset:** `{Path(RESULTS_FILE).name}`
    **Responses**: `{data_all.collect().shape[0]}`
    """)
    return
@@ -165,8 +162,6 @@ def _(S, mo):
    {filter_form}
    ''')
    return
--- a/03_quant_report.py
+++ b/03_quant_report.py
@@ -1,7 +1,7 @@
 import marimo
 __generated_with = "0.19.2"
-app = marimo.App(width="medium")
+app = marimo.App(width="full")
 with app.setup:
    import marimo as mo
@@ -166,7 +166,7 @@ def _(data_all):
    return
-@app.cell(hide_code=True)
+@app.cell
 def _():
    mo.md(r"""
    ## Demographic Distributions
@@ -204,6 +204,124 @@ def _(S, demo_plot_cols, demographics):
    return
@app.cell
 def _():
    mo.md(r"""
    ---
    # Brand Character Results
    """)
    return
@app.cell
 def _():
    mo.md(r"""
    ## Best performing: Original vs Refined frankenstein
    """)
    return
@app.cell
 def _(S, data):
    char_refine_rank = S.get_character_refine(data)[0]
    # print(char_rank.collect().head())
    # print(char_refine_rank.collect().head())
    return
@app.cell
 def _():
    mo.md(r"""
    ## Character ranking points
    """)
    return
@app.cell
 def _(S, char_rank):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
    S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
    return
@app.cell
 def _():
    mo.md(r"""
    ## Character ranking 1-2-3
    """)
    return
@app.cell
 def _(S, data):
    char_rank = S.get_character_ranking(data)[0]
    return (char_rank,)
@app.cell
 def _(S, char_rank):
    S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
    return
@app.cell
 def _():
    mo.md(r"""
    ## Character Ranking: times 1st place
    """)
    return
@app.cell
 def _(S, char_rank):
    S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
    return
@app.cell
 def _():
    mo.md(r"""
    ## Prominent predefined personality traits wordcloud
    """)
    return
@app.cell
 def _(S, data):
    top8_traits = S.get_top_8_traits(data)[0]
    S.plot_traits_wordcloud(
        data=top8_traits,
        column='Top_8_Traits',
        title="Most Prominent Personality Traits",
    )
    return
@app.cell
 def _():
    mo.md(r"""
    ## Trait frequency per brand character
    """)
    return
@app.cell
 def _():
    # Join respondent 
    return
@app.cell
 def _():
    mo.md(r"""
    ---
    # Spoken Voice Results
    """)
    return
@app.cell(hide_code=True)
 def _():
    mo.md(r"""
--- a/docs/wordcloud-usage.md
+++ b/docs/wordcloud-usage.md
@@ -0,0 +1,85 @@
 # Word Cloud for Personality Traits - Usage Example
 This example shows how to use the `create_traits_wordcloud` function to visualize the most prominent personality traits from survey data.
 ## Basic Usage in Jupyter/Marimo Notebook
 ```python
 from utils import JPMCSurvey, create_traits_wordcloud
 from pathlib import Path
 # Load your survey data
 RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv"
 QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf"
 S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
 data = S.load_data()
 # Get Top 3 Traits data
 top3_traits = S.get_top_3_traits(data)[0]
 # Create and display word cloud
 fig = create_traits_wordcloud(
    data=top3_traits,
    column='Top_3_Traits',
    title="Most Prominent Personality Traits",
    fig_save_dir='figures',  # Will save to figures/All_Respondents/
    filter_slug='All_Respondents'
 )
 # Display in notebook
 fig  # or plt.show()
 ```
 ## With Active Filters
 If you're using the survey filter methods, you can pass the filter slug:
 ```python
 # Apply filters
 S.set_filter_consumer(['Early Professional', 'Established Professional'])
 filtered_data = S.get_filtered_data()
 # Get traits from filtered data
 top3_traits = S.get_top_3_traits(filtered_data)[0]
 # Get the filter slug for directory naming
 filter_slug = S._get_filter_slug()
 # Create word cloud with filtered data
 fig = create_traits_wordcloud(
    data=top3_traits,
    column='Top_3_Traits',
    title="Most Prominent Personality Traits<br>(Early & Established Professionals)",
    fig_save_dir='figures',
    filter_slug=filter_slug  # e.g., 'Cons-Early_Professional_Established_Professional'
 )
 fig
 ```
 ## Function Parameters
 - **data**: Polars DataFrame or LazyFrame with trait data
 - **column**: Column name containing comma-separated traits (default: 'Top_3_Traits')
 - **title**: Title for the word cloud
 - **width**: Width in pixels (default: 1600)
 - **height**: Height in pixels (default: 800)
 - **background_color**: Background color (default: 'white')
 - **fig_save_dir**: Directory to save PNG (default: None - doesn't save)
 - **filter_slug**: Subdirectory name for filtered results (default: 'All_Respondents')
 ## Colors
 The word cloud uses colors from `theme.py`:
 - PRIMARY: #0077B6 (Medium Blue)
 - RANK_1: #004C6D (Dark Blue)
 - RANK_2: #008493 (Teal)
 - RANK_3: #5AAE95 (Sea Green)
 ## Output
 - **Returns**: matplotlib Figure object for display in notebooks
 - **Saves**: PNG file to `{fig_save_dir}/{filter_slug}/{sanitized_title}.png` at 300 DPI
 The saved files follow the same naming convention as plots in `plots.py`.
--- a/plots.py
+++ b/plots.py
@@ -943,3 +943,93 @@ class JPMCPlotsMixin:
        chart = self._save_plot(chart, title)
        return chart
    def plot_traits_wordcloud(
        self,
        data: pl.LazyFrame | pl.DataFrame | None = None,
        column: str = 'Top_3_Traits',
        title: str = "Most Prominent Personality Traits",
        width: int = 1600,
        height: int = 800,
        background_color: str = 'white',
    ):
        """Create a word cloud visualization of personality traits from survey data.
        Args:
            data: Polars DataFrame or LazyFrame containing trait data
            column: Name of column containing comma-separated traits
            title: Title for the word cloud
            width: Width of the word cloud image in pixels
            height: Height of the word cloud image in pixels
            background_color: Background color for the word cloud
        Returns:
            matplotlib.figure.Figure: The word cloud figure for display in notebooks
        """
        import matplotlib.pyplot as plt
        from wordcloud import WordCloud
        from collections import Counter
        import random
        df = self._ensure_dataframe(data)
        # Extract and split traits
        traits_list = []
        for row in df[column].drop_nulls():
            # Split by comma and clean whitespace
            traits = [trait.strip() for trait in row.split(',')]
            traits_list.extend(traits)
        # Create frequency dictionary
        trait_freq = Counter(traits_list)
        # Color function using JPMC colors
        def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
            colors = [
                ColorPalette.PRIMARY,
                ColorPalette.RANK_1,
                ColorPalette.RANK_2,
                ColorPalette.RANK_3,
            ]
            return random.choice(colors)
        # Generate word cloud
        wordcloud = WordCloud(
            width=width,
            height=height,
            background_color=background_color,
            color_func=color_func,
            relative_scaling=0.5,
            min_font_size=10,
            prefer_horizontal=0.7,
            collocations=False  # Treat each word independently
        ).generate_from_frequencies(trait_freq)
        # Create matplotlib figure
        fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
        ax.imshow(wordcloud, interpolation='bilinear')
        ax.axis('off')
        ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
        plt.tight_layout(pad=0)
        # Save figure if directory specified (using same pattern as other plots)
        if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
            save_path = Path(self.fig_save_dir)
            # Add filter slug subfolder
            filter_slug = self._get_filter_slug()
            save_path = save_path / filter_slug
            if not save_path.exists():
                save_path.mkdir(parents=True, exist_ok=True)
            # Use _sanitize_filename for consistency
            filename = f"{self._sanitize_filename(title)}.png"
            filepath = save_path / filename
            # Save as PNG at high resolution
            fig.savefig(filepath, dpi=300, bbox_inches='tight', facecolor='white')
            print(f"Word cloud saved to: {filepath}")
        return fig
--- a/utils.py
+++ b/utils.py
@@ -612,7 +612,7 @@ class JPMCSurvey(JPMCPlotsMixin):
        Renames columns using qid_descr_map if provided.
        """
-        QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
+        QIDs = ['QID1', 'QID2', 'QID3', 'QID4', 'QID7', 'QID13', 'QID14', 'QID15', 'QID16', 'QID17', 'Consumer']
        return self._get_subset(q, QIDs), None
--- a/wordclouds.py
+++ b/wordclouds.py
@@ -0,0 +1,18 @@
 """Word cloud utilities for Voice Branding analysis.
 The main wordcloud function is available as a method on JPMCSurvey:
    S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...')
 This module provides standalone imports for backwards compatibility.
 """
 import numpy as np
 from os import path
 from PIL import Image, ImageDraw
 from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
 import matplotlib.pyplot as plt
 import warnings
 warnings.filterwarnings("ignore")