speaking style trait scores vertical

2026-01-23 12:26:47 +01:00
parent 424355f4a1
commit 84a0f8052e
5 changed files with 615 additions and 90 deletions
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -12,7 +12,10 @@ def _():

    from validation import check_progress, duration_validation
    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
-    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
+    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
+
+    import plots as plts
+    import utils as utl
    return (
        JPMCSurvey,
        Path,
@@ -20,27 +23,23 @@ def _():
        check_progress,
        duration_validation,
        mo,
+        pl,
        plot_average_scores_with_counts,
-        plot_character_ranking_distribution,
-        plot_most_ranked_1_character,
+        plot_most_ranked_1,
+        plot_ranking_distribution,
        plot_top3_ranking_distribution,
+        plot_top3_selection_counts,
+        plot_voice_selection_counts,
        plot_weighted_ranking_score,
+        plts,
+        utl,
    )


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    # Load Data
-    """)
-    return
-
-
@app.cell
-def _(Path, mo):
+def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
-    mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
    return QSF_FILE, RESULTS_FILE


@@ -52,17 +51,30 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
    return data_all, survey


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    ## Data Validation
+@app.cell
+def _(Path, RESULTS_FILE, data_all, mo):
+    mo.md(f"""
+    # Load Data
+
+    **Dataset:** `{Path(RESULTS_FILE).name}`
+
+    {mo.ui.table(data_all.collect())}
    """)
    return


-@app.cell
-def _(check_progress, data_all):
-    check_progress(data_all)
+@app.cell(hide_code=True)
+def _(check_progress, data_all, duration_validation, mo):
+    mo.md(f"""
+    ## Data Validation
+
+    {check_progress(data_all)}
+
+
+
+    {duration_validation(data_all)}
+
+    """)
    return


@@ -112,8 +124,6 @@ def _(mo):
 def _(mo):
    mo.md(r"""
    ## Character personality ranking
-
-    ### 1. Which character personality is ranked best?
    """)
    return

@@ -126,15 +136,23 @@ def _(data, survey):


@app.cell
-def _(char_rank, plot_character_ranking_distribution):
-    plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
+def _(char_rank, mo, plot_top3_ranking_distribution):
+    mo.md(f"""
+    ### 1. Which character personality is ranked best?
+
+
+    {mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
+    """)
    return


@app.cell
-def _(mo):
-    mo.md(r"""
-    ### 2. Which character personality is ranked number 1 the most?
+def _(char_rank, mo, plot_most_ranked_1):
+    mo.md(f"""
+    ### 2. Which character personality is ranked 1st the most?
+
+
+    {mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
    """)
    return

@@ -143,16 +161,18 @@ def _(mo):
 def _(
    calculate_weighted_ranking_scores,
    char_rank,
+    mo,
    plot_weighted_ranking_score,
 ):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
-    plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
-    return
+    # plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
+
+    mo.md(f"""
+    ### 3. Which character personality most popular based on weighted scores?


-@app.cell
-def _(char_rank, plot_most_ranked_1_character):
-    plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
+    {mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
+    """)
    return


@@ -167,51 +187,74 @@ def _(mo):
@app.cell
 def _(data, survey):
    v_18_8_3 = survey.get_18_8_3(data)[0].collect()
-    print(v_18_8_3.head())
-    return
+    # print(v_18_8_3.head())
+    return (v_18_8_3,)


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 8 voices are chosen the most out of 18?
+def _(mo, plot_voice_selection_counts, v_18_8_3):
+    mo.md(f"""
+    ### Which 8 voices are chosen the most out of 18? 
+
+    {mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
+def _(mo, plot_top3_selection_counts, v_18_8_3):
+    mo.md(f"""
+    ### Which 3 voices are chosen the most out of 18? 
+
+    How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18. 
+
+    {mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
-    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best.
+def _(
+    calculate_weighted_ranking_scores,
+    data,
+    mo,
+    plot_ranking_distribution,
+    survey,
+):
+    top3_voices = survey.get_top_3_voices(data)[0].collect()
+    top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
+
+    mo.md(f"""
+    ### Which voice is ranked best in the ranking question for top 3? 
+
+    (not best 3 out of 8 question)  
+
+    {mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
+
+    """)
+    return top3_voices, top3_voices_weighted
+
+
+@app.cell
+def _(mo, plot_weighted_ranking_score, top3_voices_weighted):
+    mo.md(f"""
+    ### Most popular **voice** based on weighted scores?
+    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best. 
+    Distribution of the rankings for each voice:
+
+    {mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
    """)
    return


@app.cell
-def _(plot_top3_ranking_distribution, top3_voices):
-    plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
-    return
+def _(mo, plot_most_ranked_1, top3_voices):
+    mo.md(f"""
+    ### Which voice is ranked number 1 the most? 

+    (not always the voice with most points)

-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked number 1 the most? (not always the voice with most points)
-
-    - Each of the 350 participants gives exactly one 1st-place vote.
-    - Total Rank-1 votes = 350.
-    - Voices are sorted from most to least 1st-place votes.
-    - The top 3 voices with the most Rank-1 votes are colored blue.
-    - This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
+    {mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
    """)
    return

@@ -235,6 +278,56 @@ def _(mo):
    return


+@app.cell
+def _(data, survey):
+    ss_or, choice_map_or = survey.get_ss_orange_red(data)
+    ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
+
+    # Combine the data
+    ss_all = ss_or.join(ss_gb, on='_recordId')
+    _d = ss_all.collect()
+
+    choice_map = {**choice_map_or, **choice_map_gb}
+    # print(_d.head())
+    print(choice_map)
+    return choice_map, ss_all
+
+
+@app.cell
+def _(choice_map, ss_all, utl):
+    ss_long = utl.process_speaking_style_data(ss_all, choice_map)
+    ss_long
+    return (ss_long,)
+
+
+@app.cell
+def _(pl, ss_long):
+    target_trait = "Indifferent | Unfocussed | Detached:Attentive | Helpful | Caring | Deliberate"
+    trait_data = ss_long.filter(pl.col("Description") == target_trait)
+    trait_data
+    return target_trait, trait_data
+
+
+@app.cell
+def _(plts, target_trait, trait_data):
+    plts.plot_speaking_style_trait_scores(
+        trait_data,
+        title=target_trait.replace(":", " ↔ "),
+        # trait_description="Attentive vs Indifferent", # simplified title
+    )
+    return
+
+
+app._unparsable_cell(
+    """
+    for trait in ss_long.select(\"Description\").unique().to_series().to_list():
+        trait_data = ss_long.filter(pl.col(\"Description\") == trait)
+        mo.md(f\"\"\"
+    """,
+    name="_"
+)
+
+
@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""
--- a/plots.py
+++ b/plots.py
@@ -216,22 +216,22 @@ def plot_top3_ranking_distribution(
    return fig


-def plot_character_ranking_distribution(
+def plot_ranking_distribution(
    df: pl.DataFrame,
-    title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
-    x_label: str = "Character Personality",
+    title: str = "Rankings Distribution<br>(1st to 4th Place)",
+    x_label: str = "Item",
    y_label: str = "Number of Votes",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
-    Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
-    Sorted by the number of Rank 1 votes to highlight the 'Best' options.
+    Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
+    Sorted by the number of Rank 1 votes.

    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns (prefix 'Character_Ranking').
+        DataFrame containing ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
@@ -249,8 +249,8 @@ def plot_character_ranking_distribution(
        Plotly figure object.
    """
    stats = []
-    # Identify columns related to Character Ranking (excluding ID)
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    # Identify ranking columns (assume all columns except _recordId)
+    ranking_cols = [c for c in df.columns if c != '_recordId']

    for col in ranking_cols:
        # Count occurrences of each rank (1, 2, 3, 4)
@@ -280,7 +280,7 @@ def plot_character_ranking_distribution(
    # Clean up labels: Remove prefix and underscores
    # e.g. "Character_Ranking_The_Coach" -> "The Coach"
    labels = [
-        col.replace('Character_Ranking_', '').replace('_', ' ').strip() 
+        col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip() 
        for col in stats_df['column']
    ]

@@ -354,21 +354,22 @@ def plot_character_ranking_distribution(
    return fig


-def plot_most_ranked_1_character(
+def plot_most_ranked_1(
    df: pl.DataFrame,
-    title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
-    x_label: str = "Character Personality",
+    title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
+    x_label: str = "Item",
    y_label: str = "Count of 1st Place Rankings",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
-    Create a bar chart showing which character personality was ranked #1 the most.
+    Create a bar chart showing which item (character/voice) was ranked #1 the most.
+    Top 3 items are highlighted.

    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns.
+        DataFrame containing ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
@@ -386,8 +387,8 @@ def plot_most_ranked_1_character(
        Plotly figure object.
    """
    stats = []
-    # Identify columns related to Character Ranking
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    # Identify ranking columns (assume all columns except _recordId)
+    ranking_cols = [c for c in df.columns if c != '_recordId']

    for col in ranking_cols:
        # Count occurrences of rank 1
@@ -403,19 +404,25 @@ def plot_most_ranked_1_character(

    # Clean up labels
    labels = [
-        col.replace('Character_Ranking_', '').replace('_', ' ').strip() 
+        col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip() 
        for col in stats_df['column']
    ]

-    fig = go.Figure()
+    # Assign colors: Top 3 get PRIMARY (Blue), others get NEUTRAL (Grey)
+    colors = [
+        ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
+        for i in range(len(stats_df))
+    ]

+    fig = go.Figure()
+    
    fig.add_trace(go.Bar(
        x=labels,
        y=stats_df['count'],
        text=stats_df['count'],
        textposition='inside',
        textfont=dict(size=10, color='white'),
-        marker_color=ColorPalette.PRIMARY,
+        marker_color=colors,
        hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
    ))

@@ -444,7 +451,7 @@ def plot_most_ranked_1_character(

 def plot_weighted_ranking_score(
    weighted_df: pl.DataFrame,
-    title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
+    title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
    x_label: str = "Character Personality",
    y_label: str = "Total Weighted Score",
    color: str = ColorPalette.PRIMARY,
@@ -508,4 +515,339 @@ def plot_weighted_ranking_score(
        font=dict(size=11)
    )

-    return fig
+    return fig
+
+
+def plot_voice_selection_counts(
+    df: pl.DataFrame,
+    target_column: str = "8_Combined",
+    title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
+    x_label: str = "Voice",
+    y_label: str = "Number of Times Chosen",
+    height: int = 500,
+    width: int = 1000,
+) -> go.Figure:
+    """
+    Create a bar plot showing the frequency of voice selections.
+    Takes a column containing comma-separated values (e.g. "Voice 1, Voice 2..."),
+    counts occurrences, and highlights the top 8 most frequent voices.
+
+    Parameters
+    ----------
+    df : pl.DataFrame
+        DataFrame containing the selection column.
+    target_column : str, optional
+        Name of the column containing comma-separated voice selections.
+        Defaults to "8_Combined".
+    title : str, optional
+        Plot title.
+    x_label : str, optional
+        X-axis label.
+    y_label : str, optional
+        Y-axis label.
+    height : int, optional
+        Plot height in pixels.
+    width : int, optional
+        Plot width in pixels.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure object.
+    """
+    if target_column not in df.columns:
+        return go.Figure()
+
+    # Process the data:
+    # 1. Select the relevant column and remove nulls
+    # 2. Split the comma-separated string into a list
+    # 3. Explode the list so each voice gets its own row
+    # 4. Strip whitespace ensuring "Voice 1" and " Voice 1" match
+    # 5. Count occurrences
+    stats_df = (
+        df.select(pl.col(target_column))
+        .drop_nulls()
+        .with_columns(pl.col(target_column).str.split(","))
+        .explode(target_column)
+        .with_columns(pl.col(target_column).str.strip_chars())
+        .filter(pl.col(target_column) != "")
+        .group_by(target_column)
+        .agg(pl.len().alias("count"))
+        .sort("count", descending=True)
+    )
+
+    # Define colors: Top 8 get PRIMARY, rest get NEUTRAL
+    colors = [
+        ColorPalette.PRIMARY if i < 8 else ColorPalette.NEUTRAL 
+        for i in range(len(stats_df))
+    ]
+
+    fig = go.Figure()
+
+    fig.add_trace(go.Bar(
+        x=stats_df[target_column],
+        y=stats_df['count'],
+        text=stats_df['count'],
+        textposition='outside',
+        marker_color=colors,
+        hovertemplate='<b>%{x}</b><br>Selections: %{y}<extra></extra>'
+    ))
+
+    fig.update_layout(
+        title=title,
+        xaxis_title=x_label,
+        yaxis_title=y_label,
+        height=height,
+        width=width,
+        plot_bgcolor=ColorPalette.BACKGROUND,
+        xaxis=dict(
+            showgrid=True,
+            gridcolor=ColorPalette.GRID,
+            tickangle=-45
+        ),
+        yaxis=dict(
+            showgrid=True,
+            gridcolor=ColorPalette.GRID
+        ),
+        font=dict(size=11),
+    )
+
+    return fig
+
+
+def plot_top3_selection_counts(
+    df: pl.DataFrame,
+    target_column: str = "3_Ranked",
+    title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
+    x_label: str = "Voice",
+    y_label: str = "Count of Mentions in Top 3",
+    height: int = 500,
+    width: int = 1000,
+) -> go.Figure:
+    """
+    Question: Which 3 voices are chosen the most out of 18?
+    
+    How many times does each voice end up in the top 3?
+    (this is based on the survey question where participants need to choose 3 out 
+    of the earlier selected 8 voices). So how often each of the 18 stimuli ended 
+    up in participants' Top 3, after they first selected 8 out of 18.
+
+    Parameters
+    ----------
+    df : pl.DataFrame
+        DataFrame containing the ranking column (comma-separated strings).
+    target_column : str, optional
+        Name of the column containing comma-separated Top 3 voice elections.
+        Defaults to "3_Ranked".
+    title : str, optional
+        Plot title.
+    x_label : str, optional
+        X-axis label.
+    y_label : str, optional
+        Y-axis label.
+    height : int, optional
+        Plot height in pixels.
+    width : int, optional
+        Plot width in pixels.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure object.
+    """
+    if target_column not in df.columns:
+        return go.Figure()
+
+    # Process the data:
+    # Same logic as plot_voice_selection_counts: explode comma-separated string
+    stats_df = (
+        df.select(pl.col(target_column))
+        .drop_nulls()
+        .with_columns(pl.col(target_column).str.split(","))
+        .explode(target_column)
+        .with_columns(pl.col(target_column).str.strip_chars())
+        .filter(pl.col(target_column) != "")
+        .group_by(target_column)
+        .agg(pl.len().alias("count"))
+        .sort("count", descending=True)
+    )
+
+    # Define colors: Top 3 get PRIMARY, rest get NEUTRAL
+    colors = [
+        ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL 
+        for i in range(len(stats_df))
+    ]
+
+    fig = go.Figure()
+
+    fig.add_trace(go.Bar(
+        x=stats_df[target_column],
+        y=stats_df['count'],
+        text=stats_df['count'],
+        textposition='outside',
+        marker_color=colors,
+        hovertemplate='<b>%{x}</b><br>In Top 3: %{y} times<extra></extra>'
+    ))
+
+    fig.update_layout(
+        title=title,
+        xaxis_title=x_label,
+        yaxis_title=y_label,
+        height=height,
+        width=width,
+        plot_bgcolor=ColorPalette.BACKGROUND,
+        xaxis=dict(
+            showgrid=True,
+            gridcolor=ColorPalette.GRID,
+            tickangle=-45
+        ),
+        yaxis=dict(
+            showgrid=True,
+            gridcolor=ColorPalette.GRID
+        ),
+        font=dict(size=11),
+    )
+
+    return fig
+
+
+def plot_speaking_style_trait_scores(
+    df: pl.DataFrame,
+    trait_description: str = None,
+    left_anchor: str = None,
+    right_anchor: str = None,
+    title: str = "Speaking Style Trait Analysis",
+    height: int = 500,
+    width: int = 1000,
+) -> go.Figure:
+    """
+    Plot scores for a single speaking style trait across multiple voices.
+    
+    The plot shows the average score per Voice, sorted by score.
+    It expects the DataFrame to contain 'Voice' and 'score' columns, 
+    typically filtered for a single trait/description.
+
+    Parameters
+    ----------
+    df : pl.DataFrame
+        DataFrame containing at least 'Voice' and 'score' columns.
+        Produced by utils.process_speaking_style_data and filtered.
+    trait_description : str, optional
+        Description of the trait being analyzed (e.g. "Indifferent : Attentive").
+        If not provided, it will be constructed from annotations.
+    left_anchor : str, optional
+        Label for the lower end of the scale (e.g. "Indifferent").
+        If not provided, attempts to read 'Left_Anchor' column from df.
+    right_anchor : str, optional
+        Label for the upper end of the scale (e.g. "Attentive").
+        If not provided, attempts to read 'Right_Anchor' column from df.
+    title : str, optional
+        Plot title.
+    height : int, optional
+        Plot height.
+    width : int, optional
+        Plot width.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure object.
+    """
+    if df.is_empty():
+        return go.Figure()
+        
+    required_cols = ["Voice", "score"]
+    if not all(col in df.columns for col in required_cols):
+         return go.Figure()
+
+    # Calculate stats: Mean, Count
+    stats = (
+        df.filter(pl.col("score").is_not_null())
+        .group_by("Voice")
+        .agg([
+            pl.col("score").mean().alias("mean_score"),
+            pl.col("score").count().alias("count")
+        ])
+        .sort("mean_score", descending=True) # Descending for Left-to-Right
+    )
+
+    # Attempt to extract anchors from DF if not provided
+    if (left_anchor is None or right_anchor is None) and "Left_Anchor" in df.columns:
+        head = df.filter(pl.col("Left_Anchor").is_not_null()).head(1)
+        if not head.is_empty():
+            if left_anchor is None: left_anchor = head["Left_Anchor"][0]
+            if right_anchor is None: right_anchor = head["Right_Anchor"][0]
+
+    if trait_description is None:
+        if left_anchor and right_anchor:
+            trait_description = f"{left_anchor.split('|')[0]} vs. {right_anchor.split('|')[0]}"
+        else:
+             # Try getting from Description column
+             if "Description" in df.columns:
+                 head = df.filter(pl.col("Description").is_not_null()).head(1)
+                 if not head.is_empty():
+                      trait_description = head["Description"][0]
+                 else:
+                      trait_description = ""
+             else:
+                 trait_description = ""
+
+    fig = go.Figure()
+
+    fig.add_trace(go.Bar(
+        x=stats["Voice"], # X is Voice
+        y=stats["mean_score"], # Y is Score
+        text=stats["count"],
+        textposition='inside',
+        texttemplate='%{text}', # Count on bar
+        marker_color=ColorPalette.PRIMARY,
+        hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<br>Count: %{text}<extra></extra>'
+    ))
+
+    # Add annotations for anchors
+    annotations = []
+    
+    # Place anchors on the right side
+    if left_anchor:
+        annotations.append(dict(
+            xref='paper', yref='y',
+            x=1.01, y=1,
+            xanchor='left', yanchor='middle',
+            text=f"<b>1: {left_anchor.split('|')[0]}</b>",
+            showarrow=False,
+            font=dict(size=10, color='gray')
+        ))
+    if right_anchor:
+        annotations.append(dict(
+            xref='paper', yref='y',
+            x=1.01, y=5,
+            xanchor='left', yanchor='middle',
+            text=f"<b>5: {right_anchor.split('|')[0]}</b>",
+            showarrow=False,
+            font=dict(size=10, color='gray')
+        ))
+
+    fig.update_layout(
+         title=dict(
+            text=f"{title}<br><sub>{trait_description}</sub><br><sub>(Numbers on bars indicate respondent count)</sub>",
+            y=0.92
+        ),
+        xaxis_title="Voice",
+        yaxis_title="Average Score (1-5)",
+        height=height,
+        width=width,
+        plot_bgcolor=ColorPalette.BACKGROUND,
+        yaxis=dict(
+            range=[1, 5],
+            showgrid=True,
+            gridcolor=ColorPalette.GRID,
+            zeroline=False
+        ),
+        xaxis=dict(
+            showgrid=False
+        ),
+        margin=dict(r=150),
+        annotations=annotations,
+        font=dict(size=11)
+    )
+    return fig
--- a/theme.py
+++ b/theme.py
@@ -16,6 +16,9 @@ class ColorPalette:
    RANK_3 = "#5AAE95"   # Sea Green (3rd Choice)
    RANK_4 = "#9E9E9E"   # Grey (4th Choice / Worst)

+    # Neutral color for unhighlighted comparison items
+    NEUTRAL = "#D3D3D3"  # Light Grey
+
    # General UI elements
    TEXT = "black"
    GRID = "lightgray"
--- a/utils.py
+++ b/utils.py
@@ -3,7 +3,6 @@ from pathlib import Path
 import pandas as pd
 from typing import Union
 import json
-
 import re

 def extract_voice_label(html_str: str) -> str:
@@ -57,13 +56,13 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe

 def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
    """
-    Calculate weighted scores for character rankings.
+    Calculate weighted scores for character or voice rankings.
    Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.

    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns.
+        DataFrame containing character/ voice ranking columns.

    Returns
    -------
@@ -71,8 +70,8 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
        DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
    """
    scores = []
-    # Identify columns related to Character Ranking
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    # Identify ranking columns (assume all columns except _recordId)
+    ranking_cols = [c for c in df.columns if c != '_recordId']

    for col in ranking_cols:
        # Calculate score:
@@ -84,7 +83,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
        weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
        
        # Clean name
-        clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+        clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
        
        scores.append({
            'Character': clean_name,
@@ -413,6 +412,95 @@ class JPMCSurvey:
        QIDs = ['QID44', 'QID97', 'QID95', 'QID96']
        
        return self._get_subset(q, QIDs, rename_cols=True), None
+
+
+def process_speaking_style_data(
+    df: Union[pl.LazyFrame, pl.DataFrame],
+    trait_map: dict[str, str]
+) -> pl.DataFrame:
+    """
+    Process speaking style columns from wide to long format and map trait descriptions.
+    
+    Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
+    Example: SS_Orange_Red__V14__Choice_1
+    
+    Parameters
+    ----------
+    df : pl.LazyFrame or pl.DataFrame
+        Input dataframe containing SS_* columns.
+    trait_map : dict
+        Dictionary mapping column names to trait descriptions.
+        Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
+        
+    Returns
+    -------
+    pl.DataFrame
+        Long-format dataframe with columns:
+        _recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
+    """
+    # Normalize input to LazyFrame
+    lf = df.lazy() if isinstance(df, pl.DataFrame) else df
+    
+    # 1. Melt SS_ columns
+    melted = lf.melt(
+        id_vars=["_recordId"],
+        value_vars=pl.col("^SS_.*$"),
+        variable_name="full_col_name",
+        value_name="score"
+    )
+    
+    # 2. Extract components from column name
+    # Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
+    pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
+    
+    processed = melted.with_columns(
+        pl.col("full_col_name").str.extract_groups(pattern)
+    ).unnest("full_col_name")
+    
+    # 3. Create Mapping Lookup from the provided dictionary
+    # We map (Style_Group, Choice_ID) -> Description
+    mapping_data = []
+    seen = set()
+    
+    for col_name, desc in trait_map.items():
+        match = re.match(pattern, col_name)
+        if match:
+            groups = match.groupdict()
+            key = (groups["Style_Group"], groups["Choice_ID"])
+            
+            if key not in seen:
+                # Parse description into anchors if possible (Left : Right)
+                parts = desc.split(':')
+                left_anchor = parts[0].strip() if len(parts) > 0 else ""
+                right_anchor = parts[1].strip() if len(parts) > 1 else ""
+                
+                mapping_data.append({
+                    "Style_Group": groups["Style_Group"],
+                    "Choice_ID": groups["Choice_ID"],
+                    "Description": desc,
+                    "Left_Anchor": left_anchor,
+                    "Right_Anchor": right_anchor
+                })
+                seen.add(key)
+    
+    if not mapping_data:
+        return processed.collect()
+
+    mapping_lf = pl.LazyFrame(mapping_data)
+    
+    # 4. Join Data with Mapping
+    result = processed.join(
+        mapping_lf,
+        on=["Style_Group", "Choice_ID"],
+        how="left"
+    )
+    
+    # 5. Cast score to Int
+    result = result.with_columns(
+        pl.col("score").cast(pl.Int64, strict=False)
+    )
+    
+    return result.collect()
    


--- a/validation.py
+++ b/validation.py
@@ -5,9 +5,9 @@ import polars as pl
 def check_progress(data):
    """Check if all responses are complete based on 'progress' column."""
    if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
-        return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
+        return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
    
-    return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
+    return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"


 def duration_validation(data):
@@ -30,10 +30,9 @@ def duration_validation(data):
    outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()

    if outlier_data.shape[0] == 0:
-        return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
+        return "### Duration Outliers: \n\n✅ No duration outliers detected"

-    return mo.md(f"""
-    ### Duration Outliers:
+    return f"""### Duration Outliers:
    
    **⚠️ Potential outliers detected based on response duration ⚠️**
    
@@ -50,5 +49,5 @@ def duration_validation(data):
    
    **⚠️ NOTE: These have not been removed from the dataset ⚠️**
    
-    """)
+    """