speaking style trait scores vertical

2026-01-23 12:26:47 +01:00
parent 424355f4a1
commit 84a0f8052e
5 changed files with 615 additions and 90 deletions
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -12,7 +12,10 @@ def _():
    from validation import check_progress, duration_validation
    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
-    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
+    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
    import plots as plts
    import utils as utl
    return (
        JPMCSurvey,
        Path,
@@ -20,27 +23,23 @@ def _():
        check_progress,
        duration_validation,
        mo,
        pl,
        plot_average_scores_with_counts,
-        plot_character_ranking_distribution,
+        plot_most_ranked_1,
-        plot_most_ranked_1_character,
+        plot_ranking_distribution,
        plot_top3_ranking_distribution,
        plot_top3_selection_counts,
        plot_voice_selection_counts,
        plot_weighted_ranking_score,
        plts,
        utl,
    )
@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""
    # Load Data
    """)
    return
@app.cell
-def _(Path, mo):
+def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
    mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
    return QSF_FILE, RESULTS_FILE
@@ -52,17 +51,30 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
    return data_all, survey
-@app.cell(hide_code=True)
+@app.cell
-def _(mo):
+def _(Path, RESULTS_FILE, data_all, mo):
-    mo.md(r"""
+    mo.md(f"""
-    ## Data Validation
+    # Load Data
    **Dataset:** `{Path(RESULTS_FILE).name}`
    {mo.ui.table(data_all.collect())}
    """)
    return
-@app.cell
+@app.cell(hide_code=True)
-def _(check_progress, data_all):
+def _(check_progress, data_all, duration_validation, mo):
-    check_progress(data_all)
+    mo.md(f"""
    ## Data Validation
    {check_progress(data_all)}
    {duration_validation(data_all)}
    """)
    return
@@ -112,8 +124,6 @@ def _(mo):
 def _(mo):
    mo.md(r"""
    ## Character personality ranking
    ### 1. Which character personality is ranked best?
    """)
    return
@@ -126,15 +136,23 @@ def _(data, survey):
@app.cell
-def _(char_rank, plot_character_ranking_distribution):
+def _(char_rank, mo, plot_top3_ranking_distribution):
-    plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
+    mo.md(f"""
    ### 1. Which character personality is ranked best?
    {mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
    """)
    return
@app.cell
-def _(mo):
+def _(char_rank, mo, plot_most_ranked_1):
-    mo.md(r"""
+    mo.md(f"""
-    ### 2. Which character personality is ranked number 1 the most?
+    ### 2. Which character personality is ranked 1st the most?
    {mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
    """)
    return
@@ -143,16 +161,18 @@ def _(mo):
 def _(
    calculate_weighted_ranking_scores,
    char_rank,
    mo,
    plot_weighted_ranking_score,
 ):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
-    plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
+    # plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
-    return
+
    mo.md(f"""
    ### 3. Which character personality most popular based on weighted scores?
-@app.cell
+    {mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
-def _(char_rank, plot_most_ranked_1_character):
+    """)
    plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
    return
@@ -167,51 +187,74 @@ def _(mo):
@app.cell
 def _(data, survey):
    v_18_8_3 = survey.get_18_8_3(data)[0].collect()
-    print(v_18_8_3.head())
+    # print(v_18_8_3.head())
-    return
+    return (v_18_8_3,)
@app.cell(hide_code=True)
-def _(mo):
+def _(mo, plot_voice_selection_counts, v_18_8_3):
-    mo.md(r"""
+    mo.md(f"""
-    Which 8 voices are chosen the most out of 18?
+    ### Which 8 voices are chosen the most out of 18? 
    {mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return
@app.cell(hide_code=True)
-def _(mo):
+def _(mo, plot_top3_selection_counts, v_18_8_3):
-    mo.md(r"""
+    mo.md(f"""
-    Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
+    ### Which 3 voices are chosen the most out of 18? 
    How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18. 
    {mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return
@app.cell(hide_code=True)
-def _(mo):
+def _(
-    mo.md(r"""
+    calculate_weighted_ranking_scores,
-    Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
+    data,
    mo,
    plot_ranking_distribution,
    survey,
 ):
    top3_voices = survey.get_top_3_voices(data)[0].collect()
    top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
    mo.md(f"""
    ### Which voice is ranked best in the ranking question for top 3? 
    (not best 3 out of 8 question)  
    {mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
    """)
    return top3_voices, top3_voices_weighted
@app.cell
 def _(mo, plot_weighted_ranking_score, top3_voices_weighted):
    mo.md(f"""
    ### Most popular **voice** based on weighted scores?
    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best. 
    Distribution of the rankings for each voice:
    {mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
    """)
    return
@app.cell
-def _(plot_top3_ranking_distribution, top3_voices):
+def _(mo, plot_most_ranked_1, top3_voices):
-    plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
+    mo.md(f"""
-    return
+    ### Which voice is ranked number 1 the most? 
    (not always the voice with most points)
-@app.cell(hide_code=True)
+    {mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
 def _(mo):
    mo.md(r"""
    Which voice is ranked number 1 the most? (not always the voice with most points)
    - Each of the 350 participants gives exactly one 1st-place vote.
    - Total Rank-1 votes = 350.
    - Voices are sorted from most to least 1st-place votes.
    - The top 3 voices with the most Rank-1 votes are colored blue.
    - This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
    """)
    return
@@ -235,6 +278,56 @@ def _(mo):
    return
@app.cell
 def _(data, survey):
    ss_or, choice_map_or = survey.get_ss_orange_red(data)
    ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
    # Combine the data
    ss_all = ss_or.join(ss_gb, on='_recordId')
    _d = ss_all.collect()
    choice_map = {**choice_map_or, **choice_map_gb}
    # print(_d.head())
    print(choice_map)
    return choice_map, ss_all
@app.cell
 def _(choice_map, ss_all, utl):
    ss_long = utl.process_speaking_style_data(ss_all, choice_map)
    ss_long
    return (ss_long,)
@app.cell
 def _(pl, ss_long):
    target_trait = "Indifferent | Unfocussed | Detached:Attentive | Helpful | Caring | Deliberate"
    trait_data = ss_long.filter(pl.col("Description") == target_trait)
    trait_data
    return target_trait, trait_data
@app.cell
 def _(plts, target_trait, trait_data):
    plts.plot_speaking_style_trait_scores(
        trait_data,
        title=target_trait.replace(":", " ↔ "),
        # trait_description="Attentive vs Indifferent", # simplified title
    )
    return
 app._unparsable_cell(
    """
    for trait in ss_long.select(\"Description\").unique().to_series().to_list():
        trait_data = ss_long.filter(pl.col(\"Description\") == trait)
        mo.md(f\"\"\"
    """,
    name="_"
 )
@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""
--- a/plots.py
+++ b/plots.py
@@ -216,22 +216,22 @@ def plot_top3_ranking_distribution(
    return fig
-def plot_character_ranking_distribution(
+def plot_ranking_distribution(
    df: pl.DataFrame,
-    title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
+    title: str = "Rankings Distribution<br>(1st to 4th Place)",
-    x_label: str = "Character Personality",
+    x_label: str = "Item",
    y_label: str = "Number of Votes",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
-    Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
+    Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
-    Sorted by the number of Rank 1 votes to highlight the 'Best' options.
+    Sorted by the number of Rank 1 votes.
    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns (prefix 'Character_Ranking').
+        DataFrame containing ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
@@ -249,8 +249,8 @@ def plot_character_ranking_distribution(
        Plotly figure object.
    """
    stats = []
-    # Identify columns related to Character Ranking (excluding ID)
+    # Identify ranking columns (assume all columns except _recordId)
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    ranking_cols = [c for c in df.columns if c != '_recordId']
    for col in ranking_cols:
        # Count occurrences of each rank (1, 2, 3, 4)
@@ -280,7 +280,7 @@ def plot_character_ranking_distribution(
    # Clean up labels: Remove prefix and underscores
    # e.g. "Character_Ranking_The_Coach" -> "The Coach"
    labels = [
-        col.replace('Character_Ranking_', '').replace('_', ' ').strip() 
+        col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip() 
        for col in stats_df['column']
    ]
@@ -354,21 +354,22 @@ def plot_character_ranking_distribution(
    return fig
-def plot_most_ranked_1_character(
+def plot_most_ranked_1(
    df: pl.DataFrame,
-    title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
+    title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
-    x_label: str = "Character Personality",
+    x_label: str = "Item",
    y_label: str = "Count of 1st Place Rankings",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
-    Create a bar chart showing which character personality was ranked #1 the most.
+    Create a bar chart showing which item (character/voice) was ranked #1 the most.
    Top 3 items are highlighted.
    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns.
+        DataFrame containing ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
@@ -386,8 +387,8 @@ def plot_most_ranked_1_character(
        Plotly figure object.
    """
    stats = []
-    # Identify columns related to Character Ranking
+    # Identify ranking columns (assume all columns except _recordId)
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    ranking_cols = [c for c in df.columns if c != '_recordId']
    for col in ranking_cols:
        # Count occurrences of rank 1
@@ -403,10 +404,16 @@ def plot_most_ranked_1_character(
    # Clean up labels
    labels = [
-        col.replace('Character_Ranking_', '').replace('_', ' ').strip() 
+        col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip() 
        for col in stats_df['column']
    ]
    # Assign colors: Top 3 get PRIMARY (Blue), others get NEUTRAL (Grey)
    colors = [
        ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
        for i in range(len(stats_df))
    ]
    fig = go.Figure()
    fig.add_trace(go.Bar(
@@ -415,7 +422,7 @@ def plot_most_ranked_1_character(
        text=stats_df['count'],
        textposition='inside',
        textfont=dict(size=10, color='white'),
-        marker_color=ColorPalette.PRIMARY,
+        marker_color=colors,
        hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
    ))
@@ -444,7 +451,7 @@ def plot_most_ranked_1_character(
 def plot_weighted_ranking_score(
    weighted_df: pl.DataFrame,
-    title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
+    title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
    x_label: str = "Character Personality",
    y_label: str = "Total Weighted Score",
    color: str = ColorPalette.PRIMARY,
@@ -509,3 +516,338 @@ def plot_weighted_ranking_score(
    )
    return fig
 def plot_voice_selection_counts(
    df: pl.DataFrame,
    target_column: str = "8_Combined",
    title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
    x_label: str = "Voice",
    y_label: str = "Number of Times Chosen",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
    Create a bar plot showing the frequency of voice selections.
    Takes a column containing comma-separated values (e.g. "Voice 1, Voice 2..."),
    counts occurrences, and highlights the top 8 most frequent voices.
    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing the selection column.
    target_column : str, optional
        Name of the column containing comma-separated voice selections.
        Defaults to "8_Combined".
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.
    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    if target_column not in df.columns:
        return go.Figure()
    # Process the data:
    # 1. Select the relevant column and remove nulls
    # 2. Split the comma-separated string into a list
    # 3. Explode the list so each voice gets its own row
    # 4. Strip whitespace ensuring "Voice 1" and " Voice 1" match
    # 5. Count occurrences
    stats_df = (
        df.select(pl.col(target_column))
        .drop_nulls()
        .with_columns(pl.col(target_column).str.split(","))
        .explode(target_column)
        .with_columns(pl.col(target_column).str.strip_chars())
        .filter(pl.col(target_column) != "")
        .group_by(target_column)
        .agg(pl.len().alias("count"))
        .sort("count", descending=True)
    )
    # Define colors: Top 8 get PRIMARY, rest get NEUTRAL
    colors = [
        ColorPalette.PRIMARY if i < 8 else ColorPalette.NEUTRAL 
        for i in range(len(stats_df))
    ]
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=stats_df[target_column],
        y=stats_df['count'],
        text=stats_df['count'],
        textposition='outside',
        marker_color=colors,
        hovertemplate='<b>%{x}</b><br>Selections: %{y}<extra></extra>'
    ))
    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        font=dict(size=11),
    )
    return fig
 def plot_top3_selection_counts(
    df: pl.DataFrame,
    target_column: str = "3_Ranked",
    title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
    x_label: str = "Voice",
    y_label: str = "Count of Mentions in Top 3",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
    Question: Which 3 voices are chosen the most out of 18?
    How many times does each voice end up in the top 3?
    (this is based on the survey question where participants need to choose 3 out 
    of the earlier selected 8 voices). So how often each of the 18 stimuli ended 
    up in participants' Top 3, after they first selected 8 out of 18.
    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing the ranking column (comma-separated strings).
    target_column : str, optional
        Name of the column containing comma-separated Top 3 voice elections.
        Defaults to "3_Ranked".
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.
    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    if target_column not in df.columns:
        return go.Figure()
    # Process the data:
    # Same logic as plot_voice_selection_counts: explode comma-separated string
    stats_df = (
        df.select(pl.col(target_column))
        .drop_nulls()
        .with_columns(pl.col(target_column).str.split(","))
        .explode(target_column)
        .with_columns(pl.col(target_column).str.strip_chars())
        .filter(pl.col(target_column) != "")
        .group_by(target_column)
        .agg(pl.len().alias("count"))
        .sort("count", descending=True)
    )
    # Define colors: Top 3 get PRIMARY, rest get NEUTRAL
    colors = [
        ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL 
        for i in range(len(stats_df))
    ]
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=stats_df[target_column],
        y=stats_df['count'],
        text=stats_df['count'],
        textposition='outside',
        marker_color=colors,
        hovertemplate='<b>%{x}</b><br>In Top 3: %{y} times<extra></extra>'
    ))
    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        font=dict(size=11),
    )
    return fig
 def plot_speaking_style_trait_scores(
    df: pl.DataFrame,
    trait_description: str = None,
    left_anchor: str = None,
    right_anchor: str = None,
    title: str = "Speaking Style Trait Analysis",
    height: int = 500,
    width: int = 1000,
 ) -> go.Figure:
    """
    Plot scores for a single speaking style trait across multiple voices.
    The plot shows the average score per Voice, sorted by score.
    It expects the DataFrame to contain 'Voice' and 'score' columns, 
    typically filtered for a single trait/description.
    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing at least 'Voice' and 'score' columns.
        Produced by utils.process_speaking_style_data and filtered.
    trait_description : str, optional
        Description of the trait being analyzed (e.g. "Indifferent : Attentive").
        If not provided, it will be constructed from annotations.
    left_anchor : str, optional
        Label for the lower end of the scale (e.g. "Indifferent").
        If not provided, attempts to read 'Left_Anchor' column from df.
    right_anchor : str, optional
        Label for the upper end of the scale (e.g. "Attentive").
        If not provided, attempts to read 'Right_Anchor' column from df.
    title : str, optional
        Plot title.
    height : int, optional
        Plot height.
    width : int, optional
        Plot width.
    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    if df.is_empty():
        return go.Figure()
    required_cols = ["Voice", "score"]
    if not all(col in df.columns for col in required_cols):
         return go.Figure()
    # Calculate stats: Mean, Count
    stats = (
        df.filter(pl.col("score").is_not_null())
        .group_by("Voice")
        .agg([
            pl.col("score").mean().alias("mean_score"),
            pl.col("score").count().alias("count")
        ])
        .sort("mean_score", descending=True) # Descending for Left-to-Right
    )
    # Attempt to extract anchors from DF if not provided
    if (left_anchor is None or right_anchor is None) and "Left_Anchor" in df.columns:
        head = df.filter(pl.col("Left_Anchor").is_not_null()).head(1)
        if not head.is_empty():
            if left_anchor is None: left_anchor = head["Left_Anchor"][0]
            if right_anchor is None: right_anchor = head["Right_Anchor"][0]
    if trait_description is None:
        if left_anchor and right_anchor:
            trait_description = f"{left_anchor.split('|')[0]} vs. {right_anchor.split('|')[0]}"
        else:
             # Try getting from Description column
             if "Description" in df.columns:
                 head = df.filter(pl.col("Description").is_not_null()).head(1)
                 if not head.is_empty():
                      trait_description = head["Description"][0]
                 else:
                      trait_description = ""
             else:
                 trait_description = ""
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=stats["Voice"], # X is Voice
        y=stats["mean_score"], # Y is Score
        text=stats["count"],
        textposition='inside',
        texttemplate='%{text}', # Count on bar
        marker_color=ColorPalette.PRIMARY,
        hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<br>Count: %{text}<extra></extra>'
    ))
    # Add annotations for anchors
    annotations = []
    # Place anchors on the right side
    if left_anchor:
        annotations.append(dict(
            xref='paper', yref='y',
            x=1.01, y=1,
            xanchor='left', yanchor='middle',
            text=f"<b>1: {left_anchor.split('|')[0]}</b>",
            showarrow=False,
            font=dict(size=10, color='gray')
        ))
    if right_anchor:
        annotations.append(dict(
            xref='paper', yref='y',
            x=1.01, y=5,
            xanchor='left', yanchor='middle',
            text=f"<b>5: {right_anchor.split('|')[0]}</b>",
            showarrow=False,
            font=dict(size=10, color='gray')
        ))
    fig.update_layout(
         title=dict(
            text=f"{title}<br><sub>{trait_description}</sub><br><sub>(Numbers on bars indicate respondent count)</sub>",
            y=0.92
        ),
        xaxis_title="Voice",
        yaxis_title="Average Score (1-5)",
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        yaxis=dict(
            range=[1, 5],
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            zeroline=False
        ),
        xaxis=dict(
            showgrid=False
        ),
        margin=dict(r=150),
        annotations=annotations,
        font=dict(size=11)
    )
    return fig
--- a/theme.py
+++ b/theme.py
@@ -16,6 +16,9 @@ class ColorPalette:
    RANK_3 = "#5AAE95"   # Sea Green (3rd Choice)
    RANK_4 = "#9E9E9E"   # Grey (4th Choice / Worst)
    # Neutral color for unhighlighted comparison items
    NEUTRAL = "#D3D3D3"  # Light Grey
    # General UI elements
    TEXT = "black"
    GRID = "lightgray"
--- a/utils.py
+++ b/utils.py
@@ -3,7 +3,6 @@ from pathlib import Path
 import pandas as pd
 from typing import Union
 import json
 import re
 def extract_voice_label(html_str: str) -> str:
@@ -57,13 +56,13 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
 def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
    """
-    Calculate weighted scores for character rankings.
+    Calculate weighted scores for character or voice rankings.
    Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns.
+        DataFrame containing character/ voice ranking columns.
    Returns
    -------
@@ -71,8 +70,8 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
        DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
    """
    scores = []
-    # Identify columns related to Character Ranking
+    # Identify ranking columns (assume all columns except _recordId)
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    ranking_cols = [c for c in df.columns if c != '_recordId']
    for col in ranking_cols:
        # Calculate score:
@@ -84,7 +83,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
        weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
        # Clean name
-        clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+        clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
        scores.append({
            'Character': clean_name,
@@ -415,6 +414,95 @@ class JPMCSurvey:
        return self._get_subset(q, QIDs, rename_cols=True), None
 def process_speaking_style_data(
    df: Union[pl.LazyFrame, pl.DataFrame],
    trait_map: dict[str, str]
 ) -> pl.DataFrame:
    """
    Process speaking style columns from wide to long format and map trait descriptions.
    Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
    Example: SS_Orange_Red__V14__Choice_1
    Parameters
    ----------
    df : pl.LazyFrame or pl.DataFrame
        Input dataframe containing SS_* columns.
    trait_map : dict
        Dictionary mapping column names to trait descriptions.
        Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
    Returns
    -------
    pl.DataFrame
        Long-format dataframe with columns:
        _recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
    """
    # Normalize input to LazyFrame
    lf = df.lazy() if isinstance(df, pl.DataFrame) else df
    # 1. Melt SS_ columns
    melted = lf.melt(
        id_vars=["_recordId"],
        value_vars=pl.col("^SS_.*$"),
        variable_name="full_col_name",
        value_name="score"
    )
    # 2. Extract components from column name
    # Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
    pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
    processed = melted.with_columns(
        pl.col("full_col_name").str.extract_groups(pattern)
    ).unnest("full_col_name")
    # 3. Create Mapping Lookup from the provided dictionary
    # We map (Style_Group, Choice_ID) -> Description
    mapping_data = []
    seen = set()
    for col_name, desc in trait_map.items():
        match = re.match(pattern, col_name)
        if match:
            groups = match.groupdict()
            key = (groups["Style_Group"], groups["Choice_ID"])
            if key not in seen:
                # Parse description into anchors if possible (Left : Right)
                parts = desc.split(':')
                left_anchor = parts[0].strip() if len(parts) > 0 else ""
                right_anchor = parts[1].strip() if len(parts) > 1 else ""
                mapping_data.append({
                    "Style_Group": groups["Style_Group"],
                    "Choice_ID": groups["Choice_ID"],
                    "Description": desc,
                    "Left_Anchor": left_anchor,
                    "Right_Anchor": right_anchor
                })
                seen.add(key)
    if not mapping_data:
        return processed.collect()
    mapping_lf = pl.LazyFrame(mapping_data)
    # 4. Join Data with Mapping
    result = processed.join(
        mapping_lf,
        on=["Style_Group", "Choice_ID"],
        how="left"
    )
    # 5. Cast score to Int
    result = result.with_columns(
        pl.col("score").cast(pl.Int64, strict=False)
    )
    return result.collect()
--- a/validation.py
+++ b/validation.py
@@ -5,9 +5,9 @@ import polars as pl
 def check_progress(data):
    """Check if all responses are complete based on 'progress' column."""
    if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
-        return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
+        return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
-    return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
+    return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"
 def duration_validation(data):
@@ -30,10 +30,9 @@ def duration_validation(data):
    outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
    if outlier_data.shape[0] == 0:
-        return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
+        return "### Duration Outliers: \n\n✅ No duration outliers detected"
-    return mo.md(f"""
+    return f"""### Duration Outliers:
    ### Duration Outliers:
    **⚠️ Potential outliers detected based on response duration ⚠️**
@@ -50,5 +49,5 @@ def duration_validation(data):
    **⚠️ NOTE: These have not been removed from the dataset ⚠️**
-    """)
+    """