speaking style trait scores vertical

2026-01-23 12:26:47 +01:00
parent 424355f4a1
commit 84a0f8052e
5 changed files with 615 additions and 90 deletions
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -12,7 +12,10 @@ def _():

    from validation import check_progress, duration_validation
    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
-    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
+    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
+
+    import plots as plts
+    import utils as utl
    return (
        JPMCSurvey,
        Path,
@@ -20,27 +23,23 @@ def _():
        check_progress,
        duration_validation,
        mo,
+        pl,
        plot_average_scores_with_counts,
-        plot_character_ranking_distribution,
-        plot_most_ranked_1_character,
+        plot_most_ranked_1,
+        plot_ranking_distribution,
        plot_top3_ranking_distribution,
+        plot_top3_selection_counts,
+        plot_voice_selection_counts,
        plot_weighted_ranking_score,
+        plts,
+        utl,
    )


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    # Load Data
-    """)
-    return
-
-
@app.cell
-def _(Path, mo):
+def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
-    mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
    return QSF_FILE, RESULTS_FILE


@@ -52,17 +51,30 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
    return data_all, survey


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    ## Data Validation
+@app.cell
+def _(Path, RESULTS_FILE, data_all, mo):
+    mo.md(f"""
+    # Load Data
+
+    **Dataset:** `{Path(RESULTS_FILE).name}`
+
+    {mo.ui.table(data_all.collect())}
    """)
    return


-@app.cell
-def _(check_progress, data_all):
-    check_progress(data_all)
+@app.cell(hide_code=True)
+def _(check_progress, data_all, duration_validation, mo):
+    mo.md(f"""
+    ## Data Validation
+
+    {check_progress(data_all)}
+
+
+
+    {duration_validation(data_all)}
+
+    """)
    return


@@ -112,8 +124,6 @@ def _(mo):
 def _(mo):
    mo.md(r"""
    ## Character personality ranking
-
-    ### 1. Which character personality is ranked best?
    """)
    return

@@ -126,15 +136,23 @@ def _(data, survey):


@app.cell
-def _(char_rank, plot_character_ranking_distribution):
-    plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
+def _(char_rank, mo, plot_top3_ranking_distribution):
+    mo.md(f"""
+    ### 1. Which character personality is ranked best?
+
+
+    {mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
+    """)
    return


@app.cell
-def _(mo):
-    mo.md(r"""
-    ### 2. Which character personality is ranked number 1 the most?
+def _(char_rank, mo, plot_most_ranked_1):
+    mo.md(f"""
+    ### 2. Which character personality is ranked 1st the most?
+
+
+    {mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
    """)
    return

@@ -143,16 +161,18 @@ def _(mo):
 def _(
    calculate_weighted_ranking_scores,
    char_rank,
+    mo,
    plot_weighted_ranking_score,
 ):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
-    plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
-    return
+    # plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
+
+    mo.md(f"""
+    ### 3. Which character personality most popular based on weighted scores?


-@app.cell
-def _(char_rank, plot_most_ranked_1_character):
-    plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
+    {mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
+    """)
    return


@@ -167,51 +187,74 @@ def _(mo):
@app.cell
 def _(data, survey):
    v_18_8_3 = survey.get_18_8_3(data)[0].collect()
-    print(v_18_8_3.head())
-    return
+    # print(v_18_8_3.head())
+    return (v_18_8_3,)


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 8 voices are chosen the most out of 18?
+def _(mo, plot_voice_selection_counts, v_18_8_3):
+    mo.md(f"""
+    ### Which 8 voices are chosen the most out of 18? 
+
+    {mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
+def _(mo, plot_top3_selection_counts, v_18_8_3):
+    mo.md(f"""
+    ### Which 3 voices are chosen the most out of 18? 
+
+    How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18. 
+
+    {mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
-    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best.
+def _(
+    calculate_weighted_ranking_scores,
+    data,
+    mo,
+    plot_ranking_distribution,
+    survey,
+):
+    top3_voices = survey.get_top_3_voices(data)[0].collect()
+    top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
+
+    mo.md(f"""
+    ### Which voice is ranked best in the ranking question for top 3? 
+
+    (not best 3 out of 8 question)  
+
+    {mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
+
+    """)
+    return top3_voices, top3_voices_weighted
+
+
+@app.cell
+def _(mo, plot_weighted_ranking_score, top3_voices_weighted):
+    mo.md(f"""
+    ### Most popular **voice** based on weighted scores?
+    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best. 
+    Distribution of the rankings for each voice:
+
+    {mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
    """)
    return


@app.cell
-def _(plot_top3_ranking_distribution, top3_voices):
-    plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
-    return
+def _(mo, plot_most_ranked_1, top3_voices):
+    mo.md(f"""
+    ### Which voice is ranked number 1 the most? 

+    (not always the voice with most points)

-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked number 1 the most? (not always the voice with most points)
-
-    - Each of the 350 participants gives exactly one 1st-place vote.
-    - Total Rank-1 votes = 350.
-    - Voices are sorted from most to least 1st-place votes.
-    - The top 3 voices with the most Rank-1 votes are colored blue.
-    - This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
+    {mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
    """)
    return

@@ -235,6 +278,56 @@ def _(mo):
    return


+@app.cell
+def _(data, survey):
+    ss_or, choice_map_or = survey.get_ss_orange_red(data)
+    ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
+
+    # Combine the data
+    ss_all = ss_or.join(ss_gb, on='_recordId')
+    _d = ss_all.collect()
+
+    choice_map = {**choice_map_or, **choice_map_gb}
+    # print(_d.head())
+    print(choice_map)
+    return choice_map, ss_all
+
+
+@app.cell
+def _(choice_map, ss_all, utl):
+    ss_long = utl.process_speaking_style_data(ss_all, choice_map)
+    ss_long
+    return (ss_long,)
+
+
+@app.cell
+def _(pl, ss_long):
+    target_trait = "Indifferent | Unfocussed | Detached:Attentive | Helpful | Caring | Deliberate"
+    trait_data = ss_long.filter(pl.col("Description") == target_trait)
+    trait_data
+    return target_trait, trait_data
+
+
+@app.cell
+def _(plts, target_trait, trait_data):
+    plts.plot_speaking_style_trait_scores(
+        trait_data,
+        title=target_trait.replace(":", " ↔ "),
+        # trait_description="Attentive vs Indifferent", # simplified title
+    )
+    return
+
+
+app._unparsable_cell(
+    """
+    for trait in ss_long.select(\"Description\").unique().to_series().to_list():
+        trait_data = ss_long.filter(pl.col(\"Description\") == trait)
+        mo.md(f\"\"\"
+    """,
+    name="_"
+)
+
+
@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""