speaking style trait scores vertical

This commit is contained in:
2026-01-23 12:26:47 +01:00
parent 424355f4a1
commit 84a0f8052e
5 changed files with 615 additions and 90 deletions

View File

@@ -12,7 +12,10 @@ def _():
from validation import check_progress, duration_validation
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
import plots as plts
import utils as utl
return (
JPMCSurvey,
Path,
@@ -20,27 +23,23 @@ def _():
check_progress,
duration_validation,
mo,
pl,
plot_average_scores_with_counts,
plot_character_ranking_distribution,
plot_most_ranked_1_character,
plot_most_ranked_1,
plot_ranking_distribution,
plot_top3_ranking_distribution,
plot_top3_selection_counts,
plot_voice_selection_counts,
plot_weighted_ranking_score,
plts,
utl,
)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Load Data
""")
return
@app.cell
def _(Path, mo):
def _():
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
return QSF_FILE, RESULTS_FILE
@@ -52,17 +51,30 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
return data_all, survey
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Data Validation
@app.cell
def _(Path, RESULTS_FILE, data_all, mo):
mo.md(f"""
# Load Data
**Dataset:** `{Path(RESULTS_FILE).name}`
{mo.ui.table(data_all.collect())}
""")
return
@app.cell
def _(check_progress, data_all):
check_progress(data_all)
@app.cell(hide_code=True)
def _(check_progress, data_all, duration_validation, mo):
mo.md(f"""
## Data Validation
{check_progress(data_all)}
{duration_validation(data_all)}
""")
return
@@ -112,8 +124,6 @@ def _(mo):
def _(mo):
mo.md(r"""
## Character personality ranking
### 1. Which character personality is ranked best?
""")
return
@@ -126,15 +136,23 @@ def _(data, survey):
@app.cell
def _(char_rank, plot_character_ranking_distribution):
plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
def _(char_rank, mo, plot_top3_ranking_distribution):
mo.md(f"""
### 1. Which character personality is ranked best?
{mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
""")
return
@app.cell
def _(mo):
mo.md(r"""
### 2. Which character personality is ranked number 1 the most?
def _(char_rank, mo, plot_most_ranked_1):
mo.md(f"""
### 2. Which character personality is ranked 1st the most?
{mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
""")
return
@@ -143,16 +161,18 @@ def _(mo):
def _(
calculate_weighted_ranking_scores,
char_rank,
mo,
plot_weighted_ranking_score,
):
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
return
# plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
mo.md(f"""
### 3. Which character personality most popular based on weighted scores?
@app.cell
def _(char_rank, plot_most_ranked_1_character):
plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
{mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
""")
return
@@ -167,51 +187,74 @@ def _(mo):
@app.cell
def _(data, survey):
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
print(v_18_8_3.head())
return
# print(v_18_8_3.head())
return (v_18_8_3,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which 8 voices are chosen the most out of 18?
def _(mo, plot_voice_selection_counts, v_18_8_3):
mo.md(f"""
### Which 8 voices are chosen the most out of 18?
{mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants Top 3, after they first selected 8 out of 18.
def _(mo, plot_top3_selection_counts, v_18_8_3):
mo.md(f"""
### Which 3 voices are chosen the most out of 18?
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants Top 3, after they first selected 8 out of 18.
{mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
def _(
calculate_weighted_ranking_scores,
data,
mo,
plot_ranking_distribution,
survey,
):
top3_voices = survey.get_top_3_voices(data)[0].collect()
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
mo.md(f"""
### Which voice is ranked best in the ranking question for top 3?
(not best 3 out of 8 question)
{mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
""")
return top3_voices, top3_voices_weighted
@app.cell
def _(mo, plot_weighted_ranking_score, top3_voices_weighted):
mo.md(f"""
### Most popular **voice** based on weighted scores?
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
Distribution of the rankings for each voice:
{mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
""")
return
@app.cell
def _(plot_top3_ranking_distribution, top3_voices):
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
return
def _(mo, plot_most_ranked_1, top3_voices):
mo.md(f"""
### Which voice is ranked number 1 the most?
(not always the voice with most points)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which voice is ranked number 1 the most? (not always the voice with most points)
- Each of the 350 participants gives exactly one 1st-place vote.
- Total Rank-1 votes = 350.
- Voices are sorted from most to least 1st-place votes.
- The top 3 voices with the most Rank-1 votes are colored blue.
- This can differ from the points-based winners (321 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
{mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
""")
return
@@ -235,6 +278,56 @@ def _(mo):
return
@app.cell
def _(data, survey):
ss_or, choice_map_or = survey.get_ss_orange_red(data)
ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
# Combine the data
ss_all = ss_or.join(ss_gb, on='_recordId')
_d = ss_all.collect()
choice_map = {**choice_map_or, **choice_map_gb}
# print(_d.head())
print(choice_map)
return choice_map, ss_all
@app.cell
def _(choice_map, ss_all, utl):
ss_long = utl.process_speaking_style_data(ss_all, choice_map)
ss_long
return (ss_long,)
@app.cell
def _(pl, ss_long):
target_trait = "Indifferent | Unfocussed | Detached:Attentive | Helpful | Caring | Deliberate"
trait_data = ss_long.filter(pl.col("Description") == target_trait)
trait_data
return target_trait, trait_data
@app.cell
def _(plts, target_trait, trait_data):
plts.plot_speaking_style_trait_scores(
trait_data,
title=target_trait.replace(":", ""),
# trait_description="Attentive vs Indifferent", # simplified title
)
return
app._unparsable_cell(
"""
for trait in ss_long.select(\"Description\").unique().to_series().to_list():
trait_data = ss_long.filter(pl.col(\"Description\") == trait)
mo.md(f\"\"\"
""",
name="_"
)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""