580 lines
16 KiB
Python
580 lines
16 KiB
Python
import marimo
|
||
|
||
__generated_with = "0.19.2"
|
||
app = marimo.App(width="full")
|
||
|
||
|
||
@app.cell
|
||
def _():
|
||
import marimo as mo
|
||
import polars as pl
|
||
from pathlib import Path
|
||
|
||
from validation import check_progress, duration_validation, check_straight_liners
|
||
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||
import utils
|
||
|
||
from speaking_styles import SPEAKING_STYLES
|
||
return (
|
||
JPMCSurvey,
|
||
Path,
|
||
SPEAKING_STYLES,
|
||
calculate_weighted_ranking_scores,
|
||
check_progress,
|
||
check_straight_liners,
|
||
duration_validation,
|
||
mo,
|
||
pl,
|
||
utils,
|
||
)
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(mo):
|
||
file_browser = mo.ui.file_browser(
|
||
initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
||
)
|
||
file_browser
|
||
return (file_browser,)
|
||
|
||
|
||
@app.cell
|
||
def _(Path, file_browser, mo):
|
||
mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
|
||
# RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
||
RESULTS_FILE = Path(file_browser.path(index=0))
|
||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||
# RESULTS_FILE
|
||
return QSF_FILE, RESULTS_FILE
|
||
|
||
|
||
@app.cell
|
||
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
|
||
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||
try:
|
||
data_all = S.load_data()
|
||
except NotImplementedError as e:
|
||
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
|
||
return S, data_all
|
||
|
||
|
||
@app.cell
|
||
def _(Path, RESULTS_FILE, data_all, mo):
|
||
mo.md(f"""
|
||
|
||
---
|
||
# Load Data
|
||
|
||
**Dataset:** `{Path(RESULTS_FILE).name}`
|
||
|
||
**Responses**: `{data_all.collect().shape[0]}`
|
||
|
||
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _():
|
||
sl_ss_max_score = 5
|
||
sl_v1_10_max_score = 10
|
||
return sl_ss_max_score, sl_v1_10_max_score
|
||
|
||
|
||
@app.cell
|
||
def _(
|
||
S,
|
||
check_progress,
|
||
check_straight_liners,
|
||
data_all,
|
||
duration_validation,
|
||
mo,
|
||
sl_ss_max_score,
|
||
sl_v1_10_max_score,
|
||
):
|
||
_ss_all = S.get_ss_green_blue(data_all)[0].join(S.get_ss_orange_red(data_all)[0], on='_recordId')
|
||
_sl_ss_c, sl_ss_df = check_straight_liners(_ss_all, max_score=sl_ss_max_score)
|
||
|
||
_sl_v1_10_c, sl_v1_10_df = check_straight_liners(
|
||
S.get_voice_scale_1_10(data_all)[0],
|
||
max_score=sl_v1_10_max_score
|
||
)
|
||
|
||
|
||
mo.md(f"""
|
||
# Data Validation
|
||
|
||
{check_progress(data_all)}
|
||
|
||
|
||
|
||
{duration_validation(data_all)}
|
||
|
||
|
||
## Speaking Style - Straight Liners
|
||
{_sl_ss_c}
|
||
|
||
|
||
## Voice Score Scale 1-10 - Straight Liners
|
||
{_sl_v1_10_c}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(data_all):
|
||
# # Drop any Voice Scale 1-10 responses with straight-lining, using sl_v1_10_df _responseId values
|
||
# records_to_drop = sl_v1_10_df.select('Record ID').to_series().to_list()
|
||
|
||
# data_validated = data_all.filter(~pl.col('_recordId').is_in(records_to_drop))
|
||
|
||
# mo.md(f"""
|
||
# Dropped `{len(records_to_drop)}` responses with straight-lining in Voice Scale 1-10 evaluation.
|
||
# """)
|
||
data_validated = data_all
|
||
return (data_validated,)
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, mo):
|
||
filter_form = mo.md('''
|
||
|
||
|
||
|
||
{age}
|
||
|
||
{gender}
|
||
|
||
{ethnicity}
|
||
|
||
{income}
|
||
|
||
{consumer}
|
||
'''
|
||
).batch(
|
||
age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
|
||
gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
|
||
ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
|
||
income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
|
||
consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
|
||
).form()
|
||
mo.md(f'''
|
||
---
|
||
|
||
# Data Filter
|
||
|
||
{filter_form}
|
||
''')
|
||
|
||
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(data_validated):
|
||
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
|
||
# _d = S.filter_data(data_validated, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer'])
|
||
|
||
# # Stop execution and prevent other cells from running if no data is selected
|
||
# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
|
||
# data = _d
|
||
|
||
data = data_validated
|
||
|
||
data.collect()
|
||
return (data,)
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, data, mo):
|
||
char_rank = S.get_character_ranking(data)[0]
|
||
mo.md(r"""
|
||
---
|
||
|
||
# Analysis
|
||
|
||
## Character personality ranking
|
||
""")
|
||
return (char_rank,)
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(mo):
|
||
mo.md(r"""
|
||
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _():
|
||
# char_rank = S.get_character_ranking(data)[0]
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, char_rank, mo):
|
||
mo.md(f"""
|
||
### 1. Which character personality is ranked best?
|
||
|
||
|
||
{mo.ui.altair_chart(S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality'))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, char_rank, mo):
|
||
mo.md(f"""
|
||
### 2. Which character personality is ranked 1st the most?
|
||
|
||
|
||
{mo.ui.altair_chart(S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, calculate_weighted_ranking_scores, char_rank, mo):
|
||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||
|
||
mo.md(f"""
|
||
### 3. Which character personality most popular based on weighted scores?
|
||
|
||
|
||
{mo.ui.altair_chart(S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, data, mo):
|
||
v_18_8_3 = S.get_18_8_3(data)[0].collect()
|
||
|
||
mo.md(r"""
|
||
## Voice Ranking
|
||
""")
|
||
return (v_18_8_3,)
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _():
|
||
|
||
# print(v_18_8_3.head())
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, mo, v_18_8_3):
|
||
mo.md(f"""
|
||
### Which 8 voices are chosen the most out of 18?
|
||
|
||
{mo.ui.altair_chart(S.plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, mo, v_18_8_3):
|
||
mo.md(f"""
|
||
### Which 3 voices are chosen the most out of 18?
|
||
|
||
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
||
|
||
{mo.ui.altair_chart(S.plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, calculate_weighted_ranking_scores, data):
|
||
top3_voices = S.get_top_3_voices(data)[0]
|
||
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||
return top3_voices, top3_voices_weighted
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, top3_voices):
|
||
mo.md(f"""
|
||
### Which voice is ranked best in the ranking question for top 3?
|
||
|
||
(not best 3 out of 8 question)
|
||
|
||
{mo.ui.altair_chart(S.plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, top3_voices_weighted):
|
||
mo.md(f"""
|
||
### Most popular **voice** based on weighted scores?
|
||
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
||
Distribution of the rankings for each voice:
|
||
|
||
{mo.ui.altair_chart(S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, top3_voices):
|
||
mo.md(f"""
|
||
### Which voice is ranked number 1 the most?
|
||
|
||
(not always the voice with most points)
|
||
|
||
{mo.ui.altair_chart(S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _():
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, data, mo, utils):
|
||
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||
|
||
# Combine the data
|
||
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||
_d = ss_all.collect()
|
||
|
||
choice_map = {**choice_map_or, **choice_map_gb}
|
||
# print(_d.head())
|
||
# print(choice_map)
|
||
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
|
||
|
||
mo.md(r"""
|
||
## Voice Speaking Style - Perception Traits
|
||
|
||
Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
|
||
""")
|
||
return choice_map, ss_all, ss_long
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, pl, ss_long):
|
||
content = """### How does each voice score for each “speaking style labeled trait”?"""
|
||
|
||
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
|
||
trait_d = ss_long.filter(pl.col("Description") == trait)
|
||
|
||
content += f"""
|
||
### {i+1}) {trait.replace(":", " ↔ ")}
|
||
|
||
{mo.ui.altair_chart(S.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550))}
|
||
"""
|
||
|
||
mo.md(content)
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _():
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, data, mo):
|
||
vscales = S.get_voice_scale_1_10(data)[0]
|
||
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
||
mo.md(r"""
|
||
## Voice Scale 1-10
|
||
""")
|
||
return (vscales,)
|
||
|
||
|
||
@app.cell
|
||
def _(pl, vscales):
|
||
# Count non-null values per row
|
||
nn_vscale = vscales.with_columns(
|
||
non_null_count = pl.sum_horizontal(pl.all().exclude("_recordID").is_not_null())
|
||
)
|
||
nn_vscale.collect()['non_null_count'].describe()
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, mo, vscales):
|
||
mo.md(f"""
|
||
### How does each voice score on a scale from 1-10?
|
||
|
||
{mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000, domain=[1,10], title="Voice General Impression (Scale 1-10)"))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, utils, vscales):
|
||
_target_cols=[c for c in vscales.collect().columns if c not in ['_recordId']]
|
||
vscales_row_norm = utils.normalize_row_values(vscales.collect(), target_cols=_target_cols)
|
||
|
||
mo.md(f"""
|
||
### Voice scale 1-10 normalized per respondent?
|
||
|
||
{mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales_row_norm, x_label='Voice', width=1000, domain=[1,10], title="Voice General Impression (Scale 1-10) - Normalized per Respondent"))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(S, mo, utils, vscales):
|
||
_target_cols=[c for c in vscales.collect().columns if c not in ['_recordId']]
|
||
vscales_global_norm = utils.normalize_global_values(vscales.collect(), target_cols=_target_cols)
|
||
|
||
mo.md(f"""
|
||
### Voice scale 1-10 normalized per respondent?
|
||
|
||
{mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales_global_norm, x_label='Voice', width=1000, domain=[1,10], title="Voice General Impression (Scale 1-10) - Normalized Across All Respondents"))}
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(choice_map, mo, ss_all, utils, vscales):
|
||
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||
df_voice_long = utils.process_voice_scale_data(vscales)
|
||
|
||
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
|
||
# df_voice_long
|
||
|
||
mo.md(r"""
|
||
## Correlations Voice Speaking Styles <-> Voice Scale 1-10
|
||
|
||
Let’s show how scoring better on these speaking styles correlates (or not) with better Voice Scale 1-10 evaluation. For each speaking style we show how the traits in these speaking styles correlate with Voice Scale 1-10 evaluation. This gives us a total of 4 correlation diagrams.
|
||
|
||
Example for speaking style green:
|
||
- Trait 1: Friendly | Conversational | Down-to-earth
|
||
- Trait 2: Approachable | Familiar | Warm
|
||
- Trait 3: Optimistic | Benevolent | Positive | Appreciative
|
||
|
||
### How to Interpret These Correlation Results
|
||
Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).
|
||
|
||
**Reading the Chart**
|
||
|
||
| Correlation Value | Interpretation |
|
||
|-----------|----------|
|
||
| r > 0 (Green bars)| Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
|
||
| r < 0 (Red bars)| Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
|
||
| r ≈ 0| No relationship — this trait doesn't predict Voice Scale ratings|
|
||
""")
|
||
return df_style, joined_df
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, SPEAKING_STYLES, joined_df, mo):
|
||
_content = """### Total Results
|
||
|
||
"""
|
||
|
||
for style, traits in SPEAKING_STYLES.items():
|
||
# print(f"Correlation plot for {style}...")
|
||
fig = S.plot_speaking_style_correlation(
|
||
data=joined_df,
|
||
style_color=style,
|
||
style_traits=traits,
|
||
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
|
||
)
|
||
_content += f"""
|
||
#### Speaking Style **{style}**:
|
||
|
||
{mo.ui.altair_chart(fig)}
|
||
|
||
"""
|
||
mo.md(_content)
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(mo):
|
||
mo.md(r"""
|
||
### Female / Male Voices considered seperately
|
||
|
||
- [ ] 4 correlation diagrams considering each speaking style (4) and all female voice results.
|
||
- [ ] 4 correlation diagrams considering each speaking style (4) and all male voice results.
|
||
|
||
|
||
|
||
## Correlations Voice Speaking Styles <-> Voice Ranking Points
|
||
|
||
Let’s show how scoring better on these speaking styles correlates (or not) with better Vocie Ranking results. For each speaking style we show how the traits in these speaking styles correlate with voice ranking points. This gives us a total of 4 correlation diagrams.
|
||
|
||
Example for speaking style green:
|
||
- Trait 1: Friendly | Conversational | Down-to-earth
|
||
- Trait 2: Approachable | Familiar | Warm
|
||
- Trait 3: Optimistic | Benevolent | Positive | Appreciative
|
||
|
||
### Total Results
|
||
|
||
- [ ] 4 correlation diagrams
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(mo):
|
||
mo.md(r"""
|
||
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(mo):
|
||
mo.md(r"""
|
||
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(mo):
|
||
mo.md(r"""
|
||
|
||
""")
|
||
return
|
||
|
||
|
||
@app.cell(hide_code=True)
|
||
def _(S, SPEAKING_STYLES, df_style, mo, top3_voices, utils):
|
||
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||
|
||
|
||
_content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points
|
||
|
||
"""
|
||
|
||
for _style, _traits in SPEAKING_STYLES.items():
|
||
_fig = S.plot_speaking_style_ranking_correlation(data=joined, style_color=_style, style_traits=_traits)
|
||
_content += f"""
|
||
|
||
#### Speaking Style **{_style}**:
|
||
|
||
{mo.ui.altair_chart(_fig)}
|
||
|
||
"""
|
||
|
||
mo.md(_content)
|
||
return
|
||
|
||
|
||
@app.cell
|
||
def _(mo):
|
||
mo.md(r"""
|
||
### Female / Male Voices considered seperately
|
||
|
||
- [ ] 4 correlation diagrams considering each speaking style (4) and all female voice results.
|
||
- [ ] 4 correlation diagrams considering each speaking style (4) and all male voice results.
|
||
""")
|
||
return
|
||
|
||
|
||
if __name__ == "__main__":
|
||
app.run()
|