347 lines
6.9 KiB
Python
347 lines
6.9 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.19.2"
|
|
app = marimo.App(width="full")
|
|
|
|
with app.setup:
|
|
import marimo as mo
|
|
import polars as pl
|
|
from pathlib import Path
|
|
|
|
from validation import check_progress, duration_validation, check_straight_liners
|
|
from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
|
import utils
|
|
|
|
from speaking_styles import SPEAKING_STYLES
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
|
|
file_browser = mo.ui.file_browser(
|
|
initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
|
)
|
|
file_browser
|
|
return (file_browser,)
|
|
|
|
|
|
@app.cell
|
|
def _(file_browser):
|
|
mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
|
|
RESULTS_FILE = Path(file_browser.path(index=0))
|
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
|
return QSF_FILE, RESULTS_FILE
|
|
|
|
|
|
@app.cell
|
|
def _(QSF_FILE, RESULTS_FILE):
|
|
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
|
try:
|
|
data_all = S.load_data()
|
|
except NotImplementedError as e:
|
|
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
|
|
return S, data_all
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
# Load Data
|
|
|
|
**Dataset:** `{Path(RESULTS_FILE).name}`
|
|
|
|
**Responses**: `{data_all.collect().shape[0]}`
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, data_all):
|
|
sl_ss_max_score = 5
|
|
sl_v1_10_max_score = 10
|
|
|
|
_ss_all = S.get_ss_green_blue(data_all)[0].join(S.get_ss_orange_red(data_all)[0], on='_recordId')
|
|
_sl_ss_c, sl_ss_df = check_straight_liners(_ss_all, max_score=sl_ss_max_score)
|
|
|
|
_sl_v1_10_c, sl_v1_10_df = check_straight_liners(
|
|
S.get_voice_scale_1_10(data_all)[0],
|
|
max_score=sl_v1_10_max_score
|
|
)
|
|
|
|
|
|
mo.md(f"""
|
|
# Data Validation
|
|
|
|
{check_progress(data_all)}
|
|
|
|
|
|
|
|
{duration_validation(data_all)}
|
|
|
|
|
|
## Speaking Style - Straight Liners
|
|
{_sl_ss_c}
|
|
|
|
|
|
## Voice Score Scale 1-10 - Straight Liners
|
|
{_sl_v1_10_c}
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data_all):
|
|
# # Drop any Voice Scale 1-10 responses with straight-lining, using sl_v1_10_df _responseId values
|
|
# records_to_drop = sl_v1_10_df.select('Record ID').to_series().to_list()
|
|
|
|
# data_validated = data_all.filter(~pl.col('_recordId').is_in(records_to_drop))
|
|
|
|
# mo.md(f"""
|
|
# Dropped `{len(records_to_drop)}` responses with straight-lining in Voice Scale 1-10 evaluation.
|
|
# """)
|
|
data_validated = data_all
|
|
return (data_validated,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
|
|
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data_validated):
|
|
data = data_validated
|
|
|
|
data.collect()
|
|
return (data,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
|
|
# Introduction (Respondent Demographics)
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, data):
|
|
demographics = S.get_demographics(data)[0].collect()
|
|
demographics
|
|
return (demographics,)
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""
|
|
## Lucia confirmation missing 'Consumer' data
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(demographics):
|
|
# Demographics where 'Consumer' is null
|
|
demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list()
|
|
# demographics_no_consumer
|
|
return (demographics_no_consumer,)
|
|
|
|
|
|
@app.cell
|
|
def _(data_all, demographics_no_consumer):
|
|
# check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned
|
|
assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners."
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data_all):
|
|
# Check if all business owners are missing a 'Consumer type' in demographics
|
|
assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Demographic Distributions
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
demo_plot_cols = [
|
|
'Age',
|
|
'Gender',
|
|
# 'Race/Ethnicity',
|
|
'Bussiness_Owner',
|
|
'Consumer'
|
|
]
|
|
return (demo_plot_cols,)
|
|
|
|
|
|
@app.cell
|
|
def _(S, demo_plot_cols, demographics):
|
|
_content = """
|
|
## Demographic Distributions
|
|
|
|
"""
|
|
for c in demo_plot_cols:
|
|
_fig = S.plot_demographic_distribution(
|
|
data=demographics,
|
|
column=c,
|
|
title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
|
|
)
|
|
_content += f"""{mo.ui.altair_chart(_fig)}\n\n"""
|
|
|
|
mo.md(_content)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
|
|
# Brand Character Results
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Best performing: Original vs Refined frankenstein
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, data):
|
|
char_refine_rank = S.get_character_refine(data)[0]
|
|
# print(char_rank.collect().head())
|
|
print(char_refine_rank.collect().head())
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Character ranking points
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, char_rank):
|
|
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
|
S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Character ranking 1-2-3
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, data):
|
|
char_rank = S.get_character_ranking(data)[0]
|
|
return (char_rank,)
|
|
|
|
|
|
@app.cell
|
|
def _(S, char_rank):
|
|
S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Character Ranking: times 1st place
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, char_rank):
|
|
S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Prominent predefined personality traits wordcloud
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(S, data):
|
|
top8_traits = S.get_top_8_traits(data)[0]
|
|
S.plot_traits_wordcloud(
|
|
data=top8_traits,
|
|
column='Top_8_Traits',
|
|
title="Most Prominent Personality Traits",
|
|
)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
## Trait frequency per brand character
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
# Join respondent
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
|
|
# Spoken Voice Results
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
|
|
# Brand Character Results
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _():
|
|
mo.md(r"""
|
|
---
|
|
|
|
# Spoken Voice Results
|
|
""")
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|