import marimo
__generated_with = "0.19.2"
app = marimo.App(width="full")
with app.setup:
import marimo as mo
import polars as pl
from pathlib import Path
from validation import check_progress, duration_validation, check_straight_liners
from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
import utils
from speaking_styles import SPEAKING_STYLES
@app.cell
def _():
file_browser = mo.ui.file_browser(
initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
)
file_browser
return (file_browser,)
@app.cell
def _(file_browser):
mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
RESULTS_FILE = Path(file_browser.path(index=0))
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
return QSF_FILE, RESULTS_FILE
@app.cell
def _(QSF_FILE, RESULTS_FILE):
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
try:
data_all = S.load_data()
except NotImplementedError as e:
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
return S, data_all
@app.cell(hide_code=True)
def _():
mo.md(r"""
---
# Load Data
**Dataset:** `{Path(RESULTS_FILE).name}`
**Responses**: `{data_all.collect().shape[0]}`
""")
return
@app.cell
def _(S, data_all):
sl_ss_max_score = 5
sl_v1_10_max_score = 10
_ss_all = S.get_ss_green_blue(data_all)[0].join(S.get_ss_orange_red(data_all)[0], on='_recordId')
_sl_ss_c, sl_ss_df = check_straight_liners(_ss_all, max_score=sl_ss_max_score)
_sl_v1_10_c, sl_v1_10_df = check_straight_liners(
S.get_voice_scale_1_10(data_all)[0],
max_score=sl_v1_10_max_score
)
mo.md(f"""
# Data Validation
{check_progress(data_all)}
{duration_validation(data_all)}
## Speaking Style - Straight Liners
{_sl_ss_c}
## Voice Score Scale 1-10 - Straight Liners
{_sl_v1_10_c}
""")
return
@app.cell
def _(data_all):
# # Drop any Voice Scale 1-10 responses with straight-lining, using sl_v1_10_df _responseId values
# records_to_drop = sl_v1_10_df.select('Record ID').to_series().to_list()
# data_validated = data_all.filter(~pl.col('_recordId').is_in(records_to_drop))
# mo.md(f"""
# Dropped `{len(records_to_drop)}` responses with straight-lining in Voice Scale 1-10 evaluation.
# """)
data_validated = data_all
return (data_validated,)
@app.cell(hide_code=True)
def _():
return
@app.cell
def _(data_validated):
data = data_validated
data.collect()
return (data,)
@app.cell(hide_code=True)
def _():
mo.md(r"""
---
# Introduction (Respondent Demographics)
""")
return
@app.cell
def _(S, data):
demographics = S.get_demographics(data)[0].collect()
demographics
return (demographics,)
@app.cell(hide_code=True)
def _():
mo.md(r"""
## Lucia confirmation missing 'Consumer' data
""")
return
@app.cell
def _(demographics):
# Demographics where 'Consumer' is null
demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list()
# demographics_no_consumer
return (demographics_no_consumer,)
@app.cell
def _(data_all, demographics_no_consumer):
# check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned
assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners."
return
@app.cell
def _(data_all):
# Check if all business owners are missing a 'Consumer type' in demographics
assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
return
@app.cell
def _():
mo.md(r"""
## Demographic Distributions
""")
return
@app.cell
def _():
demo_plot_cols = [
'Age',
'Gender',
# 'Race/Ethnicity',
'Bussiness_Owner',
'Consumer'
]
return (demo_plot_cols,)
@app.cell
def _(S, demo_plot_cols, demographics):
_content = """
## Demographic Distributions
"""
for c in demo_plot_cols:
_fig = S.plot_demographic_distribution(
data=demographics,
column=c,
title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
)
_content += f"""{mo.ui.altair_chart(_fig)}\n\n"""
mo.md(_content)
return
@app.cell
def _():
mo.md(r"""
---
# Brand Character Results
""")
return
@app.cell
def _():
mo.md(r"""
## Best performing: Original vs Refined frankenstein
""")
return
@app.cell
def _(S, data):
char_refine_rank = S.get_character_refine(data)[0]
# print(char_rank.collect().head())
# print(char_refine_rank.collect().head())
return
@app.cell
def _():
mo.md(r"""
## Character ranking points
""")
return
@app.cell
def _(S, char_rank):
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score
(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
return
@app.cell
def _():
mo.md(r"""
## Character ranking 1-2-3
""")
return
@app.cell
def _(S, data):
char_rank = S.get_character_ranking(data)[0]
return (char_rank,)
@app.cell
def _(S, char_rank):
S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
return
@app.cell
def _():
mo.md(r"""
## Character Ranking: times 1st place
""")
return
@app.cell
def _(S, char_rank):
S.plot_most_ranked_1(char_rank, title="Most Popular Character
(Number of Times Ranked 1st)", x_label='Character Personality')
return
@app.cell
def _():
mo.md(r"""
## Prominent predefined personality traits wordcloud
""")
return
@app.cell
def _(S, data):
top8_traits = S.get_top_8_traits(data)[0]
S.plot_traits_wordcloud(
data=top8_traits,
column='Top_8_Traits',
title="Most Prominent Personality Traits",
)
return
@app.cell
def _():
mo.md(r"""
## Trait frequency per brand character
""")
return
@app.cell
def _():
# Join respondent
return
@app.cell
def _():
mo.md(r"""
---
# Spoken Voice Results
""")
return
@app.cell(hide_code=True)
def _():
mo.md(r"""
---
# Brand Character Results
""")
return
@app.cell(hide_code=True)
def _():
mo.md(r"""
---
# Spoken Voice Results
""")
return
if __name__ == "__main__":
app.run()