import marimo __generated_with = "0.19.7" app = marimo.App(width="full") with app.setup: import marimo as mo import polars as pl from pathlib import Path from validation import check_progress, duration_validation, check_straight_liners from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores import utils from speaking_styles import SPEAKING_STYLES @app.cell def _(): file_browser = mo.ui.file_browser( initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File" ) file_browser return (file_browser,) @app.cell def _(file_browser): mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**")) RESULTS_FILE = Path(file_browser.path(index=0)) QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' return QSF_FILE, RESULTS_FILE @app.cell def _(QSF_FILE, RESULTS_FILE): S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) try: data_all = S.load_data() except NotImplementedError as e: mo.stop(True, mo.md(f"**⚠️ {str(e)}**")) return S, data_all @app.cell(hide_code=True) def _(RESULTS_FILE, data_all): mo.md(rf""" --- # Load Data **Dataset:** {Path(RESULTS_FILE).name} **Responses**: {data_all.collect().shape[0]} """) return @app.cell def _(S, data_all): sl_ss_max_score = 5 sl_v1_10_max_score = 10 _ss_all = S.get_ss_green_blue(data_all)[0].join(S.get_ss_orange_red(data_all)[0], on='_recordId') _sl_ss_c, sl_ss_df = check_straight_liners(_ss_all, max_score=sl_ss_max_score) _sl_v1_10_c, sl_v1_10_df = check_straight_liners( S.get_voice_scale_1_10(data_all)[0], max_score=sl_v1_10_max_score ) mo.md(f""" {check_progress(data_all)} {duration_validation(data_all)} ## Speaking Style - Straight Liners {_sl_ss_c} ## Voice Score Scale 1-10 - Straight Liners {_sl_v1_10_c} """) return @app.cell def _(data_all): # # Drop any Voice Scale 1-10 responses with straight-lining, using sl_v1_10_df _responseId values # records_to_drop = sl_v1_10_df.select('Record ID').to_series().to_list() # data_validated = data_all.filter(~pl.col('_recordId').is_in(records_to_drop)) # mo.md(f""" # Dropped `{len(records_to_drop)}` responses with straight-lining in Voice Scale 1-10 evaluation. # """) data_validated = data_all return (data_validated,) @app.cell def _(): return @app.cell(hide_code=True) def _(): # return @app.cell def _(): mo.md(r""" ## Lucia confirmation missing 'Consumer' data """) return @app.cell def _(S, data_validated): demographics = S.get_demographics(data_validated)[0].collect() demographics return (demographics,) @app.cell(hide_code=True) def _(demographics): # Demographics where 'Consumer' is null demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list() demographics_no_consumer return (demographics_no_consumer,) @app.cell def _(data_all, demographics_no_consumer): # check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners." return @app.cell def _(): mo.md(r""" # Filter Data (Global corrections) """) return @app.cell def _(S): filter_form = mo.md(''' {age} {gender} {ethnicity} {income} {consumer} ''' ).batch( age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"), gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"), ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"), income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"), consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:") ).form() mo.md(f''' --- # Data Filter {filter_form} ''') return @app.cell def _(data_validated): # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) # _d = S.filter_data(data_validated, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer']) # # Stop execution and prevent other cells from running if no data is selected # mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**")) # data = _d data = data_validated data.collect() return (data,) @app.cell def _(): return @app.cell def _(): # Check if all business owners are missing a 'Consumer type' in demographics # assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics." return @app.cell def _(): mo.md(r""" # Demographic Distributions """) return @app.cell def _(): demo_plot_cols = [ 'Age', 'Gender', # 'Race/Ethnicity', 'Bussiness_Owner', 'Consumer' ] return (demo_plot_cols,) @app.cell def _(S, data, demo_plot_cols): _content = """ """ for c in demo_plot_cols: _fig = S.plot_demographic_distribution( data=S.get_demographics(data)[0], column=c, title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents" ) _content += f"""{mo.ui.altair_chart(_fig)}\n\n""" mo.md(_content) return @app.cell def _(): mo.md(r""" --- # Brand Character Results """) return @app.cell(disabled=True) def _(): mo.md(r""" ## Best performing: Original vs Refined frankenstein """) return @app.cell(disabled=True) def _(S, data): char_refine_rank = S.get_character_refine(data)[0] # print(char_rank.collect().head()) print(char_refine_rank.collect().head()) return @app.cell(disabled=True) def _(): mo.md(r""" ## Character ranking points """) return @app.cell def _(S, char_rank): char_rank_weighted = calculate_weighted_ranking_scores(char_rank) S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score
(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice') return @app.cell def _(): mo.md(r""" ## Character ranking 1-2-3 """) return @app.cell def _(S, data): char_rank = S.get_character_ranking(data)[0] return (char_rank,) @app.cell def _(S, char_rank): S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3') return @app.cell def _(): mo.md(r""" ### Statistical Significance Character Ranking """) return @app.cell(disabled=True) def _(S, char_rank): _pairwise_df, _meta = S.compute_ranking_significance(char_rank) # print(_pairwise_df.columns) mo.md(f""" {mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))} {mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))} """) return @app.cell(disabled=True) def _(): mo.md(r""" ## Character Ranking: times 1st place """) return @app.cell def _(S, char_rank): S.plot_most_ranked_1(char_rank, title="Most Popular Character
(Number of Times Ranked 1st)", x_label='Character Personality') return @app.cell def _(): mo.md(r""" ## Prominent predefined personality traits wordcloud """) return @app.cell def _(S, data): top8_traits = S.get_top_8_traits(data)[0] S.plot_traits_wordcloud( data=top8_traits, column='Top_8_Traits', title="Most Prominent Personality Traits", ) return @app.cell def _(): mo.md(r""" ## Trait frequency per brand character """) return @app.cell def _(S, data): char_df = S.get_character_refine(data)[0] return (char_df,) @app.cell def _(S, char_df): from theme import ColorPalette # Assuming you already have char_df (your data from get_character_refine or similar) characters = ['Bank Teller', 'Familiar Friend', 'The Coach', 'Personal Assistant'] character_colors = { 'Bank Teller': (ColorPalette.CHARACTER_BANK_TELLER, ColorPalette.CHARACTER_BANK_TELLER_HIGHLIGHT), 'Familiar Friend': (ColorPalette.CHARACTER_FAMILIAR_FRIEND, ColorPalette.CHARACTER_FAMILIAR_FRIEND_HIGHLIGHT), 'The Coach': (ColorPalette.CHARACTER_COACH, ColorPalette.CHARACTER_COACH_HIGHLIGHT), 'Personal Assistant': (ColorPalette.CHARACTER_PERSONAL_ASSISTANT, ColorPalette.CHARACTER_PERSONAL_ASSISTANT_HIGHLIGHT), } # Build consistent sort order (by total frequency across all characters) all_trait_counts = {} for char in characters: freq_df, _ = S.transform_character_trait_frequency(char_df, char) for row in freq_df.iter_rows(named=True): all_trait_counts[row['trait']] = all_trait_counts.get(row['trait'], 0) + row['count'] consistent_sort_order = sorted(all_trait_counts.keys(), key=lambda x: -all_trait_counts[x]) _content = """""" # Generate 4 plots (one per character) for char in characters: freq_df, _ = S.transform_character_trait_frequency(char_df, char) main_color, highlight_color = character_colors[char] chart = S.plot_single_character_trait_frequency( data=freq_df, character_name=char, bar_color=main_color, highlight_color=highlight_color, trait_sort_order=consistent_sort_order, ) _content += f""" {mo.ui.altair_chart(chart)} """ mo.md(_content) return @app.cell(disabled=True) def _(): mo.md(r""" ## Statistical significance best characters zie chat > voorbeeld: als de nr 1 en 2 niet significant verschillen maar wel van de nr 3 bijvoorbeeld is dat ook top. Beetje meedenkend over hoe ik het kan presenteren weetje wat ik bedoel?:) > """) return @app.cell(disabled=True) def _(): return @app.cell def _(): return @app.cell def _(): mo.md(r""" --- # Spoken Voice Results """) return @app.cell def _(): COLOR_GENDER = True return (COLOR_GENDER,) @app.cell def _(): mo.md(r""" ## Top 8 Most Chosen out of 18 """) return @app.cell def _(S, data): v_18_8_3 = S.get_18_8_3(data)[0] return (v_18_8_3,) @app.cell def _(COLOR_GENDER, S, v_18_8_3): S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER) return @app.cell def _(): mo.md(r""" ## Top 3 most chosen out of 8 """) return @app.cell def _(COLOR_GENDER, S, v_18_8_3): S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER) return @app.cell def _(): mo.md(r""" ## Voice Ranking Weighted Score """) return @app.cell def _(S, data): top3_voices = S.get_top_3_voices(data)[0] top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices) return top3_voices, top3_voices_weighted @app.cell def _(COLOR_GENDER, S, top3_voices_weighted): S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score
(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER) return @app.cell(hide_code=True) def _(): mo.md(r""" ## Which voice is ranked best in the ranking question for top 3? (not best 3 out of 8 question) """) return @app.cell def _(COLOR_GENDER, S, top3_voices): S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER) return @app.cell def _(): mo.md(r""" ### Statistical significance for voice ranking """) return @app.cell def _(): # print(top3_voices.collect().head()) return @app.cell def _(): # _pairwise_df, _metadata = S.compute_ranking_significance( # top3_voices,alpha=0.05,correction="none") # # View significant pairs # # print(pairwise_df.filter(pl.col('significant') == True)) # # Create heatmap visualization # _heatmap = S.plot_significance_heatmap( # _pairwise_df, # metadata=_metadata, # title="Weighted Voice Ranking Significance
(Pairwise Comparisons)" # ) # # Create summary bar chart # _summary = S.plot_significance_summary( # _pairwise_df, # metadata=_metadata # ) # mo.md(f""" # {mo.ui.altair_chart(_heatmap)} # {mo.ui.altair_chart(_summary)} # """) return @app.cell def _(): ## Voice Ranked 1st the most return @app.cell def _(COLOR_GENDER, S, top3_voices): S.plot_most_ranked_1(top3_voices, title="Most Popular Voice
(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER) return @app.cell def _(): mo.md(r""" ## Voice Scale 1-10 """) return @app.cell def _(COLOR_GENDER, S, data): # Get your voice scale data (from notebook) voice_1_10, _ = S.get_voice_scale_1_10(data) S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER) return (voice_1_10,) @app.cell(disabled=True) def _(): mo.md(r""" ### Statistical Significance (Scale 1-10) """) return @app.cell(disabled=True) def _(S, voice_1_10): # Compute pairwise significance tests pairwise_df, metadata = S.compute_pairwise_significance( voice_1_10, test_type="mannwhitney", # or "ttest", "chi2", "auto" alpha=0.05, correction="bonferroni" # or "holm", "none" ) # View significant pairs # print(pairwise_df.filter(pl.col('significant') == True)) # Create heatmap visualization _heatmap = S.plot_significance_heatmap( pairwise_df, metadata=metadata, title="Voice Rating Significance
(Pairwise Comparisons)" ) # Create summary bar chart _summary = S.plot_significance_summary( pairwise_df, metadata=metadata ) mo.md(f""" {mo.ui.altair_chart(_heatmap)} {mo.ui.altair_chart(_summary)} """) return @app.cell def _(): return if __name__ == "__main__": app.run()