import marimo __generated_with = "0.19.2" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo import polars as pl from pathlib import Path from utils import JPMCSurvey from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution return ( JPMCSurvey, mo, plot_average_scores_with_counts, plot_top3_ranking_distribution, ) @app.cell def _(): RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv' QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' # RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv' return QSF_FILE, RESULTS_FILE @app.cell def _(JPMCSurvey, QSF_FILE, RESULTS_FILE): survey = JPMCSurvey(RESULTS_FILE, QSF_FILE) survey.qid_descr_map return (survey,) @app.cell def _(survey): data = survey.load_data() df = data.collect() df.select([q for q in df.columns if 'QID98' in q]) return (data,) @app.cell def _(mo): mo.md(r""" # Data Cleanup - Remove incomplete responses (progress < 100) - Flag outliers based on duration (add column) - Flag responses that give the same rating for everything (indicates lack of engagement) """) return @app.cell def _(mo): mo.md(r""" # Answers Decoding Pipeline to decode the ranking of voices. Currently saved as QID's, they need to be remapped back to their actual values so that the analysis can be performed. ie: `GQIK26_G0_x8_RANK` -> Refers to question `Top 3 Traits_0_8_RANK - What are the important traits for the Chase AI virtual assistant?` and thus the #8 option """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## TODO: Create a python function for each of the questions. ie `def QID63()`. Each function should return a Polars query, that can be added to an existing query. Ideas: - Map column name to include the Voice number (VID) (ie the questions that only have 1 voice). The VID is in this case often included in the question description - `QID_x_GROUP` Contains the rankings of the values, stored in order. The following columns (ie `QID26_G0_x1_RANK`) are redundant and not necessary for us. The function should drop the unnecessary columns to clean up """) return @app.cell def _(survey): cfg = survey._get_qsf_question_by_QID('QID36')['Payload'] cfg return @app.cell def _(data, survey): survey.get_demographics(data)[0].collect() return @app.cell def _(data, survey): survey.get_top_8_traits(data)[0].collect() return @app.cell def _(data, survey): survey.get_top_3_traits(data)[0].collect() return @app.cell def _(data, survey): survey.get_character_ranking(data)[0].collect() return @app.cell def _(data, survey): survey.get_18_8_3(data)[0].collect() return @app.cell def _(mo): mo.md(r""" # Voice Scales 1-10 """) return @app.cell def _(data, survey): vscales = survey.get_voice_scale_1_10(data)[0].collect() vscales return (vscales,) @app.cell def _(plot_average_scores_with_counts, vscales): plot_average_scores_with_counts(vscales, x_label='Voice', width=1000) return @app.cell def _(mo): mo.md(r""" # SS Green Blue """) return @app.cell def _(data, survey): _lf, _choice_map = survey.get_ss_green_blue(data) print(_lf.collect().head()) return @app.cell def _(mo): mo.md(r""" # Top 3 Voices """) return @app.cell def _(data, survey): top3_voices = survey.get_top_3_voices(data)[0].collect() top3_voices return (top3_voices,) @app.cell def _(top3_voices): print(top3_voices.head()) return @app.cell def _(plot_top3_ranking_distribution, top3_voices): plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000) return @app.cell def _(mo): mo.md(r""" # SS Orange / Red """) return @app.cell def _(data, survey): _lf, choice_map = survey.get_ss_orange_red(data) _d = _lf.collect() _d return if __name__ == "__main__": app.run()