JPMC-quant/03_quant_report.py

import marimo

__generated_with = "0.19.2"
app = marimo.App(width="full")

with app.setup:
    import marimo as mo
    import polars as pl
    from pathlib import Path

    from validation import check_progress, duration_validation, check_straight_liners
    from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
    import utils

    from speaking_styles import SPEAKING_STYLES


@app.cell
def _():

    file_browser = mo.ui.file_browser(
        initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
    )
    file_browser
    return (file_browser,)


@app.cell
def _(file_browser):
    mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
    RESULTS_FILE = Path(file_browser.path(index=0))
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
    return QSF_FILE, RESULTS_FILE


@app.cell
def _(QSF_FILE, RESULTS_FILE):
    S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
    try:
        data_all = S.load_data()
    except NotImplementedError as e:
        mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
    return S, data_all


@app.cell(hide_code=True)
def _():
    mo.md(r"""
    ---
    # Load Data

    **Dataset:** `{Path(RESULTS_FILE).name}`

    **Responses**: `{data_all.collect().shape[0]}`
    """)
    return


@app.cell
def _(S, data_all):
    sl_ss_max_score = 5
    sl_v1_10_max_score = 10

    _ss_all = S.get_ss_green_blue(data_all)[0].join(S.get_ss_orange_red(data_all)[0], on='_recordId')
    _sl_ss_c, sl_ss_df = check_straight_liners(_ss_all, max_score=sl_ss_max_score)

    _sl_v1_10_c, sl_v1_10_df = check_straight_liners(
        S.get_voice_scale_1_10(data_all)[0],
        max_score=sl_v1_10_max_score
    )


    mo.md(f"""
    # Data Validation

    {check_progress(data_all)}


    {duration_validation(data_all)}


    ## Speaking Style - Straight Liners
    {_sl_ss_c}


    ## Voice Score Scale 1-10 - Straight Liners
    {_sl_v1_10_c}
    """)
    return


@app.cell
def _(data_all):
    # # Drop any Voice Scale 1-10 responses with straight-lining, using sl_v1_10_df _responseId values
    # records_to_drop = sl_v1_10_df.select('Record ID').to_series().to_list()

    # data_validated = data_all.filter(~pl.col('_recordId').is_in(records_to_drop))

    # mo.md(f"""
    # Dropped `{len(records_to_drop)}` responses with straight-lining in Voice Scale 1-10 evaluation.
    # """)
    data_validated = data_all
    return (data_validated,)


@app.cell(hide_code=True)
def _():


    return


@app.cell
def _(data_validated):
    data = data_validated

    data.collect()
    return (data,)


@app.cell(hide_code=True)
def _():
    mo.md(r"""
    ---

    # Introduction (Respondent Demographics)
    """)
    return


@app.cell
def _(S, data):
    demographics = S.get_demographics(data)[0].collect()
    demographics
    return (demographics,)


@app.cell(hide_code=True)
def _():
    mo.md(r"""
    ## Lucia confirmation missing 'Consumer' data
    """)
    return


@app.cell
def _(demographics):
    # Demographics where 'Consumer' is null
    demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list()
    # demographics_no_consumer
    return (demographics_no_consumer,)


@app.cell
def _(data_all, demographics_no_consumer):
    # check if the responses with missing 'Consumer type' in demographics are all business owners as Lucia mentioned
    assert all(data_all.filter(pl.col('_recordId').is_in(demographics_no_consumer)).collect()['QID4'] == 'Yes'), "Not all respondents with missing 'Consumer' are business owners."
    return


@app.cell
def _(data_all):
    # Check if all business owners are missing a 'Consumer type' in demographics
    assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
    return


@app.cell
def _():
    mo.md(r"""
    ## Demographic Distributions
    """)
    return


@app.cell
def _():
    demo_plot_cols = [
        'Age',
        'Gender',
        # 'Race/Ethnicity',
        'Bussiness_Owner',
        'Consumer'
    ]
    return (demo_plot_cols,)


@app.cell
def _(S, demo_plot_cols, demographics):
    _content = """
    ## Demographic Distributions

    """
    for c in demo_plot_cols:
        _fig = S.plot_demographic_distribution(
            data=demographics,
            column=c,
            title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
        )
        _content += f"""{mo.ui.altair_chart(_fig)}\n\n"""

    mo.md(_content)
    return


@app.cell
def _():
    mo.md(r"""
    ---

    # Brand Character Results
    """)
    return


@app.cell
def _():
    mo.md(r"""
    ## Best performing: Original vs Refined frankenstein
    """)
    return


@app.cell
def _(S, data):
    char_refine_rank = S.get_character_refine(data)[0]
    # print(char_rank.collect().head())
    print(char_refine_rank.collect().head())
    return


@app.cell
def _():
    mo.md(r"""
    ## Character ranking points
    """)
    return


@app.cell
def _(S, char_rank):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
    S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
    return


@app.cell
def _():
    mo.md(r"""
    ## Character ranking 1-2-3
    """)
    return


@app.cell
def _(S, data):
    char_rank = S.get_character_ranking(data)[0]
    return (char_rank,)


@app.cell
def _(S, char_rank):
    S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
    return


@app.cell
def _():
    mo.md(r"""
    ## Character Ranking: times 1st place
    """)
    return


@app.cell
def _(S, char_rank):
    S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
    return


@app.cell
def _():
    mo.md(r"""
    ## Prominent predefined personality traits wordcloud
    """)
    return


@app.cell
def _(S, data):
    top8_traits = S.get_top_8_traits(data)[0]
    S.plot_traits_wordcloud(
        data=top8_traits,
        column='Top_8_Traits',
        title="Most Prominent Personality Traits",
    )
    return


@app.cell
def _():
    mo.md(r"""
    ## Trait frequency per brand character
    """)
    return


@app.cell
def _():
    # Join respondent
    return


@app.cell
def _():
    mo.md(r"""
    ---

    # Spoken Voice Results
    """)
    return


@app.cell(hide_code=True)
def _():
    mo.md(r"""
    ---

    # Brand Character Results
    """)
    return


@app.cell(hide_code=True)
def _():
    mo.md(r"""
    ---

    # Spoken Voice Results
    """)
    return


if __name__ == "__main__":
    app.run()