import marimo

__generated_with = "0.19.2"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    import polars as pl
    from pathlib import Path

    from validation import check_progress, duration_validation
    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts

    import plots
    import utils

    from speaking_styles import SPEAKING_STYLES
    return (
        JPMCSurvey,
        Path,
        SPEAKING_STYLES,
        calculate_weighted_ranking_scores,
        check_progress,
        duration_validation,
        mo,
        pl,
        plot_most_ranked_1,
        plot_ranking_distribution,
        plot_top3_ranking_distribution,
        plot_top3_selection_counts,
        plot_voice_selection_counts,
        plot_weighted_ranking_score,
        plots,
        utils,
    )


@app.cell
def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
    return QSF_FILE, RESULTS_FILE


@app.cell
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
    data_all = survey.load_data()
    data_all.collect()
    return data_all, survey


@app.cell
def _(Path, RESULTS_FILE, data_all, mo):
    mo.md(f"""
    # Load Data

    **Dataset:** `{Path(RESULTS_FILE).name}`

    {mo.ui.table(data_all.collect())}
    """)
    return


@app.cell(hide_code=True)
def _(check_progress, data_all, duration_validation, mo):
    mo.md(f"""
    ## Data Validation

    {check_progress(data_all)}


    {duration_validation(data_all)}
    """)
    return


@app.cell
def _(data_all, duration_validation):
    duration_validation(data_all)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ### ToDo: "straight-liner" detection and removal
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ---

    # Data Filter

    Use to select a subset of the data for the following analysis
    """)
    return


@app.cell(hide_code=True)
def _(data_all, mo):
    data_all_collected = data_all.collect()
    ages = mo.ui.multiselect(options=data_all_collected["QID1"], value=data_all_collected["QID1"].unique(), label="Select Age Group(s):")
    income = mo.ui.multiselect(data_all_collected["QID15"], value=data_all_collected["QID15"], label="Select Income Group(s):")
    gender = mo.ui.multiselect(data_all_collected["QID2"], value=data_all_collected["QID2"], label="Select Gender(s)")
    ethnicity = mo.ui.multiselect(data_all_collected["QID3"], value=data_all_collected["QID3"], label="Select Ethnicities:")
    consumer = mo.ui.multiselect(data_all_collected["Consumer"], value=data_all_collected["Consumer"], label="Select Consumer Groups:")


    mo.md(f"""
    # Data Filters


    {ages}

    {gender}

    {ethnicity}

    {income}

    {consumer}

    """)
    return ages, consumer, ethnicity, gender, income


@app.cell
def _(ages, consumer, data_all, ethnicity, gender, income, survey):
    data = survey.filter_data(data_all, age=ages.value, gender=gender.value, income=income.value, ethnicity=ethnicity.value, consumer=consumer.value)
    data.collect()
    return (data,)


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ---

    # Analysis
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Character personality ranking
    """)
    return


@app.cell
def _(data, survey):
    char_rank = survey.get_character_ranking(data)[0].collect()
    return (char_rank,)


@app.cell
def _(char_rank, mo, plot_top3_ranking_distribution, survey):
    mo.md(f"""
    ### 1. Which character personality is ranked best?


    {mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell
def _(char_rank, mo, plot_most_ranked_1, survey):
    mo.md(f"""
    ### 2. Which character personality is ranked 1st the most?


    {mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell
def _(
    calculate_weighted_ranking_scores,
    char_rank,
    mo,
    plot_weighted_ranking_score,
    survey,
):
    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
    # plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)

    mo.md(f"""
    ### 3. Which character personality most popular based on weighted scores?


    {mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Voice Ranking
    """)
    return


@app.cell
def _(data, survey):
    v_18_8_3 = survey.get_18_8_3(data)[0].collect()
    # print(v_18_8_3.head())
    return (v_18_8_3,)


@app.cell(hide_code=True)
def _(mo, plot_voice_selection_counts, survey, v_18_8_3):
    mo.md(f"""
    ### Which 8 voices are chosen the most out of 18? 

    {mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell(hide_code=True)
def _(mo, plot_top3_selection_counts, survey, v_18_8_3):
    mo.md(f"""
    ### Which 3 voices are chosen the most out of 18? 

    How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18. 

    {mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell(hide_code=True)
def _(calculate_weighted_ranking_scores, data, survey):
    top3_voices = survey.get_top_3_voices(data)[0].collect()
    top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
    return top3_voices, top3_voices_weighted


@app.cell
def _(mo, plot_ranking_distribution, survey, top3_voices):
    mo.md(f"""
    ### Which voice is ranked best in the ranking question for top 3? 

    (not best 3 out of 8 question)  

    {mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell
def _(mo, plot_weighted_ranking_score, survey, top3_voices_weighted):
    mo.md(f"""
    ### Most popular **voice** based on weighted scores?
    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best. 
    Distribution of the rankings for each voice:

    {mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell
def _(mo, plot_most_ranked_1, survey, top3_voices):
    mo.md(f"""
    ### Which voice is ranked number 1 the most? 

    (not always the voice with most points)

    {mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Voice Speaking Style - Perception Traits

    Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
    """)
    return


@app.cell
def _(data, survey, utils):
    ss_or, choice_map_or = survey.get_ss_orange_red(data)
    ss_gb, choice_map_gb = survey.get_ss_green_blue(data)

    # Combine the data
    ss_all = ss_or.join(ss_gb, on='_recordId')
    _d = ss_all.collect()

    choice_map = {**choice_map_or, **choice_map_gb}
    # print(_d.head())
    # print(choice_map)
    ss_long = utils.process_speaking_style_data(ss_all, choice_map)
    return choice_map, ss_all, ss_long


@app.cell
def _(mo, pl, plots, ss_long, survey):
    content = """### How does each voice score for each “speaking style labeled trait”?"""

    for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
        trait_d = ss_long.filter(pl.col("Description") == trait)

        content += f"""
    ### {i+1}) {trait.replace(":", " ↔ ")}

    {mo.ui.plotly(plots.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550, results_dir=survey.fig_save_dir))}
    """

    mo.md(content)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Voice Scale 1-10
    """)
    return


@app.cell
def _(data, survey):
    vscales = survey.get_voice_scale_1_10(data)[0].collect()
    # plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
    return (vscales,)


@app.cell
def _(mo, plots, survey, vscales):
    mo.md(f"""
    ### How does each voice score on a scale from 1-10?

    {mo.ui.plotly(plots.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Correlations Voice Speaking Styles <-> Voice Scale 1-10
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    Let’s show how scoring better on these speaking styles correlates (or not) with better Voice Scale 1-10 evaluation. For each speaking style we show how the traits in these speaking styles correlate with Voice Scale 1-10 evaluation. This gives us a total of 4 correlation diagrams.

    Example for speaking style green:
    - Trait 1: Friendly | Conversational | Down-to-earth
    - Trait 2: Approachable | Familiar | Warm
    - Trait 3: Optimistic | Benevolent | Positive | Appreciative
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ### How to Interpret These Correlation Results
    Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).

    **Reading the Chart**

    | Correlation Value |	Interpretation |
    |-----------|----------|
    | r > 0 (Green bars)| 	Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
    | r < 0 (Red bars)| 	Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
    | r ≈ 0| 	No relationship — this trait doesn't predict Voice Scale ratings|
    """)
    return


@app.cell
def _(choice_map, ss_all, utils, vscales):
    df_style = utils.process_speaking_style_data(ss_all.collect(), choice_map)
    df_voice_long = utils.process_voice_scale_data(vscales)

    joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
    # df_voice_long
    return df_style, joined_df


@app.cell
def _(SPEAKING_STYLES, joined_df, mo, plots, survey):
    _content = """### Total Results

    """

    for style, traits in SPEAKING_STYLES.items():
        # print(f"Correlation plot for {style}...")
        fig = plots.plot_speaking_style_correlation(
            df=joined_df,
            style_color=style,
            style_traits=traits,
            title=f"Correlation: Speaking Style {style} and Voice Scale 1-10", 
            results_dir=survey.fig_save_dir
        )
        _content += f"""
    #### Speaking Style **{style}**:

    {mo.ui.plotly(fig)}

    """
    mo.md(_content)
    return


@app.cell
def _(mo):
    mo.md(r"""
    ### Female / Male Voices considered seperately

    - [ ] 4 correlation diagrams considering each speaking style (4) and all female voice results.
    - [ ] 4 correlation diagrams considering each speaking style (4) and all male voice results.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Correlations Voice Speaking Styles <-> Voice Ranking Points
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    Let’s show how scoring better on these speaking styles correlates (or not) with better Vocie Ranking results. For each speaking style we show how the traits in these speaking styles correlate with voice ranking points. This gives us a total of 4 correlation diagrams.

    Example for speaking style green:
    - Trait 1: Friendly | Conversational | Down-to-earth
    - Trait 2: Approachable | Familiar | Warm
    - Trait 3: Optimistic | Benevolent | Positive | Appreciative
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ### Total Results

    - [ ] 4 correlation diagrams
    """)
    return


@app.cell
def _(SPEAKING_STYLES, df_style, mo, plots, survey, top3_voices, utils):
    df_ranking = utils.process_voice_ranking_data(top3_voices)
    joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')


    _content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points

    """

    for _style, _traits in SPEAKING_STYLES.items():
        _fig = plots.plot_speaking_style_ranking_correlation(joined, _style, _traits, results_dir=survey.fig_save_dir)
        _content += f"""

        #### Speaking Style **{_style}**:

        {mo.ui.plotly(_fig)}

        """

    mo.md(_content)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ### Female / Male Voices considered seperately

    - [ ] 4 correlation diagrams considering each speaking style (4) and all female voice results.
    - [ ] 4 correlation diagrams considering each speaking style (4) and all male voice results.
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Correlation Heatmap all evaluations <-> voice acoustic data

    - [ ] Heatmap for male voices
    - [ ] Heatmap for female voices
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    ## Most Prominent Character Personality Traits
    """)
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""
    The last question of the survey is about traits for the described character's personality. For each Character personality, we want to display the 8 most chosen character personality traits. This will give us a total of 4 diagrams, one for each character personality included in the test.

    - [ ] Bank Teller
    - [ ] Familiar Friend
    - [ ] The Coach
    - [ ] Personal Assistant
    """)
    return


@app.cell
def _(mo):
    mo.md(r"""
    ---

    # Results per subgroup

    Use the dropdown selector at the top to filter the data and generate all the plots again
    """)
    return


if __name__ == "__main__":
    app.run()