fixed saving png issue

migration to altair plots
altair migration plan
2026-01-29 10:43:05 +01:00 · 2026-01-28 21:01:39 +01:00 · 2026-01-28 18:17:45 +01:00 · 2026-01-28 15:58:38 +01:00 · 2026-01-28 14:54:36 +01:00 · 2026-01-27 18:35:09 +01:00
15 changed files with 7693 additions and 629 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ data/
 docker-volumes/
 logs/

+figures/
--- a/00_qualtrics_validation.py
+++ b/00_qualtrics_validation.py
@@ -12,15 +12,24 @@ def _():
    import plotly as plt
    from pathlib import Path

-    from utils import extract_qid_descr_map
-    return Path, extract_qid_descr_map, mo, pd
+    import utils
+    return Path, mo, pd, utils


@app.cell
 def _(Path):
    # results_file = Path('data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv')
-    results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
-    return (results_file,)
+    # results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
+    results_file = Path('data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv')
+    qsf_file = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+    return qsf_file, results_file
+
+
+@app.cell
+def _(qsf_file, results_file, utils):
+    survey = utils.JPMCSurvey(results_file, qsf_file)
+    data_all = survey.load_data()
+    return (survey,)


@app.cell
@@ -33,8 +42,8 @@ def _(mo):


@app.cell
-def _(extract_qid_descr_map, results_file):
-    qid_descr_map = extract_qid_descr_map(results_file)
+def _(survey):
+    qid_descr_map = survey.qid_descr_map
    qid_descr_map
    return (qid_descr_map,)

--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -12,63 +12,59 @@ def _():

    from validation import check_progress, duration_validation
    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
-    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
+    import utils
+
+    from speaking_styles import SPEAKING_STYLES
    return (
        JPMCSurvey,
        Path,
+        SPEAKING_STYLES,
        calculate_weighted_ranking_scores,
        check_progress,
        duration_validation,
        mo,
-        plot_average_scores_with_counts,
-        plot_character_ranking_distribution,
-        plot_most_ranked_1_character,
-        plot_top3_ranking_distribution,
-        plot_weighted_ranking_score,
+        pl,
+        utils,
    )


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    # Load Data
-    """)
-    return
-
-
@app.cell
-def _(Path, mo):
+def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
-    mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
    return QSF_FILE, RESULTS_FILE


@app.cell
 def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
-    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
-    data_all = survey.load_data()
-    data_all.collect()
-    return data_all, survey
+    S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+    data_all = S.load_data()
+    return S, data_all


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    ## Data Validation
+def _(Path, RESULTS_FILE, data_all, mo):
+    mo.md(f"""
+    # Load Data
+
+    **Dataset:** `{Path(RESULTS_FILE).name}`
+
+    {mo.ui.table(data_all.collect())}
    """)
    return


-@app.cell
-def _(check_progress, data_all):
-    check_progress(data_all)
-    return
+@app.cell(hide_code=True)
+def _(check_progress, data_all, duration_validation, mo):
+    mo.md(f"""
+    ## Data Validation
+
+    {check_progress(data_all)}


-@app.cell
-def _(data_all, duration_validation):
-    duration_validation(data_all)
+
+    {duration_validation(data_all)}
+    """)
    return


@@ -92,9 +88,42 @@ def _(mo):
    return


-@app.cell
-def _(data_all, survey):
-    data = survey.filter_data(data_all, age=None, gender=None, income=None, ethnicity=None, consumer=None)
+@app.cell(hide_code=True)
+def _(S, mo):
+    filter_form = mo.md('''
+    # Data Filter
+
+    {age}
+
+    {gender}
+
+    {ethnicity}
+
+    {income}
+
+    {consumer}
+    '''
+    ).batch(
+        age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
+        gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
+        ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
+        income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
+        consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
+    ).form()
+    filter_form
+    return (filter_form,)
+
+
+@app.cell(hide_code=True)
+def _(S, data_all, filter_form, mo):
+    mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
+    _d = S.filter_data(data_all, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer'])
+
+    # Stop execution and prevent other cells from running if no data is selected
+    mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
+    data = _d
+
+    data.collect()
    return (data,)


@@ -112,47 +141,48 @@ def _(mo):
 def _(mo):
    mo.md(r"""
    ## Character personality ranking
-
-    ### 1. Which character personality is ranked best?
    """)
    return


@app.cell
-def _(data, survey):
-    char_rank = survey.get_character_ranking(data)[0].collect()
-
+def _(S, data):
+    char_rank = S.get_character_ranking(data)[0]
    return (char_rank,)


@app.cell
-def _(char_rank, plot_character_ranking_distribution):
-    plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
-    return
+def _(S, char_rank, mo):
+    mo.md(f"""
+    ### 1. Which character personality is ranked best?


-@app.cell
-def _(mo):
-    mo.md(r"""
-    ### 2. Which character personality is ranked number 1 the most?
+    {mo.ui.altair_chart(S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality'))}
    """)
    return


@app.cell
-def _(
-    calculate_weighted_ranking_scores,
-    char_rank,
-    plot_weighted_ranking_score,
-):
-    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
-    plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
+def _(S, char_rank, mo):
+    mo.md(f"""
+    ### 2. Which character personality is ranked 1st the most?
+
+
+    {mo.ui.altair_chart(S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
+    """)
    return


@app.cell
-def _(char_rank, plot_most_ranked_1_character):
-    plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
+def _(S, calculate_weighted_ranking_scores, char_rank, mo):
+    char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
+
+    mo.md(f"""
+    ### 3. Which character personality most popular based on weighted scores?
+
+
+    {mo.ui.altair_chart(S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
+    """)
    return


@@ -165,53 +195,73 @@ def _(mo):


@app.cell
-def _(data, survey):
-    v_18_8_3 = survey.get_18_8_3(data)[0].collect()
-    print(v_18_8_3.head())
-    return
+def _(S, data):
+    v_18_8_3 = S.get_18_8_3(data)[0].collect()
+    # print(v_18_8_3.head())
+    return (v_18_8_3,)


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 8 voices are chosen the most out of 18?
+def _(S, mo, v_18_8_3):
+    mo.md(f"""
+    ### Which 8 voices are chosen the most out of 18? 
+
+    {mo.ui.altair_chart(S.plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
+def _(S, mo, v_18_8_3):
+    mo.md(f"""
+    ### Which 3 voices are chosen the most out of 18? 
+
+    How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18. 
+
+    {mo.ui.altair_chart(S.plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
    """)
    return


@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
-    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best.
+def _(S, calculate_weighted_ranking_scores, data):
+    top3_voices = S.get_top_3_voices(data)[0]
+    top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
+    return top3_voices, top3_voices_weighted
+
+
+@app.cell
+def _(S, mo, top3_voices):
+    mo.md(f"""
+    ### Which voice is ranked best in the ranking question for top 3? 
+
+    (not best 3 out of 8 question)  
+
+    {mo.ui.altair_chart(S.plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
    """)
    return


@app.cell
-def _(plot_top3_ranking_distribution, top3_voices):
-    plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
+def _(S, mo, top3_voices_weighted):
+    mo.md(f"""
+    ### Most popular **voice** based on weighted scores?
+    - E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1.  The voice with most points is ranked best. 
+    Distribution of the rankings for each voice:
+
+    {mo.ui.altair_chart(S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
+    """)
    return


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    Which voice is ranked number 1 the most? (not always the voice with most points)
+@app.cell
+def _(S, mo, top3_voices):
+    mo.md(f"""
+    ### Which voice is ranked number 1 the most? 

-    - Each of the 350 participants gives exactly one 1st-place vote.
-    - Total Rank-1 votes = 350.
-    - Voices are sorted from most to least 1st-place votes.
-    - The top 3 voices with the most Rank-1 votes are colored blue.
-    - This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
+    (not always the voice with most points)
+
+    {mo.ui.altair_chart(S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
    """)
    return

@@ -220,18 +270,42 @@ def _(mo):
 def _(mo):
    mo.md(r"""
    ## Voice Speaking Style - Perception Traits
+
+    Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
    """)
    return


-@app.cell(hide_code=True)
-def _(mo):
-    mo.md(r"""
-    How does each voice score for each “speaking style labeled trait”? Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
+@app.cell
+def _(S, data, utils):
+    ss_or, choice_map_or = S.get_ss_orange_red(data)
+    ss_gb, choice_map_gb = S.get_ss_green_blue(data)

-    - There are 4 speaking styles: Green, Blue, Orange, Red.
-    - There are 16 traits distributed across the 4 speaking styles.
-    """)
+    # Combine the data
+    ss_all = ss_or.join(ss_gb, on='_recordId')
+    _d = ss_all.collect()
+
+    choice_map = {**choice_map_or, **choice_map_gb}
+    # print(_d.head())
+    # print(choice_map)
+    ss_long = utils.process_speaking_style_data(ss_all, choice_map)
+    return choice_map, ss_all, ss_long
+
+
+@app.cell
+def _(S, mo, pl, ss_long):
+    content = """### How does each voice score for each “speaking style labeled trait”?"""
+
+    for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
+        trait_d = ss_long.filter(pl.col("Description") == trait)
+
+        content += f"""
+    ### {i+1}) {trait.replace(":", " ↔ ")}
+
+    {mo.ui.altair_chart(S.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550))}
+    """
+
+    mo.md(content)
    return


@@ -244,23 +318,18 @@ def _(mo):


@app.cell
-def _(data, mo, plot_average_scores_with_counts, survey):
-    vscales = survey.get_voice_scale_1_10(data)[0].collect()
-    plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
-
-    mo.md(f"""
-
-    How does each voice score on a scale from 1-10?
-
-    {mo.ui.plotly(plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
-    """)
-    return
+def _(S, data):
+    vscales = S.get_voice_scale_1_10(data)[0]
+    # plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
+    return (vscales,)


@app.cell
-def _(mo):
-    mo.md(r"""
+def _(S, mo, vscales):
+    mo.md(f"""
+    ### How does each voice score on a scale from 1-10?

+    {mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
    """)
    return

@@ -286,16 +355,57 @@ def _(mo):
    return


-@app.cell
+@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""
-    ### Total Results
+    ### How to Interpret These Correlation Results
+    Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).

-    - [ ] 4 correlation diagrams
+    **Reading the Chart**
+
+    | Correlation Value |	Interpretation |
+    |-----------|----------|
+    | r > 0 (Green bars)| 	Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
+    | r < 0 (Red bars)| 	Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
+    | r ≈ 0| 	No relationship — this trait doesn't predict Voice Scale ratings|
    """)
    return


+@app.cell
+def _(choice_map, ss_all, utils, vscales):
+    df_style = utils.process_speaking_style_data(ss_all, choice_map)
+    df_voice_long = utils.process_voice_scale_data(vscales)
+
+    joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
+    # df_voice_long
+    return df_style, joined_df
+
+
+@app.cell
+def _(S, SPEAKING_STYLES, joined_df, mo):
+    _content = """### Total Results
+
+    """
+
+    for style, traits in SPEAKING_STYLES.items():
+        # print(f"Correlation plot for {style}...")
+        fig = S.plot_speaking_style_correlation(
+            data=joined_df,
+            style_color=style,
+            style_traits=traits,
+            title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
+        )
+        _content += f"""
+    #### Speaking Style **{style}**:
+
+    {mo.ui.altair_chart(fig)}
+
+    """
+    mo.md(_content)
+    return
+
+
@app.cell
 def _(mo):
    mo.md(r"""
@@ -338,6 +448,30 @@ def _(mo):
    return


+@app.cell
+def _(S, SPEAKING_STYLES, df_style, mo, top3_voices, utils):
+    df_ranking = utils.process_voice_ranking_data(top3_voices)
+    joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
+
+
+    _content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points
+
+    """
+
+    for _style, _traits in SPEAKING_STYLES.items():
+        _fig = S.plot_speaking_style_ranking_correlation(data=joined, style_color=_style, style_traits=_traits)
+        _content += f"""
+
+        #### Speaking Style **{_style}**:
+
+        {mo.ui.altair_chart(_fig)}
+
+        """
+
+    mo.md(_content)
+    return
+
+
@app.cell(hide_code=True)
 def _(mo):
    mo.md(r"""
--- a/docs/Speaking
+++ b/docs/Speaking
--- a/docs/altair-migration-plan.md
+++ b/docs/altair-migration-plan.md
--- a/example_correlation_plots.py
+++ b/example_correlation_plots.py
@@ -0,0 +1,60 @@
+
+import polars as pl
+from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
+from plots import plot_speaking_style_correlation
+from speaking_styles import SPEAKING_STYLES
+
+# 1. Initialize Survey and Load Data
+# We need to point to the actual data files if possible, or use standard paths
+# Assuming the file structure observed in workspace:
+# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
+# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf
+
+RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
+QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
+
+try:
+    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+except TypeError:
+    # Fallback if signature is different or file not found (just in case)
+    print("Error initializing survey with paths. Checking signature...")
+    # This part is just for debugging if it fails again
+    raise
+
+data = survey.load_data()
+
+# 2. Extract Data
+# Speaking Styles
+ss_gb, map_gb = survey.get_ss_green_blue(data)
+ss_or, map_or = survey.get_ss_orange_red(data)
+
+# Voice Scale 1-10
+voice_scale, _ = survey.get_voice_scale_1_10(data)
+
+# 3. Process Dataframes (Wide to Long)
+# Note: process_speaking_style_data handles the melt and parsing
+# We collect them because the plotting functions expect eager DataFrames usually, 
+# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())
+
+df_style_gb = process_speaking_style_data(ss_gb, map_gb)
+df_style_or = process_speaking_style_data(ss_or, map_or)
+
+# Combine both style dataframes
+df_style_all = pl.concat([df_style_gb, df_style_or])
+
+# Process Voice Scale
+df_voice_long = process_voice_scale_data(voice_scale)
+
+# 4. Join Style + Voice Data
+joined_df = join_voice_and_style_data(df_style_all, df_voice_long)
+
+# 5. Generate Plots for each Style Color
+for style, traits in SPEAKING_STYLES.items():
+    print(f"Generating plot for {style}...")
+    fig = plot_speaking_style_correlation(
+        df=joined_df,
+        style_color=style,
+        style_traits=traits
+    )
+    fig.show()
+    # If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'
--- a/plots.py
+++ b/plots.py
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,12 +14,13 @@ dependencies = [
    "openai>=2.9.0",
    "openpyxl>=3.1.5",
    "pandas>=2.3.3",
-    "plotly>=6.5.1",
    "polars>=1.37.1",
+    "pyarrow>=23.0.0",
    "pysqlite3>=0.6.0",
    "pyzmq>=27.1.0",
    "requests>=2.32.5",
    "taguette>=1.5.1",
+    "vl-convert-python>=1.9.0.post1",
    "wordcloud>=1.9.5",
 ]

--- a/speaking_styles.py
+++ b/speaking_styles.py
@@ -0,0 +1,33 @@
+
+"""
+Mapping of Speaking Styles (Colors) to their constituent Traits (Positive side).
+Derived from "Speaking Style Traits Quantitative test design.pdf".
+"""
+
+SPEAKING_STYLES = {
+    "Green": [
+        "Friendly | Conversational | Down-to-earth",
+        "Approachable | Familiar | Warm",
+        "Optimistic | Benevolent | Positive | Appreciative"
+    ],
+    "Blue": [
+        "Proactive | Cooperative",
+        "Knowledgable | Resourceful | Savvy",
+        "Clear | Straightforward | Direct",
+        "Confident | Competent",
+        "Respectable | Respectful"
+    ],
+    "Orange": [
+        "Attentive | Helpful | Caring | Deliberate",
+        "Reassuring | Empowering",
+        "Progressive | Guiding | Intentional",
+        "Patient | Open-minded"
+    ],
+    "Red": [
+        "Trustworthy | Reliable | Dependable",
+        "Calm | Steady/Stable | Controlled",
+        "Transparent | Upright | Altruistic",
+        "Adaptive | Flexible"
+    ]
+}
+
--- a/theme.py
+++ b/theme.py
@@ -16,7 +16,65 @@ class ColorPalette:
    RANK_3 = "#5AAE95"   # Sea Green (3rd Choice)
    RANK_4 = "#9E9E9E"   # Grey (4th Choice / Worst)

+    # Neutral color for unhighlighted comparison items
+    NEUTRAL = "#D3D3D3"  # Light Grey
+
    # General UI elements
    TEXT = "black"
    GRID = "lightgray"
    BACKGROUND = "white"
+
+
+def jpmc_altair_theme():
+    """JPMC brand theme for Altair charts."""
+    return {
+        'config': {
+            'view': {
+                'continuousWidth': 1000,
+                'continuousHeight': 500,
+                'strokeWidth': 0
+            },
+            'background': ColorPalette.BACKGROUND,
+            'axis': {
+                'grid': True,
+                'gridColor': ColorPalette.GRID,
+                'labelFontSize': 11,
+                'titleFontSize': 12,
+                'labelColor': ColorPalette.TEXT,
+                'titleColor': ColorPalette.TEXT,
+                'labelLimit': 200  # Allow longer labels before truncation
+            },
+            'axisX': {
+                'labelAngle': -45,
+                'labelLimit': 200  # Allow longer x-axis labels
+            },
+            'axisY': {
+                'labelAngle': 0
+            },
+            'legend': {
+                'orient': 'top',
+                'direction': 'horizontal',
+                'titleFontSize': 11,
+                'labelFontSize': 11
+            },
+            'title': {
+                'fontSize': 14,
+                'color': ColorPalette.TEXT,
+                'anchor': 'start',
+                'subtitleFontSize': 10,
+                'subtitleColor': 'gray'
+            },
+            'bar': {
+                'color': ColorPalette.PRIMARY
+            }
+        }
+    }
+
+
+# Register Altair theme
+try:
+    import altair as alt
+    alt.themes.register('jpmc', jpmc_altair_theme)
+    alt.themes.enable('jpmc')
+except ImportError:
+    pass  # Altair not installed
--- a/utils.py
+++ b/utils.py
@@ -3,8 +3,10 @@ from pathlib import Path
 import pandas as pd
 from typing import Union
 import json
-
 import re
+from plots import JPMCPlotsMixin
+
+import marimo as mo

 def extract_voice_label(html_str: str) -> str:
    """
@@ -55,24 +57,27 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe



-def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
+def calculate_weighted_ranking_scores(df: pl.LazyFrame) -> pl.DataFrame:
    """
-    Calculate weighted scores for character rankings.
+    Calculate weighted scores for character or voice rankings.
    Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.

    Parameters
    ----------
    df : pl.DataFrame
-        DataFrame containing character ranking columns.
+        DataFrame containing character/ voice ranking columns.

    Returns
    -------
    pl.DataFrame
        DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
    """
+    if isinstance(df, pl.LazyFrame):
+        df = df.collect()
+    
    scores = []
-    # Identify columns related to Character Ranking
-    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+    # Identify ranking columns (assume all columns except _recordId)
+    ranking_cols = [c for c in df.columns if c != '_recordId']

    for col in ranking_cols:
        # Calculate score:
@@ -84,7 +89,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
        weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
        
        # Clean name
-        clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+        clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
        
        scores.append({
            'Character': clean_name,
@@ -94,7 +99,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
    return pl.DataFrame(scores).sort('Weighted Score', descending=True)


-class JPMCSurvey:
+class JPMCSurvey(JPMCPlotsMixin):
    """Class to handle JPMorgan Chase survey data."""
    
    def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
@@ -109,6 +114,23 @@ class JPMCSurvey:
        self.qid_descr_map = self._extract_qid_descr_map()
        self.qsf:dict = self._load_qsf()
        
+        # get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
+        self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
+        if not self.fig_save_dir.exists():
+            self.fig_save_dir.mkdir(parents=True, exist_ok=True)
+
+        self.data_filtered = None
+        self.plot_height = 500
+        self.plot_width = 1000
+        
+        # Filter values
+        self.filter_age:list = None
+        self.filter_gender:list = None
+        self.filter_consumer:list = None
+        self.filter_ethnicity:list = None
+        self.filter_income:list = None
+        
+    
    
    def _extract_qid_descr_map(self) -> dict:
        """Extract mapping of Qualtrics ImportID to Question Description from results file."""
@@ -188,6 +210,13 @@ class JPMCSurvey:
        # Rename columns with the extracted ImportIds
        df.columns = new_columns
        
+        # Store unique values for filters (ignoring nulls) to detect "all selected" state
+        self.options_age = sorted(df['QID1'].drop_nulls().unique().to_list()) if 'QID1' in df.columns else []
+        self.options_gender = sorted(df['QID2'].drop_nulls().unique().to_list()) if 'QID2' in df.columns else []
+        self.options_consumer = sorted(df['Consumer'].drop_nulls().unique().to_list()) if 'Consumer' in df.columns else []
+        self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
+        self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
+        
        return df.lazy()
    
    def _get_subset(self, q: pl.LazyFrame, QIDs, rename_cols=True, include_record_id=True) -> pl.LazyFrame:
@@ -213,25 +242,32 @@ class JPMCSurvey:
        - ethnicity: list
        - income: list
        
-        Returns filtered polars LazyFrame.
+        Also saves the result to self.data_filtered.
        """
        
+        # Apply filters
+        self.filter_age = age
        if age is not None:
            q = q.filter(pl.col('QID1').is_in(age))
        
+        self.filter_gender = gender
        if gender is not None:
            q = q.filter(pl.col('QID2').is_in(gender))
        
+        self.filter_consumer = consumer
        if consumer is not None:
            q = q.filter(pl.col('Consumer').is_in(consumer))
        
+        self.filter_ethnicity = ethnicity
        if ethnicity is not None:
            q = q.filter(pl.col('QID3').is_in(ethnicity))
        
+        self.filter_income = income
        if income is not None:
            q = q.filter(pl.col('QID15').is_in(income))
        
-        return q
+        self.data_filtered = q
+        return self.data_filtered

    def get_demographics(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
        """Extract columns containing the demographics. 
@@ -415,6 +451,210 @@ class JPMCSurvey:
        return self._get_subset(q, QIDs, rename_cols=True), None


+def process_speaking_style_data(
+    df: Union[pl.LazyFrame, pl.DataFrame],
+    trait_map: dict[str, str]
+) -> pl.DataFrame:
+    """
+    Process speaking style columns from wide to long format and map trait descriptions.
+    
+    Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
+    Example: SS_Orange_Red__V14__Choice_1
+    
+    Parameters
+    ----------
+    df : pl.LazyFrame or pl.DataFrame
+        Input dataframe containing SS_* columns.
+    trait_map : dict
+        Dictionary mapping column names to trait descriptions.
+        Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
+        
+    Returns
+    -------
+    pl.DataFrame
+        Long-format dataframe with columns:
+        _recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
+    """
+    # Normalize input to LazyFrame
+    lf = df.lazy() if isinstance(df, pl.DataFrame) else df
+    
+    # 1. Melt SS_ columns
+    melted = lf.melt(
+        id_vars=["_recordId"],
+        value_vars=pl.col("^SS_.*$"),
+        variable_name="full_col_name",
+        value_name="score"
+    )
+    
+    # 2. Extract components from column name
+    # Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
+    pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
+    
+    processed = melted.with_columns(
+        pl.col("full_col_name").str.extract_groups(pattern)
+    ).unnest("full_col_name")
+    
+    # 3. Create Mapping Lookup from the provided dictionary
+    # We map (Style_Group, Choice_ID) -> Description
+    mapping_data = []
+    seen = set()
+    
+    for col_name, desc in trait_map.items():
+        match = re.match(pattern, col_name)
+        if match:
+            groups = match.groupdict()
+            key = (groups["Style_Group"], groups["Choice_ID"])
+            
+            if key not in seen:
+                # Parse description into anchors if possible (Left : Right)
+                parts = desc.split(':')
+                left_anchor = parts[0].strip() if len(parts) > 0 else ""
+                right_anchor = parts[1].strip() if len(parts) > 1 else ""
+                
+                mapping_data.append({
+                    "Style_Group": groups["Style_Group"],
+                    "Choice_ID": groups["Choice_ID"],
+                    "Description": desc,
+                    "Left_Anchor": left_anchor,
+                    "Right_Anchor": right_anchor
+                })
+                seen.add(key)
+    
+    if not mapping_data:
+        return processed.collect()
+
+    mapping_lf = pl.LazyFrame(mapping_data)
+    
+    # 4. Join Data with Mapping
+    result = processed.join(
+        mapping_lf,
+        on=["Style_Group", "Choice_ID"],
+        how="left"
+    )
+    
+    # 5. Cast score to Int
+    result = result.with_columns(
+        pl.col("score").cast(pl.Int64, strict=False)
+    )
+    
+    return result.collect()
    


+
+
+
+def process_voice_scale_data(
+    df: Union[pl.LazyFrame, pl.DataFrame]
+) -> pl.DataFrame:
+    """
+    Process Voice Scale columns from wide to long format.
+    
+    Parses columns with format: Voice_Scale_1_10__V{Voice}
+    Example: Voice_Scale_1_10__V14
+    
+    Returns
+    -------
+    pl.DataFrame
+        Long-format dataframe with columns:
+        _recordId, Voice, Voice_Scale_Score
+    """
+    lf = df.lazy() if isinstance(df, pl.DataFrame) else df
+    
+    # Melt
+    melted = lf.melt(
+        id_vars=["_recordId"],
+        value_vars=pl.col("^Voice_Scale_1_10__V.*$"),
+        variable_name="full_col_name",
+        value_name="Voice_Scale_Score"
+    )
+    
+    # Extract Voice
+    processed = melted.with_columns(
+        pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
+    ).with_columns(
+        ("V" + pl.col("Voice_Num")).alias("Voice")
+    )
+    
+    # Keep Score as Float (original data is f64)
+    result = processed.select([
+        "_recordId", 
+        "Voice", 
+        pl.col("Voice_Scale_Score").cast(pl.Float64, strict=False)
+    ])
+    
+    return result.collect()
+
+def join_voice_and_style_data(
+    processed_style_data: pl.DataFrame,
+    processed_voice_data: pl.DataFrame
+) -> pl.DataFrame:
+    """
+    Joins processed Speaking Style data with Voice Scale 1-10 data.
+    
+    Parameters
+    ----------
+    processed_style_data : pl.DataFrame
+        Result of process_speaking_style_data
+    processed_voice_data : pl.DataFrame
+        Result of process_voice_scale_data
+        
+    Returns
+    -------
+    pl.DataFrame
+        Merged dataframe with columns from both, joined on _recordId and Voice.
+    """
+    
+    return processed_style_data.join(
+        processed_voice_data,
+        on=["_recordId", "Voice"],
+        how="inner"
+    )
+
+def process_voice_ranking_data(
+    df: Union[pl.LazyFrame, pl.DataFrame]
+) -> pl.DataFrame:
+    """
+    Process Voice Ranking columns from wide to long format and convert ranks to points.
+    
+    Parses columns with format: Top_3_Voices_ranking__V{Voice}
+    Converts ranks to points: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt
+    
+    Returns
+    -------
+    pl.DataFrame
+        Long-format dataframe with columns:
+        _recordId, Voice, Ranking_Points
+    """
+    lf = df.lazy() if isinstance(df, pl.DataFrame) else df
+    
+    # Melt
+    melted = lf.melt(
+        id_vars=["_recordId"],
+        value_vars=pl.col("^Top_3_Voices_ranking__V.*$"),
+        variable_name="full_col_name",
+        value_name="rank"
+    )
+    
+    # Extract Voice
+    processed = melted.with_columns(
+        pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
+    ).with_columns(
+        ("V" + pl.col("Voice_Num")).alias("Voice")
+    )
+    
+    # Convert rank to points: 1st=3, 2nd=2, 3rd=1, null=0 (not ranked)
+    # Rank values are 1, 2, 3 for position in top 3
+    result = processed.with_columns(
+        pl.when(pl.col("rank") == 1).then(3)
+          .when(pl.col("rank") == 2).then(2)
+          .when(pl.col("rank") == 3).then(1)
+          .otherwise(0)
+          .alias("Ranking_Points")
+    ).select([
+        "_recordId",
+        "Voice",
+        "Ranking_Points"
+    ])
+    
+    return result.collect()
--- a/uv.lock
+++ b/uv.lock
@@ -1332,12 +1332,13 @@ dependencies = [
    { name = "openai" },
    { name = "openpyxl" },
    { name = "pandas" },
-    { name = "plotly" },
    { name = "polars" },
+    { name = "pyarrow" },
    { name = "pysqlite3" },
    { name = "pyzmq" },
    { name = "requests" },
    { name = "taguette" },
+    { name = "vl-convert-python" },
    { name = "wordcloud" },
 ]

@@ -1352,12 +1353,13 @@ requires-dist = [
    { name = "openai", specifier = ">=2.9.0" },
    { name = "openpyxl", specifier = ">=3.1.5" },
    { name = "pandas", specifier = ">=2.3.3" },
-    { name = "plotly", specifier = ">=6.5.1" },
    { name = "polars", specifier = ">=1.37.1" },
+    { name = "pyarrow", specifier = ">=23.0.0" },
    { name = "pysqlite3", specifier = ">=0.6.0" },
    { name = "pyzmq", specifier = ">=27.1.0" },
    { name = "requests", specifier = ">=2.32.5" },
    { name = "taguette", specifier = ">=1.5.1" },
+    { name = "vl-convert-python", specifier = ">=1.9.0.post1" },
    { name = "wordcloud", specifier = ">=1.9.5" },
 ]

@@ -1430,19 +1432,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
 ]

-[[package]]
-name = "plotly"
-version = "6.5.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "narwhals" },
-    { name = "packaging" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/ff/a4938b75e95114451efdb34db6b41930253e67efc8dc737bd592ef2e419d/plotly-6.5.1.tar.gz", hash = "sha256:b0478c8d5ada0c8756bce15315bcbfec7d3ab8d24614e34af9aff7bfcfea9281", size = 7014606, upload-time = "2026-01-07T20:11:41.644Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/8e/24e0bb90b2d75af84820693260c5534e9ed351afdda67ed6f393a141a0e2/plotly-6.5.1-py3-none-any.whl", hash = "sha256:5adad4f58c360612b6c5ce11a308cdbc4fd38ceb1d40594a614f0062e227abe1", size = 9894981, upload-time = "2026-01-07T20:11:38.124Z" },
-]
-
 [[package]]
 name = "polars"
 version = "1.37.1"
@@ -1521,6 +1510,49 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
 ]

+[[package]]
+name = "pyarrow"
+version = "23.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" },
+    { url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" },
+    { url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" },
+    { url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
+    { url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" },
+    { url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
+    { url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" },
+    { url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
+    { url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
+    { url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
+    { url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
+    { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.23"
@@ -2135,6 +2167,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
 ]

+[[package]]
+name = "vl-convert-python"
+version = "1.9.0.post1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/93/89/36722344d1758ec2106f4e8eca980f173cfe8f8d0358c1b77cc5d2e035a4/vl_convert_python-1.9.0.post1.tar.gz", hash = "sha256:a5b06b3128037519001166f5341ec7831e19fbd7f3a5f78f73d557ac2d5859ef", size = 4663469, upload-time = "2026-01-21T00:09:55.61Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/59/e5862245972ff467d38b0eb5ad28154685e23ecabb47e14f2b6962da7b56/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:43e9515f65bbcd317d1ef328787fd7bf0344c2fde9292eb7a0e64d5d3d29fccb", size = 30512930, upload-time = "2026-01-21T00:09:43.198Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e6/e7d0b538c2f0daaf120901dc113bd5d5d1fa51a9532fa5ffd90234e8c69e/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b0e7a3245f32addec7e7abeb1badf72b1513ed71ba1dba7aca853901217b3f4e", size = 29738742, upload-time = "2026-01-21T00:09:46.016Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/e2/5645a1bc174c53ff8cd305ed76a4a76ba36e155302db20b42b7e78daeef8/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6ecfe4b7e2ea9e8c30fd6d6eaea3ef85475be1ad249407d9796dce4ecdb5b32", size = 33366278, upload-time = "2026-01-21T00:09:48.42Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/18/88e02899b72fa8273ffb32bde12b0e5776ee0fd9fb29559a49c48ec4c5fa/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c1558fa0055e88c465bd3d71760cde9fa2c94a95f776a0ef9178252fd820b1f", size = 33520215, upload-time = "2026-01-21T00:09:50.992Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/db/6e8616587035bf0745d0f10b1791c7e945180ac5d6b28677d2f2b3ca693c/vl_convert_python-1.9.0.post1-cp37-abi3-win_amd64.whl", hash = "sha256:7e263269ac0d304640ca842b44dfe430ed863accd9edecff42e279bfc48ce940", size = 32051516, upload-time = "2026-01-21T00:09:53.47Z" },
+]
+
 [[package]]
 name = "webencodings"
 version = "0.5.1"
--- a/validation.py
+++ b/validation.py
@@ -5,9 +5,9 @@ import polars as pl
 def check_progress(data):
    """Check if all responses are complete based on 'progress' column."""
    if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
-        return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
+        return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
    
-    return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
+    return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"


 def duration_validation(data):
@@ -30,10 +30,9 @@ def duration_validation(data):
    outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()

    if outlier_data.shape[0] == 0:
-        return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
+        return "### Duration Outliers: \n\n✅ No duration outliers detected"

-    return mo.md(f"""
-    ### Duration Outliers:
+    return f"""### Duration Outliers:
    
    **⚠️ Potential outliers detected based on response duration ⚠️**
    
@@ -50,5 +49,5 @@ def duration_validation(data):
    
    **⚠️ NOTE: These have not been removed from the dataset ⚠️**
    
-    """)
+    """
    
--- a/validation_qid_descr_map.csv
+++ b/validation_qid_descr_map.csv
--- a/voices.py
+++ b/voices.py
@@ -0,0 +1,20 @@
+Voice Reference	Gender
+Voice 14	Female
+Voice 04	Female
+Voice 08	Female
+Voice 77	Female
+	
+Voice 48	Female
+Voice 82	Female
+Voice 89	Female
+Voice 91 Emily (Current IVR Voice)	Female
+Voice 34	Male
+Voice 69	Male
+Voice 45	Male
+Voice 46	Male
+Voice 54	Male
+Voice 74	Male
+Voice 81	Male
+Voice 86	Male
+Voice 88	Male
+Voice 16	Male
Author	SHA1	Message	Date
Luigi Maiorano	36d8bc4d88	fixed saving png issue	2026-01-29 10:43:05 +01:00
Luigi Maiorano	0485f991d2	migration to altair plots	2026-01-28 21:01:39 +01:00
Luigi Maiorano	3f929d93fd	altair migration plan	2026-01-28 18:17:45 +01:00
Luigi Maiorano	62e75fe899	saving plots to subdirectories grouped by filter	2026-01-28 15:58:38 +01:00
Luigi Maiorano	365e70b834	move plots to mixin class of JPMCSurvey to simplify file saving	2026-01-28 14:54:36 +01:00
Luigi Maiorano	23136b5c2e	save figures to directory	2026-01-27 18:35:09 +01:00
Luigi Maiorano	fd4cb4b596	correlation start	2026-01-27 17:22:16 +01:00
Luigi Maiorano	393c527656	filters	2026-01-23 15:05:35 +01:00
Luigi Maiorano	0f5ecf5ac7	speaking style trait scores	2026-01-23 12:39:12 +01:00
Luigi Maiorano	84a0f8052e	speaking style trait scores vertical	2026-01-23 12:26:47 +01:00