initial plots

2026-01-22 20:48:59 +01:00
parent b8642e9de8
commit dbcade215b
3 changed files with 642 additions and 59 deletions
--- a/01_ingest_qualtrics_export.py
+++ b/01_ingest_qualtrics_export.py
@@ -10,29 +10,40 @@ def _():
    import polars as pl
    from pathlib import Path

-    from utils import extract_qid_descr_map, load_csv_with_qid_headers
-    return extract_qid_descr_map, load_csv_with_qid_headers, mo
+    from utils import JPMCSurvey
+    from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution
+    return (
+        JPMCSurvey,
+        mo,
+        plot_average_scores_with_counts,
+        plot_top3_ranking_distribution,
+    )


@app.cell
 def _():
    RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
+    QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
    # RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv'
-    return (RESULTS_FILE,)
+    return QSF_FILE, RESULTS_FILE


@app.cell
-def _(RESULTS_FILE, extract_qid_descr_map):
-    qid_descr_map = extract_qid_descr_map(RESULTS_FILE)
-    qid_descr_map
-    return
+def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
+    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+    survey.qid_descr_map
+    return (survey,)


@app.cell
-def _(RESULTS_FILE, load_csv_with_qid_headers):
-    df = load_csv_with_qid_headers(RESULTS_FILE)
-    df
-    return
+def _(survey):
+    data = survey.load_data()
+    df = data.collect()
+
+
+    df.select([q for q in df.columns if 'QID98' in q])
+
+    return (data,)


@app.cell
@@ -77,5 +88,122 @@ def _(mo):
    return


+@app.cell
+def _(survey):
+    cfg = survey._get_qsf_question_by_QID('QID36')['Payload']
+    cfg
+    return
+
+
+@app.cell
+def _(data, survey):
+    survey.get_demographics(data)[0].collect()
+    return
+
+
+@app.cell
+def _(data, survey):
+    survey.get_top_8_traits(data)[0].collect()
+    return
+
+
+@app.cell
+def _(data, survey):
+    survey.get_top_3_traits(data)[0].collect()
+    return
+
+
+@app.cell
+def _(data, survey):
+    survey.get_character_ranking(data)[0].collect()
+    return
+
+
+@app.cell
+def _(data, survey):
+    survey.get_18_8_3(data)[0].collect()
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md(r"""
+    # Voice Scales 1-10
+    """)
+    return
+
+
+@app.cell
+def _(data, survey):
+    vscales = survey.get_voice_scale_1_10(data)[0].collect()
+    vscales
+    return (vscales,)
+
+
+@app.cell
+def _(plot_average_scores_with_counts, vscales):
+    plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md(r"""
+    # SS Green Blue
+    """)
+    return
+
+
+@app.cell
+def _(data, survey):
+    _lf, _choice_map = survey.get_ss_green_blue(data)
+    print(_lf.collect().head())
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md(r"""
+    # Top 3 Voices
+    """)
+    return
+
+
+@app.cell
+def _(data, survey):
+    top3_voices = survey.get_top_3_voices(data)[0].collect()
+    top3_voices
+    return (top3_voices,)
+
+
+@app.cell
+def _(top3_voices):
+
+    print(top3_voices.head())
+    return
+
+
+@app.cell
+def _(plot_top3_ranking_distribution, top3_voices):
+    plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
+    return
+
+
+@app.cell
+def _(mo):
+    mo.md(r"""
+    # SS Orange / Red
+    """)
+    return
+
+
+@app.cell
+def _(data, survey):
+    _lf, choice_map = survey.get_ss_orange_red(data)
+    _d = _lf.collect()
+    _d
+    return
+
+
 if __name__ == "__main__":
    app.run()