210 lines
5.0 KiB
Python
210 lines
5.0 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.19.2"
|
|
app = marimo.App(width="medium")
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
import marimo as mo
|
|
import polars as pl
|
|
from pathlib import Path
|
|
|
|
from utils import JPMCSurvey
|
|
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution
|
|
return (
|
|
JPMCSurvey,
|
|
mo,
|
|
plot_average_scores_with_counts,
|
|
plot_top3_ranking_distribution,
|
|
)
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
|
# RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv'
|
|
return QSF_FILE, RESULTS_FILE
|
|
|
|
|
|
@app.cell
|
|
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
|
|
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
|
survey.qid_descr_map
|
|
return (survey,)
|
|
|
|
|
|
@app.cell
|
|
def _(survey):
|
|
data = survey.load_data()
|
|
df = data.collect()
|
|
|
|
|
|
df.select([q for q in df.columns if 'QID98' in q])
|
|
|
|
return (data,)
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Data Cleanup
|
|
|
|
- Remove incomplete responses (progress < 100)
|
|
- Flag outliers based on duration (add column)
|
|
- Flag responses that give the same rating for everything (indicates lack of engagement)
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Answers Decoding
|
|
|
|
Pipeline to decode the ranking of voices. Currently saved as QID's, they need to be remapped back to their actual values so that the analysis can be performed. ie:
|
|
|
|
`GQIK26_G0_x8_RANK` -> Refers to question `Top 3 Traits_0_8_RANK - What are the important traits for the Chase AI virtual assistant?` and thus the #8 option
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _(mo):
|
|
mo.md(r"""
|
|
## TODO:
|
|
|
|
Create a python function for each of the questions. ie `def QID63()`. Each function should return a Polars query, that can be added to an existing query.
|
|
|
|
Ideas:
|
|
- Map column name to include the Voice number (VID) (ie the questions that only have 1 voice). The VID is in this case often included in the question description
|
|
- `QID_x_GROUP` Contains the rankings of the values, stored in order. The following columns (ie `QID26_G0_x1_RANK`) are redundant and not necessary for us. The function should drop the unnecessary columns to clean up
|
|
<!-- - Translate the RANK values back to the actual VID, and create an aggregate column that contains a list of the VIDs in order. ie: [V34, V56, V81].
|
|
- Use the first line of the question description (see `qid_descr_map`) to get the `"DataExportTag"`, which is a property that can be found in the `.qsf` file to inspect the choice number and it's corresponding VID
|
|
- "`VOICE SEL. 8-3_0_5_RANK`" refers to `"DataExportTag": "VOICE SEL. 8-3"`, `Group 0` (not important for this), `Choice 5`, and the value in the cell refers to the Rank assigned to that voice
|
|
- QSF file example to retrieve the VID: `"SurveyElements" -> (Find item where "Payload"["DataExportTag"] == "VOICE SEL. 8-3") -> "Payload" -> "Choices" -> "5" -> "Display" -> (Extract 'Voice <xx>' from the HTML)` -->
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(survey):
|
|
cfg = survey._get_qsf_question_by_QID('QID36')['Payload']
|
|
cfg
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
survey.get_demographics(data)[0].collect()
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
survey.get_top_8_traits(data)[0].collect()
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
survey.get_top_3_traits(data)[0].collect()
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
survey.get_character_ranking(data)[0].collect()
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
survey.get_18_8_3(data)[0].collect()
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Voice Scales 1-10
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
vscales = survey.get_voice_scale_1_10(data)[0].collect()
|
|
vscales
|
|
return (vscales,)
|
|
|
|
|
|
@app.cell
|
|
def _(plot_average_scores_with_counts, vscales):
|
|
plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# SS Green Blue
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
_lf, _choice_map = survey.get_ss_green_blue(data)
|
|
print(_lf.collect().head())
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Top 3 Voices
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
top3_voices = survey.get_top_3_voices(data)[0].collect()
|
|
top3_voices
|
|
return (top3_voices,)
|
|
|
|
|
|
@app.cell
|
|
def _(top3_voices):
|
|
|
|
print(top3_voices.head())
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(plot_top3_ranking_distribution, top3_voices):
|
|
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# SS Orange / Red
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(data, survey):
|
|
_lf, choice_map = survey.get_ss_orange_red(data)
|
|
_d = _lf.collect()
|
|
_d
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|