82 lines
3.0 KiB
Python
82 lines
3.0 KiB
Python
import marimo
|
|
|
|
__generated_with = "0.19.2"
|
|
app = marimo.App(width="medium")
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
import marimo as mo
|
|
import polars as pl
|
|
from pathlib import Path
|
|
|
|
from utils import extract_qid_descr_map, load_csv_with_qid_headers
|
|
return extract_qid_descr_map, load_csv_with_qid_headers, mo
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
|
# RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv'
|
|
return (RESULTS_FILE,)
|
|
|
|
|
|
@app.cell
|
|
def _(RESULTS_FILE, extract_qid_descr_map):
|
|
qid_descr_map = extract_qid_descr_map(RESULTS_FILE)
|
|
qid_descr_map
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(RESULTS_FILE, load_csv_with_qid_headers):
|
|
df = load_csv_with_qid_headers(RESULTS_FILE)
|
|
df
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Data Cleanup
|
|
|
|
- Remove incomplete responses (progress < 100)
|
|
- Flag outliers based on duration (add column)
|
|
- Flag responses that give the same rating for everything (indicates lack of engagement)
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mo):
|
|
mo.md(r"""
|
|
# Answers Decoding
|
|
|
|
Pipeline to decode the ranking of voices. Currently saved as QID's, they need to be remapped back to their actual values so that the analysis can be performed. ie:
|
|
|
|
`GQIK26_G0_x8_RANK` -> Refers to question `Top 3 Traits_0_8_RANK - What are the important traits for the Chase AI virtual assistant?` and thus the #8 option
|
|
""")
|
|
return
|
|
|
|
|
|
@app.cell(hide_code=True)
|
|
def _(mo):
|
|
mo.md(r"""
|
|
## TODO:
|
|
|
|
Create a python function for each of the questions. ie `def QID63()`. Each function should return a Polars query, that can be added to an existing query.
|
|
|
|
Ideas:
|
|
- Map column name to include the Voice number (VID) (ie the questions that only have 1 voice). The VID is in this case often included in the question description
|
|
- `QID_x_GROUP` Contains the rankings of the values, stored in order. The following columns (ie `QID26_G0_x1_RANK`) are redundant and not necessary for us. The function should drop the unnecessary columns to clean up
|
|
<!-- - Translate the RANK values back to the actual VID, and create an aggregate column that contains a list of the VIDs in order. ie: [V34, V56, V81].
|
|
- Use the first line of the question description (see `qid_descr_map`) to get the `"DataExportTag"`, which is a property that can be found in the `.qsf` file to inspect the choice number and it's corresponding VID
|
|
- "`VOICE SEL. 8-3_0_5_RANK`" refers to `"DataExportTag": "VOICE SEL. 8-3"`, `Group 0` (not important for this), `Choice 5`, and the value in the cell refers to the Rank assigned to that voice
|
|
- QSF file example to retrieve the VID: `"SurveyElements" -> (Find item where "Payload"["DataExportTag"] == "VOICE SEL. 8-3") -> "Payload" -> "Choices" -> "5" -> "Display" -> (Extract 'Voice <xx>' from the HTML)` -->
|
|
""")
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|