Compare commits
10 Commits
424355f4a1
...
36d8bc4d88
| Author | SHA1 | Date | |
|---|---|---|---|
| 36d8bc4d88 | |||
| 0485f991d2 | |||
| 3f929d93fd | |||
| 62e75fe899 | |||
| 365e70b834 | |||
| 23136b5c2e | |||
| fd4cb4b596 | |||
| 393c527656 | |||
| 0f5ecf5ac7 | |||
| 84a0f8052e |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -15,3 +15,4 @@ data/
|
||||
docker-volumes/
|
||||
logs/
|
||||
|
||||
figures/
|
||||
@@ -12,15 +12,24 @@ def _():
|
||||
import plotly as plt
|
||||
from pathlib import Path
|
||||
|
||||
from utils import extract_qid_descr_map
|
||||
return Path, extract_qid_descr_map, mo, pd
|
||||
import utils
|
||||
return Path, mo, pd, utils
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(Path):
|
||||
# results_file = Path('data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv')
|
||||
results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
|
||||
return (results_file,)
|
||||
# results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
|
||||
results_file = Path('data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv')
|
||||
qsf_file = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||
return qsf_file, results_file
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(qsf_file, results_file, utils):
|
||||
survey = utils.JPMCSurvey(results_file, qsf_file)
|
||||
data_all = survey.load_data()
|
||||
return (survey,)
|
||||
|
||||
|
||||
@app.cell
|
||||
@@ -33,8 +42,8 @@ def _(mo):
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(extract_qid_descr_map, results_file):
|
||||
qid_descr_map = extract_qid_descr_map(results_file)
|
||||
def _(survey):
|
||||
qid_descr_map = survey.qid_descr_map
|
||||
qid_descr_map
|
||||
return (qid_descr_map,)
|
||||
|
||||
|
||||
@@ -12,63 +12,59 @@ def _():
|
||||
|
||||
from validation import check_progress, duration_validation
|
||||
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||||
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
|
||||
import utils
|
||||
|
||||
from speaking_styles import SPEAKING_STYLES
|
||||
return (
|
||||
JPMCSurvey,
|
||||
Path,
|
||||
SPEAKING_STYLES,
|
||||
calculate_weighted_ranking_scores,
|
||||
check_progress,
|
||||
duration_validation,
|
||||
mo,
|
||||
plot_average_scores_with_counts,
|
||||
plot_character_ranking_distribution,
|
||||
plot_most_ranked_1_character,
|
||||
plot_top3_ranking_distribution,
|
||||
plot_weighted_ranking_score,
|
||||
pl,
|
||||
utils,
|
||||
)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
# Load Data
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(Path, mo):
|
||||
def _():
|
||||
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||
mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
|
||||
return QSF_FILE, RESULTS_FILE
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
|
||||
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||||
data_all = survey.load_data()
|
||||
data_all.collect()
|
||||
return data_all, survey
|
||||
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||||
data_all = S.load_data()
|
||||
return S, data_all
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
## Data Validation
|
||||
def _(Path, RESULTS_FILE, data_all, mo):
|
||||
mo.md(f"""
|
||||
# Load Data
|
||||
|
||||
**Dataset:** `{Path(RESULTS_FILE).name}`
|
||||
|
||||
{mo.ui.table(data_all.collect())}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(check_progress, data_all):
|
||||
check_progress(data_all)
|
||||
return
|
||||
@app.cell(hide_code=True)
|
||||
def _(check_progress, data_all, duration_validation, mo):
|
||||
mo.md(f"""
|
||||
## Data Validation
|
||||
|
||||
{check_progress(data_all)}
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data_all, duration_validation):
|
||||
duration_validation(data_all)
|
||||
|
||||
{duration_validation(data_all)}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@@ -92,9 +88,42 @@ def _(mo):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data_all, survey):
|
||||
data = survey.filter_data(data_all, age=None, gender=None, income=None, ethnicity=None, consumer=None)
|
||||
@app.cell(hide_code=True)
|
||||
def _(S, mo):
|
||||
filter_form = mo.md('''
|
||||
# Data Filter
|
||||
|
||||
{age}
|
||||
|
||||
{gender}
|
||||
|
||||
{ethnicity}
|
||||
|
||||
{income}
|
||||
|
||||
{consumer}
|
||||
'''
|
||||
).batch(
|
||||
age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
|
||||
gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
|
||||
ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
|
||||
income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
|
||||
consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
|
||||
).form()
|
||||
filter_form
|
||||
return (filter_form,)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(S, data_all, filter_form, mo):
|
||||
mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
|
||||
_d = S.filter_data(data_all, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer'])
|
||||
|
||||
# Stop execution and prevent other cells from running if no data is selected
|
||||
mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
|
||||
data = _d
|
||||
|
||||
data.collect()
|
||||
return (data,)
|
||||
|
||||
|
||||
@@ -112,47 +141,48 @@ def _(mo):
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
## Character personality ranking
|
||||
|
||||
### 1. Which character personality is ranked best?
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data, survey):
|
||||
char_rank = survey.get_character_ranking(data)[0].collect()
|
||||
|
||||
def _(S, data):
|
||||
char_rank = S.get_character_ranking(data)[0]
|
||||
return (char_rank,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(char_rank, plot_character_ranking_distribution):
|
||||
plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
|
||||
return
|
||||
def _(S, char_rank, mo):
|
||||
mo.md(f"""
|
||||
### 1. Which character personality is ranked best?
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
### 2. Which character personality is ranked number 1 the most?
|
||||
{mo.ui.altair_chart(S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality'))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(
|
||||
calculate_weighted_ranking_scores,
|
||||
char_rank,
|
||||
plot_weighted_ranking_score,
|
||||
):
|
||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||
plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
|
||||
def _(S, char_rank, mo):
|
||||
mo.md(f"""
|
||||
### 2. Which character personality is ranked 1st the most?
|
||||
|
||||
|
||||
{mo.ui.altair_chart(S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(char_rank, plot_most_ranked_1_character):
|
||||
plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
|
||||
def _(S, calculate_weighted_ranking_scores, char_rank, mo):
|
||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||
|
||||
mo.md(f"""
|
||||
### 3. Which character personality most popular based on weighted scores?
|
||||
|
||||
|
||||
{mo.ui.altair_chart(S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@@ -165,53 +195,73 @@ def _(mo):
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data, survey):
|
||||
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
|
||||
print(v_18_8_3.head())
|
||||
return
|
||||
def _(S, data):
|
||||
v_18_8_3 = S.get_18_8_3(data)[0].collect()
|
||||
# print(v_18_8_3.head())
|
||||
return (v_18_8_3,)
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
Which 8 voices are chosen the most out of 18?
|
||||
def _(S, mo, v_18_8_3):
|
||||
mo.md(f"""
|
||||
### Which 8 voices are chosen the most out of 18?
|
||||
|
||||
{mo.ui.altair_chart(S.plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
||||
def _(S, mo, v_18_8_3):
|
||||
mo.md(f"""
|
||||
### Which 3 voices are chosen the most out of 18?
|
||||
|
||||
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
||||
|
||||
{mo.ui.altair_chart(S.plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
|
||||
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
||||
def _(S, calculate_weighted_ranking_scores, data):
|
||||
top3_voices = S.get_top_3_voices(data)[0]
|
||||
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||||
return top3_voices, top3_voices_weighted
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, mo, top3_voices):
|
||||
mo.md(f"""
|
||||
### Which voice is ranked best in the ranking question for top 3?
|
||||
|
||||
(not best 3 out of 8 question)
|
||||
|
||||
{mo.ui.altair_chart(S.plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(plot_top3_ranking_distribution, top3_voices):
|
||||
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
|
||||
def _(S, mo, top3_voices_weighted):
|
||||
mo.md(f"""
|
||||
### Most popular **voice** based on weighted scores?
|
||||
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
||||
Distribution of the rankings for each voice:
|
||||
|
||||
{mo.ui.altair_chart(S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
Which voice is ranked number 1 the most? (not always the voice with most points)
|
||||
@app.cell
|
||||
def _(S, mo, top3_voices):
|
||||
mo.md(f"""
|
||||
### Which voice is ranked number 1 the most?
|
||||
|
||||
- Each of the 350 participants gives exactly one 1st-place vote.
|
||||
- Total Rank-1 votes = 350.
|
||||
- Voices are sorted from most to least 1st-place votes.
|
||||
- The top 3 voices with the most Rank-1 votes are colored blue.
|
||||
- This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
|
||||
(not always the voice with most points)
|
||||
|
||||
{mo.ui.altair_chart(S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
@@ -220,18 +270,42 @@ def _(mo):
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
## Voice Speaking Style - Perception Traits
|
||||
|
||||
Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
How does each voice score for each “speaking style labeled trait”? Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
|
||||
@app.cell
|
||||
def _(S, data, utils):
|
||||
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||||
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||||
|
||||
- There are 4 speaking styles: Green, Blue, Orange, Red.
|
||||
- There are 16 traits distributed across the 4 speaking styles.
|
||||
""")
|
||||
# Combine the data
|
||||
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||
_d = ss_all.collect()
|
||||
|
||||
choice_map = {**choice_map_or, **choice_map_gb}
|
||||
# print(_d.head())
|
||||
# print(choice_map)
|
||||
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
return choice_map, ss_all, ss_long
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, mo, pl, ss_long):
|
||||
content = """### How does each voice score for each “speaking style labeled trait”?"""
|
||||
|
||||
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
|
||||
trait_d = ss_long.filter(pl.col("Description") == trait)
|
||||
|
||||
content += f"""
|
||||
### {i+1}) {trait.replace(":", " ↔ ")}
|
||||
|
||||
{mo.ui.altair_chart(S.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550))}
|
||||
"""
|
||||
|
||||
mo.md(content)
|
||||
return
|
||||
|
||||
|
||||
@@ -244,23 +318,18 @@ def _(mo):
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data, mo, plot_average_scores_with_counts, survey):
|
||||
vscales = survey.get_voice_scale_1_10(data)[0].collect()
|
||||
plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
||||
|
||||
mo.md(f"""
|
||||
|
||||
How does each voice score on a scale from 1-10?
|
||||
|
||||
{mo.ui.plotly(plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
|
||||
""")
|
||||
return
|
||||
def _(S, data):
|
||||
vscales = S.get_voice_scale_1_10(data)[0]
|
||||
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
||||
return (vscales,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
def _(S, mo, vscales):
|
||||
mo.md(f"""
|
||||
### How does each voice score on a scale from 1-10?
|
||||
|
||||
{mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
|
||||
""")
|
||||
return
|
||||
|
||||
@@ -286,16 +355,57 @@ def _(mo):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
### Total Results
|
||||
### How to Interpret These Correlation Results
|
||||
Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).
|
||||
|
||||
- [ ] 4 correlation diagrams
|
||||
**Reading the Chart**
|
||||
|
||||
| Correlation Value | Interpretation |
|
||||
|-----------|----------|
|
||||
| r > 0 (Green bars)| Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
|
||||
| r < 0 (Red bars)| Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
|
||||
| r ≈ 0| No relationship — this trait doesn't predict Voice Scale ratings|
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(choice_map, ss_all, utils, vscales):
|
||||
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
df_voice_long = utils.process_voice_scale_data(vscales)
|
||||
|
||||
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
|
||||
# df_voice_long
|
||||
return df_style, joined_df
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, SPEAKING_STYLES, joined_df, mo):
|
||||
_content = """### Total Results
|
||||
|
||||
"""
|
||||
|
||||
for style, traits in SPEAKING_STYLES.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
fig = S.plot_speaking_style_correlation(
|
||||
data=joined_df,
|
||||
style_color=style,
|
||||
style_traits=traits,
|
||||
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
|
||||
)
|
||||
_content += f"""
|
||||
#### Speaking Style **{style}**:
|
||||
|
||||
{mo.ui.altair_chart(fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
@@ -338,6 +448,30 @@ def _(mo):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, SPEAKING_STYLES, df_style, mo, top3_voices, utils):
|
||||
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||||
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||||
|
||||
|
||||
_content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points
|
||||
|
||||
"""
|
||||
|
||||
for _style, _traits in SPEAKING_STYLES.items():
|
||||
_fig = S.plot_speaking_style_ranking_correlation(data=joined, style_color=_style, style_traits=_traits)
|
||||
_content += f"""
|
||||
|
||||
#### Speaking Style **{_style}**:
|
||||
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
|
||||
mo.md(_content)
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
|
||||
BIN
docs/Speaking Style Traits Quantitative test design.pdf
Normal file
BIN
docs/Speaking Style Traits Quantitative test design.pdf
Normal file
Binary file not shown.
1307
docs/altair-migration-plan.md
Normal file
1307
docs/altair-migration-plan.md
Normal file
File diff suppressed because it is too large
Load Diff
60
example_correlation_plots.py
Normal file
60
example_correlation_plots.py
Normal file
@@ -0,0 +1,60 @@
|
||||
|
||||
import polars as pl
|
||||
from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
|
||||
from plots import plot_speaking_style_correlation
|
||||
from speaking_styles import SPEAKING_STYLES
|
||||
|
||||
# 1. Initialize Survey and Load Data
|
||||
# We need to point to the actual data files if possible, or use standard paths
|
||||
# Assuming the file structure observed in workspace:
|
||||
# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
|
||||
# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf
|
||||
|
||||
RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
|
||||
QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
|
||||
|
||||
try:
|
||||
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||||
except TypeError:
|
||||
# Fallback if signature is different or file not found (just in case)
|
||||
print("Error initializing survey with paths. Checking signature...")
|
||||
# This part is just for debugging if it fails again
|
||||
raise
|
||||
|
||||
data = survey.load_data()
|
||||
|
||||
# 2. Extract Data
|
||||
# Speaking Styles
|
||||
ss_gb, map_gb = survey.get_ss_green_blue(data)
|
||||
ss_or, map_or = survey.get_ss_orange_red(data)
|
||||
|
||||
# Voice Scale 1-10
|
||||
voice_scale, _ = survey.get_voice_scale_1_10(data)
|
||||
|
||||
# 3. Process Dataframes (Wide to Long)
|
||||
# Note: process_speaking_style_data handles the melt and parsing
|
||||
# We collect them because the plotting functions expect eager DataFrames usually,
|
||||
# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())
|
||||
|
||||
df_style_gb = process_speaking_style_data(ss_gb, map_gb)
|
||||
df_style_or = process_speaking_style_data(ss_or, map_or)
|
||||
|
||||
# Combine both style dataframes
|
||||
df_style_all = pl.concat([df_style_gb, df_style_or])
|
||||
|
||||
# Process Voice Scale
|
||||
df_voice_long = process_voice_scale_data(voice_scale)
|
||||
|
||||
# 4. Join Style + Voice Data
|
||||
joined_df = join_voice_and_style_data(df_style_all, df_voice_long)
|
||||
|
||||
# 5. Generate Plots for each Style Color
|
||||
for style, traits in SPEAKING_STYLES.items():
|
||||
print(f"Generating plot for {style}...")
|
||||
fig = plot_speaking_style_correlation(
|
||||
df=joined_df,
|
||||
style_color=style,
|
||||
style_traits=traits
|
||||
)
|
||||
fig.show()
|
||||
# If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'
|
||||
@@ -14,12 +14,13 @@ dependencies = [
|
||||
"openai>=2.9.0",
|
||||
"openpyxl>=3.1.5",
|
||||
"pandas>=2.3.3",
|
||||
"plotly>=6.5.1",
|
||||
"polars>=1.37.1",
|
||||
"pyarrow>=23.0.0",
|
||||
"pysqlite3>=0.6.0",
|
||||
"pyzmq>=27.1.0",
|
||||
"requests>=2.32.5",
|
||||
"taguette>=1.5.1",
|
||||
"vl-convert-python>=1.9.0.post1",
|
||||
"wordcloud>=1.9.5",
|
||||
]
|
||||
|
||||
|
||||
33
speaking_styles.py
Normal file
33
speaking_styles.py
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
"""
|
||||
Mapping of Speaking Styles (Colors) to their constituent Traits (Positive side).
|
||||
Derived from "Speaking Style Traits Quantitative test design.pdf".
|
||||
"""
|
||||
|
||||
SPEAKING_STYLES = {
|
||||
"Green": [
|
||||
"Friendly | Conversational | Down-to-earth",
|
||||
"Approachable | Familiar | Warm",
|
||||
"Optimistic | Benevolent | Positive | Appreciative"
|
||||
],
|
||||
"Blue": [
|
||||
"Proactive | Cooperative",
|
||||
"Knowledgable | Resourceful | Savvy",
|
||||
"Clear | Straightforward | Direct",
|
||||
"Confident | Competent",
|
||||
"Respectable | Respectful"
|
||||
],
|
||||
"Orange": [
|
||||
"Attentive | Helpful | Caring | Deliberate",
|
||||
"Reassuring | Empowering",
|
||||
"Progressive | Guiding | Intentional",
|
||||
"Patient | Open-minded"
|
||||
],
|
||||
"Red": [
|
||||
"Trustworthy | Reliable | Dependable",
|
||||
"Calm | Steady/Stable | Controlled",
|
||||
"Transparent | Upright | Altruistic",
|
||||
"Adaptive | Flexible"
|
||||
]
|
||||
}
|
||||
|
||||
58
theme.py
58
theme.py
@@ -16,7 +16,65 @@ class ColorPalette:
|
||||
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
|
||||
RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
|
||||
|
||||
# Neutral color for unhighlighted comparison items
|
||||
NEUTRAL = "#D3D3D3" # Light Grey
|
||||
|
||||
# General UI elements
|
||||
TEXT = "black"
|
||||
GRID = "lightgray"
|
||||
BACKGROUND = "white"
|
||||
|
||||
|
||||
def jpmc_altair_theme():
|
||||
"""JPMC brand theme for Altair charts."""
|
||||
return {
|
||||
'config': {
|
||||
'view': {
|
||||
'continuousWidth': 1000,
|
||||
'continuousHeight': 500,
|
||||
'strokeWidth': 0
|
||||
},
|
||||
'background': ColorPalette.BACKGROUND,
|
||||
'axis': {
|
||||
'grid': True,
|
||||
'gridColor': ColorPalette.GRID,
|
||||
'labelFontSize': 11,
|
||||
'titleFontSize': 12,
|
||||
'labelColor': ColorPalette.TEXT,
|
||||
'titleColor': ColorPalette.TEXT,
|
||||
'labelLimit': 200 # Allow longer labels before truncation
|
||||
},
|
||||
'axisX': {
|
||||
'labelAngle': -45,
|
||||
'labelLimit': 200 # Allow longer x-axis labels
|
||||
},
|
||||
'axisY': {
|
||||
'labelAngle': 0
|
||||
},
|
||||
'legend': {
|
||||
'orient': 'top',
|
||||
'direction': 'horizontal',
|
||||
'titleFontSize': 11,
|
||||
'labelFontSize': 11
|
||||
},
|
||||
'title': {
|
||||
'fontSize': 14,
|
||||
'color': ColorPalette.TEXT,
|
||||
'anchor': 'start',
|
||||
'subtitleFontSize': 10,
|
||||
'subtitleColor': 'gray'
|
||||
},
|
||||
'bar': {
|
||||
'color': ColorPalette.PRIMARY
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Register Altair theme
|
||||
try:
|
||||
import altair as alt
|
||||
alt.themes.register('jpmc', jpmc_altair_theme)
|
||||
alt.themes.enable('jpmc')
|
||||
except ImportError:
|
||||
pass # Altair not installed
|
||||
|
||||
260
utils.py
260
utils.py
@@ -3,8 +3,10 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
from typing import Union
|
||||
import json
|
||||
|
||||
import re
|
||||
from plots import JPMCPlotsMixin
|
||||
|
||||
import marimo as mo
|
||||
|
||||
def extract_voice_label(html_str: str) -> str:
|
||||
"""
|
||||
@@ -55,24 +57,27 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
|
||||
|
||||
|
||||
|
||||
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
||||
def calculate_weighted_ranking_scores(df: pl.LazyFrame) -> pl.DataFrame:
|
||||
"""
|
||||
Calculate weighted scores for character rankings.
|
||||
Calculate weighted scores for character or voice rankings.
|
||||
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pl.DataFrame
|
||||
DataFrame containing character ranking columns.
|
||||
DataFrame containing character/ voice ranking columns.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pl.DataFrame
|
||||
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
||||
"""
|
||||
if isinstance(df, pl.LazyFrame):
|
||||
df = df.collect()
|
||||
|
||||
scores = []
|
||||
# Identify columns related to Character Ranking
|
||||
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
||||
# Identify ranking columns (assume all columns except _recordId)
|
||||
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||
|
||||
for col in ranking_cols:
|
||||
# Calculate score:
|
||||
@@ -84,7 +89,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
||||
weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
|
||||
|
||||
# Clean name
|
||||
clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
||||
clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
|
||||
|
||||
scores.append({
|
||||
'Character': clean_name,
|
||||
@@ -94,7 +99,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
||||
return pl.DataFrame(scores).sort('Weighted Score', descending=True)
|
||||
|
||||
|
||||
class JPMCSurvey:
|
||||
class JPMCSurvey(JPMCPlotsMixin):
|
||||
"""Class to handle JPMorgan Chase survey data."""
|
||||
|
||||
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
|
||||
@@ -109,6 +114,23 @@ class JPMCSurvey:
|
||||
self.qid_descr_map = self._extract_qid_descr_map()
|
||||
self.qsf:dict = self._load_qsf()
|
||||
|
||||
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
|
||||
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
|
||||
if not self.fig_save_dir.exists():
|
||||
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.data_filtered = None
|
||||
self.plot_height = 500
|
||||
self.plot_width = 1000
|
||||
|
||||
# Filter values
|
||||
self.filter_age:list = None
|
||||
self.filter_gender:list = None
|
||||
self.filter_consumer:list = None
|
||||
self.filter_ethnicity:list = None
|
||||
self.filter_income:list = None
|
||||
|
||||
|
||||
|
||||
def _extract_qid_descr_map(self) -> dict:
|
||||
"""Extract mapping of Qualtrics ImportID to Question Description from results file."""
|
||||
@@ -188,6 +210,13 @@ class JPMCSurvey:
|
||||
# Rename columns with the extracted ImportIds
|
||||
df.columns = new_columns
|
||||
|
||||
# Store unique values for filters (ignoring nulls) to detect "all selected" state
|
||||
self.options_age = sorted(df['QID1'].drop_nulls().unique().to_list()) if 'QID1' in df.columns else []
|
||||
self.options_gender = sorted(df['QID2'].drop_nulls().unique().to_list()) if 'QID2' in df.columns else []
|
||||
self.options_consumer = sorted(df['Consumer'].drop_nulls().unique().to_list()) if 'Consumer' in df.columns else []
|
||||
self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
|
||||
self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
|
||||
|
||||
return df.lazy()
|
||||
|
||||
def _get_subset(self, q: pl.LazyFrame, QIDs, rename_cols=True, include_record_id=True) -> pl.LazyFrame:
|
||||
@@ -213,25 +242,32 @@ class JPMCSurvey:
|
||||
- ethnicity: list
|
||||
- income: list
|
||||
|
||||
Returns filtered polars LazyFrame.
|
||||
Also saves the result to self.data_filtered.
|
||||
"""
|
||||
|
||||
# Apply filters
|
||||
self.filter_age = age
|
||||
if age is not None:
|
||||
q = q.filter(pl.col('QID1').is_in(age))
|
||||
|
||||
self.filter_gender = gender
|
||||
if gender is not None:
|
||||
q = q.filter(pl.col('QID2').is_in(gender))
|
||||
|
||||
self.filter_consumer = consumer
|
||||
if consumer is not None:
|
||||
q = q.filter(pl.col('Consumer').is_in(consumer))
|
||||
|
||||
self.filter_ethnicity = ethnicity
|
||||
if ethnicity is not None:
|
||||
q = q.filter(pl.col('QID3').is_in(ethnicity))
|
||||
|
||||
self.filter_income = income
|
||||
if income is not None:
|
||||
q = q.filter(pl.col('QID15').is_in(income))
|
||||
|
||||
return q
|
||||
self.data_filtered = q
|
||||
return self.data_filtered
|
||||
|
||||
def get_demographics(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
|
||||
"""Extract columns containing the demographics.
|
||||
@@ -415,6 +451,210 @@ class JPMCSurvey:
|
||||
return self._get_subset(q, QIDs, rename_cols=True), None
|
||||
|
||||
|
||||
def process_speaking_style_data(
|
||||
df: Union[pl.LazyFrame, pl.DataFrame],
|
||||
trait_map: dict[str, str]
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
Process speaking style columns from wide to long format and map trait descriptions.
|
||||
|
||||
Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
|
||||
Example: SS_Orange_Red__V14__Choice_1
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pl.LazyFrame or pl.DataFrame
|
||||
Input dataframe containing SS_* columns.
|
||||
trait_map : dict
|
||||
Dictionary mapping column names to trait descriptions.
|
||||
Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
|
||||
|
||||
Returns
|
||||
-------
|
||||
pl.DataFrame
|
||||
Long-format dataframe with columns:
|
||||
_recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
|
||||
"""
|
||||
# Normalize input to LazyFrame
|
||||
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
|
||||
|
||||
# 1. Melt SS_ columns
|
||||
melted = lf.melt(
|
||||
id_vars=["_recordId"],
|
||||
value_vars=pl.col("^SS_.*$"),
|
||||
variable_name="full_col_name",
|
||||
value_name="score"
|
||||
)
|
||||
|
||||
# 2. Extract components from column name
|
||||
# Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
|
||||
pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
|
||||
|
||||
processed = melted.with_columns(
|
||||
pl.col("full_col_name").str.extract_groups(pattern)
|
||||
).unnest("full_col_name")
|
||||
|
||||
# 3. Create Mapping Lookup from the provided dictionary
|
||||
# We map (Style_Group, Choice_ID) -> Description
|
||||
mapping_data = []
|
||||
seen = set()
|
||||
|
||||
for col_name, desc in trait_map.items():
|
||||
match = re.match(pattern, col_name)
|
||||
if match:
|
||||
groups = match.groupdict()
|
||||
key = (groups["Style_Group"], groups["Choice_ID"])
|
||||
|
||||
if key not in seen:
|
||||
# Parse description into anchors if possible (Left : Right)
|
||||
parts = desc.split(':')
|
||||
left_anchor = parts[0].strip() if len(parts) > 0 else ""
|
||||
right_anchor = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
mapping_data.append({
|
||||
"Style_Group": groups["Style_Group"],
|
||||
"Choice_ID": groups["Choice_ID"],
|
||||
"Description": desc,
|
||||
"Left_Anchor": left_anchor,
|
||||
"Right_Anchor": right_anchor
|
||||
})
|
||||
seen.add(key)
|
||||
|
||||
if not mapping_data:
|
||||
return processed.collect()
|
||||
|
||||
mapping_lf = pl.LazyFrame(mapping_data)
|
||||
|
||||
# 4. Join Data with Mapping
|
||||
result = processed.join(
|
||||
mapping_lf,
|
||||
on=["Style_Group", "Choice_ID"],
|
||||
how="left"
|
||||
)
|
||||
|
||||
# 5. Cast score to Int
|
||||
result = result.with_columns(
|
||||
pl.col("score").cast(pl.Int64, strict=False)
|
||||
)
|
||||
|
||||
return result.collect()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def process_voice_scale_data(
|
||||
df: Union[pl.LazyFrame, pl.DataFrame]
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
Process Voice Scale columns from wide to long format.
|
||||
|
||||
Parses columns with format: Voice_Scale_1_10__V{Voice}
|
||||
Example: Voice_Scale_1_10__V14
|
||||
|
||||
Returns
|
||||
-------
|
||||
pl.DataFrame
|
||||
Long-format dataframe with columns:
|
||||
_recordId, Voice, Voice_Scale_Score
|
||||
"""
|
||||
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
|
||||
|
||||
# Melt
|
||||
melted = lf.melt(
|
||||
id_vars=["_recordId"],
|
||||
value_vars=pl.col("^Voice_Scale_1_10__V.*$"),
|
||||
variable_name="full_col_name",
|
||||
value_name="Voice_Scale_Score"
|
||||
)
|
||||
|
||||
# Extract Voice
|
||||
processed = melted.with_columns(
|
||||
pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
|
||||
).with_columns(
|
||||
("V" + pl.col("Voice_Num")).alias("Voice")
|
||||
)
|
||||
|
||||
# Keep Score as Float (original data is f64)
|
||||
result = processed.select([
|
||||
"_recordId",
|
||||
"Voice",
|
||||
pl.col("Voice_Scale_Score").cast(pl.Float64, strict=False)
|
||||
])
|
||||
|
||||
return result.collect()
|
||||
|
||||
def join_voice_and_style_data(
|
||||
processed_style_data: pl.DataFrame,
|
||||
processed_voice_data: pl.DataFrame
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
Joins processed Speaking Style data with Voice Scale 1-10 data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
processed_style_data : pl.DataFrame
|
||||
Result of process_speaking_style_data
|
||||
processed_voice_data : pl.DataFrame
|
||||
Result of process_voice_scale_data
|
||||
|
||||
Returns
|
||||
-------
|
||||
pl.DataFrame
|
||||
Merged dataframe with columns from both, joined on _recordId and Voice.
|
||||
"""
|
||||
|
||||
return processed_style_data.join(
|
||||
processed_voice_data,
|
||||
on=["_recordId", "Voice"],
|
||||
how="inner"
|
||||
)
|
||||
|
||||
def process_voice_ranking_data(
|
||||
df: Union[pl.LazyFrame, pl.DataFrame]
|
||||
) -> pl.DataFrame:
|
||||
"""
|
||||
Process Voice Ranking columns from wide to long format and convert ranks to points.
|
||||
|
||||
Parses columns with format: Top_3_Voices_ranking__V{Voice}
|
||||
Converts ranks to points: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt
|
||||
|
||||
Returns
|
||||
-------
|
||||
pl.DataFrame
|
||||
Long-format dataframe with columns:
|
||||
_recordId, Voice, Ranking_Points
|
||||
"""
|
||||
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
|
||||
|
||||
# Melt
|
||||
melted = lf.melt(
|
||||
id_vars=["_recordId"],
|
||||
value_vars=pl.col("^Top_3_Voices_ranking__V.*$"),
|
||||
variable_name="full_col_name",
|
||||
value_name="rank"
|
||||
)
|
||||
|
||||
# Extract Voice
|
||||
processed = melted.with_columns(
|
||||
pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
|
||||
).with_columns(
|
||||
("V" + pl.col("Voice_Num")).alias("Voice")
|
||||
)
|
||||
|
||||
# Convert rank to points: 1st=3, 2nd=2, 3rd=1, null=0 (not ranked)
|
||||
# Rank values are 1, 2, 3 for position in top 3
|
||||
result = processed.with_columns(
|
||||
pl.when(pl.col("rank") == 1).then(3)
|
||||
.when(pl.col("rank") == 2).then(2)
|
||||
.when(pl.col("rank") == 3).then(1)
|
||||
.otherwise(0)
|
||||
.alias("Ranking_Points")
|
||||
).select([
|
||||
"_recordId",
|
||||
"Voice",
|
||||
"Ranking_Points"
|
||||
])
|
||||
|
||||
return result.collect()
|
||||
|
||||
75
uv.lock
generated
75
uv.lock
generated
@@ -1332,12 +1332,13 @@ dependencies = [
|
||||
{ name = "openai" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
{ name = "plotly" },
|
||||
{ name = "polars" },
|
||||
{ name = "pyarrow" },
|
||||
{ name = "pysqlite3" },
|
||||
{ name = "pyzmq" },
|
||||
{ name = "requests" },
|
||||
{ name = "taguette" },
|
||||
{ name = "vl-convert-python" },
|
||||
{ name = "wordcloud" },
|
||||
]
|
||||
|
||||
@@ -1352,12 +1353,13 @@ requires-dist = [
|
||||
{ name = "openai", specifier = ">=2.9.0" },
|
||||
{ name = "openpyxl", specifier = ">=3.1.5" },
|
||||
{ name = "pandas", specifier = ">=2.3.3" },
|
||||
{ name = "plotly", specifier = ">=6.5.1" },
|
||||
{ name = "polars", specifier = ">=1.37.1" },
|
||||
{ name = "pyarrow", specifier = ">=23.0.0" },
|
||||
{ name = "pysqlite3", specifier = ">=0.6.0" },
|
||||
{ name = "pyzmq", specifier = ">=27.1.0" },
|
||||
{ name = "requests", specifier = ">=2.32.5" },
|
||||
{ name = "taguette", specifier = ">=1.5.1" },
|
||||
{ name = "vl-convert-python", specifier = ">=1.9.0.post1" },
|
||||
{ name = "wordcloud", specifier = ">=1.9.5" },
|
||||
]
|
||||
|
||||
@@ -1430,19 +1432,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotly"
|
||||
version = "6.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "narwhals" },
|
||||
{ name = "packaging" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d6/ff/a4938b75e95114451efdb34db6b41930253e67efc8dc737bd592ef2e419d/plotly-6.5.1.tar.gz", hash = "sha256:b0478c8d5ada0c8756bce15315bcbfec7d3ab8d24614e34af9aff7bfcfea9281", size = 7014606, upload-time = "2026-01-07T20:11:41.644Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/8e/24e0bb90b2d75af84820693260c5534e9ed351afdda67ed6f393a141a0e2/plotly-6.5.1-py3-none-any.whl", hash = "sha256:5adad4f58c360612b6c5ce11a308cdbc4fd38ceb1d40594a614f0062e227abe1", size = 9894981, upload-time = "2026-01-07T20:11:38.124Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "1.37.1"
|
||||
@@ -1521,6 +1510,49 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyarrow"
|
||||
version = "23.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.23"
|
||||
@@ -2135,6 +2167,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vl-convert-python"
|
||||
version = "1.9.0.post1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/93/89/36722344d1758ec2106f4e8eca980f173cfe8f8d0358c1b77cc5d2e035a4/vl_convert_python-1.9.0.post1.tar.gz", hash = "sha256:a5b06b3128037519001166f5341ec7831e19fbd7f3a5f78f73d557ac2d5859ef", size = 4663469, upload-time = "2026-01-21T00:09:55.61Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9f/59/e5862245972ff467d38b0eb5ad28154685e23ecabb47e14f2b6962da7b56/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:43e9515f65bbcd317d1ef328787fd7bf0344c2fde9292eb7a0e64d5d3d29fccb", size = 30512930, upload-time = "2026-01-21T00:09:43.198Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/e6/e7d0b538c2f0daaf120901dc113bd5d5d1fa51a9532fa5ffd90234e8c69e/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b0e7a3245f32addec7e7abeb1badf72b1513ed71ba1dba7aca853901217b3f4e", size = 29738742, upload-time = "2026-01-21T00:09:46.016Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/e2/5645a1bc174c53ff8cd305ed76a4a76ba36e155302db20b42b7e78daeef8/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6ecfe4b7e2ea9e8c30fd6d6eaea3ef85475be1ad249407d9796dce4ecdb5b32", size = 33366278, upload-time = "2026-01-21T00:09:48.42Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/18/88e02899b72fa8273ffb32bde12b0e5776ee0fd9fb29559a49c48ec4c5fa/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c1558fa0055e88c465bd3d71760cde9fa2c94a95f776a0ef9178252fd820b1f", size = 33520215, upload-time = "2026-01-21T00:09:50.992Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/db/6e8616587035bf0745d0f10b1791c7e945180ac5d6b28677d2f2b3ca693c/vl_convert_python-1.9.0.post1-cp37-abi3-win_amd64.whl", hash = "sha256:7e263269ac0d304640ca842b44dfe430ed863accd9edecff42e279bfc48ce940", size = 32051516, upload-time = "2026-01-21T00:09:53.47Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webencodings"
|
||||
version = "0.5.1"
|
||||
|
||||
@@ -5,9 +5,9 @@ import polars as pl
|
||||
def check_progress(data):
|
||||
"""Check if all responses are complete based on 'progress' column."""
|
||||
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
|
||||
return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
|
||||
return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
|
||||
|
||||
return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
|
||||
return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"
|
||||
|
||||
|
||||
def duration_validation(data):
|
||||
@@ -30,10 +30,9 @@ def duration_validation(data):
|
||||
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
|
||||
|
||||
if outlier_data.shape[0] == 0:
|
||||
return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
|
||||
return "### Duration Outliers: \n\n✅ No duration outliers detected"
|
||||
|
||||
return mo.md(f"""
|
||||
### Duration Outliers:
|
||||
return f"""### Duration Outliers:
|
||||
|
||||
**⚠️ Potential outliers detected based on response duration ⚠️**
|
||||
|
||||
@@ -50,5 +49,5 @@ def duration_validation(data):
|
||||
|
||||
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
|
||||
|
||||
""")
|
||||
"""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
20
voices.py
Normal file
20
voices.py
Normal file
@@ -0,0 +1,20 @@
|
||||
Voice Reference Gender
|
||||
Voice 14 Female
|
||||
Voice 04 Female
|
||||
Voice 08 Female
|
||||
Voice 77 Female
|
||||
|
||||
Voice 48 Female
|
||||
Voice 82 Female
|
||||
Voice 89 Female
|
||||
Voice 91 Emily (Current IVR Voice) Female
|
||||
Voice 34 Male
|
||||
Voice 69 Male
|
||||
Voice 45 Male
|
||||
Voice 46 Male
|
||||
Voice 54 Male
|
||||
Voice 74 Male
|
||||
Voice 81 Male
|
||||
Voice 86 Male
|
||||
Voice 88 Male
|
||||
Voice 16 Male
|
||||
Reference in New Issue
Block a user