Compare commits

...

10 Commits

15 changed files with 7693 additions and 629 deletions

1
.gitignore vendored
View File

@@ -15,3 +15,4 @@ data/
docker-volumes/
logs/
figures/

View File

@@ -12,15 +12,24 @@ def _():
import plotly as plt
from pathlib import Path
from utils import extract_qid_descr_map
return Path, extract_qid_descr_map, mo, pd
import utils
return Path, mo, pd, utils
@app.cell
def _(Path):
# results_file = Path('data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv')
results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
return (results_file,)
# results_file = Path('data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv')
results_file = Path('data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv')
qsf_file = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
return qsf_file, results_file
@app.cell
def _(qsf_file, results_file, utils):
survey = utils.JPMCSurvey(results_file, qsf_file)
data_all = survey.load_data()
return (survey,)
@app.cell
@@ -33,8 +42,8 @@ def _(mo):
@app.cell
def _(extract_qid_descr_map, results_file):
qid_descr_map = extract_qid_descr_map(results_file)
def _(survey):
qid_descr_map = survey.qid_descr_map
qid_descr_map
return (qid_descr_map,)

View File

@@ -12,63 +12,59 @@ def _():
from validation import check_progress, duration_validation
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
import utils
from speaking_styles import SPEAKING_STYLES
return (
JPMCSurvey,
Path,
SPEAKING_STYLES,
calculate_weighted_ranking_scores,
check_progress,
duration_validation,
mo,
plot_average_scores_with_counts,
plot_character_ranking_distribution,
plot_most_ranked_1_character,
plot_top3_ranking_distribution,
plot_weighted_ranking_score,
pl,
utils,
)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Load Data
""")
return
@app.cell
def _(Path, mo):
def _():
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
return QSF_FILE, RESULTS_FILE
@app.cell
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
data_all = survey.load_data()
data_all.collect()
return data_all, survey
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
data_all = S.load_data()
return S, data_all
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Data Validation
def _(Path, RESULTS_FILE, data_all, mo):
mo.md(f"""
# Load Data
**Dataset:** `{Path(RESULTS_FILE).name}`
{mo.ui.table(data_all.collect())}
""")
return
@app.cell
def _(check_progress, data_all):
check_progress(data_all)
return
@app.cell(hide_code=True)
def _(check_progress, data_all, duration_validation, mo):
mo.md(f"""
## Data Validation
{check_progress(data_all)}
@app.cell
def _(data_all, duration_validation):
duration_validation(data_all)
{duration_validation(data_all)}
""")
return
@@ -92,9 +88,42 @@ def _(mo):
return
@app.cell
def _(data_all, survey):
data = survey.filter_data(data_all, age=None, gender=None, income=None, ethnicity=None, consumer=None)
@app.cell(hide_code=True)
def _(S, mo):
filter_form = mo.md('''
# Data Filter
{age}
{gender}
{ethnicity}
{income}
{consumer}
'''
).batch(
age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
).form()
filter_form
return (filter_form,)
@app.cell(hide_code=True)
def _(S, data_all, filter_form, mo):
mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
_d = S.filter_data(data_all, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer'])
# Stop execution and prevent other cells from running if no data is selected
mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
data = _d
data.collect()
return (data,)
@@ -112,47 +141,48 @@ def _(mo):
def _(mo):
mo.md(r"""
## Character personality ranking
### 1. Which character personality is ranked best?
""")
return
@app.cell
def _(data, survey):
char_rank = survey.get_character_ranking(data)[0].collect()
def _(S, data):
char_rank = S.get_character_ranking(data)[0]
return (char_rank,)
@app.cell
def _(char_rank, plot_character_ranking_distribution):
plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
return
def _(S, char_rank, mo):
mo.md(f"""
### 1. Which character personality is ranked best?
@app.cell
def _(mo):
mo.md(r"""
### 2. Which character personality is ranked number 1 the most?
{mo.ui.altair_chart(S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality'))}
""")
return
@app.cell
def _(
calculate_weighted_ranking_scores,
char_rank,
plot_weighted_ranking_score,
):
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
def _(S, char_rank, mo):
mo.md(f"""
### 2. Which character personality is ranked 1st the most?
{mo.ui.altair_chart(S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
""")
return
@app.cell
def _(char_rank, plot_most_ranked_1_character):
plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
def _(S, calculate_weighted_ranking_scores, char_rank, mo):
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
mo.md(f"""
### 3. Which character personality most popular based on weighted scores?
{mo.ui.altair_chart(S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
""")
return
@@ -165,53 +195,73 @@ def _(mo):
@app.cell
def _(data, survey):
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
print(v_18_8_3.head())
return
def _(S, data):
v_18_8_3 = S.get_18_8_3(data)[0].collect()
# print(v_18_8_3.head())
return (v_18_8_3,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which 8 voices are chosen the most out of 18?
def _(S, mo, v_18_8_3):
mo.md(f"""
### Which 8 voices are chosen the most out of 18?
{mo.ui.altair_chart(S.plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants Top 3, after they first selected 8 out of 18.
def _(S, mo, v_18_8_3):
mo.md(f"""
### Which 3 voices are chosen the most out of 18?
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants Top 3, after they first selected 8 out of 18.
{mo.ui.altair_chart(S.plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
def _(S, calculate_weighted_ranking_scores, data):
top3_voices = S.get_top_3_voices(data)[0]
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
return top3_voices, top3_voices_weighted
@app.cell
def _(S, mo, top3_voices):
mo.md(f"""
### Which voice is ranked best in the ranking question for top 3?
(not best 3 out of 8 question)
{mo.ui.altair_chart(S.plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
""")
return
@app.cell
def _(plot_top3_ranking_distribution, top3_voices):
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
def _(S, mo, top3_voices_weighted):
mo.md(f"""
### Most popular **voice** based on weighted scores?
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
Distribution of the rankings for each voice:
{mo.ui.altair_chart(S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
Which voice is ranked number 1 the most? (not always the voice with most points)
@app.cell
def _(S, mo, top3_voices):
mo.md(f"""
### Which voice is ranked number 1 the most?
- Each of the 350 participants gives exactly one 1st-place vote.
- Total Rank-1 votes = 350.
- Voices are sorted from most to least 1st-place votes.
- The top 3 voices with the most Rank-1 votes are colored blue.
- This can differ from the points-based winners (321 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
(not always the voice with most points)
{mo.ui.altair_chart(S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
""")
return
@@ -220,18 +270,42 @@ def _(mo):
def _(mo):
mo.md(r"""
## Voice Speaking Style - Perception Traits
Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
How does each voice score for each “speaking style labeled trait”? Here you can find the speaking styles and traits: [Speaking Style Traits Quantitative test design.docx](https://voicebranding-my.sharepoint.com/:w:/g/personal/phoebe_voicebranding_ai/IQBfM_Z8PF98Qalz4lzIbJ3RAUCdc7waB32HZXCj7k3xfo0?e=rtFd27)
@app.cell
def _(S, data, utils):
ss_or, choice_map_or = S.get_ss_orange_red(data)
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
- There are 4 speaking styles: Green, Blue, Orange, Red.
- There are 16 traits distributed across the 4 speaking styles.
""")
# Combine the data
ss_all = ss_or.join(ss_gb, on='_recordId')
_d = ss_all.collect()
choice_map = {**choice_map_or, **choice_map_gb}
# print(_d.head())
# print(choice_map)
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
return choice_map, ss_all, ss_long
@app.cell
def _(S, mo, pl, ss_long):
content = """### How does each voice score for each “speaking style labeled trait”?"""
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
trait_d = ss_long.filter(pl.col("Description") == trait)
content += f"""
### {i+1}) {trait.replace(":", "")}
{mo.ui.altair_chart(S.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", ""), height=550))}
"""
mo.md(content)
return
@@ -244,23 +318,18 @@ def _(mo):
@app.cell
def _(data, mo, plot_average_scores_with_counts, survey):
vscales = survey.get_voice_scale_1_10(data)[0].collect()
plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
mo.md(f"""
How does each voice score on a scale from 1-10?
{mo.ui.plotly(plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
""")
return
def _(S, data):
vscales = S.get_voice_scale_1_10(data)[0]
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
return (vscales,)
@app.cell
def _(mo):
mo.md(r"""
def _(S, mo, vscales):
mo.md(f"""
### How does each voice score on a scale from 1-10?
{mo.ui.altair_chart(S.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
""")
return
@@ -286,16 +355,57 @@ def _(mo):
return
@app.cell
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
### Total Results
### How to Interpret These Correlation Results
Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).
- [ ] 4 correlation diagrams
**Reading the Chart**
| Correlation Value | Interpretation |
|-----------|----------|
| r > 0 (Green bars)| Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
| r < 0 (Red bars)| Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
| r ≈ 0| No relationship — this trait doesn't predict Voice Scale ratings|
""")
return
@app.cell
def _(choice_map, ss_all, utils, vscales):
df_style = utils.process_speaking_style_data(ss_all, choice_map)
df_voice_long = utils.process_voice_scale_data(vscales)
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
# df_voice_long
return df_style, joined_df
@app.cell
def _(S, SPEAKING_STYLES, joined_df, mo):
_content = """### Total Results
"""
for style, traits in SPEAKING_STYLES.items():
# print(f"Correlation plot for {style}...")
fig = S.plot_speaking_style_correlation(
data=joined_df,
style_color=style,
style_traits=traits,
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
)
_content += f"""
#### Speaking Style **{style}**:
{mo.ui.altair_chart(fig)}
"""
mo.md(_content)
return
@app.cell
def _(mo):
mo.md(r"""
@@ -338,6 +448,30 @@ def _(mo):
return
@app.cell
def _(S, SPEAKING_STYLES, df_style, mo, top3_voices, utils):
df_ranking = utils.process_voice_ranking_data(top3_voices)
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
_content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points
"""
for _style, _traits in SPEAKING_STYLES.items():
_fig = S.plot_speaking_style_ranking_correlation(data=joined, style_color=_style, style_traits=_traits)
_content += f"""
#### Speaking Style **{_style}**:
{mo.ui.altair_chart(_fig)}
"""
mo.md(_content)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,60 @@
import polars as pl
from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
from plots import plot_speaking_style_correlation
from speaking_styles import SPEAKING_STYLES
# 1. Initialize Survey and Load Data
# We need to point to the actual data files if possible, or use standard paths
# Assuming the file structure observed in workspace:
# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf
RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
try:
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
except TypeError:
# Fallback if signature is different or file not found (just in case)
print("Error initializing survey with paths. Checking signature...")
# This part is just for debugging if it fails again
raise
data = survey.load_data()
# 2. Extract Data
# Speaking Styles
ss_gb, map_gb = survey.get_ss_green_blue(data)
ss_or, map_or = survey.get_ss_orange_red(data)
# Voice Scale 1-10
voice_scale, _ = survey.get_voice_scale_1_10(data)
# 3. Process Dataframes (Wide to Long)
# Note: process_speaking_style_data handles the melt and parsing
# We collect them because the plotting functions expect eager DataFrames usually,
# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())
df_style_gb = process_speaking_style_data(ss_gb, map_gb)
df_style_or = process_speaking_style_data(ss_or, map_or)
# Combine both style dataframes
df_style_all = pl.concat([df_style_gb, df_style_or])
# Process Voice Scale
df_voice_long = process_voice_scale_data(voice_scale)
# 4. Join Style + Voice Data
joined_df = join_voice_and_style_data(df_style_all, df_voice_long)
# 5. Generate Plots for each Style Color
for style, traits in SPEAKING_STYLES.items():
print(f"Generating plot for {style}...")
fig = plot_speaking_style_correlation(
df=joined_df,
style_color=style,
style_traits=traits
)
fig.show()
# If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'

1186
plots.py

File diff suppressed because it is too large Load Diff

View File

@@ -14,12 +14,13 @@ dependencies = [
"openai>=2.9.0",
"openpyxl>=3.1.5",
"pandas>=2.3.3",
"plotly>=6.5.1",
"polars>=1.37.1",
"pyarrow>=23.0.0",
"pysqlite3>=0.6.0",
"pyzmq>=27.1.0",
"requests>=2.32.5",
"taguette>=1.5.1",
"vl-convert-python>=1.9.0.post1",
"wordcloud>=1.9.5",
]

33
speaking_styles.py Normal file
View File

@@ -0,0 +1,33 @@
"""
Mapping of Speaking Styles (Colors) to their constituent Traits (Positive side).
Derived from "Speaking Style Traits Quantitative test design.pdf".
"""
SPEAKING_STYLES = {
"Green": [
"Friendly | Conversational | Down-to-earth",
"Approachable | Familiar | Warm",
"Optimistic | Benevolent | Positive | Appreciative"
],
"Blue": [
"Proactive | Cooperative",
"Knowledgable | Resourceful | Savvy",
"Clear | Straightforward | Direct",
"Confident | Competent",
"Respectable | Respectful"
],
"Orange": [
"Attentive | Helpful | Caring | Deliberate",
"Reassuring | Empowering",
"Progressive | Guiding | Intentional",
"Patient | Open-minded"
],
"Red": [
"Trustworthy | Reliable | Dependable",
"Calm | Steady/Stable | Controlled",
"Transparent | Upright | Altruistic",
"Adaptive | Flexible"
]
}

View File

@@ -16,7 +16,65 @@ class ColorPalette:
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
# Neutral color for unhighlighted comparison items
NEUTRAL = "#D3D3D3" # Light Grey
# General UI elements
TEXT = "black"
GRID = "lightgray"
BACKGROUND = "white"
def jpmc_altair_theme():
"""JPMC brand theme for Altair charts."""
return {
'config': {
'view': {
'continuousWidth': 1000,
'continuousHeight': 500,
'strokeWidth': 0
},
'background': ColorPalette.BACKGROUND,
'axis': {
'grid': True,
'gridColor': ColorPalette.GRID,
'labelFontSize': 11,
'titleFontSize': 12,
'labelColor': ColorPalette.TEXT,
'titleColor': ColorPalette.TEXT,
'labelLimit': 200 # Allow longer labels before truncation
},
'axisX': {
'labelAngle': -45,
'labelLimit': 200 # Allow longer x-axis labels
},
'axisY': {
'labelAngle': 0
},
'legend': {
'orient': 'top',
'direction': 'horizontal',
'titleFontSize': 11,
'labelFontSize': 11
},
'title': {
'fontSize': 14,
'color': ColorPalette.TEXT,
'anchor': 'start',
'subtitleFontSize': 10,
'subtitleColor': 'gray'
},
'bar': {
'color': ColorPalette.PRIMARY
}
}
}
# Register Altair theme
try:
import altair as alt
alt.themes.register('jpmc', jpmc_altair_theme)
alt.themes.enable('jpmc')
except ImportError:
pass # Altair not installed

260
utils.py
View File

@@ -3,8 +3,10 @@ from pathlib import Path
import pandas as pd
from typing import Union
import json
import re
from plots import JPMCPlotsMixin
import marimo as mo
def extract_voice_label(html_str: str) -> str:
"""
@@ -55,24 +57,27 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
def calculate_weighted_ranking_scores(df: pl.LazyFrame) -> pl.DataFrame:
"""
Calculate weighted scores for character rankings.
Calculate weighted scores for character or voice rankings.
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
Parameters
----------
df : pl.DataFrame
DataFrame containing character ranking columns.
DataFrame containing character/ voice ranking columns.
Returns
-------
pl.DataFrame
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
"""
if isinstance(df, pl.LazyFrame):
df = df.collect()
scores = []
# Identify columns related to Character Ranking
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
# Identify ranking columns (assume all columns except _recordId)
ranking_cols = [c for c in df.columns if c != '_recordId']
for col in ranking_cols:
# Calculate score:
@@ -84,7 +89,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
# Clean name
clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
scores.append({
'Character': clean_name,
@@ -94,7 +99,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
return pl.DataFrame(scores).sort('Weighted Score', descending=True)
class JPMCSurvey:
class JPMCSurvey(JPMCPlotsMixin):
"""Class to handle JPMorgan Chase survey data."""
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
@@ -109,6 +114,23 @@ class JPMCSurvey:
self.qid_descr_map = self._extract_qid_descr_map()
self.qsf:dict = self._load_qsf()
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
if not self.fig_save_dir.exists():
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
self.data_filtered = None
self.plot_height = 500
self.plot_width = 1000
# Filter values
self.filter_age:list = None
self.filter_gender:list = None
self.filter_consumer:list = None
self.filter_ethnicity:list = None
self.filter_income:list = None
def _extract_qid_descr_map(self) -> dict:
"""Extract mapping of Qualtrics ImportID to Question Description from results file."""
@@ -188,6 +210,13 @@ class JPMCSurvey:
# Rename columns with the extracted ImportIds
df.columns = new_columns
# Store unique values for filters (ignoring nulls) to detect "all selected" state
self.options_age = sorted(df['QID1'].drop_nulls().unique().to_list()) if 'QID1' in df.columns else []
self.options_gender = sorted(df['QID2'].drop_nulls().unique().to_list()) if 'QID2' in df.columns else []
self.options_consumer = sorted(df['Consumer'].drop_nulls().unique().to_list()) if 'Consumer' in df.columns else []
self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
return df.lazy()
def _get_subset(self, q: pl.LazyFrame, QIDs, rename_cols=True, include_record_id=True) -> pl.LazyFrame:
@@ -213,25 +242,32 @@ class JPMCSurvey:
- ethnicity: list
- income: list
Returns filtered polars LazyFrame.
Also saves the result to self.data_filtered.
"""
# Apply filters
self.filter_age = age
if age is not None:
q = q.filter(pl.col('QID1').is_in(age))
self.filter_gender = gender
if gender is not None:
q = q.filter(pl.col('QID2').is_in(gender))
self.filter_consumer = consumer
if consumer is not None:
q = q.filter(pl.col('Consumer').is_in(consumer))
self.filter_ethnicity = ethnicity
if ethnicity is not None:
q = q.filter(pl.col('QID3').is_in(ethnicity))
self.filter_income = income
if income is not None:
q = q.filter(pl.col('QID15').is_in(income))
return q
self.data_filtered = q
return self.data_filtered
def get_demographics(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
"""Extract columns containing the demographics.
@@ -415,6 +451,210 @@ class JPMCSurvey:
return self._get_subset(q, QIDs, rename_cols=True), None
def process_speaking_style_data(
df: Union[pl.LazyFrame, pl.DataFrame],
trait_map: dict[str, str]
) -> pl.DataFrame:
"""
Process speaking style columns from wide to long format and map trait descriptions.
Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
Example: SS_Orange_Red__V14__Choice_1
Parameters
----------
df : pl.LazyFrame or pl.DataFrame
Input dataframe containing SS_* columns.
trait_map : dict
Dictionary mapping column names to trait descriptions.
Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
Returns
-------
pl.DataFrame
Long-format dataframe with columns:
_recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
"""
# Normalize input to LazyFrame
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
# 1. Melt SS_ columns
melted = lf.melt(
id_vars=["_recordId"],
value_vars=pl.col("^SS_.*$"),
variable_name="full_col_name",
value_name="score"
)
# 2. Extract components from column name
# Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
processed = melted.with_columns(
pl.col("full_col_name").str.extract_groups(pattern)
).unnest("full_col_name")
# 3. Create Mapping Lookup from the provided dictionary
# We map (Style_Group, Choice_ID) -> Description
mapping_data = []
seen = set()
for col_name, desc in trait_map.items():
match = re.match(pattern, col_name)
if match:
groups = match.groupdict()
key = (groups["Style_Group"], groups["Choice_ID"])
if key not in seen:
# Parse description into anchors if possible (Left : Right)
parts = desc.split(':')
left_anchor = parts[0].strip() if len(parts) > 0 else ""
right_anchor = parts[1].strip() if len(parts) > 1 else ""
mapping_data.append({
"Style_Group": groups["Style_Group"],
"Choice_ID": groups["Choice_ID"],
"Description": desc,
"Left_Anchor": left_anchor,
"Right_Anchor": right_anchor
})
seen.add(key)
if not mapping_data:
return processed.collect()
mapping_lf = pl.LazyFrame(mapping_data)
# 4. Join Data with Mapping
result = processed.join(
mapping_lf,
on=["Style_Group", "Choice_ID"],
how="left"
)
# 5. Cast score to Int
result = result.with_columns(
pl.col("score").cast(pl.Int64, strict=False)
)
return result.collect()
def process_voice_scale_data(
df: Union[pl.LazyFrame, pl.DataFrame]
) -> pl.DataFrame:
"""
Process Voice Scale columns from wide to long format.
Parses columns with format: Voice_Scale_1_10__V{Voice}
Example: Voice_Scale_1_10__V14
Returns
-------
pl.DataFrame
Long-format dataframe with columns:
_recordId, Voice, Voice_Scale_Score
"""
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
# Melt
melted = lf.melt(
id_vars=["_recordId"],
value_vars=pl.col("^Voice_Scale_1_10__V.*$"),
variable_name="full_col_name",
value_name="Voice_Scale_Score"
)
# Extract Voice
processed = melted.with_columns(
pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
).with_columns(
("V" + pl.col("Voice_Num")).alias("Voice")
)
# Keep Score as Float (original data is f64)
result = processed.select([
"_recordId",
"Voice",
pl.col("Voice_Scale_Score").cast(pl.Float64, strict=False)
])
return result.collect()
def join_voice_and_style_data(
processed_style_data: pl.DataFrame,
processed_voice_data: pl.DataFrame
) -> pl.DataFrame:
"""
Joins processed Speaking Style data with Voice Scale 1-10 data.
Parameters
----------
processed_style_data : pl.DataFrame
Result of process_speaking_style_data
processed_voice_data : pl.DataFrame
Result of process_voice_scale_data
Returns
-------
pl.DataFrame
Merged dataframe with columns from both, joined on _recordId and Voice.
"""
return processed_style_data.join(
processed_voice_data,
on=["_recordId", "Voice"],
how="inner"
)
def process_voice_ranking_data(
df: Union[pl.LazyFrame, pl.DataFrame]
) -> pl.DataFrame:
"""
Process Voice Ranking columns from wide to long format and convert ranks to points.
Parses columns with format: Top_3_Voices_ranking__V{Voice}
Converts ranks to points: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt
Returns
-------
pl.DataFrame
Long-format dataframe with columns:
_recordId, Voice, Ranking_Points
"""
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
# Melt
melted = lf.melt(
id_vars=["_recordId"],
value_vars=pl.col("^Top_3_Voices_ranking__V.*$"),
variable_name="full_col_name",
value_name="rank"
)
# Extract Voice
processed = melted.with_columns(
pl.col("full_col_name").str.extract(r"V(\d+)", 1).alias("Voice_Num")
).with_columns(
("V" + pl.col("Voice_Num")).alias("Voice")
)
# Convert rank to points: 1st=3, 2nd=2, 3rd=1, null=0 (not ranked)
# Rank values are 1, 2, 3 for position in top 3
result = processed.with_columns(
pl.when(pl.col("rank") == 1).then(3)
.when(pl.col("rank") == 2).then(2)
.when(pl.col("rank") == 3).then(1)
.otherwise(0)
.alias("Ranking_Points")
).select([
"_recordId",
"Voice",
"Ranking_Points"
])
return result.collect()

75
uv.lock generated
View File

@@ -1332,12 +1332,13 @@ dependencies = [
{ name = "openai" },
{ name = "openpyxl" },
{ name = "pandas" },
{ name = "plotly" },
{ name = "polars" },
{ name = "pyarrow" },
{ name = "pysqlite3" },
{ name = "pyzmq" },
{ name = "requests" },
{ name = "taguette" },
{ name = "vl-convert-python" },
{ name = "wordcloud" },
]
@@ -1352,12 +1353,13 @@ requires-dist = [
{ name = "openai", specifier = ">=2.9.0" },
{ name = "openpyxl", specifier = ">=3.1.5" },
{ name = "pandas", specifier = ">=2.3.3" },
{ name = "plotly", specifier = ">=6.5.1" },
{ name = "polars", specifier = ">=1.37.1" },
{ name = "pyarrow", specifier = ">=23.0.0" },
{ name = "pysqlite3", specifier = ">=0.6.0" },
{ name = "pyzmq", specifier = ">=27.1.0" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "taguette", specifier = ">=1.5.1" },
{ name = "vl-convert-python", specifier = ">=1.9.0.post1" },
{ name = "wordcloud", specifier = ">=1.9.5" },
]
@@ -1430,19 +1432,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
]
[[package]]
name = "plotly"
version = "6.5.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "narwhals" },
{ name = "packaging" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d6/ff/a4938b75e95114451efdb34db6b41930253e67efc8dc737bd592ef2e419d/plotly-6.5.1.tar.gz", hash = "sha256:b0478c8d5ada0c8756bce15315bcbfec7d3ab8d24614e34af9aff7bfcfea9281", size = 7014606, upload-time = "2026-01-07T20:11:41.644Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/8e/24e0bb90b2d75af84820693260c5534e9ed351afdda67ed6f393a141a0e2/plotly-6.5.1-py3-none-any.whl", hash = "sha256:5adad4f58c360612b6c5ce11a308cdbc4fd38ceb1d40594a614f0062e227abe1", size = 9894981, upload-time = "2026-01-07T20:11:38.124Z" },
]
[[package]]
name = "polars"
version = "1.37.1"
@@ -1521,6 +1510,49 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
]
[[package]]
name = "pyarrow"
version = "23.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" },
{ url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" },
{ url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" },
{ url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" },
{ url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" },
{ url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" },
{ url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" },
{ url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
{ url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
{ url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
{ url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
{ url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
{ url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
{ url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" },
{ url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
{ url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
{ url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
{ url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
{ url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
{ url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
{ url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" },
{ url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
{ url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
{ url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
{ url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
{ url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
{ url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
{ url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" },
{ url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
{ url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
{ url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
{ url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
{ url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
{ url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
{ url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
]
[[package]]
name = "pycparser"
version = "2.23"
@@ -2135,6 +2167,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
]
[[package]]
name = "vl-convert-python"
version = "1.9.0.post1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/93/89/36722344d1758ec2106f4e8eca980f173cfe8f8d0358c1b77cc5d2e035a4/vl_convert_python-1.9.0.post1.tar.gz", hash = "sha256:a5b06b3128037519001166f5341ec7831e19fbd7f3a5f78f73d557ac2d5859ef", size = 4663469, upload-time = "2026-01-21T00:09:55.61Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9f/59/e5862245972ff467d38b0eb5ad28154685e23ecabb47e14f2b6962da7b56/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:43e9515f65bbcd317d1ef328787fd7bf0344c2fde9292eb7a0e64d5d3d29fccb", size = 30512930, upload-time = "2026-01-21T00:09:43.198Z" },
{ url = "https://files.pythonhosted.org/packages/62/e6/e7d0b538c2f0daaf120901dc113bd5d5d1fa51a9532fa5ffd90234e8c69e/vl_convert_python-1.9.0.post1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b0e7a3245f32addec7e7abeb1badf72b1513ed71ba1dba7aca853901217b3f4e", size = 29738742, upload-time = "2026-01-21T00:09:46.016Z" },
{ url = "https://files.pythonhosted.org/packages/b8/e2/5645a1bc174c53ff8cd305ed76a4a76ba36e155302db20b42b7e78daeef8/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6ecfe4b7e2ea9e8c30fd6d6eaea3ef85475be1ad249407d9796dce4ecdb5b32", size = 33366278, upload-time = "2026-01-21T00:09:48.42Z" },
{ url = "https://files.pythonhosted.org/packages/a0/18/88e02899b72fa8273ffb32bde12b0e5776ee0fd9fb29559a49c48ec4c5fa/vl_convert_python-1.9.0.post1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c1558fa0055e88c465bd3d71760cde9fa2c94a95f776a0ef9178252fd820b1f", size = 33520215, upload-time = "2026-01-21T00:09:50.992Z" },
{ url = "https://files.pythonhosted.org/packages/2f/db/6e8616587035bf0745d0f10b1791c7e945180ac5d6b28677d2f2b3ca693c/vl_convert_python-1.9.0.post1-cp37-abi3-win_amd64.whl", hash = "sha256:7e263269ac0d304640ca842b44dfe430ed863accd9edecff42e279bfc48ce940", size = 32051516, upload-time = "2026-01-21T00:09:53.47Z" },
]
[[package]]
name = "webencodings"
version = "0.5.1"

View File

@@ -5,9 +5,9 @@ import polars as pl
def check_progress(data):
"""Check if all responses are complete based on 'progress' column."""
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"
def duration_validation(data):
@@ -30,10 +30,9 @@ def duration_validation(data):
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
if outlier_data.shape[0] == 0:
return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
return "### Duration Outliers: \n\n✅ No duration outliers detected"
return mo.md(f"""
### Duration Outliers:
return f"""### Duration Outliers:
**⚠️ Potential outliers detected based on response duration ⚠️**
@@ -50,5 +49,5 @@ def duration_validation(data):
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
""")
"""

File diff suppressed because it is too large Load Diff

20
voices.py Normal file
View File

@@ -0,0 +1,20 @@
Voice Reference Gender
Voice 14 Female
Voice 04 Female
Voice 08 Female
Voice 77 Female
Voice 48 Female
Voice 82 Female
Voice 89 Female
Voice 91 Emily (Current IVR Voice) Female
Voice 34 Male
Voice 69 Male
Voice 45 Male
Voice 46 Male
Voice 54 Male
Voice 74 Male
Voice 81 Male
Voice 86 Male
Voice 88 Male
Voice 16 Male