Compare commits
2 Commits
a162701e94
...
8dd41dfc96
| Author | SHA1 | Date | |
|---|---|---|---|
| 8dd41dfc96 | |||
| 840cb4e6dc |
632
03_quant_report.script.py
Normal file
632
03_quant_report.script.py
Normal file
@@ -0,0 +1,632 @@
|
||||
|
||||
__generated_with = "0.19.7"
|
||||
|
||||
# %%
|
||||
import marimo as mo
|
||||
import polars as pl
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from validation import check_progress, duration_validation, check_straight_liners
|
||||
from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||||
import utils
|
||||
|
||||
from speaking_styles import SPEAKING_STYLES
|
||||
|
||||
# %%
|
||||
# CLI argument parsing for batch automation
|
||||
# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
|
||||
# When run in Jupyter: args will use defaults (all filters = None = all options selected)
|
||||
def parse_cli_args():
|
||||
parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
|
||||
parser.add_argument('--age', type=str, default=None, help='JSON list of age groups')
|
||||
parser.add_argument('--gender', type=str, default=None, help='JSON list of genders')
|
||||
parser.add_argument('--ethnicity', type=str, default=None, help='JSON list of ethnicities')
|
||||
parser.add_argument('--income', type=str, default=None, help='JSON list of income groups')
|
||||
parser.add_argument('--consumer', type=str, default=None, help='JSON list of consumer segments')
|
||||
|
||||
# Only parse if running as script (not in Jupyter/interactive)
|
||||
try:
|
||||
# Check if running in Jupyter by looking for ipykernel
|
||||
get_ipython() # noqa: F821
|
||||
return argparse.Namespace(age=None, gender=None, ethnicity=None, income=None, consumer=None)
|
||||
except NameError:
|
||||
args = parser.parse_args()
|
||||
# Parse JSON strings to lists
|
||||
args.age = json.loads(args.age) if args.age else None
|
||||
args.gender = json.loads(args.gender) if args.gender else None
|
||||
args.ethnicity = json.loads(args.ethnicity) if args.ethnicity else None
|
||||
args.income = json.loads(args.income) if args.income else None
|
||||
args.consumer = json.loads(args.consumer) if args.consumer else None
|
||||
return args
|
||||
|
||||
cli_args = parse_cli_args()
|
||||
|
||||
# %%
|
||||
|
||||
# file_browser = mo.ui.file_browser(
|
||||
# initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
||||
# )
|
||||
# file_browser
|
||||
|
||||
# # %%
|
||||
# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
|
||||
# RESULTS_FILE = Path(file_browser.path(index=0))
|
||||
|
||||
RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
|
||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||
|
||||
# %%
|
||||
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
||||
try:
|
||||
data_all = S.load_data()
|
||||
except NotImplementedError as e:
|
||||
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
|
||||
|
||||
# %%
|
||||
BEST_CHOSEN_CHARACTER = "the_coach"
|
||||
|
||||
# # %%
|
||||
# filter_form = mo.md('''
|
||||
|
||||
|
||||
|
||||
# {age}
|
||||
|
||||
# {gender}
|
||||
|
||||
# {ethnicity}
|
||||
|
||||
# {income}
|
||||
|
||||
# {consumer}
|
||||
# '''
|
||||
# ).batch(
|
||||
# age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
|
||||
# gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
|
||||
# ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
|
||||
# income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
|
||||
# consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
|
||||
# ).form()
|
||||
# mo.md(f'''
|
||||
# ---
|
||||
|
||||
# # Data Filter
|
||||
|
||||
# {filter_form}
|
||||
# ''')
|
||||
|
||||
# %%
|
||||
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
|
||||
# CLI args: None means "all options selected" (use S.options_* defaults)
|
||||
_filter_age = cli_args.age if cli_args.age is not None else S.options_age
|
||||
_filter_gender = cli_args.gender if cli_args.gender is not None else S.options_gender
|
||||
_filter_ethnicity = cli_args.ethnicity if cli_args.ethnicity is not None else S.options_ethnicity
|
||||
_filter_income = cli_args.income if cli_args.income is not None else S.options_income
|
||||
_filter_consumer = cli_args.consumer if cli_args.consumer is not None else S.options_consumer
|
||||
|
||||
_d = S.filter_data(data_all, age=_filter_age, gender=_filter_gender, income=_filter_income, ethnicity=_filter_ethnicity, consumer=_filter_consumer)
|
||||
|
||||
# Stop execution and prevent other cells from running if no data is selected
|
||||
# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
|
||||
data = _d
|
||||
|
||||
# data = data_validated
|
||||
data.collect()
|
||||
|
||||
# %%
|
||||
|
||||
|
||||
# %%
|
||||
# Check if all business owners are missing a 'Consumer type' in demographics
|
||||
# assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
# Demographic Distributions
|
||||
""")
|
||||
|
||||
# %%
|
||||
demo_plot_cols = [
|
||||
'Age',
|
||||
'Gender',
|
||||
# 'Race/Ethnicity',
|
||||
'Bussiness_Owner',
|
||||
'Consumer'
|
||||
]
|
||||
|
||||
# %%
|
||||
_content = """
|
||||
|
||||
"""
|
||||
for c in demo_plot_cols:
|
||||
_fig = S.plot_demographic_distribution(
|
||||
data=S.get_demographics(data)[0],
|
||||
column=c,
|
||||
title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
|
||||
)
|
||||
_content += f"""{mo.ui.altair_chart(_fig)}\n\n"""
|
||||
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
---
|
||||
|
||||
# Brand Character Results
|
||||
""")
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Best performing: Original vs Refined frankenstein
|
||||
""")
|
||||
|
||||
# %%
|
||||
char_refine_rank = S.get_character_refine(data)[0]
|
||||
# print(char_rank.collect().head())
|
||||
print(char_refine_rank.collect().head())
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Character ranking points
|
||||
""")
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Character ranking 1-2-3
|
||||
""")
|
||||
|
||||
# %%
|
||||
char_rank = S.get_character_ranking(data)[0]
|
||||
|
||||
# %%
|
||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||
S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
|
||||
|
||||
# %%
|
||||
S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Statistical Significance Character Ranking
|
||||
""")
|
||||
|
||||
# %%
|
||||
_pairwise_df, _meta = S.compute_ranking_significance(char_rank)
|
||||
|
||||
# print(_pairwise_df.columns)
|
||||
|
||||
mo.md(f"""
|
||||
|
||||
|
||||
{mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
|
||||
|
||||
{mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
|
||||
""")
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Character Ranking: times 1st place
|
||||
""")
|
||||
|
||||
# %%
|
||||
S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Prominent predefined personality traits wordcloud
|
||||
""")
|
||||
|
||||
# %%
|
||||
top8_traits = S.get_top_8_traits(data)[0]
|
||||
S.plot_traits_wordcloud(
|
||||
data=top8_traits,
|
||||
column='Top_8_Traits',
|
||||
title="Most Prominent Personality Traits",
|
||||
)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Trait frequency per brand character
|
||||
""")
|
||||
|
||||
# %%
|
||||
char_df = S.get_character_refine(data)[0]
|
||||
|
||||
# %%
|
||||
from theme import ColorPalette
|
||||
|
||||
# Assuming you already have char_df (your data from get_character_refine or similar)
|
||||
characters = ['Bank Teller', 'Familiar Friend', 'The Coach', 'Personal Assistant']
|
||||
character_colors = {
|
||||
'Bank Teller': (ColorPalette.CHARACTER_BANK_TELLER, ColorPalette.CHARACTER_BANK_TELLER_HIGHLIGHT),
|
||||
'Familiar Friend': (ColorPalette.CHARACTER_FAMILIAR_FRIEND, ColorPalette.CHARACTER_FAMILIAR_FRIEND_HIGHLIGHT),
|
||||
'The Coach': (ColorPalette.CHARACTER_COACH, ColorPalette.CHARACTER_COACH_HIGHLIGHT),
|
||||
'Personal Assistant': (ColorPalette.CHARACTER_PERSONAL_ASSISTANT, ColorPalette.CHARACTER_PERSONAL_ASSISTANT_HIGHLIGHT),
|
||||
}
|
||||
|
||||
# Build consistent sort order (by total frequency across all characters)
|
||||
all_trait_counts = {}
|
||||
for char in characters:
|
||||
freq_df, _ = S.transform_character_trait_frequency(char_df, char)
|
||||
for row in freq_df.iter_rows(named=True):
|
||||
all_trait_counts[row['trait']] = all_trait_counts.get(row['trait'], 0) + row['count']
|
||||
|
||||
consistent_sort_order = sorted(all_trait_counts.keys(), key=lambda x: -all_trait_counts[x])
|
||||
|
||||
_content = """"""
|
||||
# Generate 4 plots (one per character)
|
||||
for char in characters:
|
||||
freq_df, _ = S.transform_character_trait_frequency(char_df, char)
|
||||
main_color, highlight_color = character_colors[char]
|
||||
chart = S.plot_single_character_trait_frequency(
|
||||
data=freq_df,
|
||||
character_name=char,
|
||||
bar_color=main_color,
|
||||
highlight_color=highlight_color,
|
||||
trait_sort_order=consistent_sort_order,
|
||||
)
|
||||
_content += f"""
|
||||
{mo.ui.altair_chart(chart)}
|
||||
|
||||
|
||||
"""
|
||||
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Statistical significance best characters
|
||||
|
||||
zie chat
|
||||
> voorbeeld: als de nr 1 en 2 niet significant verschillen maar wel van de nr 3 bijvoorbeeld is dat ook top. Beetje meedenkend over hoe ik het kan presenteren weetje wat ik bedoel?:)
|
||||
>
|
||||
""")
|
||||
|
||||
# %%
|
||||
|
||||
|
||||
# %%
|
||||
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
---
|
||||
|
||||
# Spoken Voice Results
|
||||
""")
|
||||
|
||||
# %%
|
||||
COLOR_GENDER = True
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Top 8 Most Chosen out of 18
|
||||
""")
|
||||
|
||||
# %%
|
||||
v_18_8_3 = S.get_18_8_3(data)[0]
|
||||
|
||||
# %%
|
||||
S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Top 3 most chosen out of 8
|
||||
""")
|
||||
|
||||
# %%
|
||||
S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Voice Ranking Weighted Score
|
||||
""")
|
||||
|
||||
# %%
|
||||
top3_voices = S.get_top_3_voices(data)[0]
|
||||
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||||
|
||||
# %%
|
||||
S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Which voice is ranked best in the ranking question for top 3?
|
||||
|
||||
(not best 3 out of 8 question)
|
||||
""")
|
||||
|
||||
# %%
|
||||
S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Statistical significance for voice ranking
|
||||
""")
|
||||
|
||||
# %%
|
||||
# print(top3_voices.collect().head())
|
||||
|
||||
# %%
|
||||
|
||||
# _pairwise_df, _metadata = S.compute_ranking_significance(
|
||||
# top3_voices,alpha=0.05,correction="none")
|
||||
|
||||
# # View significant pairs
|
||||
# # print(pairwise_df.filter(pl.col('significant') == True))
|
||||
|
||||
# # Create heatmap visualization
|
||||
# _heatmap = S.plot_significance_heatmap(
|
||||
# _pairwise_df,
|
||||
# metadata=_metadata,
|
||||
# title="Weighted Voice Ranking Significance<br>(Pairwise Comparisons)"
|
||||
# )
|
||||
|
||||
# # Create summary bar chart
|
||||
# _summary = S.plot_significance_summary(
|
||||
# _pairwise_df,
|
||||
# metadata=_metadata
|
||||
# )
|
||||
|
||||
# mo.md(f"""
|
||||
# {mo.ui.altair_chart(_heatmap)}
|
||||
|
||||
# {mo.ui.altair_chart(_summary)}
|
||||
# """)
|
||||
|
||||
# %%
|
||||
## Voice Ranked 1st the most
|
||||
|
||||
# %%
|
||||
S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Voice Scale 1-10
|
||||
""")
|
||||
|
||||
# %%
|
||||
# Get your voice scale data (from notebook)
|
||||
voice_1_10, _ = S.get_voice_scale_1_10(data)
|
||||
S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Statistical Significance (Scale 1-10)
|
||||
""")
|
||||
|
||||
# %%
|
||||
# Compute pairwise significance tests
|
||||
pairwise_df, metadata = S.compute_pairwise_significance(
|
||||
voice_1_10,
|
||||
test_type="mannwhitney", # or "ttest", "chi2", "auto"
|
||||
alpha=0.05,
|
||||
correction="bonferroni" # or "holm", "none"
|
||||
)
|
||||
|
||||
# View significant pairs
|
||||
# print(pairwise_df.filter(pl.col('significant') == True))
|
||||
|
||||
# Create heatmap visualization
|
||||
_heatmap = S.plot_significance_heatmap(
|
||||
pairwise_df,
|
||||
metadata=metadata,
|
||||
title="Voice Rating Significance<br>(Pairwise Comparisons)"
|
||||
)
|
||||
|
||||
# Create summary bar chart
|
||||
_summary = S.plot_significance_summary(
|
||||
pairwise_df,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
mo.md(f"""
|
||||
{mo.ui.altair_chart(_heatmap)}
|
||||
|
||||
{mo.ui.altair_chart(_summary)}
|
||||
""")
|
||||
|
||||
# %%
|
||||
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Ranking points for Voice per Chosen Brand Character
|
||||
|
||||
**missing mapping**
|
||||
""")
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Correlation Speaking Styles
|
||||
""")
|
||||
|
||||
# %%
|
||||
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||||
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||||
|
||||
# Combine the data
|
||||
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||
_d = ss_all.collect()
|
||||
|
||||
choice_map = {**choice_map_or, **choice_map_gb}
|
||||
# print(_d.head())
|
||||
# print(choice_map)
|
||||
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
|
||||
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
|
||||
vscales = S.get_voice_scale_1_10(data)[0]
|
||||
df_scale_long = utils.process_voice_scale_data(vscales)
|
||||
|
||||
joined_scale = df_style.join(df_scale_long, on=["_recordId", "Voice"], how="inner")
|
||||
|
||||
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||||
joined_ranking = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||||
|
||||
# %%
|
||||
joined_ranking.head()
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Colors vs Scale 1-10
|
||||
""")
|
||||
|
||||
# %%
|
||||
# Transform to get one row per color with average correlation
|
||||
color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, SPEAKING_STYLES)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=color_corr_scale,
|
||||
title="Correlation: Speaking Style Colors and Voice Scale 1-10"
|
||||
)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Colors vs Ranking Points
|
||||
""")
|
||||
|
||||
# %%
|
||||
color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
|
||||
joined_ranking,
|
||||
SPEAKING_STYLES,
|
||||
target_column="Ranking_Points"
|
||||
)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=color_corr_ranking,
|
||||
title="Correlation: Speaking Style Colors and Voice Ranking Points"
|
||||
)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Scale 1-10
|
||||
""")
|
||||
|
||||
# %%
|
||||
_content = """"""
|
||||
|
||||
for _style, _traits in SPEAKING_STYLES.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
_fig = S.plot_speaking_style_correlation(
|
||||
data=joined_scale,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"Correlation: Speaking Style {_style} and Voice Scale 1-10",
|
||||
)
|
||||
_content += f"""
|
||||
#### Speaking Style **{_style}**:
|
||||
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Ranking Points
|
||||
""")
|
||||
|
||||
# %%
|
||||
_content = """"""
|
||||
|
||||
for _style, _traits in SPEAKING_STYLES.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
_fig = S.plot_speaking_style_ranking_correlation(
|
||||
data=joined_ranking,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
|
||||
)
|
||||
_content += f"""
|
||||
#### Speaking Style **{_style}**:
|
||||
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
## Correlations when "Best Brand Character" is chosen
|
||||
|
||||
Select only the traits that fit with that character
|
||||
""")
|
||||
|
||||
# %%
|
||||
from reference import ORIGINAL_CHARACTER_TRAITS
|
||||
chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER]
|
||||
|
||||
# %%
|
||||
STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Ranking Points
|
||||
""")
|
||||
|
||||
# %%
|
||||
_content = ""
|
||||
for _style, _traits in STYLES_SUBSET.items():
|
||||
_fig = S.plot_speaking_style_ranking_correlation(
|
||||
data=joined_ranking,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points"""
|
||||
)
|
||||
_content += f"""
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Scale 1-10
|
||||
""")
|
||||
|
||||
# %%
|
||||
_content = """"""
|
||||
|
||||
for _style, _traits in STYLES_SUBSET.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
_fig = S.plot_speaking_style_correlation(
|
||||
data=joined_scale,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""",
|
||||
)
|
||||
_content += f"""
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Colors vs Scale 1-10 (Best Character)
|
||||
""")
|
||||
|
||||
# %%
|
||||
# Transform to get one row per color with average correlation
|
||||
_color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=_color_corr_scale,
|
||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10"""
|
||||
)
|
||||
|
||||
# %%
|
||||
mo.md(r"""
|
||||
### Colors vs Ranking Points (Best Character)
|
||||
""")
|
||||
|
||||
# %%
|
||||
_color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
|
||||
joined_ranking,
|
||||
STYLES_SUBSET,
|
||||
target_column="Ranking_Points"
|
||||
)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=_color_corr_ranking,
|
||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points"""
|
||||
)
|
||||
@@ -22,7 +22,7 @@ def _():
|
||||
@app.cell
|
||||
def _():
|
||||
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
||||
TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
||||
# TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
||||
TAG_IMAGE_DIR = Path('figures/2-2-26')
|
||||
return TAG_IMAGE_DIR, TAG_SOURCE
|
||||
|
||||
|
||||
144
README.md
144
README.md
@@ -1,5 +1,147 @@
|
||||
# Voice Branding Quantitative Analysis
|
||||
|
||||
## Running Marimo Notebooks
|
||||
|
||||
Running on Ct-105 for shared access:
|
||||
|
||||
```
|
||||
```bash
|
||||
uv run marimo run 02_quant_analysis.py --headless --port 8080
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Batch Report Generation
|
||||
|
||||
The quant report can be run with different filter combinations via CLI or automated batch processing.
|
||||
|
||||
### Single Filter Run (CLI)
|
||||
|
||||
Run the report script directly with JSON-encoded filter arguments:
|
||||
|
||||
```bash
|
||||
# Single consumer segment
|
||||
uv run python 03_quant_report.script.py --consumer '["Starter"]'
|
||||
|
||||
# Single age group
|
||||
uv run python 03_quant_report.script.py --age '["18 to 21 years"]'
|
||||
|
||||
# Multiple filters combined
|
||||
uv run python 03_quant_report.script.py --age '["18 to 21 years", "22 to 24 years"]' --gender '["Male"]'
|
||||
|
||||
# All respondents (no filters = defaults to all options selected)
|
||||
uv run python 03_quant_report.script.py
|
||||
```
|
||||
|
||||
Available filter arguments:
|
||||
- `--age` — JSON list of age groups
|
||||
- `--gender` — JSON list of genders
|
||||
- `--ethnicity` — JSON list of ethnicities
|
||||
- `--income` — JSON list of income groups
|
||||
- `--consumer` — JSON list of consumer segments
|
||||
|
||||
### Batch Runner (All Combinations)
|
||||
|
||||
Run all single-filter combinations automatically with progress tracking:
|
||||
|
||||
```bash
|
||||
# Preview all combinations without running
|
||||
uv run python run_filter_combinations.py --dry-run
|
||||
|
||||
# Run all combinations (shows progress bar)
|
||||
uv run python run_filter_combinations.py
|
||||
|
||||
# Or use the registered CLI entry point
|
||||
uv run quant-report-batch
|
||||
uv run quant-report-batch --dry-run
|
||||
```
|
||||
|
||||
This generates reports for:
|
||||
- All Respondents (no filters)
|
||||
- Each age group individually
|
||||
- Each gender individually
|
||||
- Each ethnicity individually
|
||||
- Each income group individually
|
||||
- Each consumer segment individually
|
||||
|
||||
Output figures are saved to `figures/<export_date>/<filter_slug>/`.
|
||||
|
||||
### Jupyter Notebook Debugging
|
||||
|
||||
The script auto-detects Jupyter/IPython environments. When running in VS Code's Jupyter extension, CLI args default to `None` (all options selected), so you can debug cell-by-cell normally.
|
||||
|
||||
---
|
||||
|
||||
## Adding Custom Filter Combinations
|
||||
|
||||
To add new filter combinations to the batch runner, edit `run_filter_combinations.py`:
|
||||
|
||||
### Checklist
|
||||
|
||||
1. **Open** `run_filter_combinations.py`
|
||||
|
||||
2. **Find** the `get_filter_combinations()` function
|
||||
|
||||
3. **Add** your combination to the list before the `return` statement:
|
||||
|
||||
```python
|
||||
# Example: Add a specific age + consumer cross-filter
|
||||
combinations.append({
|
||||
'name': 'Age-18to24_Consumer-Starter', # Used for output folder naming
|
||||
'filters': {
|
||||
'age': ['18 to 21 years', '22 to 24 years'],
|
||||
'consumer': ['Starter']
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
4. **Filter keys** must match CLI argument names:
|
||||
- `age` — values from `survey.options_age`
|
||||
- `gender` — values from `survey.options_gender`
|
||||
- `ethnicity` — values from `survey.options_ethnicity`
|
||||
- `income` — values from `survey.options_income`
|
||||
- `consumer` — values from `survey.options_consumer`
|
||||
|
||||
5. **Check available values** by running:
|
||||
```python
|
||||
from utils import QualtricsSurvey
|
||||
S = QualtricsSurvey('data/exports/2-2-26/...Labels.csv', 'data/exports/.../....qsf')
|
||||
S.load_data()
|
||||
print(S.options_age)
|
||||
print(S.options_consumer)
|
||||
# etc.
|
||||
```
|
||||
|
||||
6. **Test** with dry-run first:
|
||||
```bash
|
||||
uv run python run_filter_combinations.py --dry-run
|
||||
```
|
||||
|
||||
### Example: Adding Multiple Cross-Filters
|
||||
|
||||
```python
|
||||
# In get_filter_combinations(), before return:
|
||||
|
||||
# Young professionals
|
||||
combinations.append({
|
||||
'name': 'Young_Professionals',
|
||||
'filters': {
|
||||
'age': ['22 to 24 years', '25 to 34 years'],
|
||||
'consumer': ['Early Professional']
|
||||
}
|
||||
})
|
||||
|
||||
# High income males
|
||||
combinations.append({
|
||||
'name': 'High_Income_Male',
|
||||
'filters': {
|
||||
'income': ['$150,000 - $199,999', '$200,000 or more'],
|
||||
'gender': ['Male']
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
- **Empty filters dict** = all respondents (no filtering)
|
||||
- **Omitted filter keys** = all options for that dimension selected
|
||||
- **Output folder names** are auto-generated from active filters by `QualtricsSurvey.filter_data()`
|
||||
@@ -7,6 +7,7 @@ requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"altair>=6.0.0",
|
||||
"imagehash>=4.3.1",
|
||||
"jupyter>=1.1.1",
|
||||
"marimo>=0.18.0",
|
||||
"matplotlib>=3.10.8",
|
||||
"modin[dask]>=0.37.1",
|
||||
@@ -24,8 +25,12 @@ dependencies = [
|
||||
"requests>=2.32.5",
|
||||
"scipy>=1.14.0",
|
||||
"taguette>=1.5.1",
|
||||
"tqdm>=4.66.0",
|
||||
"vl-convert-python>=1.9.0.post1",
|
||||
"wordcloud>=1.9.5",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
quant-report-batch = "run_filter_combinations:main"
|
||||
|
||||
|
||||
|
||||
165
run_filter_combinations.py
Normal file
165
run_filter_combinations.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Batch runner for quant report with different filter combinations.
|
||||
|
||||
Runs 03_quant_report.script.py for each single-filter combination:
|
||||
- Each age group (with all others active)
|
||||
- Each gender (with all others active)
|
||||
- Each ethnicity (with all others active)
|
||||
- Each income group (with all others active)
|
||||
- Each consumer segment (with all others active)
|
||||
|
||||
Usage:
|
||||
uv run python run_filter_combinations.py
|
||||
uv run python run_filter_combinations.py --dry-run # Preview combinations without running
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils import QualtricsSurvey
|
||||
|
||||
|
||||
# Default data paths (same as in 03_quant_report.script.py)
|
||||
RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
|
||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||
|
||||
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
|
||||
|
||||
|
||||
def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
|
||||
"""
|
||||
Generate all single-filter combinations.
|
||||
|
||||
Each combination isolates ONE filter value while keeping all others at "all selected".
|
||||
Returns list of dicts with filter kwargs for each run.
|
||||
"""
|
||||
combinations = []
|
||||
|
||||
# Add "All Respondents" run (no filters = all options selected)
|
||||
combinations.append({
|
||||
'name': 'All_Respondents',
|
||||
'filters': {} # Empty = use defaults (all selected)
|
||||
})
|
||||
|
||||
# Age groups - one at a time
|
||||
for age in survey.options_age:
|
||||
combinations.append({
|
||||
'name': f'Age-{age}',
|
||||
'filters': {'age': [age]}
|
||||
})
|
||||
|
||||
# Gender - one at a time
|
||||
for gender in survey.options_gender:
|
||||
combinations.append({
|
||||
'name': f'Gender-{gender}',
|
||||
'filters': {'gender': [gender]}
|
||||
})
|
||||
|
||||
# Ethnicity - one at a time
|
||||
for ethnicity in survey.options_ethnicity:
|
||||
combinations.append({
|
||||
'name': f'Ethnicity-{ethnicity}',
|
||||
'filters': {'ethnicity': [ethnicity]}
|
||||
})
|
||||
|
||||
# Income - one at a time
|
||||
for income in survey.options_income:
|
||||
combinations.append({
|
||||
'name': f'Income-{income}',
|
||||
'filters': {'income': [income]}
|
||||
})
|
||||
|
||||
# Consumer segments - one at a time
|
||||
for consumer in survey.options_consumer:
|
||||
combinations.append({
|
||||
'name': f'Consumer-{consumer}',
|
||||
'filters': {'consumer': [consumer]}
|
||||
})
|
||||
|
||||
return combinations
|
||||
|
||||
|
||||
def run_report(filters: dict, dry_run: bool = False) -> bool:
|
||||
"""
|
||||
Run the report script with given filters.
|
||||
|
||||
Args:
|
||||
filters: Dict of filter_name -> list of values
|
||||
dry_run: If True, just print command without running
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
cmd = [sys.executable, str(REPORT_SCRIPT)]
|
||||
|
||||
for filter_name, values in filters.items():
|
||||
if values:
|
||||
cmd.extend([f'--{filter_name}', json.dumps(values)])
|
||||
|
||||
if dry_run:
|
||||
print(f" Would run: {' '.join(cmd)}")
|
||||
return True
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=Path(__file__).parent
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"\n ERROR: {result.stderr[:500]}")
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"\n ERROR: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load survey to get available filter options
|
||||
print("Loading survey to get filter options...")
|
||||
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
||||
survey.load_data() # Populates options_* attributes
|
||||
|
||||
# Generate all combinations
|
||||
combinations = get_filter_combinations(survey)
|
||||
print(f"Generated {len(combinations)} filter combinations")
|
||||
|
||||
if args.dry_run:
|
||||
print("\nDRY RUN - Commands that would be executed:")
|
||||
for combo in combinations:
|
||||
print(f"\n{combo['name']}:")
|
||||
run_report(combo['filters'], dry_run=True)
|
||||
return
|
||||
|
||||
# Run each combination with progress bar
|
||||
successful = 0
|
||||
failed = []
|
||||
|
||||
for combo in tqdm(combinations, desc="Running reports", unit="filter"):
|
||||
tqdm.write(f"Running: {combo['name']}")
|
||||
if run_report(combo['filters']):
|
||||
successful += 1
|
||||
else:
|
||||
failed.append(combo['name'])
|
||||
|
||||
# Summary
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Completed: {successful}/{len(combinations)} successful")
|
||||
if failed:
|
||||
print(f"Failed: {', '.join(failed)}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user