other top bc's

This commit is contained in:
2026-02-05 11:50:00 +01:00
parent af9a15ccb0
commit 840bd2940d
4 changed files with 93 additions and 13 deletions

View File

@@ -20,8 +20,6 @@ from speaking_styles import SPEAKING_STYLES
RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv' RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
BEST_CHOSEN_CHARACTER = "the_coach"
# %% # %%
# CLI argument parsing for batch automation # CLI argument parsing for batch automation
@@ -50,6 +48,8 @@ def parse_cli_args():
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values') parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)') parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
parser.add_argument('--figures-dir', type=str, default=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
parser.add_argument('--best-character', type=str, default="the_coach", help='Slug of the best chosen character (default: "the_coach")')
# Only parse if running as script (not in Jupyter/interactive) # Only parse if running as script (not in Jupyter/interactive)
try: try:
@@ -57,7 +57,7 @@ def parse_cli_args():
get_ipython() # noqa: F821 # type: ignore get_ipython() # noqa: F821 # type: ignore
# Return namespace with all filters set to None # Return namespace with all filters set to None
no_filters = {f: None for f in FILTER_CONFIG} no_filters = {f: None for f in FILTER_CONFIG}
return argparse.Namespace(**no_filters, filter_name=None) return argparse.Namespace(**no_filters, filter_name=None, figures_dir=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', best_character="the_coach")
except NameError: except NameError:
args = parser.parse_args() args = parser.parse_args()
# Parse JSON strings to lists # Parse JSON strings to lists
@@ -67,11 +67,12 @@ def parse_cli_args():
return args return args
cli_args = parse_cli_args() cli_args = parse_cli_args()
BEST_CHOSEN_CHARACTER = cli_args.best_character
# %% # %%
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) S = QualtricsSurvey(RESULTS_FILE, QSF_FILE, figures_dir=cli_args.figures_dir)
try: try:
data_all = S.load_data() data_all = S.load_data()
except NotImplementedError as e: except NotImplementedError as e:

View File

@@ -1,12 +1,12 @@
"""Extra statistical significance analyses for quant report.""" """Extra statistical significance analyses for quant report."""
# %% Imports # %% Imports
from utils import QualtricsSurvey import utils
import polars as pl import polars as pl
import argparse import argparse
import json import json
import re import re
from pathlib import Path
@@ -39,6 +39,7 @@ def parse_cli_args():
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values') parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)') parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
parser.add_argument('--figures-dir', type=str, default=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
# Only parse if running as script (not in Jupyter/interactive) # Only parse if running as script (not in Jupyter/interactive)
try: try:
@@ -46,7 +47,9 @@ def parse_cli_args():
get_ipython() # noqa: F821 # type: ignore get_ipython() # noqa: F821 # type: ignore
# Return namespace with all filters set to None # Return namespace with all filters set to None
no_filters = {f: None for f in FILTER_CONFIG} no_filters = {f: None for f in FILTER_CONFIG}
return argparse.Namespace(**no_filters, filter_name=None) # Use the same default as argparse
default_fig_dir = f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}'
return argparse.Namespace(**no_filters, filter_name=None, figures_dir=default_fig_dir)
except NameError: except NameError:
args = parser.parse_args() args = parser.parse_args()
# Parse JSON strings to lists # Parse JSON strings to lists
@@ -59,7 +62,7 @@ cli_args = parse_cli_args()
# %% # %%
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) S = utils.QualtricsSurvey(RESULTS_FILE, QSF_FILE, figures_dir=cli_args.figures_dir)
data_all = S.load_data() data_all = S.load_data()
@@ -125,4 +128,76 @@ if cli_args.filter_name and S.fig_save_dir:
data = _d data = _d
data.collect() data.collect()
# %% Character coach significatly higher than others
char_rank = S.get_character_ranking(data)[0]
_pairwise_df, _meta = S.compute_ranking_significance(
char_rank,
alpha=0.05,
correction="none",
)
# %% [markdown]
"""
### Methodology Analysis
**Input Data (`char_rank`)**:
* Generated by `S.get_character_ranking(data)`.
* Contains the ranking values (1st, 2nd, 3rd, 4th) assigned by each respondent to the four options ("The Coach", etc.).
* Columns represent the characters; rows represent individual respondents; values are the numerical rank (1 = Top Choice).
**Processing**:
* The function `compute_ranking_significance` aggregates these rankings to find the **"Rank 1 Share"** (the percentage of respondents who picked that character as their #1 favorite).
* It builds a contingency table of how many times each character was ranked 1st vs. not 1st (or 1st v 2nd v 3rd).
**Statistical Test**:
* **Test Used**: Pairwise Z-test for two proportions (uncorrected).
* **Comparison**: It compares the **Rank 1 Share** of every pair of characters.
* *Example*: "Is the 42% of people who chose 'Coach' significantly different from the 29% who chose 'Familiar Friend'?"
* **Significance**: A result of `p < 0.05` means the difference in popularity (top-choice preference) is statistically significant and not due to random chance.
"""
# %% Plot heatmap of pairwise significance
S.plot_significance_heatmap(_pairwise_df, metadata=_meta, title="Statistical Significance: Character Top Choice Preference")
# %% Plot summary of significant differences (e.g., which characters are significantly higher than others)
# S.plot_significance_summary(_pairwise_df, metadata=_meta)
# %% [markdown]
"""
# Analysis: Significance of "The Coach"
**Parameters**: `alpha=0.05`, `correction='none'`
* **Rationale**: No correction was applied to allow for detection of all potential pairwise differences (uncorrected p < 0.05). If strict control for family-wise error rate were required (e.g., Bonferroni), the significance threshold would be lower (p < 0.0083).
**Results**:
"The Coach" is the top-ranked option (42.0% Rank 1 share) and shows strong separation from the field.
* **Vs. Bottom Two**: "The Coach" is significantly higher than both "The Bank Teller" (26.9%, p < 0.001) and "Familiar Friend" (29.4%, p < 0.001).
* **Vs. Runner-Up**: "The Coach" is widely preferred over "The Personal Assistant" (33.4%). The difference of **8.6 percentage points** is statistically significant (p = 0.017) at the standard 0.05 level.
* *Note*: While p=0.017 is significant in isolation, it would not meet the stricter Bonferroni threshold (0.0083). However, the effect size (+8.6%) is commercially meaningful.
**Conclusion**:
Yes, "The Coach" can be considered statistically more significant than the other options. It is clearly superior to the bottom two options and holds a statistically significant lead over the runner-up ("Personal Assistant") in direct comparison.
"""
# %% voices analysis
top3_voices = S.get_top_3_voices(data)[0]
_pairwise_df_voice, _metadata = S.compute_ranking_significance(
top3_voices,alpha=0.05,correction="none")
S.plot_significance_heatmap(
_pairwise_df_voice,
metadata=_metadata,
title="Statistical Significance: Voice Top Choice Preference"
)
# %% # %%

View File

@@ -2308,9 +2308,9 @@ class QualtricsPlotsMixin:
# Base heatmap # Base heatmap
heatmap = alt.Chart(heatmap_df).mark_rect(stroke='white', strokeWidth=1).encode( heatmap = alt.Chart(heatmap_df).mark_rect(stroke='white', strokeWidth=1).encode(
x=alt.X('col:N', title=None, sort=all_groups, x=alt.X('col:N', title=None, sort=all_groups,
axis=alt.Axis(labelAngle=-45, labelLimit=150)), axis=alt.Axis(labelAngle=-45, labelLimit=150, grid=False)),
y=alt.Y('row:N', title=None, sort=all_groups, y=alt.Y('row:N', title=None, sort=all_groups,
axis=alt.Axis(labelLimit=150)), axis=alt.Axis(labelLimit=150, grid=False)),
color=alt.Color('sig_category:N', color=alt.Color('sig_category:N',
scale=alt.Scale(domain=sig_domain, range=sig_range), scale=alt.Scale(domain=sig_domain, range=sig_range),
legend=alt.Legend( legend=alt.Legend(

View File

@@ -762,7 +762,7 @@ def normalize_global_values(df: pl.DataFrame, target_cols: list[str]) -> pl.Data
class QualtricsSurvey(QualtricsPlotsMixin): class QualtricsSurvey(QualtricsPlotsMixin):
"""Class to handle Qualtrics survey data.""" """Class to handle Qualtrics survey data."""
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]): def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path], figures_dir: Union[str, Path] = None):
if isinstance(data_path, str): if isinstance(data_path, str):
data_path = Path(data_path) data_path = Path(data_path)
@@ -774,8 +774,12 @@ class QualtricsSurvey(QualtricsPlotsMixin):
self.qid_descr_map = self._extract_qid_descr_map() self.qid_descr_map = self._extract_qid_descr_map()
self.qsf:dict = self._load_qsf() self.qsf:dict = self._load_qsf()
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21' if figures_dir:
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2] self.fig_save_dir = Path(figures_dir)
else:
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
if not self.fig_save_dir.exists(): if not self.fig_save_dir.exists():
self.fig_save_dir.mkdir(parents=True, exist_ok=True) self.fig_save_dir.mkdir(parents=True, exist_ok=True)