other top bc's
This commit is contained in:
@@ -20,8 +20,6 @@ from speaking_styles import SPEAKING_STYLES
|
|||||||
RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
|
RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
|
||||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||||
|
|
||||||
BEST_CHOSEN_CHARACTER = "the_coach"
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# CLI argument parsing for batch automation
|
# CLI argument parsing for batch automation
|
||||||
@@ -50,6 +48,8 @@ def parse_cli_args():
|
|||||||
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
||||||
|
|
||||||
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
||||||
|
parser.add_argument('--figures-dir', type=str, default=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
|
||||||
|
parser.add_argument('--best-character', type=str, default="the_coach", help='Slug of the best chosen character (default: "the_coach")')
|
||||||
|
|
||||||
# Only parse if running as script (not in Jupyter/interactive)
|
# Only parse if running as script (not in Jupyter/interactive)
|
||||||
try:
|
try:
|
||||||
@@ -57,7 +57,7 @@ def parse_cli_args():
|
|||||||
get_ipython() # noqa: F821 # type: ignore
|
get_ipython() # noqa: F821 # type: ignore
|
||||||
# Return namespace with all filters set to None
|
# Return namespace with all filters set to None
|
||||||
no_filters = {f: None for f in FILTER_CONFIG}
|
no_filters = {f: None for f in FILTER_CONFIG}
|
||||||
return argparse.Namespace(**no_filters, filter_name=None)
|
return argparse.Namespace(**no_filters, filter_name=None, figures_dir=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', best_character="the_coach")
|
||||||
except NameError:
|
except NameError:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
# Parse JSON strings to lists
|
# Parse JSON strings to lists
|
||||||
@@ -67,11 +67,12 @@ def parse_cli_args():
|
|||||||
return args
|
return args
|
||||||
|
|
||||||
cli_args = parse_cli_args()
|
cli_args = parse_cli_args()
|
||||||
|
BEST_CHOSEN_CHARACTER = cli_args.best_character
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE, figures_dir=cli_args.figures_dir)
|
||||||
try:
|
try:
|
||||||
data_all = S.load_data()
|
data_all = S.load_data()
|
||||||
except NotImplementedError as e:
|
except NotImplementedError as e:
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
"""Extra statistical significance analyses for quant report."""
|
"""Extra statistical significance analyses for quant report."""
|
||||||
# %% Imports
|
# %% Imports
|
||||||
|
|
||||||
from utils import QualtricsSurvey
|
import utils
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -39,6 +39,7 @@ def parse_cli_args():
|
|||||||
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
||||||
|
|
||||||
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
||||||
|
parser.add_argument('--figures-dir', type=str, default=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
|
||||||
|
|
||||||
# Only parse if running as script (not in Jupyter/interactive)
|
# Only parse if running as script (not in Jupyter/interactive)
|
||||||
try:
|
try:
|
||||||
@@ -46,7 +47,9 @@ def parse_cli_args():
|
|||||||
get_ipython() # noqa: F821 # type: ignore
|
get_ipython() # noqa: F821 # type: ignore
|
||||||
# Return namespace with all filters set to None
|
# Return namespace with all filters set to None
|
||||||
no_filters = {f: None for f in FILTER_CONFIG}
|
no_filters = {f: None for f in FILTER_CONFIG}
|
||||||
return argparse.Namespace(**no_filters, filter_name=None)
|
# Use the same default as argparse
|
||||||
|
default_fig_dir = f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}'
|
||||||
|
return argparse.Namespace(**no_filters, filter_name=None, figures_dir=default_fig_dir)
|
||||||
except NameError:
|
except NameError:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
# Parse JSON strings to lists
|
# Parse JSON strings to lists
|
||||||
@@ -59,7 +62,7 @@ cli_args = parse_cli_args()
|
|||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
S = utils.QualtricsSurvey(RESULTS_FILE, QSF_FILE, figures_dir=cli_args.figures_dir)
|
||||||
data_all = S.load_data()
|
data_all = S.load_data()
|
||||||
|
|
||||||
|
|
||||||
@@ -125,4 +128,76 @@ if cli_args.filter_name and S.fig_save_dir:
|
|||||||
data = _d
|
data = _d
|
||||||
data.collect()
|
data.collect()
|
||||||
|
|
||||||
|
# %% Character coach significatly higher than others
|
||||||
|
|
||||||
|
|
||||||
|
char_rank = S.get_character_ranking(data)[0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_pairwise_df, _meta = S.compute_ranking_significance(
|
||||||
|
char_rank,
|
||||||
|
alpha=0.05,
|
||||||
|
correction="none",
|
||||||
|
)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
"""
|
||||||
|
### Methodology Analysis
|
||||||
|
|
||||||
|
**Input Data (`char_rank`)**:
|
||||||
|
* Generated by `S.get_character_ranking(data)`.
|
||||||
|
* Contains the ranking values (1st, 2nd, 3rd, 4th) assigned by each respondent to the four options ("The Coach", etc.).
|
||||||
|
* Columns represent the characters; rows represent individual respondents; values are the numerical rank (1 = Top Choice).
|
||||||
|
|
||||||
|
**Processing**:
|
||||||
|
* The function `compute_ranking_significance` aggregates these rankings to find the **"Rank 1 Share"** (the percentage of respondents who picked that character as their #1 favorite).
|
||||||
|
* It builds a contingency table of how many times each character was ranked 1st vs. not 1st (or 1st v 2nd v 3rd).
|
||||||
|
|
||||||
|
**Statistical Test**:
|
||||||
|
* **Test Used**: Pairwise Z-test for two proportions (uncorrected).
|
||||||
|
* **Comparison**: It compares the **Rank 1 Share** of every pair of characters.
|
||||||
|
* *Example*: "Is the 42% of people who chose 'Coach' significantly different from the 29% who chose 'Familiar Friend'?"
|
||||||
|
* **Significance**: A result of `p < 0.05` means the difference in popularity (top-choice preference) is statistically significant and not due to random chance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# %% Plot heatmap of pairwise significance
|
||||||
|
S.plot_significance_heatmap(_pairwise_df, metadata=_meta, title="Statistical Significance: Character Top Choice Preference")
|
||||||
|
|
||||||
|
# %% Plot summary of significant differences (e.g., which characters are significantly higher than others)
|
||||||
|
# S.plot_significance_summary(_pairwise_df, metadata=_meta)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
"""
|
||||||
|
# Analysis: Significance of "The Coach"
|
||||||
|
|
||||||
|
**Parameters**: `alpha=0.05`, `correction='none'`
|
||||||
|
* **Rationale**: No correction was applied to allow for detection of all potential pairwise differences (uncorrected p < 0.05). If strict control for family-wise error rate were required (e.g., Bonferroni), the significance threshold would be lower (p < 0.0083).
|
||||||
|
|
||||||
|
**Results**:
|
||||||
|
"The Coach" is the top-ranked option (42.0% Rank 1 share) and shows strong separation from the field.
|
||||||
|
|
||||||
|
* **Vs. Bottom Two**: "The Coach" is significantly higher than both "The Bank Teller" (26.9%, p < 0.001) and "Familiar Friend" (29.4%, p < 0.001).
|
||||||
|
* **Vs. Runner-Up**: "The Coach" is widely preferred over "The Personal Assistant" (33.4%). The difference of **8.6 percentage points** is statistically significant (p = 0.017) at the standard 0.05 level.
|
||||||
|
* *Note*: While p=0.017 is significant in isolation, it would not meet the stricter Bonferroni threshold (0.0083). However, the effect size (+8.6%) is commercially meaningful.
|
||||||
|
|
||||||
|
**Conclusion**:
|
||||||
|
Yes, "The Coach" can be considered statistically more significant than the other options. It is clearly superior to the bottom two options and holds a statistically significant lead over the runner-up ("Personal Assistant") in direct comparison.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# %% voices analysis
|
||||||
|
top3_voices = S.get_top_3_voices(data)[0]
|
||||||
|
|
||||||
|
|
||||||
|
_pairwise_df_voice, _metadata = S.compute_ranking_significance(
|
||||||
|
top3_voices,alpha=0.05,correction="none")
|
||||||
|
|
||||||
|
|
||||||
|
S.plot_significance_heatmap(
|
||||||
|
_pairwise_df_voice,
|
||||||
|
metadata=_metadata,
|
||||||
|
title="Statistical Significance: Voice Top Choice Preference"
|
||||||
|
)
|
||||||
# %%
|
# %%
|
||||||
|
|||||||
4
plots.py
4
plots.py
@@ -2308,9 +2308,9 @@ class QualtricsPlotsMixin:
|
|||||||
# Base heatmap
|
# Base heatmap
|
||||||
heatmap = alt.Chart(heatmap_df).mark_rect(stroke='white', strokeWidth=1).encode(
|
heatmap = alt.Chart(heatmap_df).mark_rect(stroke='white', strokeWidth=1).encode(
|
||||||
x=alt.X('col:N', title=None, sort=all_groups,
|
x=alt.X('col:N', title=None, sort=all_groups,
|
||||||
axis=alt.Axis(labelAngle=-45, labelLimit=150)),
|
axis=alt.Axis(labelAngle=-45, labelLimit=150, grid=False)),
|
||||||
y=alt.Y('row:N', title=None, sort=all_groups,
|
y=alt.Y('row:N', title=None, sort=all_groups,
|
||||||
axis=alt.Axis(labelLimit=150)),
|
axis=alt.Axis(labelLimit=150, grid=False)),
|
||||||
color=alt.Color('sig_category:N',
|
color=alt.Color('sig_category:N',
|
||||||
scale=alt.Scale(domain=sig_domain, range=sig_range),
|
scale=alt.Scale(domain=sig_domain, range=sig_range),
|
||||||
legend=alt.Legend(
|
legend=alt.Legend(
|
||||||
|
|||||||
6
utils.py
6
utils.py
@@ -762,7 +762,7 @@ def normalize_global_values(df: pl.DataFrame, target_cols: list[str]) -> pl.Data
|
|||||||
class QualtricsSurvey(QualtricsPlotsMixin):
|
class QualtricsSurvey(QualtricsPlotsMixin):
|
||||||
"""Class to handle Qualtrics survey data."""
|
"""Class to handle Qualtrics survey data."""
|
||||||
|
|
||||||
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
|
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path], figures_dir: Union[str, Path] = None):
|
||||||
if isinstance(data_path, str):
|
if isinstance(data_path, str):
|
||||||
data_path = Path(data_path)
|
data_path = Path(data_path)
|
||||||
|
|
||||||
@@ -774,8 +774,12 @@ class QualtricsSurvey(QualtricsPlotsMixin):
|
|||||||
self.qid_descr_map = self._extract_qid_descr_map()
|
self.qid_descr_map = self._extract_qid_descr_map()
|
||||||
self.qsf:dict = self._load_qsf()
|
self.qsf:dict = self._load_qsf()
|
||||||
|
|
||||||
|
if figures_dir:
|
||||||
|
self.fig_save_dir = Path(figures_dir)
|
||||||
|
else:
|
||||||
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
|
# get export directory name for saving figures ie if data_path='data/exports/OneDrive_2026-01-21/...' should be 'figures/OneDrive_2026-01-21'
|
||||||
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
|
self.fig_save_dir = Path('figures') / self.data_filepath.parts[2]
|
||||||
|
|
||||||
if not self.fig_save_dir.exists():
|
if not self.fig_save_dir.exists():
|
||||||
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
|
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user