diff --git a/XX_statistical_significance.script.py b/XX_statistical_significance.script.py index d7a1214..f887b01 100644 --- a/XX_statistical_significance.script.py +++ b/XX_statistical_significance.script.py @@ -329,4 +329,42 @@ S.plot_significance_heatmap( metadata=_meta_male_top_vis, title="Male Voices (Excl. Bottom 3): Total Mentions Significance" ) + +# %% [markdown] +""" +# Rank 1 Selection Significance (Voice Level) + +Similar to the Total Mentions significance analysis above, but counting +only how many times each voice was ranked **1st** (out of all respondents). +This isolates first-choice preference rather than overall top-3 visibility. +""" + +# %% Rank 1 Significance: All Voices + +_pairwise_df_rank1, _meta_rank1 = S.compute_rank1_significance( + top3_voices, + alpha=0.05, + correction="none", +) + +S.plot_significance_heatmap( + _pairwise_df_rank1, + metadata=_meta_rank1, + title="Statistical Significance: Voice Rank 1 Selection" +) + +# %% Rank 1 Significance: Male Voices Only + +_pairwise_df_rank1_male, _meta_rank1_male = S.compute_rank1_significance( + df_male_voices, + alpha=0.05, + correction="none", +) + +S.plot_significance_heatmap( + _pairwise_df_rank1_male, + metadata=_meta_rank1_male, + title="Male Voices Only: Rank 1 Selection Significance" +) + # %% diff --git a/utils.py b/utils.py index be2647d..26eb183 100644 --- a/utils.py +++ b/utils.py @@ -1701,6 +1701,121 @@ class QualtricsSurvey(QualtricsPlotsMixin): return results_df, metadata + def compute_rank1_significance( + self, + data: pl.LazyFrame | pl.DataFrame, + alpha: float = 0.05, + correction: str = "bonferroni", + ) -> tuple[pl.DataFrame, dict]: + """Compute statistical significance for Rank 1 selections only. + + Like compute_mentions_significance but counts only how many times each + voice/character was ranked **1st**, using total respondents as the + denominator. This tests whether first-choice preference differs + significantly between voices. + + Args: + data: Ranking data (rows=respondents, cols=voices, values=rank). + alpha: Significance level. + correction: Multiple comparison correction method. + + Returns: + tuple: (pairwise_df, metadata) + """ + from scipy import stats as scipy_stats + import numpy as np + + if isinstance(data, pl.LazyFrame): + df = data.collect() + else: + df = data + + ranking_cols = [c for c in df.columns if c != '_recordId'] + if len(ranking_cols) < 2: + raise ValueError("Need at least 2 ranking columns") + + total_respondents = df.height + rank1_data: dict[str, int] = {} + + # Count rank-1 selections for each voice + for col in ranking_cols: + label = self._clean_voice_label(col) + count = df.filter(pl.col(col) == 1).height + rank1_data[label] = count + + labels = sorted(list(rank1_data.keys())) + results = [] + n_comparisons = len(labels) * (len(labels) - 1) // 2 + + for i, label1 in enumerate(labels): + for label2 in labels[i+1:]: + count1 = rank1_data[label1] + count2 = rank1_data[label2] + + pct1 = count1 / total_respondents + pct2 = count2 / total_respondents + + # Z-test for two proportions (same denominator for both) + n1 = total_respondents + n2 = total_respondents + + p_pooled = (count1 + count2) / (n1 + n2) + se = np.sqrt(p_pooled * (1 - p_pooled) * (1/n1 + 1/n2)) + + if se > 0: + z_stat = (pct1 - pct2) / se + p_value = 2 * (1 - scipy_stats.norm.cdf(abs(z_stat))) + else: + p_value = 1.0 + + results.append({ + 'group1': label1, + 'group2': label2, + 'p_value': float(p_value), + 'rank1_count1': count1, + 'rank1_count2': count2, + 'rank1_pct1': round(pct1 * 100, 1), + 'rank1_pct2': round(pct2 * 100, 1), + 'total1': n1, + 'total2': n2, + 'effect_size': pct1 - pct2, + }) + + results_df = pl.DataFrame(results) + + p_values = results_df['p_value'].to_numpy() + p_adjusted = np.full_like(p_values, np.nan, dtype=float) + + if correction == "bonferroni": + p_adjusted = np.minimum(p_values * n_comparisons, 1.0) + elif correction == "holm": + sorted_idx = np.argsort(p_values) + sorted_p = p_values[sorted_idx] + m = len(sorted_p) + adjusted = np.zeros(m) + for j in range(m): + adjusted[j] = sorted_p[j] * (m - j) + for j in range(1, m): + adjusted[j] = max(adjusted[j], adjusted[j-1]) + adjusted = np.minimum(adjusted, 1.0) + p_adjusted = adjusted[np.argsort(sorted_idx)] + elif correction == "none": + p_adjusted = p_values.astype(float) # pyright: ignore + + results_df = results_df.with_columns([ + pl.Series('p_adjusted', p_adjusted), + pl.Series('significant', p_adjusted < alpha), + ]).sort('p_value') + + metadata = { + 'test_type': 'proportion_z_test_rank1', + 'alpha': alpha, + 'correction': correction, + 'n_comparisons': n_comparisons, + } + + return results_df, metadata + def process_speaking_style_data(