diff --git a/03_quant_report.py b/03_quant_report.py index e74c297..8ed06cc 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -255,7 +255,7 @@ def _(): mo.md(r""" --- - # Brand Character Results (overall) + # Brand Character Results """) return @@ -276,7 +276,7 @@ def _(S, data): return -@app.cell +@app.cell(disabled=True) def _(): mo.md(r""" ## Character ranking points @@ -311,7 +311,7 @@ def _(S, char_rank): return -@app.cell(disabled=True) +@app.cell def _(): mo.md(r""" ### Statistical Significance Character Ranking @@ -335,7 +335,7 @@ def _(S, char_rank): return -@app.cell +@app.cell(disabled=True) def _(): mo.md(r""" ## Character Ranking: times 1st place @@ -438,7 +438,7 @@ def _(): return -@app.cell +@app.cell(disabled=True) def _(): return @@ -461,20 +461,34 @@ def _(): @app.cell def _(): mo.md(r""" - ## 8 Most Chosen + ## Top 8 Most Chosen out of 18 """) return @app.cell def _(S, data): - v_18_8_3 = S.get_18_8_3(data)[0].collect() + v_18_8_3 = S.get_18_8_3(data)[0] return (v_18_8_3,) @app.cell def _(S, v_18_8_3): - S.plot_voice_selection_counts(v_18_8_3) + S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice') + return + + +@app.cell +def _(): + mo.md(r""" + ## Top 3 most chosen out of 8 + """) + return + + +@app.cell +def _(S, v_18_8_3): + S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice') return @@ -497,10 +511,7 @@ def _(S, data): @app.cell def _(S, top3_voices): - _plot = S.plot_ranking_distribution(top3_voices, x_label='Voice') - mo.md(f""" - {mo.ui.altair_chart(_plot)} - """) + S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Voice Rankings (1st, 2nd, 3rd)") return @@ -548,7 +559,7 @@ def _(): return -@app.cell +@app.cell(hide_code=True) def _(): mo.md(r""" ## Weighted Popularity Scores @@ -568,6 +579,7 @@ def _(S, top3_voices_weighted): @app.cell def _(): + ## Voice Ranked 1st the most return @@ -578,7 +590,8 @@ def _(top3_voices_weighted): @app.cell -def _(): +def _(S, top3_voices): + S.plot_most_ranked_1(top3_voices, title="Most Popular Voice
(Number of Times Ranked 1st)", x_label='Voice') return @@ -597,13 +610,13 @@ def _(S, data): return (voice_1_10,) -@app.cell(hide_code=True) +@app.cell(disabled=True) def _(S, voice_1_10): - S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', width=1000, domain=[1,10], title="Voice General Impression (Scale 1-10)") + S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)") return -@app.cell +@app.cell(disabled=True) def _(): mo.md(r""" ### Statistical Significance (Scale 1-10) @@ -687,5 +700,60 @@ def _(S, voice_1_10): return +@app.cell +def _(S, data): + # Get your voice scale data (from notebook) + voice_1_10, _ = S.get_voice_scale_1_10(data) + return (voice_1_10,) + + +@app.cell +def _(S, voice_1_10): + S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)") + return + + +@app.cell(disabled=True) +def _(): + mo.md(r""" + ### Statistical Significance (Scale 1-10) + """) + return + + +@app.cell(disabled=True) +def _(S, voice_1_10): + # Compute pairwise significance tests + pairwise_df, metadata = S.compute_pairwise_significance( + voice_1_10, + test_type="mannwhitney", # or "ttest", "chi2", "auto" + alpha=0.05, + correction="bonferroni" # or "holm", "none" + ) + + # View significant pairs + # print(pairwise_df.filter(pl.col('significant') == True)) + + # Create heatmap visualization + _heatmap = S.plot_significance_heatmap( + pairwise_df, + metadata=metadata, + title="Voice Rating Significance
(Pairwise Comparisons)" + ) + + # Create summary bar chart + _summary = S.plot_significance_summary( + pairwise_df, + metadata=metadata + ) + + mo.md(f""" + {mo.ui.altair_chart(_heatmap)} + + {mo.ui.altair_chart(_summary)} + """) + return + + if __name__ == "__main__": app.run() diff --git a/plots.py b/plots.py index 6735555..1a46f51 100644 --- a/plots.py +++ b/plots.py @@ -385,13 +385,13 @@ class QualtricsPlotsMixin: def plot_ranking_distribution( self, data: pl.LazyFrame | pl.DataFrame | None = None, - title: str = "Rankings Distribution\n(1st to 4th Place)", + title: str = "Rankings Distribution\n(1st to 3rd Place)", x_label: str = "Item", y_label: str = "Number of Votes", height: int | None = None, width: int | str | None = None, ) -> alt.Chart: - """Create a stacked bar chart showing the distribution of rankings (1st to 4th).""" + """Create a stacked bar chart showing the distribution of rankings (1st to 3rd).""" df = self._ensure_dataframe(data) stats = [] @@ -401,15 +401,15 @@ class QualtricsPlotsMixin: r1 = df.filter(pl.col(col) == 1).height r2 = df.filter(pl.col(col) == 2).height r3 = df.filter(pl.col(col) == 3).height - r4 = df.filter(pl.col(col) == 4).height - total = r1 + r2 + r3 + r4 + # r4 = df.filter(pl.col(col) == 4).height + total = r1 + r2 + r3 if total > 0: label = self._clean_voice_label(col) stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'rank1': r1}) stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'rank1': r1}) stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'rank1': r1}) - stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'rank1': r1}) + # stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'rank1': r1}) if not stats: return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') @@ -423,8 +423,8 @@ class QualtricsPlotsMixin: x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='rank1', order='descending')), y=alt.Y('count:Q', title=y_label, stack='zero'), color=alt.Color('rank:N', - scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3', 'Rank 4 (Worst)'], - range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3, ColorPalette.RANK_4]), + scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], + range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), legend=alt.Legend(orient='top', direction='horizontal', title=None)), order=alt.Order('rank:N', sort='ascending'), opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), diff --git a/reference.py b/reference.py index a6ba358..c7afee4 100644 --- a/reference.py +++ b/reference.py @@ -35,4 +35,25 @@ ORIGINAL_CHARACTER_TRAITS = { "Balanced", "Efficient", ] +} + +VOICE_GENDER_MAPPING = { + "V14": "Female", + "V04": "Female", + "V08": "Female", + "V77": "Female", + "V48": "Female", + "V82": "Female", + "V89": "Female", + "V91": "Female", + "V34": "Male", + "V69": "Male", + "V45": "Male", + "V46": "Male", + "V54": "Male", + "V74": "Male", + "V81": "Male", + "V86": "Male", + "V88": "Male", + "V16": "Male", } \ No newline at end of file