statistical tests
This commit is contained in:
@@ -44,14 +44,14 @@ def _(QSF_FILE, RESULTS_FILE):
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
def _(RESULTS_FILE, data_all):
|
||||
mo.md(rf"""
|
||||
---
|
||||
# Load Data
|
||||
|
||||
**Dataset:** `{Path(RESULTS_FILE).name}`
|
||||
**Dataset:** {Path(RESULTS_FILE).name}
|
||||
|
||||
**Responses**: `{data_all.collect().shape[0]}`
|
||||
**Responses**: {data_all.collect().shape[0]}
|
||||
""")
|
||||
return
|
||||
|
||||
@@ -112,11 +112,9 @@ def _():
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data_validated):
|
||||
data = data_validated
|
||||
|
||||
data.collect()
|
||||
return (data,)
|
||||
def _():
|
||||
#
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
@@ -130,8 +128,8 @@ def _():
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
demographics = S.get_demographics(data)[0].collect()
|
||||
def _(S, data_validated):
|
||||
demographics = S.get_demographics(data_validated)[0].collect()
|
||||
demographics
|
||||
return (demographics,)
|
||||
|
||||
@@ -148,7 +146,7 @@ def _():
|
||||
def _(demographics):
|
||||
# Demographics where 'Consumer' is null
|
||||
demographics_no_consumer = demographics.filter(pl.col('Consumer').is_null())['_recordId'].to_list()
|
||||
# demographics_no_consumer
|
||||
demographics_no_consumer
|
||||
return (demographics_no_consumer,)
|
||||
|
||||
|
||||
@@ -160,9 +158,26 @@ def _(data_all, demographics_no_consumer):
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data_all):
|
||||
def _():
|
||||
mo.md(r"""
|
||||
# Filter Data (Global corrections)
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(data_validated):
|
||||
# drop rows where 'consumer' is null
|
||||
# data = data_validated.filter(pl.col('Consumer').is_not_null())
|
||||
data = data_validated
|
||||
data.collect()
|
||||
return (data,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
# Check if all business owners are missing a 'Consumer type' in demographics
|
||||
assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
|
||||
# assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
|
||||
return
|
||||
|
||||
|
||||
@@ -187,14 +202,14 @@ def _():
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, demo_plot_cols, demographics):
|
||||
def _(S, data, demo_plot_cols):
|
||||
_content = """
|
||||
## Demographic Distributions
|
||||
|
||||
"""
|
||||
for c in demo_plot_cols:
|
||||
_fig = S.plot_demographic_distribution(
|
||||
data=demographics,
|
||||
data=S.get_demographics(data)[0],
|
||||
column=c,
|
||||
title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
|
||||
)
|
||||
@@ -265,6 +280,22 @@ def _(S, char_rank):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, char_rank):
|
||||
_pairwise_df, _meta = S.compute_ranking_significance(char_rank)
|
||||
|
||||
print(_pairwise_df.columns)
|
||||
|
||||
mo.md(f"""
|
||||
### Statistical Significance Character Ranking
|
||||
|
||||
{mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
|
||||
|
||||
{mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
@@ -307,28 +338,69 @@ def _():
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
# Join respondent
|
||||
def _(S, data):
|
||||
char_df = S.get_character_refine(data)[0]
|
||||
return (char_df,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, char_df):
|
||||
from theme import ColorPalette
|
||||
|
||||
# Assuming you already have char_df (your data from get_character_refine or similar)
|
||||
characters = ['Bank Teller', 'Familiar Friend', 'The Coach', 'Personal Assistant']
|
||||
character_colors = {
|
||||
'Bank Teller': (ColorPalette.CHARACTER_BANK_TELLER, ColorPalette.CHARACTER_BANK_TELLER_HIGHLIGHT),
|
||||
'Familiar Friend': (ColorPalette.CHARACTER_FAMILIAR_FRIEND, ColorPalette.CHARACTER_FAMILIAR_FRIEND_HIGHLIGHT),
|
||||
'The Coach': (ColorPalette.CHARACTER_COACH, ColorPalette.CHARACTER_COACH_HIGHLIGHT),
|
||||
'Personal Assistant': (ColorPalette.CHARACTER_PERSONAL_ASSISTANT, ColorPalette.CHARACTER_PERSONAL_ASSISTANT_HIGHLIGHT),
|
||||
}
|
||||
|
||||
# Build consistent sort order (by total frequency across all characters)
|
||||
all_trait_counts = {}
|
||||
for char in characters:
|
||||
freq_df, _ = S.transform_character_trait_frequency(char_df, char)
|
||||
for row in freq_df.iter_rows(named=True):
|
||||
all_trait_counts[row['trait']] = all_trait_counts.get(row['trait'], 0) + row['count']
|
||||
|
||||
consistent_sort_order = sorted(all_trait_counts.keys(), key=lambda x: -all_trait_counts[x])
|
||||
|
||||
_content = """"""
|
||||
# Generate 4 plots (one per character)
|
||||
for char in characters:
|
||||
freq_df, _ = S.transform_character_trait_frequency(char_df, char)
|
||||
main_color, highlight_color = character_colors[char]
|
||||
chart = S.plot_single_character_trait_frequency(
|
||||
data=freq_df,
|
||||
character_name=char,
|
||||
bar_color=main_color,
|
||||
highlight_color=highlight_color,
|
||||
trait_sort_order=consistent_sort_order,
|
||||
)
|
||||
_content += f"""
|
||||
{mo.ui.altair_chart(chart)}
|
||||
|
||||
|
||||
"""
|
||||
|
||||
mo.md(_content)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
---
|
||||
## Statistical significance best characters
|
||||
|
||||
# Spoken Voice Results
|
||||
zie chat
|
||||
> voorbeeld: als de nr 1 en 2 niet significant verschillen maar wel van de nr 3 bijvoorbeeld is dat ook top. Beetje meedenkend over hoe ik het kan presenteren weetje wat ik bedoel?:)
|
||||
>
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
---
|
||||
|
||||
# Brand Character Results
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@@ -342,5 +414,174 @@ def _():
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
top3_voices = S.get_top_3_voices(data)[0]
|
||||
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||||
return top3_voices, top3_voices_weighted
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Which voice is ranked best in the ranking question for top 3?
|
||||
|
||||
(not best 3 out of 8 question)
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, top3_voices):
|
||||
_plot = S.plot_ranking_distribution(top3_voices, x_label='Voice')
|
||||
mo.md(f"""
|
||||
{mo.ui.altair_chart(_plot)}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Statistical significance for voice ranking
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
# print(top3_voices.collect().head())
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
|
||||
# _pairwise_df, _metadata = S.compute_ranking_significance(
|
||||
# top3_voices,alpha=0.05,correction="none")
|
||||
|
||||
# # View significant pairs
|
||||
# # print(pairwise_df.filter(pl.col('significant') == True))
|
||||
|
||||
# # Create heatmap visualization
|
||||
# _heatmap = S.plot_significance_heatmap(
|
||||
# _pairwise_df,
|
||||
# metadata=_metadata,
|
||||
# title="Weighted Voice Ranking Significance<br>(Pairwise Comparisons)"
|
||||
# )
|
||||
|
||||
# # Create summary bar chart
|
||||
# _summary = S.plot_significance_summary(
|
||||
# _pairwise_df,
|
||||
# metadata=_metadata
|
||||
# )
|
||||
|
||||
# mo.md(f"""
|
||||
# {mo.ui.altair_chart(_heatmap)}
|
||||
|
||||
# {mo.ui.altair_chart(_summary)}
|
||||
# """)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Weighted Popularity Scores
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, top3_voices_weighted):
|
||||
_plot = S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)")
|
||||
|
||||
mo.md(f"""
|
||||
{mo.ui.altair_chart(_plot)}
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(top3_voices_weighted):
|
||||
print(top3_voices_weighted.head())
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Voice Scale 1-10
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data):
|
||||
# Get your voice scale data (from notebook)
|
||||
voice_1_10, _ = S.get_voice_scale_1_10(data)
|
||||
return (voice_1_10,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, voice_1_10):
|
||||
S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', width=1000, domain=[1,10], title="Voice General Impression (Scale 1-10)")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Statistical Significance (Scale 1-10)
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, voice_1_10):
|
||||
# Compute pairwise significance tests
|
||||
pairwise_df, metadata = S.compute_pairwise_significance(
|
||||
voice_1_10,
|
||||
test_type="mannwhitney", # or "ttest", "chi2", "auto"
|
||||
alpha=0.05,
|
||||
correction="bonferroni" # or "holm", "none"
|
||||
)
|
||||
|
||||
# View significant pairs
|
||||
# print(pairwise_df.filter(pl.col('significant') == True))
|
||||
|
||||
# Create heatmap visualization
|
||||
_heatmap = S.plot_significance_heatmap(
|
||||
pairwise_df,
|
||||
metadata=metadata,
|
||||
title="Voice Rating Significance<br>(Pairwise Comparisons)"
|
||||
)
|
||||
|
||||
# Create summary bar chart
|
||||
_summary = S.plot_significance_summary(
|
||||
pairwise_df,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
mo.md(f"""
|
||||
{mo.ui.altair_chart(_heatmap)}
|
||||
|
||||
{mo.ui.altair_chart(_summary)}
|
||||
""")
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
|
||||
Reference in New Issue
Block a user