Compare commits

...

2 Commits

Author SHA1 Message Date
01b7d50637 fixed empty plots, updated filters 2026-02-03 16:51:24 +01:00
dca9ac11ba supposed wordcloud fix, but everything broke 2026-02-03 15:36:35 +01:00
4 changed files with 71 additions and 74 deletions

View File

@@ -28,8 +28,6 @@ FILTER_CONFIG = {
'income': 'options_income',
'consumer': 'options_consumer',
'business_owner': 'options_business_owner',
'employment_status': 'options_employment_status',
'personal_products': 'options_personal_products',
'ai_user': 'options_ai_user',
'investable_assets': 'options_investable_assets',
'industry': 'options_industry',
@@ -257,17 +255,17 @@ mo.md(r"""
""")
# %%
_pairwise_df, _meta = S.compute_ranking_significance(char_rank)
# _pairwise_df, _meta = S.compute_ranking_significance(char_rank)
# print(_pairwise_df.columns)
# # print(_pairwise_df.columns)
mo.md(f"""
# mo.md(f"""
{mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
# {mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
{mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
""")
# {mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
# """)
# %%
mo.md(r"""
@@ -463,34 +461,34 @@ mo.md(r"""
# %%
# Compute pairwise significance tests
pairwise_df, metadata = S.compute_pairwise_significance(
voice_1_10,
test_type="mannwhitney", # or "ttest", "chi2", "auto"
alpha=0.05,
correction="bonferroni" # or "holm", "none"
)
# pairwise_df, metadata = S.compute_pairwise_significance(
# voice_1_10,
# test_type="mannwhitney", # or "ttest", "chi2", "auto"
# alpha=0.05,
# correction="bonferroni" # or "holm", "none"
# )
# View significant pairs
# print(pairwise_df.filter(pl.col('significant') == True))
# # View significant pairs
# # print(pairwise_df.filter(pl.col('significant') == True))
# Create heatmap visualization
_heatmap = S.plot_significance_heatmap(
pairwise_df,
metadata=metadata,
title="Voice Rating Significance<br>(Pairwise Comparisons)"
)
# # Create heatmap visualization
# _heatmap = S.plot_significance_heatmap(
# pairwise_df,
# metadata=metadata,
# title="Voice Rating Significance<br>(Pairwise Comparisons)"
# )
# Create summary bar chart
_summary = S.plot_significance_summary(
pairwise_df,
metadata=metadata
)
# # Create summary bar chart
# _summary = S.plot_significance_summary(
# pairwise_df,
# metadata=metadata
# )
mo.md(f"""
{mo.ui.altair_chart(_heatmap)}
# mo.md(f"""
# {mo.ui.altair_chart(_heatmap)}
{mo.ui.altair_chart(_summary)}
""")
# {mo.ui.altair_chart(_summary)}
# """)
# %%

View File

@@ -1305,6 +1305,16 @@ class QualtricsPlotsMixin:
# Create frequency dictionary
trait_freq = Counter(traits_list)
# Handle empty data gracefully - return empty figure with message
if not trait_freq:
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.text(0.5, 0.5, "No trait data available for current filter",
ha='center', va='center', fontsize=14, color='gray',
transform=ax.transAxes)
ax.axis('off')
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
return fig
# Set random seed for color selection
random.seed(random_state)
@@ -1335,6 +1345,14 @@ class QualtricsPlotsMixin:
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
# Add title with filter subtitle (similar to _add_filter_footnote for Altair charts)
filter_text = self._get_filter_description()
if filter_text:
# Title on top, filter subtitle below in light grey
fig.suptitle(title, fontsize=16, y=0.98, color=ColorPalette.TEXT)
ax.set_title(filter_text, fontsize=10, pad=10, color='lightgrey', loc='left')
else:
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
plt.tight_layout(pad=0)

View File

@@ -87,7 +87,7 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
'filters': {'income': [income]}
})
# Consumer segments - combine _A and _B options
# Consumer segments - combine _A and _B options, and also include standalone
# Group options by base name (removing _A/_B suffix)
consumer_groups = {}
for consumer in survey.options_consumer:
@@ -101,12 +101,21 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
# Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
combinations.append({
'name': f'Consumer-{base_name}',
'filters': {'consumer': options}
})
# Add standalone options (including individual _A and _B)
for consumer in survey.options_consumer:
combinations.append({
'name': f'Consumer-{consumer}',
'filters': {'consumer': [consumer]}
})
# Business Owner - one at a time
for business_owner in survey.options_business_owner:
combinations.append({
@@ -114,20 +123,6 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
'filters': {'business_owner': [business_owner]}
})
# Employment Status - one at a time
for employment_status in survey.options_employment_status:
combinations.append({
'name': f'Employment-{employment_status}',
'filters': {'employment_status': [employment_status]}
})
# Personal Products - one at a time
for personal_products in survey.options_personal_products:
combinations.append({
'name': f'Products-{personal_products}',
'filters': {'personal_products': [personal_products]}
})
# AI User - one at a time
for ai_user in survey.options_ai_user:
combinations.append({

View File

@@ -751,8 +751,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
self.filter_ethnicity:list = None
self.filter_income:list = None
self.filter_business_owner:list = None # QID4
self.filter_employment_status:list = None # QID13
self.filter_personal_products:list = None # QID14
self.filter_ai_user:list = None # QID22
self.filter_investable_assets:list = None # QID16
self.filter_industry:list = None # QID17
@@ -844,8 +842,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else []
self.options_employment_status = sorted(df['QID13'].drop_nulls().unique().to_list()) if 'QID13' in df.columns else []
self.options_personal_products = sorted(df['QID14'].drop_nulls().unique().to_list()) if 'QID14' in df.columns else []
self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else []
self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else []
self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else []
@@ -865,7 +861,7 @@ class QualtricsSurvey(QualtricsPlotsMixin):
return q.select(QIDs).rename(rename_dict)
def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, employment_status:list=None, personal_products:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame:
def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame:
"""Filter data based on provided parameters
Possible parameters:
@@ -875,8 +871,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
- ethnicity: list (QID3)
- income: list (QID15)
- business_owner: list (QID4)
- employment_status: list (QID13)
- personal_products: list (QID14)
- ai_user: list (QID22)
- investable_assets: list (QID16)
- industry: list (QID17)
@@ -884,49 +878,41 @@ class QualtricsSurvey(QualtricsPlotsMixin):
Also saves the result to self.data_filtered.
"""
# Apply filters
# Apply filters - skip if empty list (columns with all NULLs produce empty options)
self.filter_age = age
if age is not None:
if age is not None and len(age) > 0:
q = q.filter(pl.col('QID1').is_in(age))
self.filter_gender = gender
if gender is not None:
if gender is not None and len(gender) > 0:
q = q.filter(pl.col('QID2').is_in(gender))
self.filter_consumer = consumer
if consumer is not None:
if consumer is not None and len(consumer) > 0:
q = q.filter(pl.col('Consumer').is_in(consumer))
self.filter_ethnicity = ethnicity
if ethnicity is not None:
if ethnicity is not None and len(ethnicity) > 0:
q = q.filter(pl.col('QID3').is_in(ethnicity))
self.filter_income = income
if income is not None:
if income is not None and len(income) > 0:
q = q.filter(pl.col('QID15').is_in(income))
self.filter_business_owner = business_owner
if business_owner is not None:
if business_owner is not None and len(business_owner) > 0:
q = q.filter(pl.col('QID4').is_in(business_owner))
self.filter_employment_status = employment_status
if employment_status is not None:
q = q.filter(pl.col('QID13').is_in(employment_status))
self.filter_personal_products = personal_products
if personal_products is not None:
q = q.filter(pl.col('QID14').is_in(personal_products))
self.filter_ai_user = ai_user
if ai_user is not None:
if ai_user is not None and len(ai_user) > 0:
q = q.filter(pl.col('QID22').is_in(ai_user))
self.filter_investable_assets = investable_assets
if investable_assets is not None:
if investable_assets is not None and len(investable_assets) > 0:
q = q.filter(pl.col('QID16').is_in(investable_assets))
self.filter_industry = industry
if industry is not None:
if industry is not None and len(industry) > 0:
q = q.filter(pl.col('QID17').is_in(industry))
self.data_filtered = q