Compare commits

...

2 Commits

Author SHA1 Message Date
01b7d50637 fixed empty plots, updated filters 2026-02-03 16:51:24 +01:00
dca9ac11ba supposed wordcloud fix, but everything broke 2026-02-03 15:36:35 +01:00
4 changed files with 71 additions and 74 deletions

View File

@@ -28,8 +28,6 @@ FILTER_CONFIG = {
'income': 'options_income', 'income': 'options_income',
'consumer': 'options_consumer', 'consumer': 'options_consumer',
'business_owner': 'options_business_owner', 'business_owner': 'options_business_owner',
'employment_status': 'options_employment_status',
'personal_products': 'options_personal_products',
'ai_user': 'options_ai_user', 'ai_user': 'options_ai_user',
'investable_assets': 'options_investable_assets', 'investable_assets': 'options_investable_assets',
'industry': 'options_industry', 'industry': 'options_industry',
@@ -257,17 +255,17 @@ mo.md(r"""
""") """)
# %% # %%
_pairwise_df, _meta = S.compute_ranking_significance(char_rank) # _pairwise_df, _meta = S.compute_ranking_significance(char_rank)
# print(_pairwise_df.columns) # # print(_pairwise_df.columns)
mo.md(f""" # mo.md(f"""
{mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))} # {mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
{mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))} # {mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
""") # """)
# %% # %%
mo.md(r""" mo.md(r"""
@@ -463,34 +461,34 @@ mo.md(r"""
# %% # %%
# Compute pairwise significance tests # Compute pairwise significance tests
pairwise_df, metadata = S.compute_pairwise_significance( # pairwise_df, metadata = S.compute_pairwise_significance(
voice_1_10, # voice_1_10,
test_type="mannwhitney", # or "ttest", "chi2", "auto" # test_type="mannwhitney", # or "ttest", "chi2", "auto"
alpha=0.05, # alpha=0.05,
correction="bonferroni" # or "holm", "none" # correction="bonferroni" # or "holm", "none"
) # )
# View significant pairs # # View significant pairs
# print(pairwise_df.filter(pl.col('significant') == True)) # # print(pairwise_df.filter(pl.col('significant') == True))
# Create heatmap visualization # # Create heatmap visualization
_heatmap = S.plot_significance_heatmap( # _heatmap = S.plot_significance_heatmap(
pairwise_df, # pairwise_df,
metadata=metadata, # metadata=metadata,
title="Voice Rating Significance<br>(Pairwise Comparisons)" # title="Voice Rating Significance<br>(Pairwise Comparisons)"
) # )
# Create summary bar chart # # Create summary bar chart
_summary = S.plot_significance_summary( # _summary = S.plot_significance_summary(
pairwise_df, # pairwise_df,
metadata=metadata # metadata=metadata
) # )
mo.md(f""" # mo.md(f"""
{mo.ui.altair_chart(_heatmap)} # {mo.ui.altair_chart(_heatmap)}
{mo.ui.altair_chart(_summary)} # {mo.ui.altair_chart(_summary)}
""") # """)
# %% # %%

View File

@@ -1305,6 +1305,16 @@ class QualtricsPlotsMixin:
# Create frequency dictionary # Create frequency dictionary
trait_freq = Counter(traits_list) trait_freq = Counter(traits_list)
# Handle empty data gracefully - return empty figure with message
if not trait_freq:
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.text(0.5, 0.5, "No trait data available for current filter",
ha='center', va='center', fontsize=14, color='gray',
transform=ax.transAxes)
ax.axis('off')
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
return fig
# Set random seed for color selection # Set random seed for color selection
random.seed(random_state) random.seed(random_state)
@@ -1335,7 +1345,15 @@ class QualtricsPlotsMixin:
fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100) fig, ax = plt.subplots(figsize=(width/100, height/100), dpi=100)
ax.imshow(wordcloud, interpolation='bilinear') ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off') ax.axis('off')
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
# Add title with filter subtitle (similar to _add_filter_footnote for Altair charts)
filter_text = self._get_filter_description()
if filter_text:
# Title on top, filter subtitle below in light grey
fig.suptitle(title, fontsize=16, y=0.98, color=ColorPalette.TEXT)
ax.set_title(filter_text, fontsize=10, pad=10, color='lightgrey', loc='left')
else:
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
plt.tight_layout(pad=0) plt.tight_layout(pad=0)

View File

@@ -87,7 +87,7 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
'filters': {'income': [income]} 'filters': {'income': [income]}
}) })
# Consumer segments - combine _A and _B options # Consumer segments - combine _A and _B options, and also include standalone
# Group options by base name (removing _A/_B suffix) # Group options by base name (removing _A/_B suffix)
consumer_groups = {} consumer_groups = {}
for consumer in survey.options_consumer: for consumer in survey.options_consumer:
@@ -101,10 +101,19 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
# Not an _A/_B option, keep as-is # Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer] consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items(): for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
combinations.append({
'name': f'Consumer-{base_name}',
'filters': {'consumer': options}
})
# Add standalone options (including individual _A and _B)
for consumer in survey.options_consumer:
combinations.append({ combinations.append({
'name': f'Consumer-{base_name}', 'name': f'Consumer-{consumer}',
'filters': {'consumer': options} 'filters': {'consumer': [consumer]}
}) })
# Business Owner - one at a time # Business Owner - one at a time
@@ -114,20 +123,6 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
'filters': {'business_owner': [business_owner]} 'filters': {'business_owner': [business_owner]}
}) })
# Employment Status - one at a time
for employment_status in survey.options_employment_status:
combinations.append({
'name': f'Employment-{employment_status}',
'filters': {'employment_status': [employment_status]}
})
# Personal Products - one at a time
for personal_products in survey.options_personal_products:
combinations.append({
'name': f'Products-{personal_products}',
'filters': {'personal_products': [personal_products]}
})
# AI User - one at a time # AI User - one at a time
for ai_user in survey.options_ai_user: for ai_user in survey.options_ai_user:
combinations.append({ combinations.append({

View File

@@ -751,8 +751,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
self.filter_ethnicity:list = None self.filter_ethnicity:list = None
self.filter_income:list = None self.filter_income:list = None
self.filter_business_owner:list = None # QID4 self.filter_business_owner:list = None # QID4
self.filter_employment_status:list = None # QID13
self.filter_personal_products:list = None # QID14
self.filter_ai_user:list = None # QID22 self.filter_ai_user:list = None # QID22
self.filter_investable_assets:list = None # QID16 self.filter_investable_assets:list = None # QID16
self.filter_industry:list = None # QID17 self.filter_industry:list = None # QID17
@@ -844,8 +842,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else [] self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else [] self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else [] self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else []
self.options_employment_status = sorted(df['QID13'].drop_nulls().unique().to_list()) if 'QID13' in df.columns else []
self.options_personal_products = sorted(df['QID14'].drop_nulls().unique().to_list()) if 'QID14' in df.columns else []
self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else [] self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else []
self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else [] self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else []
self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else [] self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else []
@@ -865,7 +861,7 @@ class QualtricsSurvey(QualtricsPlotsMixin):
return q.select(QIDs).rename(rename_dict) return q.select(QIDs).rename(rename_dict)
def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, employment_status:list=None, personal_products:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame: def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame:
"""Filter data based on provided parameters """Filter data based on provided parameters
Possible parameters: Possible parameters:
@@ -875,8 +871,6 @@ class QualtricsSurvey(QualtricsPlotsMixin):
- ethnicity: list (QID3) - ethnicity: list (QID3)
- income: list (QID15) - income: list (QID15)
- business_owner: list (QID4) - business_owner: list (QID4)
- employment_status: list (QID13)
- personal_products: list (QID14)
- ai_user: list (QID22) - ai_user: list (QID22)
- investable_assets: list (QID16) - investable_assets: list (QID16)
- industry: list (QID17) - industry: list (QID17)
@@ -884,49 +878,41 @@ class QualtricsSurvey(QualtricsPlotsMixin):
Also saves the result to self.data_filtered. Also saves the result to self.data_filtered.
""" """
# Apply filters # Apply filters - skip if empty list (columns with all NULLs produce empty options)
self.filter_age = age self.filter_age = age
if age is not None: if age is not None and len(age) > 0:
q = q.filter(pl.col('QID1').is_in(age)) q = q.filter(pl.col('QID1').is_in(age))
self.filter_gender = gender self.filter_gender = gender
if gender is not None: if gender is not None and len(gender) > 0:
q = q.filter(pl.col('QID2').is_in(gender)) q = q.filter(pl.col('QID2').is_in(gender))
self.filter_consumer = consumer self.filter_consumer = consumer
if consumer is not None: if consumer is not None and len(consumer) > 0:
q = q.filter(pl.col('Consumer').is_in(consumer)) q = q.filter(pl.col('Consumer').is_in(consumer))
self.filter_ethnicity = ethnicity self.filter_ethnicity = ethnicity
if ethnicity is not None: if ethnicity is not None and len(ethnicity) > 0:
q = q.filter(pl.col('QID3').is_in(ethnicity)) q = q.filter(pl.col('QID3').is_in(ethnicity))
self.filter_income = income self.filter_income = income
if income is not None: if income is not None and len(income) > 0:
q = q.filter(pl.col('QID15').is_in(income)) q = q.filter(pl.col('QID15').is_in(income))
self.filter_business_owner = business_owner self.filter_business_owner = business_owner
if business_owner is not None: if business_owner is not None and len(business_owner) > 0:
q = q.filter(pl.col('QID4').is_in(business_owner)) q = q.filter(pl.col('QID4').is_in(business_owner))
self.filter_employment_status = employment_status
if employment_status is not None:
q = q.filter(pl.col('QID13').is_in(employment_status))
self.filter_personal_products = personal_products
if personal_products is not None:
q = q.filter(pl.col('QID14').is_in(personal_products))
self.filter_ai_user = ai_user self.filter_ai_user = ai_user
if ai_user is not None: if ai_user is not None and len(ai_user) > 0:
q = q.filter(pl.col('QID22').is_in(ai_user)) q = q.filter(pl.col('QID22').is_in(ai_user))
self.filter_investable_assets = investable_assets self.filter_investable_assets = investable_assets
if investable_assets is not None: if investable_assets is not None and len(investable_assets) > 0:
q = q.filter(pl.col('QID16').is_in(investable_assets)) q = q.filter(pl.col('QID16').is_in(investable_assets))
self.filter_industry = industry self.filter_industry = industry
if industry is not None: if industry is not None and len(industry) > 0:
q = q.filter(pl.col('QID17').is_in(industry)) q = q.filter(pl.col('QID17').is_in(industry))
self.data_filtered = q self.data_filtered = q