diff --git a/03_quant_report.script.py b/03_quant_report.script.py index ea0f1ca..96c7016 100644 --- a/03_quant_report.script.py +++ b/03_quant_report.script.py @@ -28,8 +28,6 @@ FILTER_CONFIG = { 'income': 'options_income', 'consumer': 'options_consumer', 'business_owner': 'options_business_owner', - 'employment_status': 'options_employment_status', - 'personal_products': 'options_personal_products', 'ai_user': 'options_ai_user', 'investable_assets': 'options_investable_assets', 'industry': 'options_industry', diff --git a/run_filter_combinations.py b/run_filter_combinations.py index e84a062..ec85cc2 100644 --- a/run_filter_combinations.py +++ b/run_filter_combinations.py @@ -87,7 +87,7 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: 'filters': {'income': [income]} }) - # Consumer segments - combine _A and _B options + # Consumer segments - combine _A and _B options, and also include standalone # Group options by base name (removing _A/_B suffix) consumer_groups = {} for consumer in survey.options_consumer: @@ -101,10 +101,19 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: # Not an _A/_B option, keep as-is consumer_groups[consumer] = [consumer] + # Add combined _A+_B options for base_name, options in consumer_groups.items(): + if len(options) > 1: # Only combine if there are multiple (_A and _B) + combinations.append({ + 'name': f'Consumer-{base_name}', + 'filters': {'consumer': options} + }) + + # Add standalone options (including individual _A and _B) + for consumer in survey.options_consumer: combinations.append({ - 'name': f'Consumer-{base_name}', - 'filters': {'consumer': options} + 'name': f'Consumer-{consumer}', + 'filters': {'consumer': [consumer]} }) # Business Owner - one at a time @@ -114,20 +123,6 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: 'filters': {'business_owner': [business_owner]} }) - # Employment Status - one at a time - for employment_status in survey.options_employment_status: - combinations.append({ - 'name': f'Employment-{employment_status}', - 'filters': {'employment_status': [employment_status]} - }) - - # Personal Products - one at a time - for personal_products in survey.options_personal_products: - combinations.append({ - 'name': f'Products-{personal_products}', - 'filters': {'personal_products': [personal_products]} - }) - # AI User - one at a time for ai_user in survey.options_ai_user: combinations.append({ diff --git a/utils.py b/utils.py index 3cdbf47..3a4d91d 100644 --- a/utils.py +++ b/utils.py @@ -751,8 +751,6 @@ class QualtricsSurvey(QualtricsPlotsMixin): self.filter_ethnicity:list = None self.filter_income:list = None self.filter_business_owner:list = None # QID4 - self.filter_employment_status:list = None # QID13 - self.filter_personal_products:list = None # QID14 self.filter_ai_user:list = None # QID22 self.filter_investable_assets:list = None # QID16 self.filter_industry:list = None # QID17 @@ -844,8 +842,6 @@ class QualtricsSurvey(QualtricsPlotsMixin): self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else [] self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else [] self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else [] - self.options_employment_status = sorted(df['QID13'].drop_nulls().unique().to_list()) if 'QID13' in df.columns else [] - self.options_personal_products = sorted(df['QID14'].drop_nulls().unique().to_list()) if 'QID14' in df.columns else [] self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else [] self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else [] self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else [] @@ -865,7 +861,7 @@ class QualtricsSurvey(QualtricsPlotsMixin): return q.select(QIDs).rename(rename_dict) - def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, employment_status:list=None, personal_products:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame: + def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame: """Filter data based on provided parameters Possible parameters: @@ -875,8 +871,6 @@ class QualtricsSurvey(QualtricsPlotsMixin): - ethnicity: list (QID3) - income: list (QID15) - business_owner: list (QID4) - - employment_status: list (QID13) - - personal_products: list (QID14) - ai_user: list (QID22) - investable_assets: list (QID16) - industry: list (QID17) @@ -884,49 +878,41 @@ class QualtricsSurvey(QualtricsPlotsMixin): Also saves the result to self.data_filtered. """ - # Apply filters + # Apply filters - skip if empty list (columns with all NULLs produce empty options) self.filter_age = age - if age is not None: + if age is not None and len(age) > 0: q = q.filter(pl.col('QID1').is_in(age)) self.filter_gender = gender - if gender is not None: + if gender is not None and len(gender) > 0: q = q.filter(pl.col('QID2').is_in(gender)) self.filter_consumer = consumer - if consumer is not None: + if consumer is not None and len(consumer) > 0: q = q.filter(pl.col('Consumer').is_in(consumer)) self.filter_ethnicity = ethnicity - if ethnicity is not None: + if ethnicity is not None and len(ethnicity) > 0: q = q.filter(pl.col('QID3').is_in(ethnicity)) self.filter_income = income - if income is not None: + if income is not None and len(income) > 0: q = q.filter(pl.col('QID15').is_in(income)) self.filter_business_owner = business_owner - if business_owner is not None: + if business_owner is not None and len(business_owner) > 0: q = q.filter(pl.col('QID4').is_in(business_owner)) - self.filter_employment_status = employment_status - if employment_status is not None: - q = q.filter(pl.col('QID13').is_in(employment_status)) - - self.filter_personal_products = personal_products - if personal_products is not None: - q = q.filter(pl.col('QID14').is_in(personal_products)) - self.filter_ai_user = ai_user - if ai_user is not None: + if ai_user is not None and len(ai_user) > 0: q = q.filter(pl.col('QID22').is_in(ai_user)) self.filter_investable_assets = investable_assets - if investable_assets is not None: + if investable_assets is not None and len(investable_assets) > 0: q = q.filter(pl.col('QID16').is_in(investable_assets)) self.filter_industry = industry - if industry is not None: + if industry is not None and len(industry) > 0: q = q.filter(pl.col('QID17').is_in(industry)) self.data_filtered = q