From e7166a79571184bb01b26dce315378bb70be53ae Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Tue, 3 Feb 2026 15:15:59 +0100 Subject: [PATCH] added 6 more filters --- 03_quant_report.script.py | 7 +++- README.md | 77 ++++++++++++++++++++++++++++---------- plots.py | 12 ++++++ run_filter_combinations.py | 42 +++++++++++++++++++++ utils.py | 55 +++++++++++++++++++++++---- 5 files changed, 165 insertions(+), 28 deletions(-) diff --git a/03_quant_report.script.py b/03_quant_report.script.py index 25e58e4..bef3975 100644 --- a/03_quant_report.script.py +++ b/03_quant_report.script.py @@ -27,7 +27,12 @@ FILTER_CONFIG = { 'ethnicity': 'options_ethnicity', 'income': 'options_income', 'consumer': 'options_consumer', - # Add new filters here: 'newfilter': 'options_newfilter', + 'business_owner': 'options_business_owner', + 'employment_status': 'options_employment_status', + 'personal_products': 'options_personal_products', + 'ai_user': 'options_ai_user', + 'investable_assets': 'options_investable_assets', + 'industry': 'options_industry', } def parse_cli_args(): diff --git a/README.md b/README.md index 0fbce1a..6044623 100644 --- a/README.md +++ b/README.md @@ -150,36 +150,50 @@ combinations.append({ ## Adding a New Filter Dimension -To add an entirely new filter dimension (e.g., a new demographic question), edit **only** `FILTER_CONFIG` in `03_quant_report.script.py`: +To add an entirely new filter dimension (e.g., a new demographic question), you need to update several files: ### Checklist -1. **Ensure `QualtricsSurvey`** has the corresponding `options_*` attribute and `filter_data()` accepts the parameter - -2. **Open** `03_quant_report.script.py` - -3. **Find** `FILTER_CONFIG` near the top of the file: +1. **Update `utils.py` — `load_data()`** to populate the `options_*` attribute: ```python -FILTER_CONFIG = { - 'age': 'options_age', - 'gender': 'options_gender', - 'ethnicity': 'options_ethnicity', - 'income': 'options_income', - 'consumer': 'options_consumer', - # Add new filters here: 'newfilter': 'options_newfilter', -} +# In load_data(), add after existing options: +self.options_region = sorted(df['QID99'].drop_nulls().unique().to_list()) if 'QID99' in df.columns else [] ``` -4. **Add** your new filter: +2. **Update `utils.py` — `filter_data()`** to accept and apply the filter: + +```python +# Add parameter to function signature: +def filter_data(self, q: pl.LazyFrame, ..., region:list=None) -> pl.LazyFrame: + +# Add filter logic in function body: +self.filter_region = region +if region is not None: + q = q.filter(pl.col('QID99').is_in(region)) +``` + +3. **Update `plots.py` — `_get_filter_slug()`** to include the filter in directory slugs: + +```python +# Add to the filters list: +('region', 'Reg', getattr(self, 'filter_region', None), 'options_region'), +``` + +4. **Update `plots.py` — `_get_filter_description()`** for human-readable descriptions: + +```python +# Add to the filters list: +('Region', getattr(self, 'filter_region', None), 'options_region'), +``` + +5. **Update `03_quant_report.script.py` — `FILTER_CONFIG`**: ```python FILTER_CONFIG = { 'age': 'options_age', 'gender': 'options_gender', - 'ethnicity': 'options_ethnicity', - 'income': 'options_income', - 'consumer': 'options_consumer', + # ... existing filters ... 'region': 'options_region', # ← New filter } ``` @@ -190,4 +204,29 @@ This **automatically**: - Passes it to `S.filter_data()` - Writes it to the `.txt` filter description file -5. **Update** `run_filter_combinations.py` to generate combinations for the new filter (optional) \ No newline at end of file +6. **Update `run_filter_combinations.py`** to generate combinations (optional): + +```python +# Add after existing filter loops: +for region in survey.options_region: + combinations.append({ + 'name': f'Region-{region}', + 'filters': {'region': [region]} + }) +``` + +### Currently Available Filters + +| CLI Argument | Options Attribute | QID Column | Description | +|--------------|-------------------|------------|-------------| +| `--age` | `options_age` | QID1 | Age groups | +| `--gender` | `options_gender` | QID2 | Gender | +| `--ethnicity` | `options_ethnicity` | QID3 | Ethnicity | +| `--income` | `options_income` | QID15 | Income brackets | +| `--consumer` | `options_consumer` | Consumer | Consumer segments | +| `--business_owner` | `options_business_owner` | QID4 | Business owner status | +| `--employment_status` | `options_employment_status` | QID13 | Employment status | +| `--personal_products` | `options_personal_products` | QID14 | Personal products | +| `--ai_user` | `options_ai_user` | QID22 | AI user status | +| `--investable_assets` | `options_investable_assets` | QID16 | Investable assets | +| `--industry` | `options_industry` | QID17 | Industry | \ No newline at end of file diff --git a/plots.py b/plots.py index 0bee376..76cb41f 100644 --- a/plots.py +++ b/plots.py @@ -45,6 +45,12 @@ class QualtricsPlotsMixin: ('consumer', 'Cons', getattr(self, 'filter_consumer', None), 'options_consumer'), ('ethnicity', 'Eth', getattr(self, 'filter_ethnicity', None), 'options_ethnicity'), ('income', 'Inc', getattr(self, 'filter_income', None), 'options_income'), + ('business_owner', 'BizOwn', getattr(self, 'filter_business_owner', None), 'options_business_owner'), + ('employment_status', 'Emp', getattr(self, 'filter_employment_status', None), 'options_employment_status'), + ('personal_products', 'Prod', getattr(self, 'filter_personal_products', None), 'options_personal_products'), + ('ai_user', 'AI', getattr(self, 'filter_ai_user', None), 'options_ai_user'), + ('investable_assets', 'InvAsts', getattr(self, 'filter_investable_assets', None), 'options_investable_assets'), + ('industry', 'Ind', getattr(self, 'filter_industry', None), 'options_industry'), ] for _, short_code, value, options_attr in filters: @@ -101,6 +107,12 @@ class QualtricsPlotsMixin: ('Consumer', getattr(self, 'filter_consumer', None), 'options_consumer'), ('Ethnicity', getattr(self, 'filter_ethnicity', None), 'options_ethnicity'), ('Income', getattr(self, 'filter_income', None), 'options_income'), + ('Business Owner', getattr(self, 'filter_business_owner', None), 'options_business_owner'), + ('Employment Status', getattr(self, 'filter_employment_status', None), 'options_employment_status'), + ('Personal Products', getattr(self, 'filter_personal_products', None), 'options_personal_products'), + ('AI User', getattr(self, 'filter_ai_user', None), 'options_ai_user'), + ('Investable Assets', getattr(self, 'filter_investable_assets', None), 'options_investable_assets'), + ('Industry', getattr(self, 'filter_industry', None), 'options_industry'), ] for display_name, value, options_attr in filters: diff --git a/run_filter_combinations.py b/run_filter_combinations.py index fe3ebd3..e84a062 100644 --- a/run_filter_combinations.py +++ b/run_filter_combinations.py @@ -107,6 +107,48 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: 'filters': {'consumer': options} }) + # Business Owner - one at a time + for business_owner in survey.options_business_owner: + combinations.append({ + 'name': f'BusinessOwner-{business_owner}', + 'filters': {'business_owner': [business_owner]} + }) + + # Employment Status - one at a time + for employment_status in survey.options_employment_status: + combinations.append({ + 'name': f'Employment-{employment_status}', + 'filters': {'employment_status': [employment_status]} + }) + + # Personal Products - one at a time + for personal_products in survey.options_personal_products: + combinations.append({ + 'name': f'Products-{personal_products}', + 'filters': {'personal_products': [personal_products]} + }) + + # AI User - one at a time + for ai_user in survey.options_ai_user: + combinations.append({ + 'name': f'AIUser-{ai_user}', + 'filters': {'ai_user': [ai_user]} + }) + + # Investable Assets - one at a time + for investable_assets in survey.options_investable_assets: + combinations.append({ + 'name': f'Assets-{investable_assets}', + 'filters': {'investable_assets': [investable_assets]} + }) + + # Industry - one at a time + for industry in survey.options_industry: + combinations.append({ + 'name': f'Industry-{industry}', + 'filters': {'industry': [industry]} + }) + return combinations diff --git a/utils.py b/utils.py index 963022c..3cdbf47 100644 --- a/utils.py +++ b/utils.py @@ -750,7 +750,12 @@ class QualtricsSurvey(QualtricsPlotsMixin): self.filter_consumer:list = None self.filter_ethnicity:list = None self.filter_income:list = None - + self.filter_business_owner:list = None # QID4 + self.filter_employment_status:list = None # QID13 + self.filter_personal_products:list = None # QID14 + self.filter_ai_user:list = None # QID22 + self.filter_investable_assets:list = None # QID16 + self.filter_industry:list = None # QID17 @@ -838,6 +843,12 @@ class QualtricsSurvey(QualtricsPlotsMixin): self.options_consumer = sorted(df['Consumer'].drop_nulls().unique().to_list()) if 'Consumer' in df.columns else [] self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else [] self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else [] + self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else [] + self.options_employment_status = sorted(df['QID13'].drop_nulls().unique().to_list()) if 'QID13' in df.columns else [] + self.options_personal_products = sorted(df['QID14'].drop_nulls().unique().to_list()) if 'QID14' in df.columns else [] + self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else [] + self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else [] + self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else [] return df.lazy() @@ -854,15 +865,21 @@ class QualtricsSurvey(QualtricsPlotsMixin): return q.select(QIDs).rename(rename_dict) - def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None) -> pl.LazyFrame: + def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, employment_status:list=None, personal_products:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame: """Filter data based on provided parameters Possible parameters: - - age: list of age groups to include - - gender: list - - consumer: list - - ethnicity: list - - income: list + - age: list of age groups to include (QID1) + - gender: list (QID2) + - consumer: list (Consumer) + - ethnicity: list (QID3) + - income: list (QID15) + - business_owner: list (QID4) + - employment_status: list (QID13) + - personal_products: list (QID14) + - ai_user: list (QID22) + - investable_assets: list (QID16) + - industry: list (QID17) Also saves the result to self.data_filtered. """ @@ -888,7 +905,29 @@ class QualtricsSurvey(QualtricsPlotsMixin): if income is not None: q = q.filter(pl.col('QID15').is_in(income)) - self + self.filter_business_owner = business_owner + if business_owner is not None: + q = q.filter(pl.col('QID4').is_in(business_owner)) + + self.filter_employment_status = employment_status + if employment_status is not None: + q = q.filter(pl.col('QID13').is_in(employment_status)) + + self.filter_personal_products = personal_products + if personal_products is not None: + q = q.filter(pl.col('QID14').is_in(personal_products)) + + self.filter_ai_user = ai_user + if ai_user is not None: + q = q.filter(pl.col('QID22').is_in(ai_user)) + + self.filter_investable_assets = investable_assets + if investable_assets is not None: + q = q.filter(pl.col('QID16').is_in(investable_assets)) + + self.filter_industry = industry + if industry is not None: + q = q.filter(pl.col('QID17').is_in(industry)) self.data_filtered = q return self.data_filtered