From e7166a79571184bb01b26dce315378bb70be53ae Mon Sep 17 00:00:00 2001
From: Luigi Maiorano <luigi.maiorano@qumo.io>
Date: Tue, 3 Feb 2026 15:15:59 +0100
Subject: [PATCH] added 6 more filters

---
 03_quant_report.script.py  |  7 +++-
 README.md                  | 77 ++++++++++++++++++++++++++++----------
 plots.py                   | 12 ++++++
 run_filter_combinations.py | 42 +++++++++++++++++++++
 utils.py                   | 55 +++++++++++++++++++++++----
 5 files changed, 165 insertions(+), 28 deletions(-)

diff --git a/03_quant_report.script.py b/03_quant_report.script.py
index 25e58e4..bef3975 100644
--- a/03_quant_report.script.py
+++ b/03_quant_report.script.py
@@ -27,7 +27,12 @@ FILTER_CONFIG = {
     'ethnicity': 'options_ethnicity',
     'income': 'options_income',
     'consumer': 'options_consumer',
-    # Add new filters here: 'newfilter': 'options_newfilter',
+    'business_owner': 'options_business_owner',
+    'employment_status': 'options_employment_status',
+    'personal_products': 'options_personal_products',
+    'ai_user': 'options_ai_user',
+    'investable_assets': 'options_investable_assets',
+    'industry': 'options_industry',
 }
 
 def parse_cli_args():
diff --git a/README.md b/README.md
index 0fbce1a..6044623 100644
--- a/README.md
+++ b/README.md
@@ -150,36 +150,50 @@ combinations.append({
 
 ## Adding a New Filter Dimension
 
-To add an entirely new filter dimension (e.g., a new demographic question), edit **only** `FILTER_CONFIG` in `03_quant_report.script.py`:
+To add an entirely new filter dimension (e.g., a new demographic question), you need to update several files:
 
 ### Checklist
 
-1. **Ensure `QualtricsSurvey`** has the corresponding `options_*` attribute and `filter_data()` accepts the parameter
-
-2. **Open** `03_quant_report.script.py`
-
-3. **Find** `FILTER_CONFIG` near the top of the file:
+1. **Update `utils.py` — `load_data()`** to populate the `options_*` attribute:
 
 ```python
-FILTER_CONFIG = {
-    'age': 'options_age',
-    'gender': 'options_gender',
-    'ethnicity': 'options_ethnicity',
-    'income': 'options_income',
-    'consumer': 'options_consumer',
-    # Add new filters here: 'newfilter': 'options_newfilter',
-}
+# In load_data(), add after existing options:
+self.options_region = sorted(df['QID99'].drop_nulls().unique().to_list()) if 'QID99' in df.columns else []
 ```
 
-4. **Add** your new filter:
+2. **Update `utils.py` — `filter_data()`** to accept and apply the filter:
+
+```python
+# Add parameter to function signature:
+def filter_data(self, q: pl.LazyFrame, ..., region:list=None) -> pl.LazyFrame:
+
+# Add filter logic in function body:
+self.filter_region = region
+if region is not None:
+    q = q.filter(pl.col('QID99').is_in(region))
+```
+
+3. **Update `plots.py` — `_get_filter_slug()`** to include the filter in directory slugs:
+
+```python
+# Add to the filters list:
+('region', 'Reg', getattr(self, 'filter_region', None), 'options_region'),
+```
+
+4. **Update `plots.py` — `_get_filter_description()`** for human-readable descriptions:
+
+```python
+# Add to the filters list:
+('Region', getattr(self, 'filter_region', None), 'options_region'),
+```
+
+5. **Update `03_quant_report.script.py` — `FILTER_CONFIG`**:
 
 ```python
 FILTER_CONFIG = {
     'age': 'options_age',
     'gender': 'options_gender',
-    'ethnicity': 'options_ethnicity',
-    'income': 'options_income',
-    'consumer': 'options_consumer',
+    # ... existing filters ...
     'region': 'options_region',  # ← New filter
 }
 ```
@@ -190,4 +204,29 @@ This **automatically**:
 - Passes it to `S.filter_data()`
 - Writes it to the `.txt` filter description file
 
-5. **Update** `run_filter_combinations.py` to generate combinations for the new filter (optional)
\ No newline at end of file
+6. **Update `run_filter_combinations.py`** to generate combinations (optional):
+
+```python
+# Add after existing filter loops:
+for region in survey.options_region:
+    combinations.append({
+        'name': f'Region-{region}',
+        'filters': {'region': [region]}
+    })
+```
+
+### Currently Available Filters
+
+| CLI Argument | Options Attribute | QID Column | Description |
+|--------------|-------------------|------------|-------------|
+| `--age` | `options_age` | QID1 | Age groups |
+| `--gender` | `options_gender` | QID2 | Gender |
+| `--ethnicity` | `options_ethnicity` | QID3 | Ethnicity |
+| `--income` | `options_income` | QID15 | Income brackets |
+| `--consumer` | `options_consumer` | Consumer | Consumer segments |
+| `--business_owner` | `options_business_owner` | QID4 | Business owner status |
+| `--employment_status` | `options_employment_status` | QID13 | Employment status |
+| `--personal_products` | `options_personal_products` | QID14 | Personal products |
+| `--ai_user` | `options_ai_user` | QID22 | AI user status |
+| `--investable_assets` | `options_investable_assets` | QID16 | Investable assets |
+| `--industry` | `options_industry` | QID17 | Industry |
\ No newline at end of file
diff --git a/plots.py b/plots.py
index 0bee376..76cb41f 100644
--- a/plots.py
+++ b/plots.py
@@ -45,6 +45,12 @@ class QualtricsPlotsMixin:
             ('consumer', 'Cons', getattr(self, 'filter_consumer', None), 'options_consumer'),
             ('ethnicity', 'Eth', getattr(self, 'filter_ethnicity', None), 'options_ethnicity'),
             ('income', 'Inc', getattr(self, 'filter_income', None), 'options_income'),
+            ('business_owner', 'BizOwn', getattr(self, 'filter_business_owner', None), 'options_business_owner'),
+            ('employment_status', 'Emp', getattr(self, 'filter_employment_status', None), 'options_employment_status'),
+            ('personal_products', 'Prod', getattr(self, 'filter_personal_products', None), 'options_personal_products'),
+            ('ai_user', 'AI', getattr(self, 'filter_ai_user', None), 'options_ai_user'),
+            ('investable_assets', 'InvAsts', getattr(self, 'filter_investable_assets', None), 'options_investable_assets'),
+            ('industry', 'Ind', getattr(self, 'filter_industry', None), 'options_industry'),
         ]
         
         for _, short_code, value, options_attr in filters:
@@ -101,6 +107,12 @@ class QualtricsPlotsMixin:
             ('Consumer', getattr(self, 'filter_consumer', None), 'options_consumer'),
             ('Ethnicity', getattr(self, 'filter_ethnicity', None), 'options_ethnicity'),
             ('Income', getattr(self, 'filter_income', None), 'options_income'),
+            ('Business Owner', getattr(self, 'filter_business_owner', None), 'options_business_owner'),
+            ('Employment Status', getattr(self, 'filter_employment_status', None), 'options_employment_status'),
+            ('Personal Products', getattr(self, 'filter_personal_products', None), 'options_personal_products'),
+            ('AI User', getattr(self, 'filter_ai_user', None), 'options_ai_user'),
+            ('Investable Assets', getattr(self, 'filter_investable_assets', None), 'options_investable_assets'),
+            ('Industry', getattr(self, 'filter_industry', None), 'options_industry'),
         ]
         
         for display_name, value, options_attr in filters:
diff --git a/run_filter_combinations.py b/run_filter_combinations.py
index fe3ebd3..e84a062 100644
--- a/run_filter_combinations.py
+++ b/run_filter_combinations.py
@@ -107,6 +107,48 @@ def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
             'filters': {'consumer': options}
         })
     
+    # Business Owner - one at a time
+    for business_owner in survey.options_business_owner:
+        combinations.append({
+            'name': f'BusinessOwner-{business_owner}',
+            'filters': {'business_owner': [business_owner]}
+        })
+    
+    # Employment Status - one at a time
+    for employment_status in survey.options_employment_status:
+        combinations.append({
+            'name': f'Employment-{employment_status}',
+            'filters': {'employment_status': [employment_status]}
+        })
+    
+    # Personal Products - one at a time
+    for personal_products in survey.options_personal_products:
+        combinations.append({
+            'name': f'Products-{personal_products}',
+            'filters': {'personal_products': [personal_products]}
+        })
+    
+    # AI User - one at a time
+    for ai_user in survey.options_ai_user:
+        combinations.append({
+            'name': f'AIUser-{ai_user}',
+            'filters': {'ai_user': [ai_user]}
+        })
+    
+    # Investable Assets - one at a time
+    for investable_assets in survey.options_investable_assets:
+        combinations.append({
+            'name': f'Assets-{investable_assets}',
+            'filters': {'investable_assets': [investable_assets]}
+        })
+    
+    # Industry - one at a time
+    for industry in survey.options_industry:
+        combinations.append({
+            'name': f'Industry-{industry}',
+            'filters': {'industry': [industry]}
+        })
+    
     return combinations
 
 
diff --git a/utils.py b/utils.py
index 963022c..3cdbf47 100644
--- a/utils.py
+++ b/utils.py
@@ -750,7 +750,12 @@ class QualtricsSurvey(QualtricsPlotsMixin):
         self.filter_consumer:list = None
         self.filter_ethnicity:list = None
         self.filter_income:list = None
-
+        self.filter_business_owner:list = None      # QID4
+        self.filter_employment_status:list = None   # QID13
+        self.filter_personal_products:list = None   # QID14
+        self.filter_ai_user:list = None             # QID22
+        self.filter_investable_assets:list = None   # QID16
+        self.filter_industry:list = None            # QID17
         
     
     
@@ -838,6 +843,12 @@ class QualtricsSurvey(QualtricsPlotsMixin):
         self.options_consumer = sorted(df['Consumer'].drop_nulls().unique().to_list()) if 'Consumer' in df.columns else []
         self.options_ethnicity = sorted(df['QID3'].drop_nulls().unique().to_list()) if 'QID3' in df.columns else []
         self.options_income = sorted(df['QID15'].drop_nulls().unique().to_list()) if 'QID15' in df.columns else []
+        self.options_business_owner = sorted(df['QID4'].drop_nulls().unique().to_list()) if 'QID4' in df.columns else []
+        self.options_employment_status = sorted(df['QID13'].drop_nulls().unique().to_list()) if 'QID13' in df.columns else []
+        self.options_personal_products = sorted(df['QID14'].drop_nulls().unique().to_list()) if 'QID14' in df.columns else []
+        self.options_ai_user = sorted(df['QID22'].drop_nulls().unique().to_list()) if 'QID22' in df.columns else []
+        self.options_investable_assets = sorted(df['QID16'].drop_nulls().unique().to_list()) if 'QID16' in df.columns else []
+        self.options_industry = sorted(df['QID17'].drop_nulls().unique().to_list()) if 'QID17' in df.columns else []
         
         return df.lazy()
     
@@ -854,15 +865,21 @@ class QualtricsSurvey(QualtricsPlotsMixin):
         
         return q.select(QIDs).rename(rename_dict)
 
-    def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None) -> pl.LazyFrame:
+    def filter_data(self, q: pl.LazyFrame, age:list=None, gender:list=None, consumer:list=None, ethnicity:list=None, income:list=None, business_owner:list=None, employment_status:list=None, personal_products:list=None, ai_user:list=None, investable_assets:list=None, industry:list=None) -> pl.LazyFrame:
         """Filter data based on provided parameters
         
         Possible parameters:
-        - age: list of age groups to include
-        - gender: list
-        - consumer: list
-        - ethnicity: list
-        - income: list
+        - age: list of age groups to include (QID1)
+        - gender: list (QID2)
+        - consumer: list (Consumer)
+        - ethnicity: list (QID3)
+        - income: list (QID15)
+        - business_owner: list (QID4)
+        - employment_status: list (QID13)
+        - personal_products: list (QID14)
+        - ai_user: list (QID22)
+        - investable_assets: list (QID16)
+        - industry: list (QID17)
         
         Also saves the result to self.data_filtered.
         """
@@ -888,7 +905,29 @@ class QualtricsSurvey(QualtricsPlotsMixin):
         if income is not None:
             q = q.filter(pl.col('QID15').is_in(income))
         
-        self
+        self.filter_business_owner = business_owner
+        if business_owner is not None:
+            q = q.filter(pl.col('QID4').is_in(business_owner))
+        
+        self.filter_employment_status = employment_status
+        if employment_status is not None:
+            q = q.filter(pl.col('QID13').is_in(employment_status))
+        
+        self.filter_personal_products = personal_products
+        if personal_products is not None:
+            q = q.filter(pl.col('QID14').is_in(personal_products))
+        
+        self.filter_ai_user = ai_user
+        if ai_user is not None:
+            q = q.filter(pl.col('QID22').is_in(ai_user))
+        
+        self.filter_investable_assets = investable_assets
+        if investable_assets is not None:
+            q = q.filter(pl.col('QID16').is_in(investable_assets))
+        
+        self.filter_industry = industry
+        if industry is not None:
+            q = q.filter(pl.col('QID17').is_in(industry))
         
         self.data_filtered = q
         return self.data_filtered