Start automation of running filter combinations

This commit is contained in:
2026-02-03 14:33:09 +01:00
parent 840cb4e6dc
commit 8dd41dfc96
5 changed files with 354 additions and 3 deletions

View File

@@ -5,6 +5,8 @@ __generated_with = "0.19.7"
import marimo as mo import marimo as mo
import polars as pl import polars as pl
from pathlib import Path from pathlib import Path
import argparse
import json
from validation import check_progress, duration_validation, check_straight_liners from validation import check_progress, duration_validation, check_straight_liners
from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
@@ -12,6 +14,35 @@ import utils
from speaking_styles import SPEAKING_STYLES from speaking_styles import SPEAKING_STYLES
# %%
# CLI argument parsing for batch automation
# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
# When run in Jupyter: args will use defaults (all filters = None = all options selected)
def parse_cli_args():
parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
parser.add_argument('--age', type=str, default=None, help='JSON list of age groups')
parser.add_argument('--gender', type=str, default=None, help='JSON list of genders')
parser.add_argument('--ethnicity', type=str, default=None, help='JSON list of ethnicities')
parser.add_argument('--income', type=str, default=None, help='JSON list of income groups')
parser.add_argument('--consumer', type=str, default=None, help='JSON list of consumer segments')
# Only parse if running as script (not in Jupyter/interactive)
try:
# Check if running in Jupyter by looking for ipykernel
get_ipython() # noqa: F821
return argparse.Namespace(age=None, gender=None, ethnicity=None, income=None, consumer=None)
except NameError:
args = parser.parse_args()
# Parse JSON strings to lists
args.age = json.loads(args.age) if args.age else None
args.gender = json.loads(args.gender) if args.gender else None
args.ethnicity = json.loads(args.ethnicity) if args.ethnicity else None
args.income = json.loads(args.income) if args.income else None
args.consumer = json.loads(args.consumer) if args.consumer else None
return args
cli_args = parse_cli_args()
# %% # %%
# file_browser = mo.ui.file_browser( # file_browser = mo.ui.file_browser(
@@ -68,7 +99,14 @@ BEST_CHOSEN_CHARACTER = "the_coach"
# %% # %%
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
_d = S.filter_data(data_validated, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer']) # CLI args: None means "all options selected" (use S.options_* defaults)
_filter_age = cli_args.age if cli_args.age is not None else S.options_age
_filter_gender = cli_args.gender if cli_args.gender is not None else S.options_gender
_filter_ethnicity = cli_args.ethnicity if cli_args.ethnicity is not None else S.options_ethnicity
_filter_income = cli_args.income if cli_args.income is not None else S.options_income
_filter_consumer = cli_args.consumer if cli_args.consumer is not None else S.options_consumer
_d = S.filter_data(data_all, age=_filter_age, gender=_filter_gender, income=_filter_income, ethnicity=_filter_ethnicity, consumer=_filter_consumer)
# Stop execution and prevent other cells from running if no data is selected # Stop execution and prevent other cells from running if no data is selected
# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**")) # mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))

144
README.md
View File

@@ -1,5 +1,147 @@
# Voice Branding Quantitative Analysis
## Running Marimo Notebooks
Running on Ct-105 for shared access: Running on Ct-105 for shared access:
``` ```bash
uv run marimo run 02_quant_analysis.py --headless --port 8080 uv run marimo run 02_quant_analysis.py --headless --port 8080
``` ```
---
## Batch Report Generation
The quant report can be run with different filter combinations via CLI or automated batch processing.
### Single Filter Run (CLI)
Run the report script directly with JSON-encoded filter arguments:
```bash
# Single consumer segment
uv run python 03_quant_report.script.py --consumer '["Starter"]'
# Single age group
uv run python 03_quant_report.script.py --age '["18 to 21 years"]'
# Multiple filters combined
uv run python 03_quant_report.script.py --age '["18 to 21 years", "22 to 24 years"]' --gender '["Male"]'
# All respondents (no filters = defaults to all options selected)
uv run python 03_quant_report.script.py
```
Available filter arguments:
- `--age` — JSON list of age groups
- `--gender` — JSON list of genders
- `--ethnicity` — JSON list of ethnicities
- `--income` — JSON list of income groups
- `--consumer` — JSON list of consumer segments
### Batch Runner (All Combinations)
Run all single-filter combinations automatically with progress tracking:
```bash
# Preview all combinations without running
uv run python run_filter_combinations.py --dry-run
# Run all combinations (shows progress bar)
uv run python run_filter_combinations.py
# Or use the registered CLI entry point
uv run quant-report-batch
uv run quant-report-batch --dry-run
```
This generates reports for:
- All Respondents (no filters)
- Each age group individually
- Each gender individually
- Each ethnicity individually
- Each income group individually
- Each consumer segment individually
Output figures are saved to `figures/<export_date>/<filter_slug>/`.
### Jupyter Notebook Debugging
The script auto-detects Jupyter/IPython environments. When running in VS Code's Jupyter extension, CLI args default to `None` (all options selected), so you can debug cell-by-cell normally.
---
## Adding Custom Filter Combinations
To add new filter combinations to the batch runner, edit `run_filter_combinations.py`:
### Checklist
1. **Open** `run_filter_combinations.py`
2. **Find** the `get_filter_combinations()` function
3. **Add** your combination to the list before the `return` statement:
```python
# Example: Add a specific age + consumer cross-filter
combinations.append({
'name': 'Age-18to24_Consumer-Starter', # Used for output folder naming
'filters': {
'age': ['18 to 21 years', '22 to 24 years'],
'consumer': ['Starter']
}
})
```
4. **Filter keys** must match CLI argument names:
- `age` — values from `survey.options_age`
- `gender` — values from `survey.options_gender`
- `ethnicity` — values from `survey.options_ethnicity`
- `income` — values from `survey.options_income`
- `consumer` — values from `survey.options_consumer`
5. **Check available values** by running:
```python
from utils import QualtricsSurvey
S = QualtricsSurvey('data/exports/2-2-26/...Labels.csv', 'data/exports/.../....qsf')
S.load_data()
print(S.options_age)
print(S.options_consumer)
# etc.
```
6. **Test** with dry-run first:
```bash
uv run python run_filter_combinations.py --dry-run
```
### Example: Adding Multiple Cross-Filters
```python
# In get_filter_combinations(), before return:
# Young professionals
combinations.append({
'name': 'Young_Professionals',
'filters': {
'age': ['22 to 24 years', '25 to 34 years'],
'consumer': ['Early Professional']
}
})
# High income males
combinations.append({
'name': 'High_Income_Male',
'filters': {
'income': ['$150,000 - $199,999', '$200,000 or more'],
'gender': ['Male']
}
})
```
### Notes
- **Empty filters dict** = all respondents (no filtering)
- **Omitted filter keys** = all options for that dimension selected
- **Output folder names** are auto-generated from active filters by `QualtricsSurvey.filter_data()`

View File

@@ -25,8 +25,12 @@ dependencies = [
"requests>=2.32.5", "requests>=2.32.5",
"scipy>=1.14.0", "scipy>=1.14.0",
"taguette>=1.5.1", "taguette>=1.5.1",
"tqdm>=4.66.0",
"vl-convert-python>=1.9.0.post1", "vl-convert-python>=1.9.0.post1",
"wordcloud>=1.9.5", "wordcloud>=1.9.5",
] ]
[project.scripts]
quant-report-batch = "run_filter_combinations:main"

165
run_filter_combinations.py Normal file
View File

@@ -0,0 +1,165 @@
#!/usr/bin/env python
"""
Batch runner for quant report with different filter combinations.
Runs 03_quant_report.script.py for each single-filter combination:
- Each age group (with all others active)
- Each gender (with all others active)
- Each ethnicity (with all others active)
- Each income group (with all others active)
- Each consumer segment (with all others active)
Usage:
uv run python run_filter_combinations.py
uv run python run_filter_combinations.py --dry-run # Preview combinations without running
"""
import subprocess
import sys
import json
from pathlib import Path
from tqdm import tqdm
from utils import QualtricsSurvey
# Default data paths (same as in 03_quant_report.script.py)
RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
"""
Generate all single-filter combinations.
Each combination isolates ONE filter value while keeping all others at "all selected".
Returns list of dicts with filter kwargs for each run.
"""
combinations = []
# Add "All Respondents" run (no filters = all options selected)
combinations.append({
'name': 'All_Respondents',
'filters': {} # Empty = use defaults (all selected)
})
# Age groups - one at a time
for age in survey.options_age:
combinations.append({
'name': f'Age-{age}',
'filters': {'age': [age]}
})
# Gender - one at a time
for gender in survey.options_gender:
combinations.append({
'name': f'Gender-{gender}',
'filters': {'gender': [gender]}
})
# Ethnicity - one at a time
for ethnicity in survey.options_ethnicity:
combinations.append({
'name': f'Ethnicity-{ethnicity}',
'filters': {'ethnicity': [ethnicity]}
})
# Income - one at a time
for income in survey.options_income:
combinations.append({
'name': f'Income-{income}',
'filters': {'income': [income]}
})
# Consumer segments - one at a time
for consumer in survey.options_consumer:
combinations.append({
'name': f'Consumer-{consumer}',
'filters': {'consumer': [consumer]}
})
return combinations
def run_report(filters: dict, dry_run: bool = False) -> bool:
"""
Run the report script with given filters.
Args:
filters: Dict of filter_name -> list of values
dry_run: If True, just print command without running
Returns:
True if successful, False otherwise
"""
cmd = [sys.executable, str(REPORT_SCRIPT)]
for filter_name, values in filters.items():
if values:
cmd.extend([f'--{filter_name}', json.dumps(values)])
if dry_run:
print(f" Would run: {' '.join(cmd)}")
return True
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
cwd=Path(__file__).parent
)
if result.returncode != 0:
print(f"\n ERROR: {result.stderr[:500]}")
return False
return True
except Exception as e:
print(f"\n ERROR: {e}")
return False
def main():
import argparse
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
args = parser.parse_args()
# Load survey to get available filter options
print("Loading survey to get filter options...")
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
survey.load_data() # Populates options_* attributes
# Generate all combinations
combinations = get_filter_combinations(survey)
print(f"Generated {len(combinations)} filter combinations")
if args.dry_run:
print("\nDRY RUN - Commands that would be executed:")
for combo in combinations:
print(f"\n{combo['name']}:")
run_report(combo['filters'], dry_run=True)
return
# Run each combination with progress bar
successful = 0
failed = []
for combo in tqdm(combinations, desc="Running reports", unit="filter"):
tqdm.write(f"Running: {combo['name']}")
if run_report(combo['filters']):
successful += 1
else:
failed.append(combo['name'])
# Summary
print(f"\n{'='*50}")
print(f"Completed: {successful}/{len(combinations)} successful")
if failed:
print(f"Failed: {', '.join(failed)}")
if __name__ == '__main__':
main()

2
uv.lock generated
View File

@@ -2075,6 +2075,7 @@ dependencies = [
{ name = "requests" }, { name = "requests" },
{ name = "scipy" }, { name = "scipy" },
{ name = "taguette" }, { name = "taguette" },
{ name = "tqdm" },
{ name = "vl-convert-python" }, { name = "vl-convert-python" },
{ name = "wordcloud" }, { name = "wordcloud" },
] ]
@@ -2101,6 +2102,7 @@ requires-dist = [
{ name = "requests", specifier = ">=2.32.5" }, { name = "requests", specifier = ">=2.32.5" },
{ name = "scipy", specifier = ">=1.14.0" }, { name = "scipy", specifier = ">=1.14.0" },
{ name = "taguette", specifier = ">=1.5.1" }, { name = "taguette", specifier = ">=1.5.1" },
{ name = "tqdm", specifier = ">=4.66.0" },
{ name = "vl-convert-python", specifier = ">=1.9.0.post1" }, { name = "vl-convert-python", specifier = ">=1.9.0.post1" },
{ name = "wordcloud", specifier = ">=1.9.5" }, { name = "wordcloud", specifier = ">=1.9.5" },
] ]