From 8dd41dfc9630b57d637d33d432b5e029a25920eb Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Tue, 3 Feb 2026 14:33:09 +0100 Subject: [PATCH] Start automation of running filter combinations --- 03_quant_report.script.py | 40 ++++++++- README.md | 146 +++++++++++++++++++++++++++++++- pyproject.toml | 4 + run_filter_combinations.py | 165 +++++++++++++++++++++++++++++++++++++ uv.lock | 2 + 5 files changed, 354 insertions(+), 3 deletions(-) create mode 100644 run_filter_combinations.py diff --git a/03_quant_report.script.py b/03_quant_report.script.py index 10d4a99..3f32c85 100644 --- a/03_quant_report.script.py +++ b/03_quant_report.script.py @@ -5,6 +5,8 @@ __generated_with = "0.19.7" import marimo as mo import polars as pl from pathlib import Path +import argparse +import json from validation import check_progress, duration_validation, check_straight_liners from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores @@ -12,6 +14,35 @@ import utils from speaking_styles import SPEAKING_STYLES +# %% +# CLI argument parsing for batch automation +# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]' +# When run in Jupyter: args will use defaults (all filters = None = all options selected) +def parse_cli_args(): + parser = argparse.ArgumentParser(description='Generate quant report with optional filters') + parser.add_argument('--age', type=str, default=None, help='JSON list of age groups') + parser.add_argument('--gender', type=str, default=None, help='JSON list of genders') + parser.add_argument('--ethnicity', type=str, default=None, help='JSON list of ethnicities') + parser.add_argument('--income', type=str, default=None, help='JSON list of income groups') + parser.add_argument('--consumer', type=str, default=None, help='JSON list of consumer segments') + + # Only parse if running as script (not in Jupyter/interactive) + try: + # Check if running in Jupyter by looking for ipykernel + get_ipython() # noqa: F821 + return argparse.Namespace(age=None, gender=None, ethnicity=None, income=None, consumer=None) + except NameError: + args = parser.parse_args() + # Parse JSON strings to lists + args.age = json.loads(args.age) if args.age else None + args.gender = json.loads(args.gender) if args.gender else None + args.ethnicity = json.loads(args.ethnicity) if args.ethnicity else None + args.income = json.loads(args.income) if args.income else None + args.consumer = json.loads(args.consumer) if args.consumer else None + return args + +cli_args = parse_cli_args() + # %% # file_browser = mo.ui.file_browser( @@ -68,7 +99,14 @@ BEST_CHOSEN_CHARACTER = "the_coach" # %% # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) -_d = S.filter_data(data_validated, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer']) +# CLI args: None means "all options selected" (use S.options_* defaults) +_filter_age = cli_args.age if cli_args.age is not None else S.options_age +_filter_gender = cli_args.gender if cli_args.gender is not None else S.options_gender +_filter_ethnicity = cli_args.ethnicity if cli_args.ethnicity is not None else S.options_ethnicity +_filter_income = cli_args.income if cli_args.income is not None else S.options_income +_filter_consumer = cli_args.consumer if cli_args.consumer is not None else S.options_consumer + +_d = S.filter_data(data_all, age=_filter_age, gender=_filter_gender, income=_filter_income, ethnicity=_filter_ethnicity, consumer=_filter_consumer) # Stop execution and prevent other cells from running if no data is selected # mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**")) diff --git a/README.md b/README.md index 3eb074a..402d4f4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,147 @@ +# Voice Branding Quantitative Analysis + +## Running Marimo Notebooks + Running on Ct-105 for shared access: -``` +```bash uv run marimo run 02_quant_analysis.py --headless --port 8080 -``` \ No newline at end of file +``` + +--- + +## Batch Report Generation + +The quant report can be run with different filter combinations via CLI or automated batch processing. + +### Single Filter Run (CLI) + +Run the report script directly with JSON-encoded filter arguments: + +```bash +# Single consumer segment +uv run python 03_quant_report.script.py --consumer '["Starter"]' + +# Single age group +uv run python 03_quant_report.script.py --age '["18 to 21 years"]' + +# Multiple filters combined +uv run python 03_quant_report.script.py --age '["18 to 21 years", "22 to 24 years"]' --gender '["Male"]' + +# All respondents (no filters = defaults to all options selected) +uv run python 03_quant_report.script.py +``` + +Available filter arguments: +- `--age` — JSON list of age groups +- `--gender` — JSON list of genders +- `--ethnicity` — JSON list of ethnicities +- `--income` — JSON list of income groups +- `--consumer` — JSON list of consumer segments + +### Batch Runner (All Combinations) + +Run all single-filter combinations automatically with progress tracking: + +```bash +# Preview all combinations without running +uv run python run_filter_combinations.py --dry-run + +# Run all combinations (shows progress bar) +uv run python run_filter_combinations.py + +# Or use the registered CLI entry point +uv run quant-report-batch +uv run quant-report-batch --dry-run +``` + +This generates reports for: +- All Respondents (no filters) +- Each age group individually +- Each gender individually +- Each ethnicity individually +- Each income group individually +- Each consumer segment individually + +Output figures are saved to `figures///`. + +### Jupyter Notebook Debugging + +The script auto-detects Jupyter/IPython environments. When running in VS Code's Jupyter extension, CLI args default to `None` (all options selected), so you can debug cell-by-cell normally. + +--- + +## Adding Custom Filter Combinations + +To add new filter combinations to the batch runner, edit `run_filter_combinations.py`: + +### Checklist + +1. **Open** `run_filter_combinations.py` + +2. **Find** the `get_filter_combinations()` function + +3. **Add** your combination to the list before the `return` statement: + +```python +# Example: Add a specific age + consumer cross-filter +combinations.append({ + 'name': 'Age-18to24_Consumer-Starter', # Used for output folder naming + 'filters': { + 'age': ['18 to 21 years', '22 to 24 years'], + 'consumer': ['Starter'] + } +}) +``` + +4. **Filter keys** must match CLI argument names: + - `age` — values from `survey.options_age` + - `gender` — values from `survey.options_gender` + - `ethnicity` — values from `survey.options_ethnicity` + - `income` — values from `survey.options_income` + - `consumer` — values from `survey.options_consumer` + +5. **Check available values** by running: +```python +from utils import QualtricsSurvey +S = QualtricsSurvey('data/exports/2-2-26/...Labels.csv', 'data/exports/.../....qsf') +S.load_data() +print(S.options_age) +print(S.options_consumer) +# etc. +``` + +6. **Test** with dry-run first: +```bash +uv run python run_filter_combinations.py --dry-run +``` + +### Example: Adding Multiple Cross-Filters + +```python +# In get_filter_combinations(), before return: + +# Young professionals +combinations.append({ + 'name': 'Young_Professionals', + 'filters': { + 'age': ['22 to 24 years', '25 to 34 years'], + 'consumer': ['Early Professional'] + } +}) + +# High income males +combinations.append({ + 'name': 'High_Income_Male', + 'filters': { + 'income': ['$150,000 - $199,999', '$200,000 or more'], + 'gender': ['Male'] + } +}) +``` + +### Notes + +- **Empty filters dict** = all respondents (no filtering) +- **Omitted filter keys** = all options for that dimension selected +- **Output folder names** are auto-generated from active filters by `QualtricsSurvey.filter_data()` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 685345d..c75d680 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,12 @@ dependencies = [ "requests>=2.32.5", "scipy>=1.14.0", "taguette>=1.5.1", + "tqdm>=4.66.0", "vl-convert-python>=1.9.0.post1", "wordcloud>=1.9.5", ] +[project.scripts] +quant-report-batch = "run_filter_combinations:main" + diff --git a/run_filter_combinations.py b/run_filter_combinations.py new file mode 100644 index 0000000..a50dd1d --- /dev/null +++ b/run_filter_combinations.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +""" +Batch runner for quant report with different filter combinations. + +Runs 03_quant_report.script.py for each single-filter combination: +- Each age group (with all others active) +- Each gender (with all others active) +- Each ethnicity (with all others active) +- Each income group (with all others active) +- Each consumer segment (with all others active) + +Usage: + uv run python run_filter_combinations.py + uv run python run_filter_combinations.py --dry-run # Preview combinations without running +""" + +import subprocess +import sys +import json +from pathlib import Path + +from tqdm import tqdm + +from utils import QualtricsSurvey + + +# Default data paths (same as in 03_quant_report.script.py) +RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv' +QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' + +REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py' + + +def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: + """ + Generate all single-filter combinations. + + Each combination isolates ONE filter value while keeping all others at "all selected". + Returns list of dicts with filter kwargs for each run. + """ + combinations = [] + + # Add "All Respondents" run (no filters = all options selected) + combinations.append({ + 'name': 'All_Respondents', + 'filters': {} # Empty = use defaults (all selected) + }) + + # Age groups - one at a time + for age in survey.options_age: + combinations.append({ + 'name': f'Age-{age}', + 'filters': {'age': [age]} + }) + + # Gender - one at a time + for gender in survey.options_gender: + combinations.append({ + 'name': f'Gender-{gender}', + 'filters': {'gender': [gender]} + }) + + # Ethnicity - one at a time + for ethnicity in survey.options_ethnicity: + combinations.append({ + 'name': f'Ethnicity-{ethnicity}', + 'filters': {'ethnicity': [ethnicity]} + }) + + # Income - one at a time + for income in survey.options_income: + combinations.append({ + 'name': f'Income-{income}', + 'filters': {'income': [income]} + }) + + # Consumer segments - one at a time + for consumer in survey.options_consumer: + combinations.append({ + 'name': f'Consumer-{consumer}', + 'filters': {'consumer': [consumer]} + }) + + return combinations + + +def run_report(filters: dict, dry_run: bool = False) -> bool: + """ + Run the report script with given filters. + + Args: + filters: Dict of filter_name -> list of values + dry_run: If True, just print command without running + + Returns: + True if successful, False otherwise + """ + cmd = [sys.executable, str(REPORT_SCRIPT)] + + for filter_name, values in filters.items(): + if values: + cmd.extend([f'--{filter_name}', json.dumps(values)]) + + if dry_run: + print(f" Would run: {' '.join(cmd)}") + return True + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=Path(__file__).parent + ) + if result.returncode != 0: + print(f"\n ERROR: {result.stderr[:500]}") + return False + return True + except Exception as e: + print(f"\n ERROR: {e}") + return False + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Run quant report for all filter combinations') + parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running') + args = parser.parse_args() + + # Load survey to get available filter options + print("Loading survey to get filter options...") + survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE) + survey.load_data() # Populates options_* attributes + + # Generate all combinations + combinations = get_filter_combinations(survey) + print(f"Generated {len(combinations)} filter combinations") + + if args.dry_run: + print("\nDRY RUN - Commands that would be executed:") + for combo in combinations: + print(f"\n{combo['name']}:") + run_report(combo['filters'], dry_run=True) + return + + # Run each combination with progress bar + successful = 0 + failed = [] + + for combo in tqdm(combinations, desc="Running reports", unit="filter"): + tqdm.write(f"Running: {combo['name']}") + if run_report(combo['filters']): + successful += 1 + else: + failed.append(combo['name']) + + # Summary + print(f"\n{'='*50}") + print(f"Completed: {successful}/{len(combinations)} successful") + if failed: + print(f"Failed: {', '.join(failed)}") + + +if __name__ == '__main__': + main() diff --git a/uv.lock b/uv.lock index 5bad9a4..7797a31 100644 --- a/uv.lock +++ b/uv.lock @@ -2075,6 +2075,7 @@ dependencies = [ { name = "requests" }, { name = "scipy" }, { name = "taguette" }, + { name = "tqdm" }, { name = "vl-convert-python" }, { name = "wordcloud" }, ] @@ -2101,6 +2102,7 @@ requires-dist = [ { name = "requests", specifier = ">=2.32.5" }, { name = "scipy", specifier = ">=1.14.0" }, { name = "taguette", specifier = ">=1.5.1" }, + { name = "tqdm", specifier = ">=4.66.0" }, { name = "vl-convert-python", specifier = ">=1.9.0.post1" }, { name = "wordcloud", specifier = ">=1.9.5" }, ]