Start automation of running filter combinations

2026-02-03 14:33:09 +01:00
parent 840cb4e6dc
commit 8dd41dfc96
5 changed files with 354 additions and 3 deletions
--- a/03_quant_report.script.py
+++ b/03_quant_report.script.py
@@ -5,6 +5,8 @@ __generated_with = "0.19.7"
 import marimo as mo
 import polars as pl
 from pathlib import Path
 import argparse
 import json
 from validation import check_progress, duration_validation, check_straight_liners
 from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
@@ -12,6 +14,35 @@ import utils
 from speaking_styles import SPEAKING_STYLES
 # %%
 # CLI argument parsing for batch automation
 # When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
 # When run in Jupyter: args will use defaults (all filters = None = all options selected)
 def parse_cli_args():
    parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
    parser.add_argument('--age', type=str, default=None, help='JSON list of age groups')
    parser.add_argument('--gender', type=str, default=None, help='JSON list of genders')
    parser.add_argument('--ethnicity', type=str, default=None, help='JSON list of ethnicities')
    parser.add_argument('--income', type=str, default=None, help='JSON list of income groups')
    parser.add_argument('--consumer', type=str, default=None, help='JSON list of consumer segments')
    # Only parse if running as script (not in Jupyter/interactive)
    try:
        # Check if running in Jupyter by looking for ipykernel
        get_ipython()  # noqa: F821
        return argparse.Namespace(age=None, gender=None, ethnicity=None, income=None, consumer=None)
    except NameError:
        args = parser.parse_args()
        # Parse JSON strings to lists
        args.age = json.loads(args.age) if args.age else None
        args.gender = json.loads(args.gender) if args.gender else None
        args.ethnicity = json.loads(args.ethnicity) if args.ethnicity else None
        args.income = json.loads(args.income) if args.income else None
        args.consumer = json.loads(args.consumer) if args.consumer else None
        return args
 cli_args = parse_cli_args()
 # %%
 # file_browser = mo.ui.file_browser(
@@ -68,7 +99,14 @@ BEST_CHOSEN_CHARACTER = "the_coach"
 # %%
 # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
-_d = S.filter_data(data_validated, age=filter_form.value['age'], gender=filter_form.value['gender'], income=filter_form.value['income'], ethnicity=filter_form.value['ethnicity'], consumer=filter_form.value['consumer'])
+# CLI args: None means "all options selected" (use S.options_* defaults)
 _filter_age = cli_args.age if cli_args.age is not None else S.options_age
 _filter_gender = cli_args.gender if cli_args.gender is not None else S.options_gender
 _filter_ethnicity = cli_args.ethnicity if cli_args.ethnicity is not None else S.options_ethnicity
 _filter_income = cli_args.income if cli_args.income is not None else S.options_income
 _filter_consumer = cli_args.consumer if cli_args.consumer is not None else S.options_consumer
 _d = S.filter_data(data_all, age=_filter_age, gender=_filter_gender, income=_filter_income, ethnicity=_filter_ethnicity, consumer=_filter_consumer)
 # Stop execution and prevent other cells from running if no data is selected
 # mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
--- a/README.md
+++ b/README.md
@@ -1,5 +1,147 @@
 # Voice Branding Quantitative Analysis
 ## Running Marimo Notebooks
 Running on Ct-105 for shared access:
-```
+```bash
 uv run marimo run 02_quant_analysis.py --headless --port 8080
 ```
 ---
 ## Batch Report Generation
 The quant report can be run with different filter combinations via CLI or automated batch processing.
 ### Single Filter Run (CLI)
 Run the report script directly with JSON-encoded filter arguments:
 ```bash
 # Single consumer segment
 uv run python 03_quant_report.script.py --consumer '["Starter"]'
 # Single age group
 uv run python 03_quant_report.script.py --age '["18 to 21 years"]'
 # Multiple filters combined
 uv run python 03_quant_report.script.py --age '["18 to 21 years", "22 to 24 years"]' --gender '["Male"]'
 # All respondents (no filters = defaults to all options selected)
 uv run python 03_quant_report.script.py
 ```
 Available filter arguments:
 - `--age` — JSON list of age groups
 - `--gender` — JSON list of genders  
 - `--ethnicity` — JSON list of ethnicities
 - `--income` — JSON list of income groups
 - `--consumer` — JSON list of consumer segments
 ### Batch Runner (All Combinations)
 Run all single-filter combinations automatically with progress tracking:
 ```bash
 # Preview all combinations without running
 uv run python run_filter_combinations.py --dry-run
 # Run all combinations (shows progress bar)
 uv run python run_filter_combinations.py
 # Or use the registered CLI entry point
 uv run quant-report-batch
 uv run quant-report-batch --dry-run
 ```
 This generates reports for:
 - All Respondents (no filters)
 - Each age group individually
 - Each gender individually
 - Each ethnicity individually
 - Each income group individually
 - Each consumer segment individually
 Output figures are saved to `figures/<export_date>/<filter_slug>/`.
 ### Jupyter Notebook Debugging
 The script auto-detects Jupyter/IPython environments. When running in VS Code's Jupyter extension, CLI args default to `None` (all options selected), so you can debug cell-by-cell normally.
 ---
 ## Adding Custom Filter Combinations
 To add new filter combinations to the batch runner, edit `run_filter_combinations.py`:
 ### Checklist
 1. **Open** `run_filter_combinations.py`
 2. **Find** the `get_filter_combinations()` function
 3. **Add** your combination to the list before the `return` statement:
 ```python
 # Example: Add a specific age + consumer cross-filter
 combinations.append({
    'name': 'Age-18to24_Consumer-Starter',  # Used for output folder naming
    'filters': {
        'age': ['18 to 21 years', '22 to 24 years'],
        'consumer': ['Starter']
    }
 })
 ```
 4. **Filter keys** must match CLI argument names:
   - `age` — values from `survey.options_age`
   - `gender` — values from `survey.options_gender`
   - `ethnicity` — values from `survey.options_ethnicity`
   - `income` — values from `survey.options_income`
   - `consumer` — values from `survey.options_consumer`
 5. **Check available values** by running:
 ```python
 from utils import QualtricsSurvey
 S = QualtricsSurvey('data/exports/2-2-26/...Labels.csv', 'data/exports/.../....qsf')
 S.load_data()
 print(S.options_age)
 print(S.options_consumer)
 # etc.
 ```
 6. **Test** with dry-run first:
 ```bash
 uv run python run_filter_combinations.py --dry-run
 ```
 ### Example: Adding Multiple Cross-Filters
 ```python
 # In get_filter_combinations(), before return:
 # Young professionals
 combinations.append({
    'name': 'Young_Professionals',
    'filters': {
        'age': ['22 to 24 years', '25 to 34 years'],
        'consumer': ['Early Professional']
    }
 })
 # High income males
 combinations.append({
    'name': 'High_Income_Male',
    'filters': {
        'income': ['$150,000 - $199,999', '$200,000 or more'],
        'gender': ['Male']
    }
 })
 ```
 ### Notes
 - **Empty filters dict** = all respondents (no filtering)
 - **Omitted filter keys** = all options for that dimension selected
 - **Output folder names** are auto-generated from active filters by `QualtricsSurvey.filter_data()`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,8 +25,12 @@ dependencies = [
    "requests>=2.32.5",
    "scipy>=1.14.0",
    "taguette>=1.5.1",
    "tqdm>=4.66.0",
    "vl-convert-python>=1.9.0.post1",
    "wordcloud>=1.9.5",
 ]
 [project.scripts]
 quant-report-batch = "run_filter_combinations:main"
--- a/run_filter_combinations.py
+++ b/run_filter_combinations.py
@@ -0,0 +1,165 @@
 #!/usr/bin/env python
 """
 Batch runner for quant report with different filter combinations.
 Runs 03_quant_report.script.py for each single-filter combination:
 - Each age group (with all others active)
 - Each gender (with all others active)
 - Each ethnicity (with all others active)
 - Each income group (with all others active)
 - Each consumer segment (with all others active)
 Usage:
    uv run python run_filter_combinations.py
    uv run python run_filter_combinations.py --dry-run  # Preview combinations without running
 """
 import subprocess
 import sys
 import json
 from pathlib import Path
 from tqdm import tqdm
 from utils import QualtricsSurvey
 # Default data paths (same as in 03_quant_report.script.py)
 RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
 QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
 REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
 def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
    """
    Generate all single-filter combinations.
    Each combination isolates ONE filter value while keeping all others at "all selected".
    Returns list of dicts with filter kwargs for each run.
    """
    combinations = []
    # Add "All Respondents" run (no filters = all options selected)
    combinations.append({
        'name': 'All_Respondents',
        'filters': {}  # Empty = use defaults (all selected)
    })
    # Age groups - one at a time
    for age in survey.options_age:
        combinations.append({
            'name': f'Age-{age}',
            'filters': {'age': [age]}
        })
    # Gender - one at a time
    for gender in survey.options_gender:
        combinations.append({
            'name': f'Gender-{gender}',
            'filters': {'gender': [gender]}
        })
    # Ethnicity - one at a time
    for ethnicity in survey.options_ethnicity:
        combinations.append({
            'name': f'Ethnicity-{ethnicity}',
            'filters': {'ethnicity': [ethnicity]}
        })
    # Income - one at a time
    for income in survey.options_income:
        combinations.append({
            'name': f'Income-{income}',
            'filters': {'income': [income]}
        })
    # Consumer segments - one at a time
    for consumer in survey.options_consumer:
        combinations.append({
            'name': f'Consumer-{consumer}',
            'filters': {'consumer': [consumer]}
        })
    return combinations
 def run_report(filters: dict, dry_run: bool = False) -> bool:
    """
    Run the report script with given filters.
    Args:
        filters: Dict of filter_name -> list of values
        dry_run: If True, just print command without running
    Returns:
        True if successful, False otherwise
    """
    cmd = [sys.executable, str(REPORT_SCRIPT)]
    for filter_name, values in filters.items():
        if values:
            cmd.extend([f'--{filter_name}', json.dumps(values)])
    if dry_run:
        print(f"  Would run: {' '.join(cmd)}")
        return True
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            cwd=Path(__file__).parent
        )
        if result.returncode != 0:
            print(f"\n  ERROR: {result.stderr[:500]}")
            return False
        return True
    except Exception as e:
        print(f"\n  ERROR: {e}")
        return False
 def main():
    import argparse
    parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
    parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
    args = parser.parse_args()
    # Load survey to get available filter options
    print("Loading survey to get filter options...")
    survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
    survey.load_data()  # Populates options_* attributes
    # Generate all combinations
    combinations = get_filter_combinations(survey)
    print(f"Generated {len(combinations)} filter combinations")
    if args.dry_run:
        print("\nDRY RUN - Commands that would be executed:")
        for combo in combinations:
            print(f"\n{combo['name']}:")
            run_report(combo['filters'], dry_run=True)
        return
    # Run each combination with progress bar
    successful = 0
    failed = []
    for combo in tqdm(combinations, desc="Running reports", unit="filter"):
        tqdm.write(f"Running: {combo['name']}")
        if run_report(combo['filters']):
            successful += 1
        else:
            failed.append(combo['name'])
    # Summary
    print(f"\n{'='*50}")
    print(f"Completed: {successful}/{len(combinations)} successful")
    if failed:
        print(f"Failed: {', '.join(failed)}")
 if __name__ == '__main__':
    main()
--- a/uv.lock
+++ b/uv.lock
@@ -2075,6 +2075,7 @@ dependencies = [
    { name = "requests" },
    { name = "scipy" },
    { name = "taguette" },
    { name = "tqdm" },
    { name = "vl-convert-python" },
    { name = "wordcloud" },
 ]
@@ -2101,6 +2102,7 @@ requires-dist = [
    { name = "requests", specifier = ">=2.32.5" },
    { name = "scipy", specifier = ">=1.14.0" },
    { name = "taguette", specifier = ">=1.5.1" },
    { name = "tqdm", specifier = ">=4.66.0" },
    { name = "vl-convert-python", specifier = ">=1.9.0.post1" },
    { name = "wordcloud", specifier = ">=1.9.5" },
 ]