Start automation of running filter combinations

exported marimo to script form
2026-02-03 14:33:09 +01:00 · 2026-02-03 13:48:05 +01:00
6 changed files with 1781 additions and 3 deletions
--- a/03_quant_report.script.py
+++ b/03_quant_report.script.py
@@ -0,0 +1,632 @@
+
+__generated_with = "0.19.7"
+
+# %%
+import marimo as mo
+import polars as pl
+from pathlib import Path
+import argparse
+import json
+
+from validation import check_progress, duration_validation, check_straight_liners
+from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
+import utils
+
+from speaking_styles import SPEAKING_STYLES
+
+# %%
+# CLI argument parsing for batch automation
+# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
+# When run in Jupyter: args will use defaults (all filters = None = all options selected)
+def parse_cli_args():
+    parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
+    parser.add_argument('--age', type=str, default=None, help='JSON list of age groups')
+    parser.add_argument('--gender', type=str, default=None, help='JSON list of genders')
+    parser.add_argument('--ethnicity', type=str, default=None, help='JSON list of ethnicities')
+    parser.add_argument('--income', type=str, default=None, help='JSON list of income groups')
+    parser.add_argument('--consumer', type=str, default=None, help='JSON list of consumer segments')
+    
+    # Only parse if running as script (not in Jupyter/interactive)
+    try:
+        # Check if running in Jupyter by looking for ipykernel
+        get_ipython()  # noqa: F821
+        return argparse.Namespace(age=None, gender=None, ethnicity=None, income=None, consumer=None)
+    except NameError:
+        args = parser.parse_args()
+        # Parse JSON strings to lists
+        args.age = json.loads(args.age) if args.age else None
+        args.gender = json.loads(args.gender) if args.gender else None
+        args.ethnicity = json.loads(args.ethnicity) if args.ethnicity else None
+        args.income = json.loads(args.income) if args.income else None
+        args.consumer = json.loads(args.consumer) if args.consumer else None
+        return args
+
+cli_args = parse_cli_args()
+
+# %%
+
+# file_browser = mo.ui.file_browser(
+#     initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
+# )
+# file_browser
+
+# # %%
+# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
+# RESULTS_FILE = Path(file_browser.path(index=0))
+
+RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+# %%
+S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
+try:
+    data_all = S.load_data()
+except NotImplementedError as e:
+    mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
+
+# %%
+BEST_CHOSEN_CHARACTER = "the_coach"
+
+# # %%
+# filter_form = mo.md('''
+
+
+
+# {age}
+
+# {gender}
+
+# {ethnicity}
+
+# {income}
+
+# {consumer}
+# '''
+# ).batch(
+#     age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
+#     gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
+#     ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
+#     income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
+#     consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
+# ).form()
+# mo.md(f'''
+# ---
+
+# # Data Filter
+
+# {filter_form}
+# ''')
+
+# %%
+# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
+# CLI args: None means "all options selected" (use S.options_* defaults)
+_filter_age = cli_args.age if cli_args.age is not None else S.options_age
+_filter_gender = cli_args.gender if cli_args.gender is not None else S.options_gender
+_filter_ethnicity = cli_args.ethnicity if cli_args.ethnicity is not None else S.options_ethnicity
+_filter_income = cli_args.income if cli_args.income is not None else S.options_income
+_filter_consumer = cli_args.consumer if cli_args.consumer is not None else S.options_consumer
+
+_d = S.filter_data(data_all, age=_filter_age, gender=_filter_gender, income=_filter_income, ethnicity=_filter_ethnicity, consumer=_filter_consumer)
+
+# Stop execution and prevent other cells from running if no data is selected
+# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
+data = _d
+
+# data = data_validated
+data.collect()
+
+# %%
+
+
+# %%
+# Check if all business owners are missing a 'Consumer type' in demographics
+# assert all([a is None for a in data_all.filter(pl.col('QID4') == 'Yes').collect()['Consumer'].unique()]) , "Not all business owners are missing 'Consumer type' in demographics."
+
+# %%
+mo.md(r"""
+# Demographic Distributions
+""")
+
+# %%
+demo_plot_cols = [
+    'Age',
+    'Gender',
+    # 'Race/Ethnicity',
+    'Bussiness_Owner',
+    'Consumer'
+]
+
+# %%
+_content = """
+
+"""
+for c in demo_plot_cols:
+    _fig = S.plot_demographic_distribution(
+        data=S.get_demographics(data)[0],
+        column=c,
+        title=f"{c.replace('Bussiness', 'Business').replace('_', ' ')} Distribution of Survey Respondents"
+    )
+    _content += f"""{mo.ui.altair_chart(_fig)}\n\n"""
+
+mo.md(_content)
+
+# %%
+mo.md(r"""
+---
+
+# Brand Character Results
+""")
+
+# %%
+mo.md(r"""
+## Best performing: Original vs Refined frankenstein
+""")
+
+# %%
+char_refine_rank = S.get_character_refine(data)[0]
+# print(char_rank.collect().head())
+print(char_refine_rank.collect().head())
+
+# %%
+mo.md(r"""
+## Character ranking points
+""")
+
+# %%
+mo.md(r"""
+## Character ranking 1-2-3
+""")
+
+# %%
+char_rank = S.get_character_ranking(data)[0]
+
+# %%
+char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
+S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice')
+
+# %%
+S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', title='Character Personality: Rankings Top 3')
+
+# %%
+mo.md(r"""
+### Statistical Significance Character Ranking
+""")
+
+# %%
+_pairwise_df, _meta = S.compute_ranking_significance(char_rank)
+
+# print(_pairwise_df.columns)
+
+mo.md(f"""
+
+
+{mo.ui.altair_chart(S.plot_significance_heatmap(_pairwise_df, metadata=_meta))}
+
+{mo.ui.altair_chart(S.plot_significance_summary(_pairwise_df, metadata=_meta))}
+""")
+
+# %%
+mo.md(r"""
+## Character Ranking: times 1st place
+""")
+
+# %%
+S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality')
+
+# %%
+mo.md(r"""
+## Prominent predefined personality traits wordcloud
+""")
+
+# %%
+top8_traits = S.get_top_8_traits(data)[0]
+S.plot_traits_wordcloud(
+    data=top8_traits,
+    column='Top_8_Traits',
+    title="Most Prominent Personality Traits",
+)
+
+# %%
+mo.md(r"""
+## Trait frequency per brand character
+""")
+
+# %%
+char_df = S.get_character_refine(data)[0]
+
+# %%
+from theme import ColorPalette
+
+# Assuming you already have char_df (your data from get_character_refine or similar)
+characters = ['Bank Teller', 'Familiar Friend', 'The Coach', 'Personal Assistant']
+character_colors = {
+    'Bank Teller': (ColorPalette.CHARACTER_BANK_TELLER, ColorPalette.CHARACTER_BANK_TELLER_HIGHLIGHT),
+    'Familiar Friend': (ColorPalette.CHARACTER_FAMILIAR_FRIEND, ColorPalette.CHARACTER_FAMILIAR_FRIEND_HIGHLIGHT),
+    'The Coach': (ColorPalette.CHARACTER_COACH, ColorPalette.CHARACTER_COACH_HIGHLIGHT),
+    'Personal Assistant': (ColorPalette.CHARACTER_PERSONAL_ASSISTANT, ColorPalette.CHARACTER_PERSONAL_ASSISTANT_HIGHLIGHT),
+}
+
+# Build consistent sort order (by total frequency across all characters)
+all_trait_counts = {}
+for char in characters:
+    freq_df, _ = S.transform_character_trait_frequency(char_df, char)
+    for row in freq_df.iter_rows(named=True):
+        all_trait_counts[row['trait']] = all_trait_counts.get(row['trait'], 0) + row['count']
+
+consistent_sort_order = sorted(all_trait_counts.keys(), key=lambda x: -all_trait_counts[x])
+
+_content = """"""
+# Generate 4 plots (one per character)
+for char in characters:
+    freq_df, _ = S.transform_character_trait_frequency(char_df, char)
+    main_color, highlight_color = character_colors[char]
+    chart = S.plot_single_character_trait_frequency(
+        data=freq_df,
+        character_name=char,
+        bar_color=main_color,
+        highlight_color=highlight_color,
+        trait_sort_order=consistent_sort_order,
+    )
+    _content += f"""
+    {mo.ui.altair_chart(chart)}
+
+
+"""
+
+mo.md(_content)
+
+# %%
+mo.md(r"""
+## Statistical significance best characters
+
+zie chat
+> voorbeeld: als de nr 1 en 2 niet significant verschillen maar wel van de nr 3 bijvoorbeeld is dat ook top. Beetje meedenkend over hoe ik het kan presenteren weetje wat ik bedoel?:)
+>
+""")
+
+# %%
+
+
+# %%
+
+
+# %%
+mo.md(r"""
+---
+
+# Spoken Voice Results
+""")
+
+# %%
+COLOR_GENDER = True
+
+# %%
+mo.md(r"""
+## Top 8 Most Chosen out of 18
+""")
+
+# %%
+v_18_8_3 = S.get_18_8_3(data)[0]
+
+# %%
+S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+## Top 3 most chosen out of 8
+""")
+
+# %%
+S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+## Voice Ranking Weighted Score
+""")
+
+# %%
+top3_voices = S.get_top_3_voices(data)[0]
+top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
+
+# %%
+S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+## Which voice is ranked best in the ranking question for top 3?
+
+(not best 3 out of 8 question)
+""")
+
+# %%
+S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+### Statistical significance for voice ranking
+""")
+
+# %%
+# print(top3_voices.collect().head())
+
+# %%
+
+# _pairwise_df, _metadata = S.compute_ranking_significance(
+#     top3_voices,alpha=0.05,correction="none")
+
+# # View significant pairs
+# # print(pairwise_df.filter(pl.col('significant') == True))
+
+# # Create heatmap visualization
+# _heatmap = S.plot_significance_heatmap(
+#     _pairwise_df, 
+#     metadata=_metadata,
+#     title="Weighted Voice Ranking Significance<br>(Pairwise Comparisons)"
+# )
+
+# # Create summary bar chart
+# _summary = S.plot_significance_summary(
+#     _pairwise_df,
+#     metadata=_metadata
+# )
+
+# mo.md(f"""
+# {mo.ui.altair_chart(_heatmap)}
+
+# {mo.ui.altair_chart(_summary)}
+# """)
+
+# %%
+## Voice Ranked 1st the most
+
+# %%
+S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+## Voice Scale 1-10
+""")
+
+# %%
+# Get your voice scale data (from notebook)
+voice_1_10, _ = S.get_voice_scale_1_10(data)
+S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER)
+
+# %%
+mo.md(r"""
+### Statistical Significance (Scale 1-10)
+""")
+
+# %%
+# Compute pairwise significance tests
+pairwise_df, metadata = S.compute_pairwise_significance(
+    voice_1_10,
+    test_type="mannwhitney",  # or "ttest", "chi2", "auto"
+    alpha=0.05,
+    correction="bonferroni"   # or "holm", "none"
+)
+
+# View significant pairs
+# print(pairwise_df.filter(pl.col('significant') == True))
+
+# Create heatmap visualization
+_heatmap = S.plot_significance_heatmap(
+    pairwise_df, 
+    metadata=metadata,
+    title="Voice Rating Significance<br>(Pairwise Comparisons)"
+)
+
+# Create summary bar chart
+_summary = S.plot_significance_summary(
+    pairwise_df,
+    metadata=metadata
+)
+
+mo.md(f"""
+{mo.ui.altair_chart(_heatmap)}
+
+{mo.ui.altair_chart(_summary)}
+""")
+
+# %%
+
+
+# %%
+mo.md(r"""
+## Ranking points for Voice per Chosen Brand Character
+
+**missing mapping**
+""")
+
+# %%
+mo.md(r"""
+## Correlation Speaking Styles
+""")
+
+# %%
+ss_or, choice_map_or = S.get_ss_orange_red(data)
+ss_gb, choice_map_gb = S.get_ss_green_blue(data)
+
+# Combine the data
+ss_all = ss_or.join(ss_gb, on='_recordId')
+_d = ss_all.collect()
+
+choice_map = {**choice_map_or, **choice_map_gb}
+# print(_d.head())
+# print(choice_map)
+ss_long = utils.process_speaking_style_data(ss_all, choice_map)
+
+df_style = utils.process_speaking_style_data(ss_all, choice_map)
+
+vscales = S.get_voice_scale_1_10(data)[0]
+df_scale_long = utils.process_voice_scale_data(vscales)
+
+joined_scale = df_style.join(df_scale_long, on=["_recordId", "Voice"], how="inner")
+
+df_ranking = utils.process_voice_ranking_data(top3_voices)
+joined_ranking = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
+
+# %%
+joined_ranking.head()
+
+# %%
+mo.md(r"""
+### Colors vs Scale 1-10
+""")
+
+# %%
+# Transform to get one row per color with average correlation
+color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, SPEAKING_STYLES)
+S.plot_speaking_style_color_correlation(
+    data=color_corr_scale,
+    title="Correlation: Speaking Style Colors and Voice Scale 1-10"
+)
+
+# %%
+mo.md(r"""
+### Colors vs Ranking Points
+""")
+
+# %%
+color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
+    joined_ranking, 
+    SPEAKING_STYLES, 
+    target_column="Ranking_Points"
+)
+S.plot_speaking_style_color_correlation(
+    data=color_corr_ranking,
+    title="Correlation: Speaking Style Colors and Voice Ranking Points"
+)
+
+# %%
+mo.md(r"""
+### Individual Traits vs Scale 1-10
+""")
+
+# %%
+_content = """"""
+
+for _style, _traits in SPEAKING_STYLES.items():
+    # print(f"Correlation plot for {style}...")
+    _fig = S.plot_speaking_style_correlation(
+        data=joined_scale,
+        style_color=_style,
+        style_traits=_traits,
+        title=f"Correlation: Speaking Style {_style} and Voice Scale 1-10",
+    )
+    _content += f"""
+#### Speaking Style **{_style}**:
+
+{mo.ui.altair_chart(_fig)}
+
+"""
+mo.md(_content)
+
+# %%
+mo.md(r"""
+### Individual Traits vs Ranking Points
+""")
+
+# %%
+_content = """"""
+
+for _style, _traits in SPEAKING_STYLES.items():
+    # print(f"Correlation plot for {style}...")
+    _fig = S.plot_speaking_style_ranking_correlation(
+    data=joined_ranking,
+    style_color=_style,
+    style_traits=_traits,
+    title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
+)
+    _content += f"""
+#### Speaking Style **{_style}**:
+
+{mo.ui.altair_chart(_fig)}
+
+"""
+mo.md(_content)
+
+# %%
+mo.md(r"""
+## Correlations when "Best Brand Character" is chosen
+
+Select only the traits that fit with that character
+""")
+
+# %%
+from reference import ORIGINAL_CHARACTER_TRAITS
+chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER]
+
+# %%
+STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits)
+
+# %%
+mo.md(r"""
+### Individual Traits vs Ranking Points
+""")
+
+# %%
+_content = ""
+for _style, _traits in STYLES_SUBSET.items():
+    _fig = S.plot_speaking_style_ranking_correlation(
+        data=joined_ranking,
+        style_color=_style,
+        style_traits=_traits,
+        title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points"""
+    )
+    _content += f"""
+{mo.ui.altair_chart(_fig)}
+
+"""
+mo.md(_content)
+
+# %%
+mo.md(r"""
+### Individual Traits vs Scale 1-10
+""")
+
+# %%
+_content = """"""
+
+for _style, _traits in STYLES_SUBSET.items():
+    # print(f"Correlation plot for {style}...")
+    _fig = S.plot_speaking_style_correlation(
+        data=joined_scale,
+        style_color=_style,
+        style_traits=_traits,
+        title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""",
+    )
+    _content += f"""
+{mo.ui.altair_chart(_fig)}
+
+"""
+mo.md(_content)
+
+# %%
+mo.md(r"""
+### Colors vs Scale 1-10 (Best Character)
+""")
+
+# %%
+# Transform to get one row per color with average correlation
+_color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET)
+S.plot_speaking_style_color_correlation(
+    data=_color_corr_scale,
+    title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10"""
+)
+
+# %%
+mo.md(r"""
+### Colors vs Ranking Points (Best Character)
+""")
+
+# %%
+_color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
+    joined_ranking, 
+    STYLES_SUBSET, 
+    target_column="Ranking_Points"
+)
+S.plot_speaking_style_color_correlation(
+    data=_color_corr_ranking,
+    title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points"""
+)
--- a/04_PPTX_Update_Images.py
+++ b/04_PPTX_Update_Images.py
@@ -22,7 +22,7 @@ def _():
@app.cell
 def _():
    TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
-    TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
+    # TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
    TAG_IMAGE_DIR = Path('figures/2-2-26')
    return TAG_IMAGE_DIR, TAG_SOURCE

--- a/README.md
+++ b/README.md
@@ -1,5 +1,147 @@
+# Voice Branding Quantitative Analysis
+
+## Running Marimo Notebooks
+
 Running on Ct-105 for shared access:

-```
+```bash
 uv run marimo run 02_quant_analysis.py --headless --port 8080
 ```
+
+---
+
+## Batch Report Generation
+
+The quant report can be run with different filter combinations via CLI or automated batch processing.
+
+### Single Filter Run (CLI)
+
+Run the report script directly with JSON-encoded filter arguments:
+
+```bash
+# Single consumer segment
+uv run python 03_quant_report.script.py --consumer '["Starter"]'
+
+# Single age group
+uv run python 03_quant_report.script.py --age '["18 to 21 years"]'
+
+# Multiple filters combined
+uv run python 03_quant_report.script.py --age '["18 to 21 years", "22 to 24 years"]' --gender '["Male"]'
+
+# All respondents (no filters = defaults to all options selected)
+uv run python 03_quant_report.script.py
+```
+
+Available filter arguments:
+- `--age` — JSON list of age groups
+- `--gender` — JSON list of genders  
+- `--ethnicity` — JSON list of ethnicities
+- `--income` — JSON list of income groups
+- `--consumer` — JSON list of consumer segments
+
+### Batch Runner (All Combinations)
+
+Run all single-filter combinations automatically with progress tracking:
+
+```bash
+# Preview all combinations without running
+uv run python run_filter_combinations.py --dry-run
+
+# Run all combinations (shows progress bar)
+uv run python run_filter_combinations.py
+
+# Or use the registered CLI entry point
+uv run quant-report-batch
+uv run quant-report-batch --dry-run
+```
+
+This generates reports for:
+- All Respondents (no filters)
+- Each age group individually
+- Each gender individually
+- Each ethnicity individually
+- Each income group individually
+- Each consumer segment individually
+
+Output figures are saved to `figures/<export_date>/<filter_slug>/`.
+
+### Jupyter Notebook Debugging
+
+The script auto-detects Jupyter/IPython environments. When running in VS Code's Jupyter extension, CLI args default to `None` (all options selected), so you can debug cell-by-cell normally.
+
+---
+
+## Adding Custom Filter Combinations
+
+To add new filter combinations to the batch runner, edit `run_filter_combinations.py`:
+
+### Checklist
+
+1. **Open** `run_filter_combinations.py`
+
+2. **Find** the `get_filter_combinations()` function
+
+3. **Add** your combination to the list before the `return` statement:
+
+```python
+# Example: Add a specific age + consumer cross-filter
+combinations.append({
+    'name': 'Age-18to24_Consumer-Starter',  # Used for output folder naming
+    'filters': {
+        'age': ['18 to 21 years', '22 to 24 years'],
+        'consumer': ['Starter']
+    }
+})
+```
+
+4. **Filter keys** must match CLI argument names:
+   - `age` — values from `survey.options_age`
+   - `gender` — values from `survey.options_gender`
+   - `ethnicity` — values from `survey.options_ethnicity`
+   - `income` — values from `survey.options_income`
+   - `consumer` — values from `survey.options_consumer`
+
+5. **Check available values** by running:
+```python
+from utils import QualtricsSurvey
+S = QualtricsSurvey('data/exports/2-2-26/...Labels.csv', 'data/exports/.../....qsf')
+S.load_data()
+print(S.options_age)
+print(S.options_consumer)
+# etc.
+```
+
+6. **Test** with dry-run first:
+```bash
+uv run python run_filter_combinations.py --dry-run
+```
+
+### Example: Adding Multiple Cross-Filters
+
+```python
+# In get_filter_combinations(), before return:
+
+# Young professionals
+combinations.append({
+    'name': 'Young_Professionals',
+    'filters': {
+        'age': ['22 to 24 years', '25 to 34 years'],
+        'consumer': ['Early Professional']
+    }
+})
+
+# High income males
+combinations.append({
+    'name': 'High_Income_Male',
+    'filters': {
+        'income': ['$150,000 - $199,999', '$200,000 or more'],
+        'gender': ['Male']
+    }
+})
+```
+
+### Notes
+
+- **Empty filters dict** = all respondents (no filtering)
+- **Omitted filter keys** = all options for that dimension selected
+- **Output folder names** are auto-generated from active filters by `QualtricsSurvey.filter_data()`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,6 +7,7 @@ requires-python = ">=3.12"
 dependencies = [
    "altair>=6.0.0",
    "imagehash>=4.3.1",
+    "jupyter>=1.1.1",
    "marimo>=0.18.0",
    "matplotlib>=3.10.8",
    "modin[dask]>=0.37.1",
@@ -24,8 +25,12 @@ dependencies = [
    "requests>=2.32.5",
    "scipy>=1.14.0",
    "taguette>=1.5.1",
+    "tqdm>=4.66.0",
    "vl-convert-python>=1.9.0.post1",
    "wordcloud>=1.9.5",
 ]

+[project.scripts]
+quant-report-batch = "run_filter_combinations:main"
+

--- a/run_filter_combinations.py
+++ b/run_filter_combinations.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+"""
+Batch runner for quant report with different filter combinations.
+
+Runs 03_quant_report.script.py for each single-filter combination:
+- Each age group (with all others active)
+- Each gender (with all others active)
+- Each ethnicity (with all others active)
+- Each income group (with all others active)
+- Each consumer segment (with all others active)
+
+Usage:
+    uv run python run_filter_combinations.py
+    uv run python run_filter_combinations.py --dry-run  # Preview combinations without running
+"""
+
+import subprocess
+import sys
+import json
+from pathlib import Path
+
+from tqdm import tqdm
+
+from utils import QualtricsSurvey
+
+
+# Default data paths (same as in 03_quant_report.script.py)
+RESULTS_FILE = 'data/exports/2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
+
+
+def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
+    """
+    Generate all single-filter combinations.
+    
+    Each combination isolates ONE filter value while keeping all others at "all selected".
+    Returns list of dicts with filter kwargs for each run.
+    """
+    combinations = []
+    
+    # Add "All Respondents" run (no filters = all options selected)
+    combinations.append({
+        'name': 'All_Respondents',
+        'filters': {}  # Empty = use defaults (all selected)
+    })
+    
+    # Age groups - one at a time
+    for age in survey.options_age:
+        combinations.append({
+            'name': f'Age-{age}',
+            'filters': {'age': [age]}
+        })
+    
+    # Gender - one at a time
+    for gender in survey.options_gender:
+        combinations.append({
+            'name': f'Gender-{gender}',
+            'filters': {'gender': [gender]}
+        })
+    
+    # Ethnicity - one at a time
+    for ethnicity in survey.options_ethnicity:
+        combinations.append({
+            'name': f'Ethnicity-{ethnicity}',
+            'filters': {'ethnicity': [ethnicity]}
+        })
+    
+    # Income - one at a time
+    for income in survey.options_income:
+        combinations.append({
+            'name': f'Income-{income}',
+            'filters': {'income': [income]}
+        })
+    
+    # Consumer segments - one at a time
+    for consumer in survey.options_consumer:
+        combinations.append({
+            'name': f'Consumer-{consumer}',
+            'filters': {'consumer': [consumer]}
+        })
+    
+    return combinations
+
+
+def run_report(filters: dict, dry_run: bool = False) -> bool:
+    """
+    Run the report script with given filters.
+    
+    Args:
+        filters: Dict of filter_name -> list of values
+        dry_run: If True, just print command without running
+        
+    Returns:
+        True if successful, False otherwise
+    """
+    cmd = [sys.executable, str(REPORT_SCRIPT)]
+    
+    for filter_name, values in filters.items():
+        if values:
+            cmd.extend([f'--{filter_name}', json.dumps(values)])
+    
+    if dry_run:
+        print(f"  Would run: {' '.join(cmd)}")
+        return True
+    
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=Path(__file__).parent
+        )
+        if result.returncode != 0:
+            print(f"\n  ERROR: {result.stderr[:500]}")
+            return False
+        return True
+    except Exception as e:
+        print(f"\n  ERROR: {e}")
+        return False
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
+    parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
+    args = parser.parse_args()
+    
+    # Load survey to get available filter options
+    print("Loading survey to get filter options...")
+    survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
+    survey.load_data()  # Populates options_* attributes
+    
+    # Generate all combinations
+    combinations = get_filter_combinations(survey)
+    print(f"Generated {len(combinations)} filter combinations")
+    
+    if args.dry_run:
+        print("\nDRY RUN - Commands that would be executed:")
+        for combo in combinations:
+            print(f"\n{combo['name']}:")
+            run_report(combo['filters'], dry_run=True)
+        return
+    
+    # Run each combination with progress bar
+    successful = 0
+    failed = []
+    
+    for combo in tqdm(combinations, desc="Running reports", unit="filter"):
+        tqdm.write(f"Running: {combo['name']}")
+        if run_report(combo['filters']):
+            successful += 1
+        else:
+            failed.append(combo['name'])
+    
+    # Summary
+    print(f"\n{'='*50}")
+    print(f"Completed: {successful}/{len(combinations)} successful")
+    if failed:
+        print(f"Failed: {', '.join(failed)}")
+
+
+if __name__ == '__main__':
+    main()
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
Luigi Maiorano	8dd41dfc96	Start automation of running filter combinations	2026-02-03 14:33:09 +01:00
Luigi Maiorano	840cb4e6dc	exported marimo to script form	2026-02-03 13:48:05 +01:00