fix sample size

This commit is contained in:
2026-02-03 20:48:34 +01:00
parent 9a587dcc4c
commit 36280a6ff8
3 changed files with 186 additions and 122 deletions

View File

@@ -12,6 +12,8 @@ Runs 03_quant_report.script.py for each single-filter combination:
Usage:
uv run python run_filter_combinations.py
uv run python run_filter_combinations.py --dry-run # Preview combinations without running
uv run python run_filter_combinations.py --category age # Only run age combinations
uv run python run_filter_combinations.py --category consumer # Only run consumer segment combinations
"""
import subprocess
@@ -31,132 +33,151 @@ QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_P
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> list[dict]:
"""
Generate all single-filter combinations.
Each combination isolates ONE filter value while keeping all others at "all selected".
Returns list of dicts with filter kwargs for each run.
Args:
survey: QualtricsSurvey instance with loaded data
category: Optional filter category to limit combinations to.
Valid values: 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer',
'business_owner', 'ai_user', 'investable_assets', 'industry'
If None or 'all', generates all combinations.
Returns:
List of dicts with filter kwargs for each run.
"""
combinations = []
# Add "All Respondents" run (no filters = all options selected)
combinations.append({
'name': 'All_Respondents',
'filters': {} # Empty = use defaults (all selected)
})
if not category or category == 'all':
combinations.append({
'name': 'All_Respondents',
'filters': {} # Empty = use defaults (all selected)
})
# Age groups - one at a time
for age in survey.options_age:
combinations.append({
'name': f'Age-{age}',
'filters': {'age': [age]}
})
# Gender - one at a time
for gender in survey.options_gender:
combinations.append({
'name': f'Gender-{gender}',
'filters': {'gender': [gender]}
})
# Ethnicity - grouped by individual values
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
# Create filters that include ALL options containing each individual ethnicity value
ethnicity_values = set()
for ethnicity_option in survey.options_ethnicity:
# Split by comma and strip whitespace
values = [v.strip() for v in ethnicity_option.split(',')]
ethnicity_values.update(values)
for ethnicity_value in sorted(ethnicity_values):
# Find all options that contain this value
matching_options = [
opt for opt in survey.options_ethnicity
if ethnicity_value in [v.strip() for v in opt.split(',')]
]
combinations.append({
'name': f'Ethnicity-{ethnicity_value}',
'filters': {'ethnicity': matching_options}
})
# Income - one at a time
for income in survey.options_income:
combinations.append({
'name': f'Income-{income}',
'filters': {'income': [income]}
})
# Consumer segments - combine _A and _B options, and also include standalone
# Group options by base name (removing _A/_B suffix)
consumer_groups = {}
for consumer in survey.options_consumer:
# Check if ends with _A or _B
if consumer.endswith('_A') or consumer.endswith('_B'):
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
if base_name not in consumer_groups:
consumer_groups[base_name] = []
consumer_groups[base_name].append(consumer)
else:
# Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
if not category or category in ['all', 'age']:
for age in survey.options_age:
combinations.append({
'name': f'Consumer-{base_name}',
'filters': {'consumer': options}
'name': f'Age-{age}',
'filters': {'age': [age]}
})
# Add standalone options (including individual _A and _B)
for consumer in survey.options_consumer:
combinations.append({
'name': f'Consumer-{consumer}',
'filters': {'consumer': [consumer]}
})
# Gender - one at a time
if not category or category in ['all', 'gender']:
for gender in survey.options_gender:
combinations.append({
'name': f'Gender-{gender}',
'filters': {'gender': [gender]}
})
# Ethnicity - grouped by individual values
if not category or category in ['all', 'ethnicity']:
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
# Create filters that include ALL options containing each individual ethnicity value
ethnicity_values = set()
for ethnicity_option in survey.options_ethnicity:
# Split by comma and strip whitespace
values = [v.strip() for v in ethnicity_option.split(',')]
ethnicity_values.update(values)
for ethnicity_value in sorted(ethnicity_values):
# Find all options that contain this value
matching_options = [
opt for opt in survey.options_ethnicity
if ethnicity_value in [v.strip() for v in opt.split(',')]
]
combinations.append({
'name': f'Ethnicity-{ethnicity_value}',
'filters': {'ethnicity': matching_options}
})
# Income - one at a time
if not category or category in ['all', 'income']:
for income in survey.options_income:
combinations.append({
'name': f'Income-{income}',
'filters': {'income': [income]}
})
# Consumer segments - combine _A and _B options, and also include standalone
if not category or category in ['all', 'consumer']:
# Group options by base name (removing _A/_B suffix)
consumer_groups = {}
for consumer in survey.options_consumer:
# Check if ends with _A or _B
if consumer.endswith('_A') or consumer.endswith('_B'):
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
if base_name not in consumer_groups:
consumer_groups[base_name] = []
consumer_groups[base_name].append(consumer)
else:
# Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
combinations.append({
'name': f'Consumer-{base_name}',
'filters': {'consumer': options}
})
# Add standalone options (including individual _A and _B)
for consumer in survey.options_consumer:
combinations.append({
'name': f'Consumer-{consumer}',
'filters': {'consumer': [consumer]}
})
# Business Owner - one at a time
for business_owner in survey.options_business_owner:
combinations.append({
'name': f'BusinessOwner-{business_owner}',
'filters': {'business_owner': [business_owner]}
})
if not category or category in ['all', 'business_owner']:
for business_owner in survey.options_business_owner:
combinations.append({
'name': f'BusinessOwner-{business_owner}',
'filters': {'business_owner': [business_owner]}
})
# AI User - one at a time
for ai_user in survey.options_ai_user:
if not category or category in ['all', 'ai_user']:
for ai_user in survey.options_ai_user:
combinations.append({
'name': f'AIUser-{ai_user}',
'filters': {'ai_user': [ai_user]}
})
# AI user daily, more than once daily, en multiple times a week = frequent
combinations.append({
'name': f'AIUser-{ai_user}',
'filters': {'ai_user': [ai_user]}
'name': 'AIUser-Frequent',
'filters': {'ai_user': [
'Daily', 'More than once daily', 'Multiple times per week'
]}
})
combinations.append({
'name': 'AIUser-Infrequent',
'filters': {'ai_user': [
'Once a month', 'Less than once a month', 'Once a week'
]}
})
# AI user daily, more than once daily, en multiple times a week = frequent
combinations.append({
'name': 'AIUser-Frequent',
'filters': {'ai_user': [
'Daily', 'More than once daily', 'Multiple times per week'
]}
})
combinations.append({
'name': 'AIUser-Infrequent',
'filters': {'ai_user': [
'Once a month', 'Less than once a month', 'Once a week'
]}
})
# Investable Assets - one at a time
for investable_assets in survey.options_investable_assets:
combinations.append({
'name': f'Assets-{investable_assets}',
'filters': {'investable_assets': [investable_assets]}
})
if not category or category in ['all', 'investable_assets']:
for investable_assets in survey.options_investable_assets:
combinations.append({
'name': f'Assets-{investable_assets}',
'filters': {'investable_assets': [investable_assets]}
})
# Industry - one at a time
for industry in survey.options_industry:
combinations.append({
'name': f'Industry-{industry}',
'filters': {'industry': [industry]}
})
if not category or category in ['all', 'industry']:
for industry in survey.options_industry:
combinations.append({
'name': f'Industry-{industry}',
'filters': {'industry': [industry]}
})
return combinations
@@ -207,6 +228,13 @@ def main():
import argparse
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
parser.add_argument(
'--category',
choices=['all', 'age', 'gender', 'ethnicity', 'income', 'consumer',
'business_owner', 'ai_user', 'investable_assets', 'industry'],
default='all',
help='Filter category to run combinations for (default: all)'
)
args = parser.parse_args()
# Load survey to get available filter options
@@ -214,9 +242,10 @@ def main():
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
survey.load_data() # Populates options_* attributes
# Generate all combinations
combinations = get_filter_combinations(survey)
print(f"Generated {len(combinations)} filter combinations")
# Generate combinations for specified category
combinations = get_filter_combinations(survey, category=args.category)
category_desc = f" for category '{args.category}'" if args.category != 'all' else ''
print(f"Generated {len(combinations)} filter combinations{category_desc}")
if args.dry_run:
print("\nDRY RUN - Commands that would be executed:")