diff --git a/04_PPTX_Update_Images.py b/04_PPTX_Update_Images.py index fe1c3e7..2ea79f7 100644 --- a/04_PPTX_Update_Images.py +++ b/04_PPTX_Update_Images.py @@ -26,9 +26,9 @@ def _(): @app.cell def _(): - TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2_3-2-18-15.pptx') + TAG_SOURCE = Path('data/reports/Perception-Research-Report_3-2-26_20-00.pptx') # TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx') - TAG_IMAGE_DIR = Path('figures/2-3-26_Copy-2-2-26') + TAG_IMAGE_DIR = Path('figures/debug') return TAG_IMAGE_DIR, TAG_SOURCE @@ -52,7 +52,7 @@ def _(): @app.cell def _(): - REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2_3-2-18-15.pptx') + REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_3-2-26_20-00.pptx') # REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx') NEW_IMAGES_DIR = Path('figures/debug') diff --git a/plots.py b/plots.py index 897ffa1..1922e0e 100644 --- a/plots.py +++ b/plots.py @@ -178,8 +178,8 @@ class QualtricsPlotsMixin: # Use UPPERCASE for category name to distinguish from values parts.append(f"{display_name.upper()}: {val_str}") - # Get sample size (stored by _ensure_dataframe) - sample_size = getattr(self, '_last_sample_size', None) + # Get sample size from the filtered dataset (not from transformed plot data) + sample_size = self._get_filtered_sample_size() sample_prefix = f"Sample size: {sample_size}" if sample_size is not None else "" if not parts: @@ -297,10 +297,7 @@ class QualtricsPlotsMixin: return chart def _ensure_dataframe(self, data: pl.LazyFrame | pl.DataFrame | None) -> pl.DataFrame: - """Ensure data is an eager DataFrame, collecting if necessary. - - Also stores the sample size on self._last_sample_size for use in filter descriptions. - """ + """Ensure data is an eager DataFrame, collecting if necessary.""" df = data if data is not None else getattr(self, 'data_filtered', None) if df is None: raise ValueError("No data provided and self.data_filtered is None.") @@ -308,9 +305,21 @@ class QualtricsPlotsMixin: if isinstance(df, pl.LazyFrame): df = df.collect() - # Store sample size for filter description - self._last_sample_size = df.height return df + + def _get_filtered_sample_size(self) -> int | None: + """Get the sample size from the filtered dataset (self.data_filtered). + + This returns the number of respondents in the filtered dataset, + not the size of any transformed/aggregated data passed to plot functions. + """ + data_filtered = getattr(self, 'data_filtered', None) + if data_filtered is None: + return None + + if isinstance(data_filtered, pl.LazyFrame): + return data_filtered.select(pl.len()).collect().item() + return data_filtered.height def _clean_voice_label(self, col_name: str) -> str: """Extract and clean voice name from column name for display. @@ -681,7 +690,7 @@ class QualtricsPlotsMixin: ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ] - chart = alt.Chart(stats_df).mark_bar().encode( + bars = alt.Chart(stats_df).mark_bar().encode( x=alt.X('item:N', title=x_label, sort='-y'), y=alt.Y('count:Q', title=y_label), color=alt.Color('gender_category:N', @@ -692,14 +701,27 @@ class QualtricsPlotsMixin: alt.Tooltip('count:Q', title='1st Place Votes'), alt.Tooltip('gender:N', title='Gender') ] - ).properties( + ) + + # Text overlay for counts + text = alt.Chart(stats_df).mark_text( + dy=-5, + color='black', + fontSize=10 + ).encode( + x=alt.X('item:N', sort='-y'), + y=alt.Y('count:Q'), + text=alt.Text('count:Q') + ) + + chart = (bars + text).properties( title=self._process_title(title), width=width or 800, height=height or getattr(self, 'plot_height', 400) ) else: # Bar chart with conditional color - chart = alt.Chart(stats_df).mark_bar().encode( + bars = alt.Chart(stats_df).mark_bar().encode( x=alt.X('item:N', title=x_label, sort='-y'), y=alt.Y('count:Q', title=y_label), color=alt.Color('category:N', @@ -710,7 +732,20 @@ class QualtricsPlotsMixin: alt.Tooltip('item:N', title='Item'), alt.Tooltip('count:Q', title='1st Place Votes') ] - ).properties( + ) + + # Text overlay for counts + text = alt.Chart(stats_df).mark_text( + dy=-5, + color='black', + fontSize=10 + ).encode( + x=alt.X('item:N', sort='-y'), + y=alt.Y('count:Q'), + text=alt.Text('count:Q') + ) + + chart = (bars + text).properties( title=self._process_title(title), width=width or 800, height=height or getattr(self, 'plot_height', 400) @@ -769,7 +804,7 @@ class QualtricsPlotsMixin: # Text overlay text = bars.mark_text( dy=-5, - color='white', + color='black', fontSize=11 ).encode( text='Weighted Score:Q' diff --git a/run_filter_combinations.py b/run_filter_combinations.py index f05527a..1b52179 100644 --- a/run_filter_combinations.py +++ b/run_filter_combinations.py @@ -12,6 +12,8 @@ Runs 03_quant_report.script.py for each single-filter combination: Usage: uv run python run_filter_combinations.py uv run python run_filter_combinations.py --dry-run # Preview combinations without running + uv run python run_filter_combinations.py --category age # Only run age combinations + uv run python run_filter_combinations.py --category consumer # Only run consumer segment combinations """ import subprocess @@ -31,132 +33,151 @@ QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_P REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py' -def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: +def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> list[dict]: """ Generate all single-filter combinations. Each combination isolates ONE filter value while keeping all others at "all selected". - Returns list of dicts with filter kwargs for each run. + + Args: + survey: QualtricsSurvey instance with loaded data + category: Optional filter category to limit combinations to. + Valid values: 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', + 'business_owner', 'ai_user', 'investable_assets', 'industry' + If None or 'all', generates all combinations. + + Returns: + List of dicts with filter kwargs for each run. """ combinations = [] # Add "All Respondents" run (no filters = all options selected) - combinations.append({ - 'name': 'All_Respondents', - 'filters': {} # Empty = use defaults (all selected) - }) + if not category or category == 'all': + combinations.append({ + 'name': 'All_Respondents', + 'filters': {} # Empty = use defaults (all selected) + }) # Age groups - one at a time - for age in survey.options_age: - combinations.append({ - 'name': f'Age-{age}', - 'filters': {'age': [age]} - }) - - # Gender - one at a time - for gender in survey.options_gender: - combinations.append({ - 'name': f'Gender-{gender}', - 'filters': {'gender': [gender]} - }) - - # Ethnicity - grouped by individual values - # Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino") - # Create filters that include ALL options containing each individual ethnicity value - ethnicity_values = set() - for ethnicity_option in survey.options_ethnicity: - # Split by comma and strip whitespace - values = [v.strip() for v in ethnicity_option.split(',')] - ethnicity_values.update(values) - - for ethnicity_value in sorted(ethnicity_values): - # Find all options that contain this value - matching_options = [ - opt for opt in survey.options_ethnicity - if ethnicity_value in [v.strip() for v in opt.split(',')] - ] - combinations.append({ - 'name': f'Ethnicity-{ethnicity_value}', - 'filters': {'ethnicity': matching_options} - }) - - # Income - one at a time - for income in survey.options_income: - combinations.append({ - 'name': f'Income-{income}', - 'filters': {'income': [income]} - }) - - # Consumer segments - combine _A and _B options, and also include standalone - # Group options by base name (removing _A/_B suffix) - consumer_groups = {} - for consumer in survey.options_consumer: - # Check if ends with _A or _B - if consumer.endswith('_A') or consumer.endswith('_B'): - base_name = consumer[:-2] # Remove last 2 chars (_A or _B) - if base_name not in consumer_groups: - consumer_groups[base_name] = [] - consumer_groups[base_name].append(consumer) - else: - # Not an _A/_B option, keep as-is - consumer_groups[consumer] = [consumer] - - # Add combined _A+_B options - for base_name, options in consumer_groups.items(): - if len(options) > 1: # Only combine if there are multiple (_A and _B) + if not category or category in ['all', 'age']: + for age in survey.options_age: combinations.append({ - 'name': f'Consumer-{base_name}', - 'filters': {'consumer': options} + 'name': f'Age-{age}', + 'filters': {'age': [age]} }) - # Add standalone options (including individual _A and _B) - for consumer in survey.options_consumer: - combinations.append({ - 'name': f'Consumer-{consumer}', - 'filters': {'consumer': [consumer]} - }) + # Gender - one at a time + if not category or category in ['all', 'gender']: + for gender in survey.options_gender: + combinations.append({ + 'name': f'Gender-{gender}', + 'filters': {'gender': [gender]} + }) + + # Ethnicity - grouped by individual values + if not category or category in ['all', 'ethnicity']: + # Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino") + # Create filters that include ALL options containing each individual ethnicity value + ethnicity_values = set() + for ethnicity_option in survey.options_ethnicity: + # Split by comma and strip whitespace + values = [v.strip() for v in ethnicity_option.split(',')] + ethnicity_values.update(values) + + for ethnicity_value in sorted(ethnicity_values): + # Find all options that contain this value + matching_options = [ + opt for opt in survey.options_ethnicity + if ethnicity_value in [v.strip() for v in opt.split(',')] + ] + combinations.append({ + 'name': f'Ethnicity-{ethnicity_value}', + 'filters': {'ethnicity': matching_options} + }) + + # Income - one at a time + if not category or category in ['all', 'income']: + for income in survey.options_income: + combinations.append({ + 'name': f'Income-{income}', + 'filters': {'income': [income]} + }) + + # Consumer segments - combine _A and _B options, and also include standalone + if not category or category in ['all', 'consumer']: + # Group options by base name (removing _A/_B suffix) + consumer_groups = {} + for consumer in survey.options_consumer: + # Check if ends with _A or _B + if consumer.endswith('_A') or consumer.endswith('_B'): + base_name = consumer[:-2] # Remove last 2 chars (_A or _B) + if base_name not in consumer_groups: + consumer_groups[base_name] = [] + consumer_groups[base_name].append(consumer) + else: + # Not an _A/_B option, keep as-is + consumer_groups[consumer] = [consumer] + + # Add combined _A+_B options + for base_name, options in consumer_groups.items(): + if len(options) > 1: # Only combine if there are multiple (_A and _B) + combinations.append({ + 'name': f'Consumer-{base_name}', + 'filters': {'consumer': options} + }) + + # Add standalone options (including individual _A and _B) + for consumer in survey.options_consumer: + combinations.append({ + 'name': f'Consumer-{consumer}', + 'filters': {'consumer': [consumer]} + }) # Business Owner - one at a time - for business_owner in survey.options_business_owner: - combinations.append({ - 'name': f'BusinessOwner-{business_owner}', - 'filters': {'business_owner': [business_owner]} - }) + if not category or category in ['all', 'business_owner']: + for business_owner in survey.options_business_owner: + combinations.append({ + 'name': f'BusinessOwner-{business_owner}', + 'filters': {'business_owner': [business_owner]} + }) # AI User - one at a time - for ai_user in survey.options_ai_user: + if not category or category in ['all', 'ai_user']: + for ai_user in survey.options_ai_user: + combinations.append({ + 'name': f'AIUser-{ai_user}', + 'filters': {'ai_user': [ai_user]} + }) + + # AI user daily, more than once daily, en multiple times a week = frequent combinations.append({ - 'name': f'AIUser-{ai_user}', - 'filters': {'ai_user': [ai_user]} + 'name': 'AIUser-Frequent', + 'filters': {'ai_user': [ + 'Daily', 'More than once daily', 'Multiple times per week' + ]} + }) + combinations.append({ + 'name': 'AIUser-Infrequent', + 'filters': {'ai_user': [ + 'Once a month', 'Less than once a month', 'Once a week' + ]} }) - - # AI user daily, more than once daily, en multiple times a week = frequent - combinations.append({ - 'name': 'AIUser-Frequent', - 'filters': {'ai_user': [ - 'Daily', 'More than once daily', 'Multiple times per week' - ]} - }) - combinations.append({ - 'name': 'AIUser-Infrequent', - 'filters': {'ai_user': [ - 'Once a month', 'Less than once a month', 'Once a week' - ]} - }) # Investable Assets - one at a time - for investable_assets in survey.options_investable_assets: - combinations.append({ - 'name': f'Assets-{investable_assets}', - 'filters': {'investable_assets': [investable_assets]} - }) + if not category or category in ['all', 'investable_assets']: + for investable_assets in survey.options_investable_assets: + combinations.append({ + 'name': f'Assets-{investable_assets}', + 'filters': {'investable_assets': [investable_assets]} + }) # Industry - one at a time - for industry in survey.options_industry: - combinations.append({ - 'name': f'Industry-{industry}', - 'filters': {'industry': [industry]} - }) + if not category or category in ['all', 'industry']: + for industry in survey.options_industry: + combinations.append({ + 'name': f'Industry-{industry}', + 'filters': {'industry': [industry]} + }) return combinations @@ -207,6 +228,13 @@ def main(): import argparse parser = argparse.ArgumentParser(description='Run quant report for all filter combinations') parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running') + parser.add_argument( + '--category', + choices=['all', 'age', 'gender', 'ethnicity', 'income', 'consumer', + 'business_owner', 'ai_user', 'investable_assets', 'industry'], + default='all', + help='Filter category to run combinations for (default: all)' + ) args = parser.parse_args() # Load survey to get available filter options @@ -214,9 +242,10 @@ def main(): survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE) survey.load_data() # Populates options_* attributes - # Generate all combinations - combinations = get_filter_combinations(survey) - print(f"Generated {len(combinations)} filter combinations") + # Generate combinations for specified category + combinations = get_filter_combinations(survey, category=args.category) + category_desc = f" for category '{args.category}'" if args.category != 'all' else '' + print(f"Generated {len(combinations)} filter combinations{category_desc}") if args.dry_run: print("\nDRY RUN - Commands that would be executed:")