fix sample size
This commit is contained in:
@@ -26,9 +26,9 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2_3-2-18-15.pptx')
|
TAG_SOURCE = Path('data/reports/Perception-Research-Report_3-2-26_20-00.pptx')
|
||||||
# TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
# TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
||||||
TAG_IMAGE_DIR = Path('figures/2-3-26_Copy-2-2-26')
|
TAG_IMAGE_DIR = Path('figures/debug')
|
||||||
return TAG_IMAGE_DIR, TAG_SOURCE
|
return TAG_IMAGE_DIR, TAG_SOURCE
|
||||||
|
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2_3-2-18-15.pptx')
|
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_3-2-26_20-00.pptx')
|
||||||
# REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx')
|
# REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx')
|
||||||
|
|
||||||
NEW_IMAGES_DIR = Path('figures/debug')
|
NEW_IMAGES_DIR = Path('figures/debug')
|
||||||
|
|||||||
61
plots.py
61
plots.py
@@ -178,8 +178,8 @@ class QualtricsPlotsMixin:
|
|||||||
# Use UPPERCASE for category name to distinguish from values
|
# Use UPPERCASE for category name to distinguish from values
|
||||||
parts.append(f"{display_name.upper()}: {val_str}")
|
parts.append(f"{display_name.upper()}: {val_str}")
|
||||||
|
|
||||||
# Get sample size (stored by _ensure_dataframe)
|
# Get sample size from the filtered dataset (not from transformed plot data)
|
||||||
sample_size = getattr(self, '_last_sample_size', None)
|
sample_size = self._get_filtered_sample_size()
|
||||||
sample_prefix = f"Sample size: {sample_size}" if sample_size is not None else ""
|
sample_prefix = f"Sample size: {sample_size}" if sample_size is not None else ""
|
||||||
|
|
||||||
if not parts:
|
if not parts:
|
||||||
@@ -297,10 +297,7 @@ class QualtricsPlotsMixin:
|
|||||||
return chart
|
return chart
|
||||||
|
|
||||||
def _ensure_dataframe(self, data: pl.LazyFrame | pl.DataFrame | None) -> pl.DataFrame:
|
def _ensure_dataframe(self, data: pl.LazyFrame | pl.DataFrame | None) -> pl.DataFrame:
|
||||||
"""Ensure data is an eager DataFrame, collecting if necessary.
|
"""Ensure data is an eager DataFrame, collecting if necessary."""
|
||||||
|
|
||||||
Also stores the sample size on self._last_sample_size for use in filter descriptions.
|
|
||||||
"""
|
|
||||||
df = data if data is not None else getattr(self, 'data_filtered', None)
|
df = data if data is not None else getattr(self, 'data_filtered', None)
|
||||||
if df is None:
|
if df is None:
|
||||||
raise ValueError("No data provided and self.data_filtered is None.")
|
raise ValueError("No data provided and self.data_filtered is None.")
|
||||||
@@ -308,10 +305,22 @@ class QualtricsPlotsMixin:
|
|||||||
if isinstance(df, pl.LazyFrame):
|
if isinstance(df, pl.LazyFrame):
|
||||||
df = df.collect()
|
df = df.collect()
|
||||||
|
|
||||||
# Store sample size for filter description
|
|
||||||
self._last_sample_size = df.height
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
def _get_filtered_sample_size(self) -> int | None:
|
||||||
|
"""Get the sample size from the filtered dataset (self.data_filtered).
|
||||||
|
|
||||||
|
This returns the number of respondents in the filtered dataset,
|
||||||
|
not the size of any transformed/aggregated data passed to plot functions.
|
||||||
|
"""
|
||||||
|
data_filtered = getattr(self, 'data_filtered', None)
|
||||||
|
if data_filtered is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if isinstance(data_filtered, pl.LazyFrame):
|
||||||
|
return data_filtered.select(pl.len()).collect().item()
|
||||||
|
return data_filtered.height
|
||||||
|
|
||||||
def _clean_voice_label(self, col_name: str) -> str:
|
def _clean_voice_label(self, col_name: str) -> str:
|
||||||
"""Extract and clean voice name from column name for display.
|
"""Extract and clean voice name from column name for display.
|
||||||
|
|
||||||
@@ -681,7 +690,7 @@ class QualtricsPlotsMixin:
|
|||||||
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
|
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
|
||||||
]
|
]
|
||||||
|
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
bars = alt.Chart(stats_df).mark_bar().encode(
|
||||||
x=alt.X('item:N', title=x_label, sort='-y'),
|
x=alt.X('item:N', title=x_label, sort='-y'),
|
||||||
y=alt.Y('count:Q', title=y_label),
|
y=alt.Y('count:Q', title=y_label),
|
||||||
color=alt.Color('gender_category:N',
|
color=alt.Color('gender_category:N',
|
||||||
@@ -692,14 +701,27 @@ class QualtricsPlotsMixin:
|
|||||||
alt.Tooltip('count:Q', title='1st Place Votes'),
|
alt.Tooltip('count:Q', title='1st Place Votes'),
|
||||||
alt.Tooltip('gender:N', title='Gender')
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
]
|
]
|
||||||
).properties(
|
)
|
||||||
|
|
||||||
|
# Text overlay for counts
|
||||||
|
text = alt.Chart(stats_df).mark_text(
|
||||||
|
dy=-5,
|
||||||
|
color='black',
|
||||||
|
fontSize=10
|
||||||
|
).encode(
|
||||||
|
x=alt.X('item:N', sort='-y'),
|
||||||
|
y=alt.Y('count:Q'),
|
||||||
|
text=alt.Text('count:Q')
|
||||||
|
)
|
||||||
|
|
||||||
|
chart = (bars + text).properties(
|
||||||
title=self._process_title(title),
|
title=self._process_title(title),
|
||||||
width=width or 800,
|
width=width or 800,
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Bar chart with conditional color
|
# Bar chart with conditional color
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
bars = alt.Chart(stats_df).mark_bar().encode(
|
||||||
x=alt.X('item:N', title=x_label, sort='-y'),
|
x=alt.X('item:N', title=x_label, sort='-y'),
|
||||||
y=alt.Y('count:Q', title=y_label),
|
y=alt.Y('count:Q', title=y_label),
|
||||||
color=alt.Color('category:N',
|
color=alt.Color('category:N',
|
||||||
@@ -710,7 +732,20 @@ class QualtricsPlotsMixin:
|
|||||||
alt.Tooltip('item:N', title='Item'),
|
alt.Tooltip('item:N', title='Item'),
|
||||||
alt.Tooltip('count:Q', title='1st Place Votes')
|
alt.Tooltip('count:Q', title='1st Place Votes')
|
||||||
]
|
]
|
||||||
).properties(
|
)
|
||||||
|
|
||||||
|
# Text overlay for counts
|
||||||
|
text = alt.Chart(stats_df).mark_text(
|
||||||
|
dy=-5,
|
||||||
|
color='black',
|
||||||
|
fontSize=10
|
||||||
|
).encode(
|
||||||
|
x=alt.X('item:N', sort='-y'),
|
||||||
|
y=alt.Y('count:Q'),
|
||||||
|
text=alt.Text('count:Q')
|
||||||
|
)
|
||||||
|
|
||||||
|
chart = (bars + text).properties(
|
||||||
title=self._process_title(title),
|
title=self._process_title(title),
|
||||||
width=width or 800,
|
width=width or 800,
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
@@ -769,7 +804,7 @@ class QualtricsPlotsMixin:
|
|||||||
# Text overlay
|
# Text overlay
|
||||||
text = bars.mark_text(
|
text = bars.mark_text(
|
||||||
dy=-5,
|
dy=-5,
|
||||||
color='white',
|
color='black',
|
||||||
fontSize=11
|
fontSize=11
|
||||||
).encode(
|
).encode(
|
||||||
text='Weighted Score:Q'
|
text='Weighted Score:Q'
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ Runs 03_quant_report.script.py for each single-filter combination:
|
|||||||
Usage:
|
Usage:
|
||||||
uv run python run_filter_combinations.py
|
uv run python run_filter_combinations.py
|
||||||
uv run python run_filter_combinations.py --dry-run # Preview combinations without running
|
uv run python run_filter_combinations.py --dry-run # Preview combinations without running
|
||||||
|
uv run python run_filter_combinations.py --category age # Only run age combinations
|
||||||
|
uv run python run_filter_combinations.py --category consumer # Only run consumer segment combinations
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -31,132 +33,151 @@ QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_P
|
|||||||
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
|
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
|
||||||
|
|
||||||
|
|
||||||
def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]:
|
def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Generate all single-filter combinations.
|
Generate all single-filter combinations.
|
||||||
|
|
||||||
Each combination isolates ONE filter value while keeping all others at "all selected".
|
Each combination isolates ONE filter value while keeping all others at "all selected".
|
||||||
Returns list of dicts with filter kwargs for each run.
|
|
||||||
|
Args:
|
||||||
|
survey: QualtricsSurvey instance with loaded data
|
||||||
|
category: Optional filter category to limit combinations to.
|
||||||
|
Valid values: 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer',
|
||||||
|
'business_owner', 'ai_user', 'investable_assets', 'industry'
|
||||||
|
If None or 'all', generates all combinations.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with filter kwargs for each run.
|
||||||
"""
|
"""
|
||||||
combinations = []
|
combinations = []
|
||||||
|
|
||||||
# Add "All Respondents" run (no filters = all options selected)
|
# Add "All Respondents" run (no filters = all options selected)
|
||||||
combinations.append({
|
if not category or category == 'all':
|
||||||
'name': 'All_Respondents',
|
combinations.append({
|
||||||
'filters': {} # Empty = use defaults (all selected)
|
'name': 'All_Respondents',
|
||||||
})
|
'filters': {} # Empty = use defaults (all selected)
|
||||||
|
})
|
||||||
|
|
||||||
# Age groups - one at a time
|
# Age groups - one at a time
|
||||||
for age in survey.options_age:
|
if not category or category in ['all', 'age']:
|
||||||
combinations.append({
|
for age in survey.options_age:
|
||||||
'name': f'Age-{age}',
|
|
||||||
'filters': {'age': [age]}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Gender - one at a time
|
|
||||||
for gender in survey.options_gender:
|
|
||||||
combinations.append({
|
|
||||||
'name': f'Gender-{gender}',
|
|
||||||
'filters': {'gender': [gender]}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Ethnicity - grouped by individual values
|
|
||||||
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
|
|
||||||
# Create filters that include ALL options containing each individual ethnicity value
|
|
||||||
ethnicity_values = set()
|
|
||||||
for ethnicity_option in survey.options_ethnicity:
|
|
||||||
# Split by comma and strip whitespace
|
|
||||||
values = [v.strip() for v in ethnicity_option.split(',')]
|
|
||||||
ethnicity_values.update(values)
|
|
||||||
|
|
||||||
for ethnicity_value in sorted(ethnicity_values):
|
|
||||||
# Find all options that contain this value
|
|
||||||
matching_options = [
|
|
||||||
opt for opt in survey.options_ethnicity
|
|
||||||
if ethnicity_value in [v.strip() for v in opt.split(',')]
|
|
||||||
]
|
|
||||||
combinations.append({
|
|
||||||
'name': f'Ethnicity-{ethnicity_value}',
|
|
||||||
'filters': {'ethnicity': matching_options}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Income - one at a time
|
|
||||||
for income in survey.options_income:
|
|
||||||
combinations.append({
|
|
||||||
'name': f'Income-{income}',
|
|
||||||
'filters': {'income': [income]}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Consumer segments - combine _A and _B options, and also include standalone
|
|
||||||
# Group options by base name (removing _A/_B suffix)
|
|
||||||
consumer_groups = {}
|
|
||||||
for consumer in survey.options_consumer:
|
|
||||||
# Check if ends with _A or _B
|
|
||||||
if consumer.endswith('_A') or consumer.endswith('_B'):
|
|
||||||
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
|
|
||||||
if base_name not in consumer_groups:
|
|
||||||
consumer_groups[base_name] = []
|
|
||||||
consumer_groups[base_name].append(consumer)
|
|
||||||
else:
|
|
||||||
# Not an _A/_B option, keep as-is
|
|
||||||
consumer_groups[consumer] = [consumer]
|
|
||||||
|
|
||||||
# Add combined _A+_B options
|
|
||||||
for base_name, options in consumer_groups.items():
|
|
||||||
if len(options) > 1: # Only combine if there are multiple (_A and _B)
|
|
||||||
combinations.append({
|
combinations.append({
|
||||||
'name': f'Consumer-{base_name}',
|
'name': f'Age-{age}',
|
||||||
'filters': {'consumer': options}
|
'filters': {'age': [age]}
|
||||||
})
|
})
|
||||||
|
|
||||||
# Add standalone options (including individual _A and _B)
|
# Gender - one at a time
|
||||||
for consumer in survey.options_consumer:
|
if not category or category in ['all', 'gender']:
|
||||||
combinations.append({
|
for gender in survey.options_gender:
|
||||||
'name': f'Consumer-{consumer}',
|
combinations.append({
|
||||||
'filters': {'consumer': [consumer]}
|
'name': f'Gender-{gender}',
|
||||||
})
|
'filters': {'gender': [gender]}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Ethnicity - grouped by individual values
|
||||||
|
if not category or category in ['all', 'ethnicity']:
|
||||||
|
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
|
||||||
|
# Create filters that include ALL options containing each individual ethnicity value
|
||||||
|
ethnicity_values = set()
|
||||||
|
for ethnicity_option in survey.options_ethnicity:
|
||||||
|
# Split by comma and strip whitespace
|
||||||
|
values = [v.strip() for v in ethnicity_option.split(',')]
|
||||||
|
ethnicity_values.update(values)
|
||||||
|
|
||||||
|
for ethnicity_value in sorted(ethnicity_values):
|
||||||
|
# Find all options that contain this value
|
||||||
|
matching_options = [
|
||||||
|
opt for opt in survey.options_ethnicity
|
||||||
|
if ethnicity_value in [v.strip() for v in opt.split(',')]
|
||||||
|
]
|
||||||
|
combinations.append({
|
||||||
|
'name': f'Ethnicity-{ethnicity_value}',
|
||||||
|
'filters': {'ethnicity': matching_options}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Income - one at a time
|
||||||
|
if not category or category in ['all', 'income']:
|
||||||
|
for income in survey.options_income:
|
||||||
|
combinations.append({
|
||||||
|
'name': f'Income-{income}',
|
||||||
|
'filters': {'income': [income]}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Consumer segments - combine _A and _B options, and also include standalone
|
||||||
|
if not category or category in ['all', 'consumer']:
|
||||||
|
# Group options by base name (removing _A/_B suffix)
|
||||||
|
consumer_groups = {}
|
||||||
|
for consumer in survey.options_consumer:
|
||||||
|
# Check if ends with _A or _B
|
||||||
|
if consumer.endswith('_A') or consumer.endswith('_B'):
|
||||||
|
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
|
||||||
|
if base_name not in consumer_groups:
|
||||||
|
consumer_groups[base_name] = []
|
||||||
|
consumer_groups[base_name].append(consumer)
|
||||||
|
else:
|
||||||
|
# Not an _A/_B option, keep as-is
|
||||||
|
consumer_groups[consumer] = [consumer]
|
||||||
|
|
||||||
|
# Add combined _A+_B options
|
||||||
|
for base_name, options in consumer_groups.items():
|
||||||
|
if len(options) > 1: # Only combine if there are multiple (_A and _B)
|
||||||
|
combinations.append({
|
||||||
|
'name': f'Consumer-{base_name}',
|
||||||
|
'filters': {'consumer': options}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add standalone options (including individual _A and _B)
|
||||||
|
for consumer in survey.options_consumer:
|
||||||
|
combinations.append({
|
||||||
|
'name': f'Consumer-{consumer}',
|
||||||
|
'filters': {'consumer': [consumer]}
|
||||||
|
})
|
||||||
|
|
||||||
# Business Owner - one at a time
|
# Business Owner - one at a time
|
||||||
for business_owner in survey.options_business_owner:
|
if not category or category in ['all', 'business_owner']:
|
||||||
combinations.append({
|
for business_owner in survey.options_business_owner:
|
||||||
'name': f'BusinessOwner-{business_owner}',
|
combinations.append({
|
||||||
'filters': {'business_owner': [business_owner]}
|
'name': f'BusinessOwner-{business_owner}',
|
||||||
})
|
'filters': {'business_owner': [business_owner]}
|
||||||
|
})
|
||||||
|
|
||||||
# AI User - one at a time
|
# AI User - one at a time
|
||||||
for ai_user in survey.options_ai_user:
|
if not category or category in ['all', 'ai_user']:
|
||||||
combinations.append({
|
for ai_user in survey.options_ai_user:
|
||||||
'name': f'AIUser-{ai_user}',
|
combinations.append({
|
||||||
'filters': {'ai_user': [ai_user]}
|
'name': f'AIUser-{ai_user}',
|
||||||
})
|
'filters': {'ai_user': [ai_user]}
|
||||||
|
})
|
||||||
|
|
||||||
# AI user daily, more than once daily, en multiple times a week = frequent
|
# AI user daily, more than once daily, en multiple times a week = frequent
|
||||||
combinations.append({
|
combinations.append({
|
||||||
'name': 'AIUser-Frequent',
|
'name': 'AIUser-Frequent',
|
||||||
'filters': {'ai_user': [
|
'filters': {'ai_user': [
|
||||||
'Daily', 'More than once daily', 'Multiple times per week'
|
'Daily', 'More than once daily', 'Multiple times per week'
|
||||||
]}
|
]}
|
||||||
})
|
})
|
||||||
combinations.append({
|
combinations.append({
|
||||||
'name': 'AIUser-Infrequent',
|
'name': 'AIUser-Infrequent',
|
||||||
'filters': {'ai_user': [
|
'filters': {'ai_user': [
|
||||||
'Once a month', 'Less than once a month', 'Once a week'
|
'Once a month', 'Less than once a month', 'Once a week'
|
||||||
]}
|
]}
|
||||||
})
|
})
|
||||||
|
|
||||||
# Investable Assets - one at a time
|
# Investable Assets - one at a time
|
||||||
for investable_assets in survey.options_investable_assets:
|
if not category or category in ['all', 'investable_assets']:
|
||||||
combinations.append({
|
for investable_assets in survey.options_investable_assets:
|
||||||
'name': f'Assets-{investable_assets}',
|
combinations.append({
|
||||||
'filters': {'investable_assets': [investable_assets]}
|
'name': f'Assets-{investable_assets}',
|
||||||
})
|
'filters': {'investable_assets': [investable_assets]}
|
||||||
|
})
|
||||||
|
|
||||||
# Industry - one at a time
|
# Industry - one at a time
|
||||||
for industry in survey.options_industry:
|
if not category or category in ['all', 'industry']:
|
||||||
combinations.append({
|
for industry in survey.options_industry:
|
||||||
'name': f'Industry-{industry}',
|
combinations.append({
|
||||||
'filters': {'industry': [industry]}
|
'name': f'Industry-{industry}',
|
||||||
})
|
'filters': {'industry': [industry]}
|
||||||
|
})
|
||||||
|
|
||||||
return combinations
|
return combinations
|
||||||
|
|
||||||
@@ -207,6 +228,13 @@ def main():
|
|||||||
import argparse
|
import argparse
|
||||||
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
|
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
|
||||||
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
|
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
|
||||||
|
parser.add_argument(
|
||||||
|
'--category',
|
||||||
|
choices=['all', 'age', 'gender', 'ethnicity', 'income', 'consumer',
|
||||||
|
'business_owner', 'ai_user', 'investable_assets', 'industry'],
|
||||||
|
default='all',
|
||||||
|
help='Filter category to run combinations for (default: all)'
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Load survey to get available filter options
|
# Load survey to get available filter options
|
||||||
@@ -214,9 +242,10 @@ def main():
|
|||||||
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
||||||
survey.load_data() # Populates options_* attributes
|
survey.load_data() # Populates options_* attributes
|
||||||
|
|
||||||
# Generate all combinations
|
# Generate combinations for specified category
|
||||||
combinations = get_filter_combinations(survey)
|
combinations = get_filter_combinations(survey, category=args.category)
|
||||||
print(f"Generated {len(combinations)} filter combinations")
|
category_desc = f" for category '{args.category}'" if args.category != 'all' else ''
|
||||||
|
print(f"Generated {len(combinations)} filter combinations{category_desc}")
|
||||||
|
|
||||||
if args.dry_run:
|
if args.dry_run:
|
||||||
print("\nDRY RUN - Commands that would be executed:")
|
print("\nDRY RUN - Commands that would be executed:")
|
||||||
|
|||||||
Reference in New Issue
Block a user