Compare commits

..

9 Commits

Author SHA1 Message Date
ad1d8c6e58 all plots offline update 2026-02-03 22:38:15 +01:00
f5b4c247b8 tidy plots 2026-02-03 22:12:17 +01:00
a35670aa72 fixed missing ai_user category 2026-02-03 21:13:29 +01:00
36280a6ff8 fix sample size 2026-02-03 20:48:34 +01:00
9a587dcc4c add ai-user filter combinations 2026-02-03 19:46:07 +01:00
9a49d1c690 added sample size to filter text 2026-02-03 19:16:39 +01:00
8f505da550 offline update 18-30 2026-02-03 18:43:20 +01:00
495b56307c fixed filter to none 2026-02-03 18:19:06 +01:00
1e76a82f24 fix wordcloud filter values 2026-02-03 17:41:12 +01:00
6 changed files with 484 additions and 224 deletions

5
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"chat.tools.terminal.autoApprove": {
"/home/luigi/Documents/VoiceBranding/JPMC/Phase-3/.venv/bin/python": true
}
}

View File

@@ -7,7 +7,7 @@ import polars as pl
from pathlib import Path from pathlib import Path
import argparse import argparse
import json import json
import re
from validation import check_progress, duration_validation, check_straight_liners from validation import check_progress, duration_validation, check_straight_liners
from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
import utils import utils
@@ -69,7 +69,7 @@ cli_args = parse_cli_args()
# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**")) # mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
# RESULTS_FILE = Path(file_browser.path(index=0)) # RESULTS_FILE = Path(file_browser.path(index=0))
RESULTS_FILE = 'data/exports/2-3-26_Copy-2-2-26/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv' RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
# %% # %%
@@ -114,14 +114,11 @@ BEST_CHOSEN_CHARACTER = "the_coach"
# %% # %%
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
# CLI args: None means "all options selected" (use S.options_* defaults) # CLI args: None means "no filter applied" - filter_data() will skip None filters
# Build filter values dict dynamically from FILTER_CONFIG # Build filter values dict dynamically from FILTER_CONFIG
_active_filters = {} _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
for filter_name, options_attr in FILTER_CONFIG.items():
cli_value = getattr(cli_args, filter_name)
all_options = getattr(S, options_attr)
_active_filters[filter_name] = cli_value if cli_value is not None else all_options
# %%
_d = S.filter_data(data_all, **_active_filters) _d = S.filter_data(data_all, **_active_filters)
# Write filter description file if filter-name is provided # Write filter description file if filter-name is provided
@@ -142,14 +139,17 @@ if cli_args.filter_name and S.fig_save_dir:
all_options = getattr(S, options_attr) all_options = getattr(S, options_attr)
values = _active_filters[filter_name] values = _active_filters[filter_name]
display_name = filter_name.replace('_', ' ').title() display_name = filter_name.replace('_', ' ').title()
if values != all_options: # None means no filter applied (same as "All")
if values is not None and values != all_options:
_short_desc_parts.append(f"{display_name}: {', '.join(values)}") _short_desc_parts.append(f"{display_name}: {', '.join(values)}")
_filter_desc_lines.append(f" {display_name}: {', '.join(values)}") _filter_desc_lines.append(f" {display_name}: {', '.join(values)}")
else: else:
_filter_desc_lines.append(f" {display_name}: All") _filter_desc_lines.append(f" {display_name}: All")
# Write detailed description INSIDE the filter-slug directory # Write detailed description INSIDE the filter-slug directory
_filter_file = _filter_slug_dir / f"{cli_args.filter_name}.txt" # Sanitize filter name for filename usage (replace / and other chars)
_safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
_filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
_filter_file.write_text('\n'.join(_filter_desc_lines)) _filter_file.write_text('\n'.join(_filter_desc_lines))
# Append to summary index file at figures/<export_date>/filter_index.txt # Append to summary index file at figures/<export_date>/filter_index.txt

View File

@@ -21,9 +21,14 @@ def _():
@app.cell @app.cell
def _(): def _():
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx') return
@app.cell
def _():
TAG_SOURCE = Path('/home/luigi/Documents/VoiceBranding/JPMC/Phase-3/data/reports/VOICE_Perception-Research-Report_3-2-26.pptx')
# TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx') # TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
TAG_IMAGE_DIR = Path('figures/2-2-26') TAG_IMAGE_DIR = Path('figures/debug')
return TAG_IMAGE_DIR, TAG_SOURCE return TAG_IMAGE_DIR, TAG_SOURCE
@@ -47,10 +52,10 @@ def _():
@app.cell @app.cell
def _(): def _():
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx') REPLACE_SOURCE = Path('/home/luigi/Documents/VoiceBranding/JPMC/Phase-3/data/reports/VOICE_Perception-Research-Report_3-2-26.pptx')
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx') # REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx')
NEW_IMAGES_DIR = Path('figures/2-2-26') NEW_IMAGES_DIR = Path('figures/debug')
return NEW_IMAGES_DIR, REPLACE_SOURCE return NEW_IMAGES_DIR, REPLACE_SOURCE

422
plots.py
View File

@@ -2,6 +2,7 @@
import re import re
import math import math
import textwrap
from pathlib import Path from pathlib import Path
import altair as alt import altair as alt
@@ -97,7 +98,11 @@ class QualtricsPlotsMixin:
return "_".join(parts) return "_".join(parts)
def _get_filter_description(self) -> str: def _get_filter_description(self) -> str:
"""Generate a human-readable description of active filters.""" """Generate a human-readable description of active filters.
Includes sample size (from _last_sample_size) prepended to the filter text.
Format: "Sample size: <n> | Filters: ..." or "Sample size: <n>" if no filters.
"""
parts = [] parts = []
# Mapping of attribute name to (display_name, value, options_attr) # Mapping of attribute name to (display_name, value, options_attr)
@@ -131,17 +136,62 @@ class QualtricsPlotsMixin:
if master_list and set(value) == set(master_list): if master_list and set(value) == set(master_list):
continue continue
# Use original values for display (full list) # Special handling for Ethnicity: detect single-value ethnicity filters
clean_values = [str(v) for v in value] # When filtering by one ethnicity (e.g., "White or Caucasian"), multiple options
val_str = ", ".join(clean_values) # may be selected (all options containing that value). Display just the common value
# only if ALL options containing that value are selected.
if display_name.lower() == 'ethnicity' and len(value) > 1 and master_list:
# Find common individual ethnicity values across all selected options
# Each option may be comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
value_sets = [
set(v.strip() for v in opt.split(','))
for opt in value
]
# Intersect all sets to find common values
common_values = value_sets[0]
for vs in value_sets[1:]:
common_values = common_values.intersection(vs)
# If exactly one common value, check if ALL options containing it are selected
if len(common_values) == 1:
common_val = common_values.pop()
# Find all options in master list that contain this common value
all_options_with_value = [
opt for opt in master_list
if common_val in [v.strip() for v in opt.split(',')]
]
# Only simplify if we selected ALL options containing this value
if set(value) == set(all_options_with_value):
val_str = common_val
else:
clean_values = [str(v) for v in value]
val_str = ", ".join(clean_values)
else:
# No single common value - fall back to full list
clean_values = [str(v) for v in value]
val_str = ", ".join(clean_values)
else:
# Use original values for display (full list)
clean_values = [str(v) for v in value]
val_str = ", ".join(clean_values)
# Use UPPERCASE for category name to distinguish from values # Use UPPERCASE for category name to distinguish from values
parts.append(f"{display_name.upper()}: {val_str}") parts.append(f"{display_name.upper()}: {val_str}")
# Get sample size from the filtered dataset (not from transformed plot data)
sample_size = self._get_filtered_sample_size()
sample_prefix = f"Sample size: {sample_size}" if sample_size is not None else ""
if not parts: if not parts:
return "" # No filters active - return just sample size (or empty string if no sample size)
return sample_prefix
# Join with clear separator - double space for visual break # Join with clear separator - double space for visual break
return "Filters: " + "".join(parts) filter_text = "Filters: " + "".join(parts)
if sample_prefix:
return f"{sample_prefix} | {filter_text}"
return filter_text
def _add_filter_footnote(self, chart: alt.Chart) -> alt.Chart: def _add_filter_footnote(self, chart: alt.Chart) -> alt.Chart:
"""Add a footnote with active filters to the chart. """Add a footnote with active filters to the chart.
@@ -253,8 +303,23 @@ class QualtricsPlotsMixin:
raise ValueError("No data provided and self.data_filtered is None.") raise ValueError("No data provided and self.data_filtered is None.")
if isinstance(df, pl.LazyFrame): if isinstance(df, pl.LazyFrame):
return df.collect() df = df.collect()
return df return df
def _get_filtered_sample_size(self) -> int | None:
"""Get the sample size from the filtered dataset (self.data_filtered).
This returns the number of respondents in the filtered dataset,
not the size of any transformed/aggregated data passed to plot functions.
"""
data_filtered = getattr(self, 'data_filtered', None)
if data_filtered is None:
return None
if isinstance(data_filtered, pl.LazyFrame):
return data_filtered.select(pl.len()).collect().item()
return data_filtered.height
def _clean_voice_label(self, col_name: str) -> str: def _clean_voice_label(self, col_name: str) -> str:
"""Extract and clean voice name from column name for display. """Extract and clean voice name from column name for display.
@@ -370,8 +435,8 @@ class QualtricsPlotsMixin:
# Base bar chart - use y2 to explicitly start bars at domain minimum # Base bar chart - use y2 to explicitly start bars at domain minimum
if color_gender: if color_gender:
bars = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('voice:N', title=x_label, sort='-y'), x=alt.X('voice:N', title=x_label, sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain), axis=alt.Axis(grid=True)),
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
color=alt.Color('gender:N', color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'], scale=alt.Scale(domain=['Male', 'Female'],
@@ -384,10 +449,15 @@ class QualtricsPlotsMixin:
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
) )
# Text overlay - inherit color from bars via mark_text
text = bars.mark_text(dy=-5, fontSize=10).encode(
text=alt.Text('count:Q')
)
else: else:
bars = alt.Chart(stats_df).mark_bar(color=color).encode( bars = alt.Chart(stats_df).mark_bar(color=color).encode(
x=alt.X('voice:N', title=x_label, sort='-y'), x=alt.X('voice:N', title=x_label, sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain), axis=alt.Axis(grid=True)),
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
tooltip=[ tooltip=[
alt.Tooltip('voice:N', title='Voice'), alt.Tooltip('voice:N', title='Voice'),
@@ -395,17 +465,17 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
) )
# Text overlay for counts # Text overlay for counts
text = alt.Chart(stats_df).mark_text( text = alt.Chart(stats_df).mark_text(
dy=-5, dy=-5,
color='black', color='black',
fontSize=10 fontSize=10
).encode( ).encode(
x=alt.X('voice:N', sort='-y'), x=alt.X('voice:N', sort='-y'),
y=alt.Y('average:Q'), y=alt.Y('average:Q'),
text=alt.Text('count:Q') text=alt.Text('count:Q')
) )
# Combine layers # Combine layers
chart = (bars + text).properties( chart = (bars + text).properties(
@@ -447,13 +517,16 @@ class QualtricsPlotsMixin:
# Convert to long format, sort by total # Convert to long format, sort by total
stats_df = pl.DataFrame(stats).to_pandas() stats_df = pl.DataFrame(stats).to_pandas()
# Compute explicit sort order by total (descending)
sort_order = stats_df.drop_duplicates('voice').sort_values('total', ascending=False)['voice'].tolist()
# Interactive legend selection - click to filter # Interactive legend selection - click to filter
selection = alt.selection_point(fields=['rank'], bind='legend') selection = alt.selection_point(fields=['rank'], bind='legend')
# Create stacked bar chart with interactive legend # Create stacked bar chart with interactive legend
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('voice:N', title=x_label, sort=alt.EncodingSortField(field='total', op='sum', order='descending')), x=alt.X('voice:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('rank:N', color=alt.Color('rank:N',
scale=alt.Scale(domain=['Rank 1 (1st Choice)', 'Rank 2 (2nd Choice)', 'Rank 3 (3rd Choice)'], scale=alt.Scale(domain=['Rank 1 (1st Choice)', 'Rank 2 (2nd Choice)', 'Rank 3 (3rd Choice)'],
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
@@ -465,7 +538,18 @@ class QualtricsPlotsMixin:
alt.Tooltip('rank:N', title='Rank'), alt.Tooltip('rank:N', title='Rank'),
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
).add_params(selection).properties( )
# Text layer showing totals on top of bars
text = alt.Chart(stats_df).transform_filter(
alt.datum.rank == 'Rank 1 (1st Choice)'
).mark_text(dy=-10, color='black').encode(
x=alt.X('voice:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q')
)
chart = alt.layer(bars, text).add_params(selection).properties(
title=self._process_title(title), title=self._process_title(title),
width=width or 800, width=width or 800,
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
@@ -518,6 +602,9 @@ class QualtricsPlotsMixin:
# Interactive legend selection - click to filter # Interactive legend selection - click to filter
selection = alt.selection_point(fields=['rank'], bind='legend') selection = alt.selection_point(fields=['rank'], bind='legend')
# Compute explicit sort order by total (descending)
sort_order = stats_df.drop_duplicates('item').sort_values('total', ascending=False)['item'].tolist()
if color_gender: if color_gender:
# Add gender_rank column for combined color encoding # Add gender_rank column for combined color encoding
stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank'] stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank']
@@ -532,9 +619,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3 ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('gender_rank:N', color=alt.Color('gender_rank:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)), legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)),
@@ -546,15 +633,11 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Count'), alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('rank:N', color=alt.Color('rank:N',
scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'],
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
@@ -566,12 +649,37 @@ class QualtricsPlotsMixin:
alt.Tooltip('rank:N', title='Rank'), alt.Tooltip('rank:N', title='Rank'),
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer showing totals on top of bars
if color_gender:
# Create a separate chart for totals with gender coloring
text_df = stats_df.drop_duplicates('item')[['item', 'total', 'gender']]
text = alt.Chart(text_df).mark_text(dy=-10).encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
)
else:
text = alt.Chart(stats_df).transform_filter(
alt.datum.rank_order == 1
).mark_text(dy=-10, color='black').encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q')
)
chart = alt.layer(bars, text).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -604,6 +712,7 @@ class QualtricsPlotsMixin:
# Convert and sort # Convert and sort
stats_df = pl.DataFrame(stats).sort('count', descending=True) stats_df = pl.DataFrame(stats).sort('count', descending=True)
sort_order = stats_df['item'].to_list()
# Add rank column for coloring (1-3 vs 4+) # Add rank column for coloring (1-3 vs 4+)
stats_df = stats_df.with_row_index('rank_index') stats_df = stats_df.with_row_index('rank_index')
@@ -625,9 +734,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -636,16 +745,30 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='1st Place Votes'), alt.Tooltip('count:Q', title='1st Place Votes'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).properties( )
# Create text layer with gender coloring using conditional
text = alt.Chart(stats_df).mark_text(dy=-5, fontSize=10).encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
)
chart = (bars + text).properties(
title=self._process_title(title), title=self._process_title(title),
width=width or 800, width=width or 800,
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
) )
else: else:
# Bar chart with conditional color # Bar chart with conditional color
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 3', 'Other'], scale=alt.Scale(domain=['Top 3', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -654,7 +777,20 @@ class QualtricsPlotsMixin:
alt.Tooltip('item:N', title='Item'), alt.Tooltip('item:N', title='Item'),
alt.Tooltip('count:Q', title='1st Place Votes') alt.Tooltip('count:Q', title='1st Place Votes')
] ]
).properties( )
# Text overlay for counts
text = alt.Chart(stats_df).mark_text(
dy=-5,
color='black',
fontSize=10
).encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q')
)
chart = (bars + text).properties(
title=self._process_title(title), title=self._process_title(title),
width=width or 800, width=width or 800,
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
@@ -680,6 +816,8 @@ class QualtricsPlotsMixin:
color_gender: If True, color bars by voice gender (blue=male, pink=female). color_gender: If True, color bars by voice gender (blue=male, pink=female).
""" """
weighted_df = self._ensure_dataframe(data).to_pandas() weighted_df = self._ensure_dataframe(data).to_pandas()
weighted_df.sort_values('Weighted Score', ascending=False, inplace=True)
sort_order = weighted_df['Character'].tolist()
if color_gender: if color_gender:
# Add gender column based on Character name # Add gender column based on Character name
@@ -687,8 +825,8 @@ class QualtricsPlotsMixin:
# Bar chart with gender coloring # Bar chart with gender coloring
bars = alt.Chart(weighted_df).mark_bar().encode( bars = alt.Chart(weighted_df).mark_bar().encode(
x=alt.X('Character:N', title=x_label, sort='-y'), x=alt.X('Character:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('Weighted Score:Q', title=y_label), y=alt.Y('Weighted Score:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender:N', color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'], scale=alt.Scale(domain=['Male', 'Female'],
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]), range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
@@ -702,8 +840,8 @@ class QualtricsPlotsMixin:
else: else:
# Bar chart # Bar chart
bars = alt.Chart(weighted_df).mark_bar(color=color).encode( bars = alt.Chart(weighted_df).mark_bar(color=color).encode(
x=alt.X('Character:N', title=x_label, sort='-y'), x=alt.X('Character:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('Weighted Score:Q', title=y_label), y=alt.Y('Weighted Score:Q', title=y_label, axis=alt.Axis(grid=True)),
tooltip=[ tooltip=[
alt.Tooltip('Character:N'), alt.Tooltip('Character:N'),
alt.Tooltip('Weighted Score:Q', title='Score') alt.Tooltip('Weighted Score:Q', title='Score')
@@ -713,7 +851,7 @@ class QualtricsPlotsMixin:
# Text overlay # Text overlay
text = bars.mark_text( text = bars.mark_text(
dy=-5, dy=-5,
color='white', color='black',
fontSize=11 fontSize=11
).encode( ).encode(
text='Weighted Score:Q' text='Weighted Score:Q'
@@ -771,8 +909,11 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
# Compute explicit sort order by count (descending)
sort_order = stats_df.sort_values('count', ascending=False)[target_column].tolist()
# Add gender column for all cases when color_gender is True (needed for text layer)
if color_gender: if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding # Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
@@ -784,9 +925,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -795,15 +936,23 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Selections'), alt.Tooltip('count:Q', title='Selections'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).properties( )
title=self._process_title(title),
width=width or 800, # Text layer with gender coloring using conditional
height=height or getattr(self, 'plot_height', 400) text = alt.Chart(stats_df).mark_text(dy=-10).encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 8', 'Other'], scale=alt.Scale(domain=['Top 8', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -812,11 +961,20 @@ class QualtricsPlotsMixin:
alt.Tooltip(f'{target_column}:N', title='Voice'), alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='Selections') alt.Tooltip('count:Q', title='Selections')
] ]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer with black color
text = alt.Chart(stats_df).mark_text(dy=-10, color='black').encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q')
)
chart = alt.layer(bars, text).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -863,8 +1021,11 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
# Compute explicit sort order by count (descending)
sort_order = stats_df.sort_values('count', ascending=False)[target_column].tolist()
# Add gender column for all cases when color_gender is True (needed for text layer)
if color_gender: if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding # Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
@@ -876,9 +1037,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -887,15 +1048,23 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='In Top 3'), alt.Tooltip('count:Q', title='In Top 3'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).properties( )
title=self._process_title(title),
width=width or 800, # Text layer with gender coloring using conditional
height=height or getattr(self, 'plot_height', 400) text = alt.Chart(stats_df).mark_text(dy=-10).encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 3', 'Other'], scale=alt.Scale(domain=['Top 3', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -904,11 +1073,20 @@ class QualtricsPlotsMixin:
alt.Tooltip(f'{target_column}:N', title='Voice'), alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='In Top 3') alt.Tooltip('count:Q', title='In Top 3')
] ]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer with black color
text = alt.Chart(stats_df).mark_text(dy=-10, color='black').encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q')
)
chart = alt.layer(bars, text).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -965,9 +1143,9 @@ class QualtricsPlotsMixin:
# Horizontal bar chart - use x2 to explicitly start bars at x=1 # Horizontal bar chart - use x2 to explicitly start bars at x=1
bars = alt.Chart(stats).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(stats).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('mean_score:Q', title='Average Score (1-5)', scale=alt.Scale(domain=[1, 5])), x=alt.X('mean_score:Q', title='Average Score (1-5)', scale=alt.Scale(domain=[1, 5]), axis=alt.Axis(grid=True)),
x2=alt.datum(1), # Bars start at x=1 (left edge of domain) x2=alt.datum(1), # Bars start at x=1 (left edge of domain)
y=alt.Y('Voice:N', title='Voice', sort='-x'), y=alt.Y('Voice:N', title='Voice', sort='-x', axis=alt.Axis(grid=False)),
tooltip=[ tooltip=[
alt.Tooltip('Voice:N'), alt.Tooltip('Voice:N'),
alt.Tooltip('mean_score:Q', title='Average', format='.2f'), alt.Tooltip('mean_score:Q', title='Average', format='.2f'),
@@ -1040,8 +1218,8 @@ class QualtricsPlotsMixin:
# Conditional color based on sign # Conditional color based on sign
chart = alt.Chart(plot_df).mark_bar().encode( chart = alt.Chart(plot_df).mark_bar().encode(
x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0)), x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0, grid=False)),
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1])), y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1089,11 +1267,12 @@ class QualtricsPlotsMixin:
chart = alt.Chart(df.to_pandas()).mark_bar().encode( chart = alt.Chart(df.to_pandas()).mark_bar().encode(
x=alt.X('Color:N', x=alt.X('Color:N',
title=None, title=None,
axis=alt.Axis(labelAngle=0), axis=alt.Axis(labelAngle=0, grid=False),
sort=["Green", "Blue", "Orange", "Red"]), sort=["Green", "Blue", "Orange", "Red"]),
y=alt.Y('correlation:Q', y=alt.Y('correlation:Q',
title='Average Correlation', title='Average Correlation',
scale=alt.Scale(domain=[-1, 1])), scale=alt.Scale(domain=[-1, 1]),
axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1149,10 +1328,23 @@ class QualtricsPlotsMixin:
.with_columns(pl.col(column).fill_null("(No Response)")) .with_columns(pl.col(column).fill_null("(No Response)"))
.group_by(column) .group_by(column)
.agg(pl.len().alias("count")) .agg(pl.len().alias("count"))
.sort("count", descending=True)
.to_pandas() .to_pandas()
) )
# Apply sorting logic
if column == 'Age':
# Custom sort for Age ranges
# Example values: "18 to 21 years", "25 to 34 years", "70 years or more"
# Extract first number to sort by
stats_df['sort_key'] = stats_df[column].apply(
lambda x: int(re.search(r'\d+', str(x)).group()) if re.search(r'\d+', str(x)) else 999
)
# Use EncodingSortField for Age to avoid schema issues with list-based labels
sort_order = alt.EncodingSortField(field="sort_key", order="ascending")
else:
# Default sort by count descending
sort_order = '-x'
if stats_df.empty: if stats_df.empty:
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
@@ -1160,22 +1352,31 @@ class QualtricsPlotsMixin:
total = stats_df['count'].sum() total = stats_df['count'].sum()
stats_df['percentage'] = (stats_df['count'] / total * 100).round(1) stats_df['percentage'] = (stats_df['count'] / total * 100).round(1)
# Clean y-labels by replacing underscores and wrapping long text
import textwrap
stats_df['clean_label'] = stats_df[column].astype(str).str.replace('_', ' ').apply(
lambda x: textwrap.wrap(x, width=25) if isinstance(x, str) else [str(x)]
)
# Calculate max lines for height adjustment
max_lines = stats_df['clean_label'].apply(len).max() if not stats_df.empty else 1
# Generate title if not provided # Generate title if not provided
if title is None: if title is None:
clean_col = column.replace('_', ' ').replace('/', ' / ') clean_col = column.replace('_', ' ').replace('/', ' / ')
title = f"Distribution: {clean_col}" title = f"Distribution: {clean_col}"
# Calculate appropriate height based on number of categories # Calculate appropriate height based on number of categories and wrapping
num_categories = len(stats_df) num_categories = len(stats_df)
bar_height = 18 # pixels per bar bar_height = max(20, max_lines * 15) # pixels per bar, scale with lines
calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding
# Horizontal bar chart - categories on Y axis, counts on X axis # Horizontal bar chart - categories on Y axis, counts on X axis
bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=False)), x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=True)),
y=alt.Y(f'{column}:N', title=None, sort='-x', axis=alt.Axis(labelLimit=150)), y=alt.Y('clean_label:N', title=None, sort=sort_order, axis=alt.Axis(labelLimit=300, grid=False)),
tooltip=[ tooltip=[
alt.Tooltip(f'{column}:N', title=column.replace('_', ' ')), alt.Tooltip('clean_label:N', title=column.replace('_', ' ')),
alt.Tooltip('count:Q', title='Count'), alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('percentage:Q', title='Percentage', format='.1f') alt.Tooltip('percentage:Q', title='Percentage', format='.1f')
] ]
@@ -1191,7 +1392,7 @@ class QualtricsPlotsMixin:
color=ColorPalette.TEXT color=ColorPalette.TEXT
).encode( ).encode(
x='count:Q', x='count:Q',
y=alt.Y(f'{column}:N', sort='-x'), y=alt.Y('clean_label:N', sort=sort_order),
text='count:Q' text='count:Q'
) )
chart = (bars + text) chart = (bars + text)
@@ -1244,8 +1445,8 @@ class QualtricsPlotsMixin:
plot_df = pl.DataFrame(trait_correlations).to_pandas() plot_df = pl.DataFrame(trait_correlations).to_pandas()
chart = alt.Chart(plot_df).mark_bar().encode( chart = alt.Chart(plot_df).mark_bar().encode(
x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0)), x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0, grid=False)),
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1])), y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1349,9 +1550,14 @@ class QualtricsPlotsMixin:
# Add title with filter subtitle (similar to _add_filter_footnote for Altair charts) # Add title with filter subtitle (similar to _add_filter_footnote for Altair charts)
filter_text = self._get_filter_description() filter_text = self._get_filter_description()
if filter_text: if filter_text:
# Title on top, filter subtitle below in light grey # Wrap filter text to prevent excessively long lines
fig.suptitle(title, fontsize=16, y=0.98, color=ColorPalette.TEXT) wrapped_lines = textwrap.wrap(filter_text, width=100)
ax.set_title(filter_text, fontsize=10, pad=10, color='lightgrey', loc='left') wrapped_text = '\n'.join(wrapped_lines)
# Use suptitle for main title (auto-positioned above axes)
fig.suptitle(title, fontsize=16, color=ColorPalette.TEXT, y=1.02)
# Use ax.set_title for filter text (positioned relative to axes, not figure)
ax.set_title(wrapped_text, fontsize=10, color='lightgrey', loc='left', pad=5)
else: else:
ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT) ax.set_title(title, fontsize=16, pad=20, color=ColorPalette.TEXT)
@@ -1420,8 +1626,8 @@ class QualtricsPlotsMixin:
x=alt.X('Trait:N', x=alt.X('Trait:N',
title=x_label, title=x_label,
sort=trait_order, sort=trait_order,
axis=alt.Axis(labelAngle=-45, labelLimit=200)), axis=alt.Axis(labelAngle=-45, labelLimit=200, grid=False)),
y=alt.Y('Count:Q', title=y_label), y=alt.Y('Count:Q', title=y_label, axis=alt.Axis(grid=True)),
xOffset='Character:N', xOffset='Character:N',
color=alt.Color('Character:N', color=alt.Color('Character:N',
scale=alt.Scale(domain=characters, scale=alt.Scale(domain=characters,
@@ -1537,8 +1743,8 @@ class QualtricsPlotsMixin:
y=alt.Y('trait:N', y=alt.Y('trait:N',
title=x_label, title=x_label,
sort=reversed_sort, sort=reversed_sort,
axis=alt.Axis(labelLimit=200)), axis=alt.Axis(labelLimit=200, grid=False)),
x=alt.X('count:Q', title=y_label), x=alt.X('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale( scale=alt.Scale(
domain=['Original Trait', 'Other Trait'], domain=['Original Trait', 'Other Trait'],
@@ -1877,8 +2083,8 @@ class QualtricsPlotsMixin:
tooltip_title = 'Mean Score' if has_means else 'Rank 1 %' if has_ranks else 'Score' tooltip_title = 'Mean Score' if has_means else 'Rank 1 %' if has_ranks else 'Score'
bars = alt.Chart(summary_df).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(summary_df).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('group:N', title='Group', sort='-y'), x=alt.X('group:N', title='Group', sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('sig_count:Q', title='# of Significant Differences'), y=alt.Y('sig_count:Q', title='# of Significant Differences', axis=alt.Axis(grid=True)),
tooltip=[ tooltip=[
alt.Tooltip('group:N', title='Group'), alt.Tooltip('group:N', title='Group'),
alt.Tooltip('sig_count:Q', title='Sig. Differences'), alt.Tooltip('sig_count:Q', title='Sig. Differences'),

View File

@@ -12,6 +12,8 @@ Runs 03_quant_report.script.py for each single-filter combination:
Usage: Usage:
uv run python run_filter_combinations.py uv run python run_filter_combinations.py
uv run python run_filter_combinations.py --dry-run # Preview combinations without running uv run python run_filter_combinations.py --dry-run # Preview combinations without running
uv run python run_filter_combinations.py --category age # Only run age combinations
uv run python run_filter_combinations.py --category consumer # Only run consumer segment combinations
""" """
import subprocess import subprocess
@@ -31,118 +33,151 @@ QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_P
REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py' REPORT_SCRIPT = Path(__file__).parent / '03_quant_report.script.py'
def get_filter_combinations(survey: QualtricsSurvey) -> list[dict]: def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> list[dict]:
""" """
Generate all single-filter combinations. Generate all single-filter combinations.
Each combination isolates ONE filter value while keeping all others at "all selected". Each combination isolates ONE filter value while keeping all others at "all selected".
Returns list of dicts with filter kwargs for each run.
Args:
survey: QualtricsSurvey instance with loaded data
category: Optional filter category to limit combinations to.
Valid values: 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer',
'business_owner', 'ai_user', 'investable_assets', 'industry'
If None or 'all', generates all combinations.
Returns:
List of dicts with filter kwargs for each run.
""" """
combinations = [] combinations = []
# Add "All Respondents" run (no filters = all options selected) # Add "All Respondents" run (no filters = all options selected)
combinations.append({ if not category or category in ['all_filters', 'all']:
'name': 'All_Respondents', combinations.append({
'filters': {} # Empty = use defaults (all selected) 'name': 'All_Respondents',
}) 'filters': {} # Empty = use defaults (all selected)
})
# Age groups - one at a time # Age groups - one at a time
for age in survey.options_age: if not category or category in ['all_filters', 'age']:
combinations.append({ for age in survey.options_age:
'name': f'Age-{age}',
'filters': {'age': [age]}
})
# Gender - one at a time
for gender in survey.options_gender:
combinations.append({
'name': f'Gender-{gender}',
'filters': {'gender': [gender]}
})
# Ethnicity - grouped by individual values
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
# Create filters that include ALL options containing each individual ethnicity value
ethnicity_values = set()
for ethnicity_option in survey.options_ethnicity:
# Split by comma and strip whitespace
values = [v.strip() for v in ethnicity_option.split(',')]
ethnicity_values.update(values)
for ethnicity_value in sorted(ethnicity_values):
# Find all options that contain this value
matching_options = [
opt for opt in survey.options_ethnicity
if ethnicity_value in [v.strip() for v in opt.split(',')]
]
combinations.append({
'name': f'Ethnicity-{ethnicity_value}',
'filters': {'ethnicity': matching_options}
})
# Income - one at a time
for income in survey.options_income:
combinations.append({
'name': f'Income-{income}',
'filters': {'income': [income]}
})
# Consumer segments - combine _A and _B options, and also include standalone
# Group options by base name (removing _A/_B suffix)
consumer_groups = {}
for consumer in survey.options_consumer:
# Check if ends with _A or _B
if consumer.endswith('_A') or consumer.endswith('_B'):
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
if base_name not in consumer_groups:
consumer_groups[base_name] = []
consumer_groups[base_name].append(consumer)
else:
# Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
combinations.append({ combinations.append({
'name': f'Consumer-{base_name}', 'name': f'Age-{age}',
'filters': {'consumer': options} 'filters': {'age': [age]}
}) })
# Add standalone options (including individual _A and _B) # Gender - one at a time
for consumer in survey.options_consumer: if not category or category in ['all_filters', 'gender']:
combinations.append({ for gender in survey.options_gender:
'name': f'Consumer-{consumer}', combinations.append({
'filters': {'consumer': [consumer]} 'name': f'Gender-{gender}',
}) 'filters': {'gender': [gender]}
})
# Ethnicity - grouped by individual values
if not category or category in ['all_filters', 'ethnicity']:
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
# Create filters that include ALL options containing each individual ethnicity value
ethnicity_values = set()
for ethnicity_option in survey.options_ethnicity:
# Split by comma and strip whitespace
values = [v.strip() for v in ethnicity_option.split(',')]
ethnicity_values.update(values)
for ethnicity_value in sorted(ethnicity_values):
# Find all options that contain this value
matching_options = [
opt for opt in survey.options_ethnicity
if ethnicity_value in [v.strip() for v in opt.split(',')]
]
combinations.append({
'name': f'Ethnicity-{ethnicity_value}',
'filters': {'ethnicity': matching_options}
})
# Income - one at a time
if not category or category in ['all_filters', 'income']:
for income in survey.options_income:
combinations.append({
'name': f'Income-{income}',
'filters': {'income': [income]}
})
# Consumer segments - combine _A and _B options, and also include standalone
if not category or category in ['all_filters', 'consumer']:
# Group options by base name (removing _A/_B suffix)
consumer_groups = {}
for consumer in survey.options_consumer:
# Check if ends with _A or _B
if consumer.endswith('_A') or consumer.endswith('_B'):
base_name = consumer[:-2] # Remove last 2 chars (_A or _B)
if base_name not in consumer_groups:
consumer_groups[base_name] = []
consumer_groups[base_name].append(consumer)
else:
# Not an _A/_B option, keep as-is
consumer_groups[consumer] = [consumer]
# Add combined _A+_B options
for base_name, options in consumer_groups.items():
if len(options) > 1: # Only combine if there are multiple (_A and _B)
combinations.append({
'name': f'Consumer-{base_name}',
'filters': {'consumer': options}
})
# Add standalone options (including individual _A and _B)
for consumer in survey.options_consumer:
combinations.append({
'name': f'Consumer-{consumer}',
'filters': {'consumer': [consumer]}
})
# Business Owner - one at a time # Business Owner - one at a time
for business_owner in survey.options_business_owner: if not category or category in ['all_filters', 'business_owner']:
combinations.append({ for business_owner in survey.options_business_owner:
'name': f'BusinessOwner-{business_owner}', combinations.append({
'filters': {'business_owner': [business_owner]} 'name': f'BusinessOwner-{business_owner}',
}) 'filters': {'business_owner': [business_owner]}
})
# AI User - one at a time # AI User - one at a time
for ai_user in survey.options_ai_user: if not category or category in ['all_filters', 'ai_user']:
for ai_user in survey.options_ai_user:
combinations.append({
'name': f'AIUser-{ai_user}',
'filters': {'ai_user': [ai_user]}
})
# AI user daily, more than once daily, en multiple times a week = frequent
combinations.append({ combinations.append({
'name': f'AIUser-{ai_user}', 'name': 'AIUser-Frequent',
'filters': {'ai_user': [ai_user]} 'filters': {'ai_user': [
'Daily', 'More than once daily', 'Multiple times per week'
]}
})
combinations.append({
'name': 'AIUser-RarelyNever',
'filters': {'ai_user': [
'Once a month', 'Less than once a month', 'Once a week', 'Rarely/Never'
]}
}) })
# Investable Assets - one at a time # Investable Assets - one at a time
for investable_assets in survey.options_investable_assets: if not category or category in ['all_filters', 'investable_assets']:
combinations.append({ for investable_assets in survey.options_investable_assets:
'name': f'Assets-{investable_assets}', combinations.append({
'filters': {'investable_assets': [investable_assets]} 'name': f'Assets-{investable_assets}',
}) 'filters': {'investable_assets': [investable_assets]}
})
# Industry - one at a time # Industry - one at a time
for industry in survey.options_industry: if not category or category in ['all_filters', 'industry']:
combinations.append({ for industry in survey.options_industry:
'name': f'Industry-{industry}', combinations.append({
'filters': {'industry': [industry]} 'name': f'Industry-{industry}',
}) 'filters': {'industry': [industry]}
})
return combinations return combinations
@@ -193,6 +228,12 @@ def main():
import argparse import argparse
parser = argparse.ArgumentParser(description='Run quant report for all filter combinations') parser = argparse.ArgumentParser(description='Run quant report for all filter combinations')
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running') parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
parser.add_argument(
'--category',
choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry'],
default='all_filters',
help='Filter category to run combinations for (default: all_filters)'
)
args = parser.parse_args() args = parser.parse_args()
# Load survey to get available filter options # Load survey to get available filter options
@@ -200,9 +241,10 @@ def main():
survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE) survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
survey.load_data() # Populates options_* attributes survey.load_data() # Populates options_* attributes
# Generate all combinations # Generate combinations for specified category
combinations = get_filter_combinations(survey) combinations = get_filter_combinations(survey, category=args.category)
print(f"Generated {len(combinations)} filter combinations") category_desc = f" for category '{args.category}'" if args.category != 'all' else ''
print(f"Generated {len(combinations)} filter combinations{category_desc}")
if args.dry_run: if args.dry_run:
print("\nDRY RUN - Commands that would be executed:") print("\nDRY RUN - Commands that would be executed:")

View File

@@ -879,40 +879,42 @@ class QualtricsSurvey(QualtricsPlotsMixin):
""" """
# Apply filters - skip if empty list (columns with all NULLs produce empty options) # Apply filters - skip if empty list (columns with all NULLs produce empty options)
# OR if all options are selected (to avoid dropping NULLs)
self.filter_age = age self.filter_age = age
if age is not None and len(age) > 0: if age is not None and len(age) > 0 and set(age) != set(self.options_age):
q = q.filter(pl.col('QID1').is_in(age)) q = q.filter(pl.col('QID1').is_in(age))
self.filter_gender = gender self.filter_gender = gender
if gender is not None and len(gender) > 0: if gender is not None and len(gender) > 0 and set(gender) != set(self.options_gender):
q = q.filter(pl.col('QID2').is_in(gender)) q = q.filter(pl.col('QID2').is_in(gender))
self.filter_consumer = consumer self.filter_consumer = consumer
if consumer is not None and len(consumer) > 0: if consumer is not None and len(consumer) > 0 and set(consumer) != set(self.options_consumer):
q = q.filter(pl.col('Consumer').is_in(consumer)) q = q.filter(pl.col('Consumer').is_in(consumer))
self.filter_ethnicity = ethnicity self.filter_ethnicity = ethnicity
if ethnicity is not None and len(ethnicity) > 0: if ethnicity is not None and len(ethnicity) > 0 and set(ethnicity) != set(self.options_ethnicity):
q = q.filter(pl.col('QID3').is_in(ethnicity)) q = q.filter(pl.col('QID3').is_in(ethnicity))
self.filter_income = income self.filter_income = income
if income is not None and len(income) > 0: if income is not None and len(income) > 0 and set(income) != set(self.options_income):
q = q.filter(pl.col('QID15').is_in(income)) q = q.filter(pl.col('QID15').is_in(income))
self.filter_business_owner = business_owner self.filter_business_owner = business_owner
if business_owner is not None and len(business_owner) > 0: if business_owner is not None and len(business_owner) > 0 and set(business_owner) != set(self.options_business_owner):
q = q.filter(pl.col('QID4').is_in(business_owner)) q = q.filter(pl.col('QID4').is_in(business_owner))
self.filter_ai_user = ai_user self.filter_ai_user = ai_user
if ai_user is not None and len(ai_user) > 0: if ai_user is not None and len(ai_user) > 0 and set(ai_user) != set(self.options_ai_user):
q = q.filter(pl.col('QID22').is_in(ai_user)) q = q.filter(pl.col('QID22').is_in(ai_user))
self.filter_investable_assets = investable_assets self.filter_investable_assets = investable_assets
if investable_assets is not None and len(investable_assets) > 0: if investable_assets is not None and len(investable_assets) > 0 and set(investable_assets) != set(self.options_investable_assets):
q = q.filter(pl.col('QID16').is_in(investable_assets)) q = q.filter(pl.col('QID16').is_in(investable_assets))
self.filter_industry = industry self.filter_industry = industry
if industry is not None and len(industry) > 0: if industry is not None and len(industry) > 0 and set(industry) != set(self.options_industry):
q = q.filter(pl.col('QID17').is_in(industry)) q = q.filter(pl.col('QID17').is_in(industry))
self.data_filtered = q self.data_filtered = q