tidy plots

This commit is contained in:
2026-02-03 21:51:29 +01:00
parent a35670aa72
commit f5b4c247b8
3 changed files with 224 additions and 117 deletions

View File

@@ -111,8 +111,6 @@ BEST_CHOSEN_CHARACTER = "the_coach"
# {filter_form} # {filter_form}
# ''') # ''')
# %%
print(len(data_all.collect()))
# %% # %%
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))

312
plots.py
View File

@@ -435,8 +435,8 @@ class QualtricsPlotsMixin:
# Base bar chart - use y2 to explicitly start bars at domain minimum # Base bar chart - use y2 to explicitly start bars at domain minimum
if color_gender: if color_gender:
bars = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('voice:N', title=x_label, sort='-y'), x=alt.X('voice:N', title=x_label, sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain), axis=alt.Axis(grid=True)),
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
color=alt.Color('gender:N', color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'], scale=alt.Scale(domain=['Male', 'Female'],
@@ -449,10 +449,15 @@ class QualtricsPlotsMixin:
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
) )
# Text overlay - inherit color from bars via mark_text
text = bars.mark_text(dy=-5, fontSize=10).encode(
text=alt.Text('count:Q')
)
else: else:
bars = alt.Chart(stats_df).mark_bar(color=color).encode( bars = alt.Chart(stats_df).mark_bar(color=color).encode(
x=alt.X('voice:N', title=x_label, sort='-y'), x=alt.X('voice:N', title=x_label, sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain), axis=alt.Axis(grid=True)),
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
tooltip=[ tooltip=[
alt.Tooltip('voice:N', title='Voice'), alt.Tooltip('voice:N', title='Voice'),
@@ -460,17 +465,17 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
) )
# Text overlay for counts # Text overlay for counts
text = alt.Chart(stats_df).mark_text( text = alt.Chart(stats_df).mark_text(
dy=-5, dy=-5,
color='black', color='black',
fontSize=10 fontSize=10
).encode( ).encode(
x=alt.X('voice:N', sort='-y'), x=alt.X('voice:N', sort='-y'),
y=alt.Y('average:Q'), y=alt.Y('average:Q'),
text=alt.Text('count:Q') text=alt.Text('count:Q')
) )
# Combine layers # Combine layers
chart = (bars + text).properties( chart = (bars + text).properties(
@@ -512,13 +517,16 @@ class QualtricsPlotsMixin:
# Convert to long format, sort by total # Convert to long format, sort by total
stats_df = pl.DataFrame(stats).to_pandas() stats_df = pl.DataFrame(stats).to_pandas()
# Compute explicit sort order by total (descending)
sort_order = stats_df.drop_duplicates('voice').sort_values('total', ascending=False)['voice'].tolist()
# Interactive legend selection - click to filter # Interactive legend selection - click to filter
selection = alt.selection_point(fields=['rank'], bind='legend') selection = alt.selection_point(fields=['rank'], bind='legend')
# Create stacked bar chart with interactive legend # Create stacked bar chart with interactive legend
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('voice:N', title=x_label, sort=alt.EncodingSortField(field='total', op='sum', order='descending')), x=alt.X('voice:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('rank:N', color=alt.Color('rank:N',
scale=alt.Scale(domain=['Rank 1 (1st Choice)', 'Rank 2 (2nd Choice)', 'Rank 3 (3rd Choice)'], scale=alt.Scale(domain=['Rank 1 (1st Choice)', 'Rank 2 (2nd Choice)', 'Rank 3 (3rd Choice)'],
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
@@ -530,7 +538,18 @@ class QualtricsPlotsMixin:
alt.Tooltip('rank:N', title='Rank'), alt.Tooltip('rank:N', title='Rank'),
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
).add_params(selection).properties( )
# Text layer showing totals on top of bars
text = alt.Chart(stats_df).transform_filter(
alt.datum.rank == 'Rank 1 (1st Choice)'
).mark_text(dy=-10, color='black').encode(
x=alt.X('voice:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q')
)
chart = alt.layer(bars, text).add_params(selection).properties(
title=self._process_title(title), title=self._process_title(title),
width=width or 800, width=width or 800,
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
@@ -583,6 +602,9 @@ class QualtricsPlotsMixin:
# Interactive legend selection - click to filter # Interactive legend selection - click to filter
selection = alt.selection_point(fields=['rank'], bind='legend') selection = alt.selection_point(fields=['rank'], bind='legend')
# Compute explicit sort order by total (descending)
sort_order = stats_df.drop_duplicates('item').sort_values('total', ascending=False)['item'].tolist()
if color_gender: if color_gender:
# Add gender_rank column for combined color encoding # Add gender_rank column for combined color encoding
stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank'] stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank']
@@ -597,9 +619,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3 ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('gender_rank:N', color=alt.Color('gender_rank:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)), legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)),
@@ -611,15 +633,11 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Count'), alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero', axis=alt.Axis(grid=True)),
color=alt.Color('rank:N', color=alt.Color('rank:N',
scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'],
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
@@ -631,12 +649,37 @@ class QualtricsPlotsMixin:
alt.Tooltip('rank:N', title='Rank'), alt.Tooltip('rank:N', title='Rank'),
alt.Tooltip('count:Q', title='Count') alt.Tooltip('count:Q', title='Count')
] ]
).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer showing totals on top of bars
if color_gender:
# Create a separate chart for totals with gender coloring
text_df = stats_df.drop_duplicates('item')[['item', 'total', 'gender']]
text = alt.Chart(text_df).mark_text(dy=-10).encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
)
else:
text = alt.Chart(stats_df).transform_filter(
alt.datum.rank_order == 1
).mark_text(dy=-10, color='black').encode(
x=alt.X('item:N', sort=sort_order),
y=alt.Y('total:Q'),
text=alt.Text('total:Q')
)
chart = alt.layer(bars, text).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -669,6 +712,7 @@ class QualtricsPlotsMixin:
# Convert and sort # Convert and sort
stats_df = pl.DataFrame(stats).sort('count', descending=True) stats_df = pl.DataFrame(stats).sort('count', descending=True)
sort_order = stats_df['item'].to_list()
# Add rank column for coloring (1-3 vs 4+) # Add rank column for coloring (1-3 vs 4+)
stats_df = stats_df.with_row_index('rank_index') stats_df = stats_df.with_row_index('rank_index')
@@ -691,8 +735,8 @@ class QualtricsPlotsMixin:
] ]
bars = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -703,15 +747,16 @@ class QualtricsPlotsMixin:
] ]
) )
# Text overlay for counts # Create text layer with gender coloring using conditional
text = alt.Chart(stats_df).mark_text( text = alt.Chart(stats_df).mark_text(dy=-5, fontSize=10).encode(
dy=-5, x=alt.X('item:N', sort=sort_order),
color='black',
fontSize=10
).encode(
x=alt.X('item:N', sort='-y'),
y=alt.Y('count:Q'), y=alt.Y('count:Q'),
text=alt.Text('count:Q') text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
) )
chart = (bars + text).properties( chart = (bars + text).properties(
@@ -722,8 +767,8 @@ class QualtricsPlotsMixin:
else: else:
# Bar chart with conditional color # Bar chart with conditional color
bars = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'), x=alt.X('item:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 3', 'Other'], scale=alt.Scale(domain=['Top 3', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -740,7 +785,7 @@ class QualtricsPlotsMixin:
color='black', color='black',
fontSize=10 fontSize=10
).encode( ).encode(
x=alt.X('item:N', sort='-y'), x=alt.X('item:N', sort=sort_order),
y=alt.Y('count:Q'), y=alt.Y('count:Q'),
text=alt.Text('count:Q') text=alt.Text('count:Q')
) )
@@ -771,6 +816,8 @@ class QualtricsPlotsMixin:
color_gender: If True, color bars by voice gender (blue=male, pink=female). color_gender: If True, color bars by voice gender (blue=male, pink=female).
""" """
weighted_df = self._ensure_dataframe(data).to_pandas() weighted_df = self._ensure_dataframe(data).to_pandas()
weighted_df.sort_values('Weighted Score', ascending=False, inplace=True)
sort_order = weighted_df['Character'].tolist()
if color_gender: if color_gender:
# Add gender column based on Character name # Add gender column based on Character name
@@ -778,8 +825,8 @@ class QualtricsPlotsMixin:
# Bar chart with gender coloring # Bar chart with gender coloring
bars = alt.Chart(weighted_df).mark_bar().encode( bars = alt.Chart(weighted_df).mark_bar().encode(
x=alt.X('Character:N', title=x_label, sort='-y'), x=alt.X('Character:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('Weighted Score:Q', title=y_label), y=alt.Y('Weighted Score:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender:N', color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'], scale=alt.Scale(domain=['Male', 'Female'],
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]), range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
@@ -793,8 +840,8 @@ class QualtricsPlotsMixin:
else: else:
# Bar chart # Bar chart
bars = alt.Chart(weighted_df).mark_bar(color=color).encode( bars = alt.Chart(weighted_df).mark_bar(color=color).encode(
x=alt.X('Character:N', title=x_label, sort='-y'), x=alt.X('Character:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('Weighted Score:Q', title=y_label), y=alt.Y('Weighted Score:Q', title=y_label, axis=alt.Axis(grid=True)),
tooltip=[ tooltip=[
alt.Tooltip('Character:N'), alt.Tooltip('Character:N'),
alt.Tooltip('Weighted Score:Q', title='Score') alt.Tooltip('Weighted Score:Q', title='Score')
@@ -862,8 +909,11 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
# Compute explicit sort order by count (descending)
sort_order = stats_df.sort_values('count', ascending=False)[target_column].tolist()
# Add gender column for all cases when color_gender is True (needed for text layer)
if color_gender: if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding # Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
@@ -875,9 +925,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -886,15 +936,23 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='Selections'), alt.Tooltip('count:Q', title='Selections'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).properties( )
title=self._process_title(title),
width=width or 800, # Text layer with gender coloring using conditional
height=height or getattr(self, 'plot_height', 400) text = alt.Chart(stats_df).mark_text(dy=-10).encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 8', 'Other'], scale=alt.Scale(domain=['Top 8', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -903,11 +961,20 @@ class QualtricsPlotsMixin:
alt.Tooltip(f'{target_column}:N', title='Voice'), alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='Selections') alt.Tooltip('count:Q', title='Selections')
] ]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer with black color
text = alt.Chart(stats_df).mark_text(dy=-10, color='black').encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q')
)
chart = alt.layer(bars, text).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -954,8 +1021,11 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
# Compute explicit sort order by count (descending)
sort_order = stats_df.sort_values('count', ascending=False)[target_column].tolist()
# Add gender column for all cases when color_gender is True (needed for text layer)
if color_gender: if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding # Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
@@ -967,9 +1037,9 @@ class QualtricsPlotsMixin:
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
] ]
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('gender_category:N', color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors), scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
@@ -978,15 +1048,23 @@ class QualtricsPlotsMixin:
alt.Tooltip('count:Q', title='In Top 3'), alt.Tooltip('count:Q', title='In Top 3'),
alt.Tooltip('gender:N', title='Gender') alt.Tooltip('gender:N', title='Gender')
] ]
).properties( )
title=self._process_title(title),
width=width or 800, # Text layer with gender coloring using conditional
height=height or getattr(self, 'plot_height', 400) text = alt.Chart(stats_df).mark_text(dy=-10).encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q'),
color=alt.condition(
alt.datum.gender == 'Female',
alt.value(ColorPalette.GENDER_FEMALE),
alt.value(ColorPalette.GENDER_MALE)
)
) )
else: else:
chart = alt.Chart(stats_df).mark_bar().encode( bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort=sort_order, axis=alt.Axis(grid=False)),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale(domain=['Top 3', 'Other'], scale=alt.Scale(domain=['Top 3', 'Other'],
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
@@ -995,11 +1073,20 @@ class QualtricsPlotsMixin:
alt.Tooltip(f'{target_column}:N', title='Voice'), alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='In Top 3') alt.Tooltip('count:Q', title='In Top 3')
] ]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
) )
# Text layer with black color
text = alt.Chart(stats_df).mark_text(dy=-10, color='black').encode(
x=alt.X(f'{target_column}:N', sort=sort_order),
y=alt.Y('count:Q'),
text=alt.Text('count:Q')
)
chart = alt.layer(bars, text).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
@@ -1056,9 +1143,9 @@ class QualtricsPlotsMixin:
# Horizontal bar chart - use x2 to explicitly start bars at x=1 # Horizontal bar chart - use x2 to explicitly start bars at x=1
bars = alt.Chart(stats).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(stats).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('mean_score:Q', title='Average Score (1-5)', scale=alt.Scale(domain=[1, 5])), x=alt.X('mean_score:Q', title='Average Score (1-5)', scale=alt.Scale(domain=[1, 5]), axis=alt.Axis(grid=True)),
x2=alt.datum(1), # Bars start at x=1 (left edge of domain) x2=alt.datum(1), # Bars start at x=1 (left edge of domain)
y=alt.Y('Voice:N', title='Voice', sort='-x'), y=alt.Y('Voice:N', title='Voice', sort='-x', axis=alt.Axis(grid=False)),
tooltip=[ tooltip=[
alt.Tooltip('Voice:N'), alt.Tooltip('Voice:N'),
alt.Tooltip('mean_score:Q', title='Average', format='.2f'), alt.Tooltip('mean_score:Q', title='Average', format='.2f'),
@@ -1131,8 +1218,8 @@ class QualtricsPlotsMixin:
# Conditional color based on sign # Conditional color based on sign
chart = alt.Chart(plot_df).mark_bar().encode( chart = alt.Chart(plot_df).mark_bar().encode(
x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0)), x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0, grid=False)),
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1])), y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1180,11 +1267,12 @@ class QualtricsPlotsMixin:
chart = alt.Chart(df.to_pandas()).mark_bar().encode( chart = alt.Chart(df.to_pandas()).mark_bar().encode(
x=alt.X('Color:N', x=alt.X('Color:N',
title=None, title=None,
axis=alt.Axis(labelAngle=0), axis=alt.Axis(labelAngle=0, grid=False),
sort=["Green", "Blue", "Orange", "Red"]), sort=["Green", "Blue", "Orange", "Red"]),
y=alt.Y('correlation:Q', y=alt.Y('correlation:Q',
title='Average Correlation', title='Average Correlation',
scale=alt.Scale(domain=[-1, 1])), scale=alt.Scale(domain=[-1, 1]),
axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1240,10 +1328,23 @@ class QualtricsPlotsMixin:
.with_columns(pl.col(column).fill_null("(No Response)")) .with_columns(pl.col(column).fill_null("(No Response)"))
.group_by(column) .group_by(column)
.agg(pl.len().alias("count")) .agg(pl.len().alias("count"))
.sort("count", descending=True)
.to_pandas() .to_pandas()
) )
# Apply sorting logic
if column == 'Age':
# Custom sort for Age ranges
# Example values: "18 to 21 years", "25 to 34 years", "70 years or more"
# Extract first number to sort by
stats_df['sort_key'] = stats_df[column].apply(
lambda x: int(re.search(r'\d+', str(x)).group()) if re.search(r'\d+', str(x)) else 999
)
# Use EncodingSortField for Age to avoid schema issues with list-based labels
sort_order = alt.EncodingSortField(field="sort_key", order="ascending")
else:
# Default sort by count descending
sort_order = '-x'
if stats_df.empty: if stats_df.empty:
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
@@ -1251,22 +1352,31 @@ class QualtricsPlotsMixin:
total = stats_df['count'].sum() total = stats_df['count'].sum()
stats_df['percentage'] = (stats_df['count'] / total * 100).round(1) stats_df['percentage'] = (stats_df['count'] / total * 100).round(1)
# Clean y-labels by replacing underscores and wrapping long text
import textwrap
stats_df['clean_label'] = stats_df[column].astype(str).str.replace('_', ' ').apply(
lambda x: textwrap.wrap(x, width=25) if isinstance(x, str) else [str(x)]
)
# Calculate max lines for height adjustment
max_lines = stats_df['clean_label'].apply(len).max() if not stats_df.empty else 1
# Generate title if not provided # Generate title if not provided
if title is None: if title is None:
clean_col = column.replace('_', ' ').replace('/', ' / ') clean_col = column.replace('_', ' ').replace('/', ' / ')
title = f"Distribution: {clean_col}" title = f"Distribution: {clean_col}"
# Calculate appropriate height based on number of categories # Calculate appropriate height based on number of categories and wrapping
num_categories = len(stats_df) num_categories = len(stats_df)
bar_height = 18 # pixels per bar bar_height = max(20, max_lines * 15) # pixels per bar, scale with lines
calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding calculated_height = max(120, num_categories * bar_height + 40) # min 120px, +40 for title/padding
# Horizontal bar chart - categories on Y axis, counts on X axis # Horizontal bar chart - categories on Y axis, counts on X axis
bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(stats_df).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=False)), x=alt.X('count:Q', title='Count', axis=alt.Axis(grid=True)),
y=alt.Y(f'{column}:N', title=None, sort='-x', axis=alt.Axis(labelLimit=150)), y=alt.Y('clean_label:N', title=None, sort=sort_order, axis=alt.Axis(labelLimit=300, grid=False)),
tooltip=[ tooltip=[
alt.Tooltip(f'{column}:N', title=column.replace('_', ' ')), alt.Tooltip('clean_label:N', title=column.replace('_', ' ')),
alt.Tooltip('count:Q', title='Count'), alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('percentage:Q', title='Percentage', format='.1f') alt.Tooltip('percentage:Q', title='Percentage', format='.1f')
] ]
@@ -1282,7 +1392,7 @@ class QualtricsPlotsMixin:
color=ColorPalette.TEXT color=ColorPalette.TEXT
).encode( ).encode(
x='count:Q', x='count:Q',
y=alt.Y(f'{column}:N', sort='-x'), y=alt.Y('clean_label:N', sort=sort_order),
text='count:Q' text='count:Q'
) )
chart = (bars + text) chart = (bars + text)
@@ -1335,8 +1445,8 @@ class QualtricsPlotsMixin:
plot_df = pl.DataFrame(trait_correlations).to_pandas() plot_df = pl.DataFrame(trait_correlations).to_pandas()
chart = alt.Chart(plot_df).mark_bar().encode( chart = alt.Chart(plot_df).mark_bar().encode(
x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0)), x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0, grid=False)),
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1])), y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
color=alt.condition( color=alt.condition(
alt.datum.correlation >= 0, alt.datum.correlation >= 0,
alt.value('green'), alt.value('green'),
@@ -1516,8 +1626,8 @@ class QualtricsPlotsMixin:
x=alt.X('Trait:N', x=alt.X('Trait:N',
title=x_label, title=x_label,
sort=trait_order, sort=trait_order,
axis=alt.Axis(labelAngle=-45, labelLimit=200)), axis=alt.Axis(labelAngle=-45, labelLimit=200, grid=False)),
y=alt.Y('Count:Q', title=y_label), y=alt.Y('Count:Q', title=y_label, axis=alt.Axis(grid=True)),
xOffset='Character:N', xOffset='Character:N',
color=alt.Color('Character:N', color=alt.Color('Character:N',
scale=alt.Scale(domain=characters, scale=alt.Scale(domain=characters,
@@ -1633,8 +1743,8 @@ class QualtricsPlotsMixin:
y=alt.Y('trait:N', y=alt.Y('trait:N',
title=x_label, title=x_label,
sort=reversed_sort, sort=reversed_sort,
axis=alt.Axis(labelLimit=200)), axis=alt.Axis(labelLimit=200, grid=False)),
x=alt.X('count:Q', title=y_label), x=alt.X('count:Q', title=y_label, axis=alt.Axis(grid=True)),
color=alt.Color('category:N', color=alt.Color('category:N',
scale=alt.Scale( scale=alt.Scale(
domain=['Original Trait', 'Other Trait'], domain=['Original Trait', 'Other Trait'],
@@ -1973,8 +2083,8 @@ class QualtricsPlotsMixin:
tooltip_title = 'Mean Score' if has_means else 'Rank 1 %' if has_ranks else 'Score' tooltip_title = 'Mean Score' if has_means else 'Rank 1 %' if has_ranks else 'Score'
bars = alt.Chart(summary_df).mark_bar(color=ColorPalette.PRIMARY).encode( bars = alt.Chart(summary_df).mark_bar(color=ColorPalette.PRIMARY).encode(
x=alt.X('group:N', title='Group', sort='-y'), x=alt.X('group:N', title='Group', sort='-y', axis=alt.Axis(grid=False)),
y=alt.Y('sig_count:Q', title='# of Significant Differences'), y=alt.Y('sig_count:Q', title='# of Significant Differences', axis=alt.Axis(grid=True)),
tooltip=[ tooltip=[
alt.Tooltip('group:N', title='Group'), alt.Tooltip('group:N', title='Group'),
alt.Tooltip('sig_count:Q', title='Sig. Differences'), alt.Tooltip('sig_count:Q', title='Sig. Differences'),

View File

@@ -52,14 +52,14 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
combinations = [] combinations = []
# Add "All Respondents" run (no filters = all options selected) # Add "All Respondents" run (no filters = all options selected)
if not category or category == 'all': if not category or category in ['all_filters', 'all']:
combinations.append({ combinations.append({
'name': 'All_Respondents', 'name': 'All_Respondents',
'filters': {} # Empty = use defaults (all selected) 'filters': {} # Empty = use defaults (all selected)
}) })
# Age groups - one at a time # Age groups - one at a time
if not category or category in ['all', 'age']: if not category or category in ['all_filters', 'age']:
for age in survey.options_age: for age in survey.options_age:
combinations.append({ combinations.append({
'name': f'Age-{age}', 'name': f'Age-{age}',
@@ -67,7 +67,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Gender - one at a time # Gender - one at a time
if not category or category in ['all', 'gender']: if not category or category in ['all_filters', 'gender']:
for gender in survey.options_gender: for gender in survey.options_gender:
combinations.append({ combinations.append({
'name': f'Gender-{gender}', 'name': f'Gender-{gender}',
@@ -75,7 +75,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Ethnicity - grouped by individual values # Ethnicity - grouped by individual values
if not category or category in ['all', 'ethnicity']: if not category or category in ['all_filters', 'ethnicity']:
# Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino") # Ethnicity options are comma-separated (e.g., "White or Caucasian, Hispanic or Latino")
# Create filters that include ALL options containing each individual ethnicity value # Create filters that include ALL options containing each individual ethnicity value
ethnicity_values = set() ethnicity_values = set()
@@ -96,7 +96,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Income - one at a time # Income - one at a time
if not category or category in ['all', 'income']: if not category or category in ['all_filters', 'income']:
for income in survey.options_income: for income in survey.options_income:
combinations.append({ combinations.append({
'name': f'Income-{income}', 'name': f'Income-{income}',
@@ -104,7 +104,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Consumer segments - combine _A and _B options, and also include standalone # Consumer segments - combine _A and _B options, and also include standalone
if not category or category in ['all', 'consumer']: if not category or category in ['all_filters', 'consumer']:
# Group options by base name (removing _A/_B suffix) # Group options by base name (removing _A/_B suffix)
consumer_groups = {} consumer_groups = {}
for consumer in survey.options_consumer: for consumer in survey.options_consumer:
@@ -134,7 +134,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Business Owner - one at a time # Business Owner - one at a time
if not category or category in ['all', 'business_owner']: if not category or category in ['all_filters', 'business_owner']:
for business_owner in survey.options_business_owner: for business_owner in survey.options_business_owner:
combinations.append({ combinations.append({
'name': f'BusinessOwner-{business_owner}', 'name': f'BusinessOwner-{business_owner}',
@@ -142,7 +142,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# AI User - one at a time # AI User - one at a time
if not category or category in ['all', 'ai_user']: if not category or category in ['all_filters', 'ai_user']:
for ai_user in survey.options_ai_user: for ai_user in survey.options_ai_user:
combinations.append({ combinations.append({
'name': f'AIUser-{ai_user}', 'name': f'AIUser-{ai_user}',
@@ -164,7 +164,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Investable Assets - one at a time # Investable Assets - one at a time
if not category or category in ['all', 'investable_assets']: if not category or category in ['all_filters', 'investable_assets']:
for investable_assets in survey.options_investable_assets: for investable_assets in survey.options_investable_assets:
combinations.append({ combinations.append({
'name': f'Assets-{investable_assets}', 'name': f'Assets-{investable_assets}',
@@ -172,7 +172,7 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
}) })
# Industry - one at a time # Industry - one at a time
if not category or category in ['all', 'industry']: if not category or category in ['all_filters', 'industry']:
for industry in survey.options_industry: for industry in survey.options_industry:
combinations.append({ combinations.append({
'name': f'Industry-{industry}', 'name': f'Industry-{industry}',
@@ -230,10 +230,9 @@ def main():
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running') parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
parser.add_argument( parser.add_argument(
'--category', '--category',
choices=['all', 'age', 'gender', 'ethnicity', 'income', 'consumer', choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry'],
'business_owner', 'ai_user', 'investable_assets', 'industry'], default=['all_filters'],
default='all', help='Filter category to run combinations for (default: all_filters)'
help='Filter category to run combinations for (default: all)'
) )
args = parser.parse_args() args = parser.parse_args()