diff --git a/03_quant_report.py b/03_quant_report.py index ac2f380..b29ac2e 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -458,6 +458,12 @@ def _(): return +@app.cell +def _(): + COLOR_GENDER = True + return (COLOR_GENDER,) + + @app.cell def _(): mo.md(r""" @@ -473,8 +479,8 @@ def _(S, data): @app.cell -def _(S, v_18_8_3): - S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice') +def _(COLOR_GENDER, S, v_18_8_3): + S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER) return @@ -487,8 +493,8 @@ def _(): @app.cell -def _(S, v_18_8_3): - S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice') +def _(COLOR_GENDER, S, v_18_8_3): + S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER) return @@ -508,8 +514,8 @@ def _(S, data): @app.cell -def _(S, top3_voices_weighted): - S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score
(1st = 3pts, 2nd = 2pts, 3rd = 1pt)") +def _(COLOR_GENDER, S, top3_voices_weighted): + S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score
(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER) return @@ -524,8 +530,8 @@ def _(): @app.cell -def _(S, top3_voices): - S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Voice Rankings (1st, 2nd, 3rd)") +def _(COLOR_GENDER, S, top3_voices): + S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER) return @@ -580,8 +586,8 @@ def _(): @app.cell -def _(S, top3_voices): - S.plot_most_ranked_1(top3_voices, title="Most Popular Voice
(Number of Times Ranked 1st)", x_label='Voice') +def _(COLOR_GENDER, S, top3_voices): + S.plot_most_ranked_1(top3_voices, title="Most Popular Voice
(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER) return @@ -594,10 +600,10 @@ def _(): @app.cell -def _(S, data): +def _(COLOR_GENDER, S, data): # Get your voice scale data (from notebook) voice_1_10, _ = S.get_voice_scale_1_10(data) - S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)") + S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER) return (voice_1_10,) diff --git a/04_PPTX_Update_Images.py b/04_PPTX_Update_Images.py index 0657bc6..3c53796 100644 --- a/04_PPTX_Update_Images.py +++ b/04_PPTX_Update_Images.py @@ -21,8 +21,8 @@ def _(): @app.cell def _(): - TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx') - TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx') + TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx') + TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx') TAG_IMAGE_DIR = Path('figures/2-2-26') return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET @@ -43,8 +43,8 @@ def _(): @app.cell def _(): - REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx') - REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx') + REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx') + REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx') NEW_IMAGES_DIR = Path('figures/2-2-26') return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET diff --git a/plots.py b/plots.py index 1a46f51..ca81be4 100644 --- a/plots.py +++ b/plots.py @@ -8,6 +8,7 @@ import altair as alt import pandas as pd import polars as pl from theme import ColorPalette +from reference import VOICE_GENDER_MAPPING import hashlib @@ -260,6 +261,61 @@ class QualtricsPlotsMixin: label = label.replace('_', ' ').strip() return label + def _get_voice_gender(self, voice_label: str) -> str: + """Get the gender of a voice from its label. + + Parameters: + voice_label: Voice label (e.g., 'V14', 'Voice 14', etc.) + + Returns: + 'Male' or 'Female', defaults to 'Male' if not found + """ + # Extract voice code (e.g., 'V14' from 'Voice 14' or 'V14') + voice_code = None + + # Try to find VXX pattern + match = re.search(r'V(\d+)', voice_label) + if match: + voice_code = f"V{match.group(1)}" + else: + # Try to extract number and prepend V + match = re.search(r'(\d+)', voice_label) + if match: + voice_code = f"V{match.group(1)}" + + if voice_code and voice_code in VOICE_GENDER_MAPPING: + return VOICE_GENDER_MAPPING[voice_code] + + return "Male" # Default to Male if unknown + + def _get_gender_color(self, gender: str, color_type: str = "primary") -> str: + """Get the appropriate color based on gender. + + Parameters: + gender: 'Male' or 'Female' + color_type: One of 'primary', 'rank_1', 'rank_2', 'rank_3', 'neutral' + + Returns: + Hex color string + """ + color_map = { + "Male": { + "primary": ColorPalette.GENDER_MALE, + "rank_1": ColorPalette.GENDER_MALE_RANK_1, + "rank_2": ColorPalette.GENDER_MALE_RANK_2, + "rank_3": ColorPalette.GENDER_MALE_RANK_3, + "neutral": ColorPalette.GENDER_MALE_NEUTRAL, + }, + "Female": { + "primary": ColorPalette.GENDER_FEMALE, + "rank_1": ColorPalette.GENDER_FEMALE_RANK_1, + "rank_2": ColorPalette.GENDER_FEMALE_RANK_2, + "rank_3": ColorPalette.GENDER_FEMALE_RANK_3, + "neutral": ColorPalette.GENDER_FEMALE_NEUTRAL, + } + } + return color_map.get(gender, color_map["Male"]).get(color_type, ColorPalette.PRIMARY) + def plot_average_scores_with_counts( self, data: pl.LazyFrame | pl.DataFrame | None = None, @@ -270,8 +326,13 @@ class QualtricsPlotsMixin: height: int | None = None, width: int | str | None = None, domain: list[float] | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Create a bar plot showing average scores and count of non-null values for each column.""" + """Create a bar plot showing average scores and count of non-null values for each column. + + Parameters: + color_gender: If True, color bars by voice gender (blue=male, pink=female). + """ df = self._ensure_dataframe(data) # Calculate stats for each column (exclude _recordId) @@ -280,10 +341,12 @@ class QualtricsPlotsMixin: avg_score = df[col].mean() non_null_count = df[col].drop_nulls().len() label = self._clean_voice_label(col) + gender = self._get_voice_gender(label) if color_gender else None stats.append({ 'voice': label, 'average': avg_score, - 'count': non_null_count + 'count': non_null_count, + 'gender': gender }) # Convert to pandas for Altair (sort by average descending) @@ -293,16 +356,33 @@ class QualtricsPlotsMixin: domain = [stats_df['average'].min(), stats_df['average'].max()] # Base bar chart - use y2 to explicitly start bars at domain minimum - bars = alt.Chart(stats_df).mark_bar(color=color).encode( - x=alt.X('voice:N', title=x_label, sort='-y'), - y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), - y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) - tooltip=[ - alt.Tooltip('voice:N', title='Voice'), - alt.Tooltip('average:Q', title='Average', format='.2f'), - alt.Tooltip('count:Q', title='Count') - ] - ) + if color_gender: + bars = alt.Chart(stats_df).mark_bar().encode( + x=alt.X('voice:N', title=x_label, sort='-y'), + y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), + y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) + color=alt.Color('gender:N', + scale=alt.Scale(domain=['Male', 'Female'], + range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]), + legend=alt.Legend(orient='top', direction='horizontal', title='Gender')), + tooltip=[ + alt.Tooltip('voice:N', title='Voice'), + alt.Tooltip('average:Q', title='Average', format='.2f'), + alt.Tooltip('count:Q', title='Count'), + alt.Tooltip('gender:N', title='Gender') + ] + ) + else: + bars = alt.Chart(stats_df).mark_bar(color=color).encode( + x=alt.X('voice:N', title=x_label, sort='-y'), + y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), + y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge) + tooltip=[ + alt.Tooltip('voice:N', title='Voice'), + alt.Tooltip('average:Q', title='Average', format='.2f'), + alt.Tooltip('count:Q', title='Count') + ] + ) # Text overlay for counts text = alt.Chart(stats_df).mark_text( @@ -390,8 +470,14 @@ class QualtricsPlotsMixin: y_label: str = "Number of Votes", height: int | None = None, width: int | str | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Create a stacked bar chart showing the distribution of rankings (1st to 3rd).""" + """Create a stacked bar chart showing the distribution of rankings (1st to 3rd). + + Parameters: + color_gender: If True, color bars by voice gender with rank intensity + (blue shades=male, pink shades=female). + """ df = self._ensure_dataframe(data) stats = [] @@ -406,10 +492,11 @@ class QualtricsPlotsMixin: if total > 0: label = self._clean_voice_label(col) - stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'rank1': r1}) - stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'rank1': r1}) - stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'rank1': r1}) - # stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'rank1': r1}) + gender = self._get_voice_gender(label) if color_gender else None + stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'total': total, 'gender': gender, 'rank_order': 1}) + stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'total': total, 'gender': gender, 'rank_order': 2}) + stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'total': total, 'gender': gender, 'rank_order': 3}) + # stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'total': total, 'gender': gender, 'rank_order': 4}) if not stats: return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') @@ -419,25 +506,59 @@ class QualtricsPlotsMixin: # Interactive legend selection - click to filter selection = alt.selection_point(fields=['rank'], bind='legend') - chart = alt.Chart(stats_df).mark_bar().encode( - x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='rank1', order='descending')), - y=alt.Y('count:Q', title=y_label, stack='zero'), - color=alt.Color('rank:N', - scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], - range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), - legend=alt.Legend(orient='top', direction='horizontal', title=None)), - order=alt.Order('rank:N', sort='ascending'), - opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), - tooltip=[ - alt.Tooltip('item:N', title='Item'), - alt.Tooltip('rank:N', title='Rank'), - alt.Tooltip('count:Q', title='Count') + if color_gender: + # Add gender_rank column for combined color encoding + stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank'] + + # Define combined domain and range for gender + rank + domain = [ + 'Male - Rank 1 (Best)', 'Male - Rank 2', 'Male - Rank 3', + 'Female - Rank 1 (Best)', 'Female - Rank 2', 'Female - Rank 3' ] - ).add_params(selection).properties( - title=self._process_title(title), - width=width or 800, - height=height or getattr(self, 'plot_height', 400) - ) + range_colors = [ + ColorPalette.GENDER_MALE_RANK_1, ColorPalette.GENDER_MALE_RANK_2, ColorPalette.GENDER_MALE_RANK_3, + ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3 + ] + + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), + y=alt.Y('count:Q', title=y_label, stack='zero'), + color=alt.Color('gender_rank:N', + scale=alt.Scale(domain=domain, range=range_colors), + legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)), + order=alt.Order('rank_order:Q', sort='ascending'), + opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), + tooltip=[ + alt.Tooltip('item:N', title='Item'), + alt.Tooltip('rank:N', title='Rank'), + alt.Tooltip('count:Q', title='Count'), + alt.Tooltip('gender:N', title='Gender') + ] + ).add_params(selection).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) + else: + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')), + y=alt.Y('count:Q', title=y_label, stack='zero'), + color=alt.Color('rank:N', + scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], + range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), + legend=alt.Legend(orient='top', direction='horizontal', title=None)), + order=alt.Order('rank_order:Q', sort='ascending'), + opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), + tooltip=[ + alt.Tooltip('item:N', title='Item'), + alt.Tooltip('rank:N', title='Rank'), + alt.Tooltip('count:Q', title='Count') + ] + ).add_params(selection).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) chart = self._save_plot(chart, title) return chart @@ -450,8 +571,14 @@ class QualtricsPlotsMixin: y_label: str = "Count of 1st Place Rankings", height: int | None = None, width: int | str | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted.""" + """Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted. + + Parameters: + color_gender: If True, color bars by voice gender with highlight/neutral intensity + (blue shades=male, pink shades=female). + """ df = self._ensure_dataframe(data) stats = [] @@ -460,7 +587,8 @@ class QualtricsPlotsMixin: for col in ranking_cols: count_rank_1 = df.filter(pl.col(col) == 1).height label = self._clean_voice_label(col) - stats.append({'item': label, 'count': count_rank_1}) + gender = self._get_voice_gender(label) if color_gender else None + stats.append({'item': label, 'count': count_rank_1, 'gender': gender}) # Convert and sort stats_df = pl.DataFrame(stats).sort('count', descending=True) @@ -474,23 +602,51 @@ class QualtricsPlotsMixin: .alias('category') ).to_pandas() - # Bar chart with conditional color - chart = alt.Chart(stats_df).mark_bar().encode( - x=alt.X('item:N', title=x_label, sort='-y'), - y=alt.Y('count:Q', title=y_label), - color=alt.Color('category:N', - scale=alt.Scale(domain=['Top 3', 'Other'], - range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), - legend=None), - tooltip=[ - alt.Tooltip('item:N', title='Item'), - alt.Tooltip('count:Q', title='1st Place Votes') + if color_gender: + # Add gender_category column for combined color encoding + stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] + + # Define combined domain and range for gender + category + domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other'] + range_colors = [ + ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL, + ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ] - ).properties( - title=self._process_title(title), - width=width or 800, - height=height or getattr(self, 'plot_height', 400) - ) + + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X('item:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('gender_category:N', + scale=alt.Scale(domain=domain, range=range_colors), + legend=alt.Legend(orient='top', direction='horizontal', title=None)), + tooltip=[ + alt.Tooltip('item:N', title='Item'), + alt.Tooltip('count:Q', title='1st Place Votes'), + alt.Tooltip('gender:N', title='Gender') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) + else: + # Bar chart with conditional color + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X('item:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('category:N', + scale=alt.Scale(domain=['Top 3', 'Other'], + range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), + legend=None), + tooltip=[ + alt.Tooltip('item:N', title='Item'), + alt.Tooltip('count:Q', title='1st Place Votes') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) chart = self._save_plot(chart, title) return chart @@ -504,19 +660,43 @@ class QualtricsPlotsMixin: color: str = ColorPalette.PRIMARY, height: int | None = None, width: int | str | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Create a bar chart showing the weighted ranking score for each character.""" + """Create a bar chart showing the weighted ranking score for each character. + + Parameters: + color_gender: If True, color bars by voice gender (blue=male, pink=female). + """ weighted_df = self._ensure_dataframe(data).to_pandas() - # Bar chart - bars = alt.Chart(weighted_df).mark_bar(color=color).encode( - x=alt.X('Character:N', title=x_label, sort='-y'), - y=alt.Y('Weighted Score:Q', title=y_label), - tooltip=[ - alt.Tooltip('Character:N'), - alt.Tooltip('Weighted Score:Q', title='Score') - ] - ) + if color_gender: + # Add gender column based on Character name + weighted_df['gender'] = weighted_df['Character'].apply(self._get_voice_gender) + + # Bar chart with gender coloring + bars = alt.Chart(weighted_df).mark_bar().encode( + x=alt.X('Character:N', title=x_label, sort='-y'), + y=alt.Y('Weighted Score:Q', title=y_label), + color=alt.Color('gender:N', + scale=alt.Scale(domain=['Male', 'Female'], + range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]), + legend=alt.Legend(orient='top', direction='horizontal', title='Gender')), + tooltip=[ + alt.Tooltip('Character:N'), + alt.Tooltip('Weighted Score:Q', title='Score'), + alt.Tooltip('gender:N', title='Gender') + ] + ) + else: + # Bar chart + bars = alt.Chart(weighted_df).mark_bar(color=color).encode( + x=alt.X('Character:N', title=x_label, sort='-y'), + y=alt.Y('Weighted Score:Q', title=y_label), + tooltip=[ + alt.Tooltip('Character:N'), + alt.Tooltip('Weighted Score:Q', title='Score') + ] + ) # Text overlay text = bars.mark_text( @@ -545,8 +725,14 @@ class QualtricsPlotsMixin: y_label: str = "Number of Times Chosen", height: int | None = None, width: int | str | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Create a bar plot showing the frequency of voice selections.""" + """Create a bar plot showing the frequency of voice selections. + + Parameters: + color_gender: If True, color bars by voice gender with highlight/neutral intensity + (blue shades=male, pink shades=female). + """ df = self._ensure_dataframe(data) if target_column not in df.columns: @@ -573,22 +759,52 @@ class QualtricsPlotsMixin: .to_pandas() ) - chart = alt.Chart(stats_df).mark_bar().encode( - x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), - y=alt.Y('count:Q', title=y_label), - color=alt.Color('category:N', - scale=alt.Scale(domain=['Top 8', 'Other'], - range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), - legend=None), - tooltip=[ - alt.Tooltip(f'{target_column}:N', title='Voice'), - alt.Tooltip('count:Q', title='Selections') + if color_gender: + # Add gender column based on voice label + stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) + # Add gender_category column for combined color encoding + stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] + + # Define combined domain and range for gender + category + domain = ['Male - Top 8', 'Male - Other', 'Female - Top 8', 'Female - Other'] + range_colors = [ + ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL, + ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ] - ).properties( - title=self._process_title(title), - width=width or 800, - height=height or getattr(self, 'plot_height', 400) - ) + + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('gender_category:N', + scale=alt.Scale(domain=domain, range=range_colors), + legend=alt.Legend(orient='top', direction='horizontal', title=None)), + tooltip=[ + alt.Tooltip(f'{target_column}:N', title='Voice'), + alt.Tooltip('count:Q', title='Selections'), + alt.Tooltip('gender:N', title='Gender') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) + else: + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('category:N', + scale=alt.Scale(domain=['Top 8', 'Other'], + range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), + legend=None), + tooltip=[ + alt.Tooltip(f'{target_column}:N', title='Voice'), + alt.Tooltip('count:Q', title='Selections') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) chart = self._save_plot(chart, title) return chart @@ -602,8 +818,14 @@ class QualtricsPlotsMixin: y_label: str = "Count of Mentions in Top 3", height: int | None = None, width: int | str | None = None, + color_gender: bool = False, ) -> alt.Chart: - """Question: Which 3 voices are chosen the most out of 18?""" + """Question: Which 3 voices are chosen the most out of 18? + + Parameters: + color_gender: If True, color bars by voice gender with highlight/neutral intensity + (blue shades=male, pink shades=female). + """ df = self._ensure_dataframe(data) if target_column not in df.columns: @@ -629,22 +851,52 @@ class QualtricsPlotsMixin: .to_pandas() ) - chart = alt.Chart(stats_df).mark_bar().encode( - x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), - y=alt.Y('count:Q', title=y_label), - color=alt.Color('category:N', - scale=alt.Scale(domain=['Top 3', 'Other'], - range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), - legend=None), - tooltip=[ - alt.Tooltip(f'{target_column}:N', title='Voice'), - alt.Tooltip('count:Q', title='In Top 3') + if color_gender: + # Add gender column based on voice label + stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender) + # Add gender_category column for combined color encoding + stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category'] + + # Define combined domain and range for gender + category + domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other'] + range_colors = [ + ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL, + ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL ] - ).properties( - title=self._process_title(title), - width=width or 800, - height=height or getattr(self, 'plot_height', 400) - ) + + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('gender_category:N', + scale=alt.Scale(domain=domain, range=range_colors), + legend=alt.Legend(orient='top', direction='horizontal', title=None)), + tooltip=[ + alt.Tooltip(f'{target_column}:N', title='Voice'), + alt.Tooltip('count:Q', title='In Top 3'), + alt.Tooltip('gender:N', title='Gender') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) + else: + chart = alt.Chart(stats_df).mark_bar().encode( + x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), + y=alt.Y('count:Q', title=y_label), + color=alt.Color('category:N', + scale=alt.Scale(domain=['Top 3', 'Other'], + range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]), + legend=None), + tooltip=[ + alt.Tooltip(f'{target_column}:N', title='Voice'), + alt.Tooltip('count:Q', title='In Top 3') + ] + ).properties( + title=self._process_title(title), + width=width or 800, + height=height or getattr(self, 'plot_height', 400) + ) chart = self._save_plot(chart, title) return chart diff --git a/theme.py b/theme.py index 2c33533..9164769 100644 --- a/theme.py +++ b/theme.py @@ -59,6 +59,24 @@ class ColorPalette: "#457B9D", # Steel Blue ] + # Gender-based colors (Male = Blue tones, Female = Pink tones) + # Primary colors by gender + GENDER_MALE = "#0077B6" # Medium Blue (same as PRIMARY) + GENDER_FEMALE = "#B6007A" # Medium Pink + + # Ranking colors by gender (Darkest -> Lightest) + GENDER_MALE_RANK_1 = "#004C6D" # Dark Blue + GENDER_MALE_RANK_2 = "#0077B6" # Medium Blue + GENDER_MALE_RANK_3 = "#669BBC" # Light Steel Blue + + GENDER_FEMALE_RANK_1 = "#6D004C" # Dark Pink + GENDER_FEMALE_RANK_2 = "#B6007A" # Medium Pink + GENDER_FEMALE_RANK_3 = "#BC669B" # Light Pink + + # Neutral colors by gender (for non-highlighted items) + GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue + GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink + def jpmc_altair_theme(): """JPMC brand theme for Altair charts.""" diff --git a/utils.py b/utils.py index bc83663..2f0d34f 100644 --- a/utils.py +++ b/utils.py @@ -508,9 +508,16 @@ def update_ppt_alt_text(ppt_path: Union[str, Path], image_source_dir: Union[str, print(f"Error updating alt text for {original_path}: {e}") else: + # Check if image already has alt text set - if so, skip reporting as unmatched + existing_alt = _get_shape_alt_text(shape) + if existing_alt: + # Image already has alt text, no need to report as unmatched + continue + shape_id = getattr(shape, 'shape_id', getattr(shape, 'id', 'Unknown ID')) shape_name = shape.name if shape.name else f"Unnamed Shape (ID: {shape_id})" hash_type = "pHash" if use_perceptual_hash else "SHA1" + unmatched_images.append({ 'slide': i+1, 'shape_name': shape_name,