male/female colored plots

This commit is contained in:
2026-02-03 00:40:51 +01:00
parent 77fdd6e8f6
commit acf9c45844
5 changed files with 394 additions and 111 deletions

View File

@@ -458,6 +458,12 @@ def _():
return return
@app.cell
def _():
COLOR_GENDER = True
return (COLOR_GENDER,)
@app.cell @app.cell
def _(): def _():
mo.md(r""" mo.md(r"""
@@ -473,8 +479,8 @@ def _(S, data):
@app.cell @app.cell
def _(S, v_18_8_3): def _(COLOR_GENDER, S, v_18_8_3):
S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice') S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER)
return return
@@ -487,8 +493,8 @@ def _():
@app.cell @app.cell
def _(S, v_18_8_3): def _(COLOR_GENDER, S, v_18_8_3):
S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice') S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER)
return return
@@ -508,8 +514,8 @@ def _(S, data):
@app.cell @app.cell
def _(S, top3_voices_weighted): def _(COLOR_GENDER, S, top3_voices_weighted):
S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)") S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER)
return return
@@ -524,8 +530,8 @@ def _():
@app.cell @app.cell
def _(S, top3_voices): def _(COLOR_GENDER, S, top3_voices):
S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Voice Rankings (1st, 2nd, 3rd)") S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER)
return return
@@ -580,8 +586,8 @@ def _():
@app.cell @app.cell
def _(S, top3_voices): def _(COLOR_GENDER, S, top3_voices):
S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice') S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER)
return return
@@ -594,10 +600,10 @@ def _():
@app.cell @app.cell
def _(S, data): def _(COLOR_GENDER, S, data):
# Get your voice scale data (from notebook) # Get your voice scale data (from notebook)
voice_1_10, _ = S.get_voice_scale_1_10(data) voice_1_10, _ = S.get_voice_scale_1_10(data)
S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)") S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER)
return (voice_1_10,) return (voice_1_10,)

View File

@@ -21,8 +21,8 @@ def _():
@app.cell @app.cell
def _(): def _():
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx') TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx') TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
TAG_IMAGE_DIR = Path('figures/2-2-26') TAG_IMAGE_DIR = Path('figures/2-2-26')
return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET
@@ -43,8 +43,8 @@ def _():
@app.cell @app.cell
def _(): def _():
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx') REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx') REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx')
NEW_IMAGES_DIR = Path('figures/2-2-26') NEW_IMAGES_DIR = Path('figures/2-2-26')
return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET

280
plots.py
View File

@@ -8,6 +8,7 @@ import altair as alt
import pandas as pd import pandas as pd
import polars as pl import polars as pl
from theme import ColorPalette from theme import ColorPalette
from reference import VOICE_GENDER_MAPPING
import hashlib import hashlib
@@ -260,6 +261,61 @@ class QualtricsPlotsMixin:
label = label.replace('_', ' ').strip() label = label.replace('_', ' ').strip()
return label return label
def _get_voice_gender(self, voice_label: str) -> str:
"""Get the gender of a voice from its label.
Parameters:
voice_label: Voice label (e.g., 'V14', 'Voice 14', etc.)
Returns:
'Male' or 'Female', defaults to 'Male' if not found
"""
# Extract voice code (e.g., 'V14' from 'Voice 14' or 'V14')
voice_code = None
# Try to find VXX pattern
match = re.search(r'V(\d+)', voice_label)
if match:
voice_code = f"V{match.group(1)}"
else:
# Try to extract number and prepend V
match = re.search(r'(\d+)', voice_label)
if match:
voice_code = f"V{match.group(1)}"
if voice_code and voice_code in VOICE_GENDER_MAPPING:
return VOICE_GENDER_MAPPING[voice_code]
return "Male" # Default to Male if unknown
def _get_gender_color(self, gender: str, color_type: str = "primary") -> str:
"""Get the appropriate color based on gender.
Parameters:
gender: 'Male' or 'Female'
color_type: One of 'primary', 'rank_1', 'rank_2', 'rank_3', 'neutral'
Returns:
Hex color string
"""
color_map = {
"Male": {
"primary": ColorPalette.GENDER_MALE,
"rank_1": ColorPalette.GENDER_MALE_RANK_1,
"rank_2": ColorPalette.GENDER_MALE_RANK_2,
"rank_3": ColorPalette.GENDER_MALE_RANK_3,
"neutral": ColorPalette.GENDER_MALE_NEUTRAL,
},
"Female": {
"primary": ColorPalette.GENDER_FEMALE,
"rank_1": ColorPalette.GENDER_FEMALE_RANK_1,
"rank_2": ColorPalette.GENDER_FEMALE_RANK_2,
"rank_3": ColorPalette.GENDER_FEMALE_RANK_3,
"neutral": ColorPalette.GENDER_FEMALE_NEUTRAL,
}
}
return color_map.get(gender, color_map["Male"]).get(color_type, ColorPalette.PRIMARY)
def plot_average_scores_with_counts( def plot_average_scores_with_counts(
self, self,
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
@@ -270,8 +326,13 @@ class QualtricsPlotsMixin:
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
domain: list[float] | None = None, domain: list[float] | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Create a bar plot showing average scores and count of non-null values for each column.""" """Create a bar plot showing average scores and count of non-null values for each column.
Parameters:
color_gender: If True, color bars by voice gender (blue=male, pink=female).
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
# Calculate stats for each column (exclude _recordId) # Calculate stats for each column (exclude _recordId)
@@ -280,10 +341,12 @@ class QualtricsPlotsMixin:
avg_score = df[col].mean() avg_score = df[col].mean()
non_null_count = df[col].drop_nulls().len() non_null_count = df[col].drop_nulls().len()
label = self._clean_voice_label(col) label = self._clean_voice_label(col)
gender = self._get_voice_gender(label) if color_gender else None
stats.append({ stats.append({
'voice': label, 'voice': label,
'average': avg_score, 'average': avg_score,
'count': non_null_count 'count': non_null_count,
'gender': gender
}) })
# Convert to pandas for Altair (sort by average descending) # Convert to pandas for Altair (sort by average descending)
@@ -293,6 +356,23 @@ class QualtricsPlotsMixin:
domain = [stats_df['average'].min(), stats_df['average'].max()] domain = [stats_df['average'].min(), stats_df['average'].max()]
# Base bar chart - use y2 to explicitly start bars at domain minimum # Base bar chart - use y2 to explicitly start bars at domain minimum
if color_gender:
bars = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('voice:N', title=x_label, sort='-y'),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)),
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'],
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
legend=alt.Legend(orient='top', direction='horizontal', title='Gender')),
tooltip=[
alt.Tooltip('voice:N', title='Voice'),
alt.Tooltip('average:Q', title='Average', format='.2f'),
alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('gender:N', title='Gender')
]
)
else:
bars = alt.Chart(stats_df).mark_bar(color=color).encode( bars = alt.Chart(stats_df).mark_bar(color=color).encode(
x=alt.X('voice:N', title=x_label, sort='-y'), x=alt.X('voice:N', title=x_label, sort='-y'),
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)), y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)),
@@ -390,8 +470,14 @@ class QualtricsPlotsMixin:
y_label: str = "Number of Votes", y_label: str = "Number of Votes",
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Create a stacked bar chart showing the distribution of rankings (1st to 3rd).""" """Create a stacked bar chart showing the distribution of rankings (1st to 3rd).
Parameters:
color_gender: If True, color bars by voice gender with rank intensity
(blue shades=male, pink shades=female).
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
stats = [] stats = []
@@ -406,10 +492,11 @@ class QualtricsPlotsMixin:
if total > 0: if total > 0:
label = self._clean_voice_label(col) label = self._clean_voice_label(col)
stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'rank1': r1}) gender = self._get_voice_gender(label) if color_gender else None
stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'rank1': r1}) stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'total': total, 'gender': gender, 'rank_order': 1})
stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'rank1': r1}) stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'total': total, 'gender': gender, 'rank_order': 2})
# stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'rank1': r1}) stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'total': total, 'gender': gender, 'rank_order': 3})
# stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'total': total, 'gender': gender, 'rank_order': 4})
if not stats: if not stats:
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N') return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
@@ -419,14 +506,48 @@ class QualtricsPlotsMixin:
# Interactive legend selection - click to filter # Interactive legend selection - click to filter
selection = alt.selection_point(fields=['rank'], bind='legend') selection = alt.selection_point(fields=['rank'], bind='legend')
if color_gender:
# Add gender_rank column for combined color encoding
stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank']
# Define combined domain and range for gender + rank
domain = [
'Male - Rank 1 (Best)', 'Male - Rank 2', 'Male - Rank 3',
'Female - Rank 1 (Best)', 'Female - Rank 2', 'Female - Rank 3'
]
range_colors = [
ColorPalette.GENDER_MALE_RANK_1, ColorPalette.GENDER_MALE_RANK_2, ColorPalette.GENDER_MALE_RANK_3,
ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3
]
chart = alt.Chart(stats_df).mark_bar().encode( chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='rank1', order='descending')), x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')),
y=alt.Y('count:Q', title=y_label, stack='zero'),
color=alt.Color('gender_rank:N',
scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)),
order=alt.Order('rank_order:Q', sort='ascending'),
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
tooltip=[
alt.Tooltip('item:N', title='Item'),
alt.Tooltip('rank:N', title='Rank'),
alt.Tooltip('count:Q', title='Count'),
alt.Tooltip('gender:N', title='Gender')
]
).add_params(selection).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
else:
chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')),
y=alt.Y('count:Q', title=y_label, stack='zero'), y=alt.Y('count:Q', title=y_label, stack='zero'),
color=alt.Color('rank:N', color=alt.Color('rank:N',
scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'], scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'],
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]), range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
legend=alt.Legend(orient='top', direction='horizontal', title=None)), legend=alt.Legend(orient='top', direction='horizontal', title=None)),
order=alt.Order('rank:N', sort='ascending'), order=alt.Order('rank_order:Q', sort='ascending'),
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
tooltip=[ tooltip=[
alt.Tooltip('item:N', title='Item'), alt.Tooltip('item:N', title='Item'),
@@ -450,8 +571,14 @@ class QualtricsPlotsMixin:
y_label: str = "Count of 1st Place Rankings", y_label: str = "Count of 1st Place Rankings",
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted.""" """Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted.
Parameters:
color_gender: If True, color bars by voice gender with highlight/neutral intensity
(blue shades=male, pink shades=female).
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
stats = [] stats = []
@@ -460,7 +587,8 @@ class QualtricsPlotsMixin:
for col in ranking_cols: for col in ranking_cols:
count_rank_1 = df.filter(pl.col(col) == 1).height count_rank_1 = df.filter(pl.col(col) == 1).height
label = self._clean_voice_label(col) label = self._clean_voice_label(col)
stats.append({'item': label, 'count': count_rank_1}) gender = self._get_voice_gender(label) if color_gender else None
stats.append({'item': label, 'count': count_rank_1, 'gender': gender})
# Convert and sort # Convert and sort
stats_df = pl.DataFrame(stats).sort('count', descending=True) stats_df = pl.DataFrame(stats).sort('count', descending=True)
@@ -474,6 +602,34 @@ class QualtricsPlotsMixin:
.alias('category') .alias('category')
).to_pandas() ).to_pandas()
if color_gender:
# Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
# Define combined domain and range for gender + category
domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other']
range_colors = [
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
]
chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'),
y=alt.Y('count:Q', title=y_label),
color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
tooltip=[
alt.Tooltip('item:N', title='Item'),
alt.Tooltip('count:Q', title='1st Place Votes'),
alt.Tooltip('gender:N', title='Gender')
]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
else:
# Bar chart with conditional color # Bar chart with conditional color
chart = alt.Chart(stats_df).mark_bar().encode( chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X('item:N', title=x_label, sort='-y'), x=alt.X('item:N', title=x_label, sort='-y'),
@@ -504,10 +660,34 @@ class QualtricsPlotsMixin:
color: str = ColorPalette.PRIMARY, color: str = ColorPalette.PRIMARY,
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Create a bar chart showing the weighted ranking score for each character.""" """Create a bar chart showing the weighted ranking score for each character.
Parameters:
color_gender: If True, color bars by voice gender (blue=male, pink=female).
"""
weighted_df = self._ensure_dataframe(data).to_pandas() weighted_df = self._ensure_dataframe(data).to_pandas()
if color_gender:
# Add gender column based on Character name
weighted_df['gender'] = weighted_df['Character'].apply(self._get_voice_gender)
# Bar chart with gender coloring
bars = alt.Chart(weighted_df).mark_bar().encode(
x=alt.X('Character:N', title=x_label, sort='-y'),
y=alt.Y('Weighted Score:Q', title=y_label),
color=alt.Color('gender:N',
scale=alt.Scale(domain=['Male', 'Female'],
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
legend=alt.Legend(orient='top', direction='horizontal', title='Gender')),
tooltip=[
alt.Tooltip('Character:N'),
alt.Tooltip('Weighted Score:Q', title='Score'),
alt.Tooltip('gender:N', title='Gender')
]
)
else:
# Bar chart # Bar chart
bars = alt.Chart(weighted_df).mark_bar(color=color).encode( bars = alt.Chart(weighted_df).mark_bar(color=color).encode(
x=alt.X('Character:N', title=x_label, sort='-y'), x=alt.X('Character:N', title=x_label, sort='-y'),
@@ -545,8 +725,14 @@ class QualtricsPlotsMixin:
y_label: str = "Number of Times Chosen", y_label: str = "Number of Times Chosen",
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Create a bar plot showing the frequency of voice selections.""" """Create a bar plot showing the frequency of voice selections.
Parameters:
color_gender: If True, color bars by voice gender with highlight/neutral intensity
(blue shades=male, pink shades=female).
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
if target_column not in df.columns: if target_column not in df.columns:
@@ -573,6 +759,36 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
# Define combined domain and range for gender + category
domain = ['Male - Top 8', 'Male - Other', 'Female - Top 8', 'Female - Other']
range_colors = [
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
]
chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
y=alt.Y('count:Q', title=y_label),
color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
tooltip=[
alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='Selections'),
alt.Tooltip('gender:N', title='Gender')
]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
else:
chart = alt.Chart(stats_df).mark_bar().encode( chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label),
@@ -602,8 +818,14 @@ class QualtricsPlotsMixin:
y_label: str = "Count of Mentions in Top 3", y_label: str = "Count of Mentions in Top 3",
height: int | None = None, height: int | None = None,
width: int | str | None = None, width: int | str | None = None,
color_gender: bool = False,
) -> alt.Chart: ) -> alt.Chart:
"""Question: Which 3 voices are chosen the most out of 18?""" """Question: Which 3 voices are chosen the most out of 18?
Parameters:
color_gender: If True, color bars by voice gender with highlight/neutral intensity
(blue shades=male, pink shades=female).
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
if target_column not in df.columns: if target_column not in df.columns:
@@ -629,6 +851,36 @@ class QualtricsPlotsMixin:
.to_pandas() .to_pandas()
) )
if color_gender:
# Add gender column based on voice label
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
# Add gender_category column for combined color encoding
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
# Define combined domain and range for gender + category
domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other']
range_colors = [
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
]
chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
y=alt.Y('count:Q', title=y_label),
color=alt.Color('gender_category:N',
scale=alt.Scale(domain=domain, range=range_colors),
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
tooltip=[
alt.Tooltip(f'{target_column}:N', title='Voice'),
alt.Tooltip('count:Q', title='In Top 3'),
alt.Tooltip('gender:N', title='Gender')
]
).properties(
title=self._process_title(title),
width=width or 800,
height=height or getattr(self, 'plot_height', 400)
)
else:
chart = alt.Chart(stats_df).mark_bar().encode( chart = alt.Chart(stats_df).mark_bar().encode(
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'), x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
y=alt.Y('count:Q', title=y_label), y=alt.Y('count:Q', title=y_label),

View File

@@ -59,6 +59,24 @@ class ColorPalette:
"#457B9D", # Steel Blue "#457B9D", # Steel Blue
] ]
# Gender-based colors (Male = Blue tones, Female = Pink tones)
# Primary colors by gender
GENDER_MALE = "#0077B6" # Medium Blue (same as PRIMARY)
GENDER_FEMALE = "#B6007A" # Medium Pink
# Ranking colors by gender (Darkest -> Lightest)
GENDER_MALE_RANK_1 = "#004C6D" # Dark Blue
GENDER_MALE_RANK_2 = "#0077B6" # Medium Blue
GENDER_MALE_RANK_3 = "#669BBC" # Light Steel Blue
GENDER_FEMALE_RANK_1 = "#6D004C" # Dark Pink
GENDER_FEMALE_RANK_2 = "#B6007A" # Medium Pink
GENDER_FEMALE_RANK_3 = "#BC669B" # Light Pink
# Neutral colors by gender (for non-highlighted items)
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
def jpmc_altair_theme(): def jpmc_altair_theme():
"""JPMC brand theme for Altair charts.""" """JPMC brand theme for Altair charts."""

View File

@@ -508,9 +508,16 @@ def update_ppt_alt_text(ppt_path: Union[str, Path], image_source_dir: Union[str,
print(f"Error updating alt text for {original_path}: {e}") print(f"Error updating alt text for {original_path}: {e}")
else: else:
# Check if image already has alt text set - if so, skip reporting as unmatched
existing_alt = _get_shape_alt_text(shape)
if existing_alt:
# Image already has alt text, no need to report as unmatched
continue
shape_id = getattr(shape, 'shape_id', getattr(shape, 'id', 'Unknown ID')) shape_id = getattr(shape, 'shape_id', getattr(shape, 'id', 'Unknown ID'))
shape_name = shape.name if shape.name else f"Unnamed Shape (ID: {shape_id})" shape_name = shape.name if shape.name else f"Unnamed Shape (ID: {shape_id})"
hash_type = "pHash" if use_perceptual_hash else "SHA1" hash_type = "pHash" if use_perceptual_hash else "SHA1"
unmatched_images.append({ unmatched_images.append({
'slide': i+1, 'slide': i+1,
'shape_name': shape_name, 'shape_name': shape_name,