male/female colored plots
This commit is contained in:
@@ -458,6 +458,12 @@ def _():
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
COLOR_GENDER = True
|
||||||
|
return (COLOR_GENDER,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
@@ -473,8 +479,8 @@ def _(S, data):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, v_18_8_3):
|
def _(COLOR_GENDER, S, v_18_8_3):
|
||||||
S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice')
|
S.plot_voice_selection_counts(v_18_8_3, title="Top 8 Voice Selection from 18 Voices", x_label='Voice', color_gender=COLOR_GENDER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -487,8 +493,8 @@ def _():
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, v_18_8_3):
|
def _(COLOR_GENDER, S, v_18_8_3):
|
||||||
S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice')
|
S.plot_top3_selection_counts(v_18_8_3, title="Top 3 Voice Selection Counts from 8 Voices", x_label='Voice', color_gender=COLOR_GENDER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -508,8 +514,8 @@ def _(S, data):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, top3_voices_weighted):
|
def _(COLOR_GENDER, S, top3_voices_weighted):
|
||||||
S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)")
|
S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", color_gender=COLOR_GENDER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -524,8 +530,8 @@ def _():
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, top3_voices):
|
def _(COLOR_GENDER, S, top3_voices):
|
||||||
S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Voice Rankings (1st, 2nd, 3rd)")
|
S.plot_ranking_distribution(top3_voices, x_label='Voice', title="Distribution of Top 3 Voice Rankings (1st, 2nd, 3rd)", color_gender=COLOR_GENDER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -580,8 +586,8 @@ def _():
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, top3_voices):
|
def _(COLOR_GENDER, S, top3_voices):
|
||||||
S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice')
|
S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', color_gender=COLOR_GENDER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -594,10 +600,10 @@ def _():
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(S, data):
|
def _(COLOR_GENDER, S, data):
|
||||||
# Get your voice scale data (from notebook)
|
# Get your voice scale data (from notebook)
|
||||||
voice_1_10, _ = S.get_voice_scale_1_10(data)
|
voice_1_10, _ = S.get_voice_scale_1_10(data)
|
||||||
S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)")
|
S.plot_average_scores_with_counts(voice_1_10, x_label='Voice', domain=[1,10], title="Voice General Impression (Scale 1-10)", color_gender=COLOR_GENDER)
|
||||||
return (voice_1_10,)
|
return (voice_1_10,)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
TAG_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
||||||
TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx')
|
TAG_TARGET = Path('data/reports/Perception-Research-Report_2-2_tagged.pptx')
|
||||||
TAG_IMAGE_DIR = Path('figures/2-2-26')
|
TAG_IMAGE_DIR = Path('figures/2-2-26')
|
||||||
return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET
|
return TAG_IMAGE_DIR, TAG_SOURCE, TAG_TARGET
|
||||||
|
|
||||||
@@ -43,8 +43,8 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _():
|
def _():
|
||||||
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2_tagged_2.pptx')
|
REPLACE_SOURCE = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
||||||
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2.pptx')
|
REPLACE_TARGET = Path('data/reports/Perception-Research-Report_2-2_updated.pptx')
|
||||||
|
|
||||||
NEW_IMAGES_DIR = Path('figures/2-2-26')
|
NEW_IMAGES_DIR = Path('figures/2-2-26')
|
||||||
return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET
|
return NEW_IMAGES_DIR, REPLACE_SOURCE, REPLACE_TARGET
|
||||||
|
|||||||
442
plots.py
442
plots.py
@@ -8,6 +8,7 @@ import altair as alt
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import polars as pl
|
import polars as pl
|
||||||
from theme import ColorPalette
|
from theme import ColorPalette
|
||||||
|
from reference import VOICE_GENDER_MAPPING
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
@@ -260,6 +261,61 @@ class QualtricsPlotsMixin:
|
|||||||
label = label.replace('_', ' ').strip()
|
label = label.replace('_', ' ').strip()
|
||||||
return label
|
return label
|
||||||
|
|
||||||
|
def _get_voice_gender(self, voice_label: str) -> str:
|
||||||
|
"""Get the gender of a voice from its label.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
voice_label: Voice label (e.g., 'V14', 'Voice 14', etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
'Male' or 'Female', defaults to 'Male' if not found
|
||||||
|
"""
|
||||||
|
# Extract voice code (e.g., 'V14' from 'Voice 14' or 'V14')
|
||||||
|
voice_code = None
|
||||||
|
|
||||||
|
# Try to find VXX pattern
|
||||||
|
match = re.search(r'V(\d+)', voice_label)
|
||||||
|
if match:
|
||||||
|
voice_code = f"V{match.group(1)}"
|
||||||
|
else:
|
||||||
|
# Try to extract number and prepend V
|
||||||
|
match = re.search(r'(\d+)', voice_label)
|
||||||
|
if match:
|
||||||
|
voice_code = f"V{match.group(1)}"
|
||||||
|
|
||||||
|
if voice_code and voice_code in VOICE_GENDER_MAPPING:
|
||||||
|
return VOICE_GENDER_MAPPING[voice_code]
|
||||||
|
|
||||||
|
return "Male" # Default to Male if unknown
|
||||||
|
|
||||||
|
def _get_gender_color(self, gender: str, color_type: str = "primary") -> str:
|
||||||
|
"""Get the appropriate color based on gender.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
gender: 'Male' or 'Female'
|
||||||
|
color_type: One of 'primary', 'rank_1', 'rank_2', 'rank_3', 'neutral'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hex color string
|
||||||
|
"""
|
||||||
|
color_map = {
|
||||||
|
"Male": {
|
||||||
|
"primary": ColorPalette.GENDER_MALE,
|
||||||
|
"rank_1": ColorPalette.GENDER_MALE_RANK_1,
|
||||||
|
"rank_2": ColorPalette.GENDER_MALE_RANK_2,
|
||||||
|
"rank_3": ColorPalette.GENDER_MALE_RANK_3,
|
||||||
|
"neutral": ColorPalette.GENDER_MALE_NEUTRAL,
|
||||||
|
},
|
||||||
|
"Female": {
|
||||||
|
"primary": ColorPalette.GENDER_FEMALE,
|
||||||
|
"rank_1": ColorPalette.GENDER_FEMALE_RANK_1,
|
||||||
|
"rank_2": ColorPalette.GENDER_FEMALE_RANK_2,
|
||||||
|
"rank_3": ColorPalette.GENDER_FEMALE_RANK_3,
|
||||||
|
"neutral": ColorPalette.GENDER_FEMALE_NEUTRAL,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return color_map.get(gender, color_map["Male"]).get(color_type, ColorPalette.PRIMARY)
|
||||||
|
|
||||||
def plot_average_scores_with_counts(
|
def plot_average_scores_with_counts(
|
||||||
self,
|
self,
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
@@ -270,8 +326,13 @@ class QualtricsPlotsMixin:
|
|||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
domain: list[float] | None = None,
|
domain: list[float] | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Create a bar plot showing average scores and count of non-null values for each column."""
|
"""Create a bar plot showing average scores and count of non-null values for each column.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
# Calculate stats for each column (exclude _recordId)
|
# Calculate stats for each column (exclude _recordId)
|
||||||
@@ -280,10 +341,12 @@ class QualtricsPlotsMixin:
|
|||||||
avg_score = df[col].mean()
|
avg_score = df[col].mean()
|
||||||
non_null_count = df[col].drop_nulls().len()
|
non_null_count = df[col].drop_nulls().len()
|
||||||
label = self._clean_voice_label(col)
|
label = self._clean_voice_label(col)
|
||||||
|
gender = self._get_voice_gender(label) if color_gender else None
|
||||||
stats.append({
|
stats.append({
|
||||||
'voice': label,
|
'voice': label,
|
||||||
'average': avg_score,
|
'average': avg_score,
|
||||||
'count': non_null_count
|
'count': non_null_count,
|
||||||
|
'gender': gender
|
||||||
})
|
})
|
||||||
|
|
||||||
# Convert to pandas for Altair (sort by average descending)
|
# Convert to pandas for Altair (sort by average descending)
|
||||||
@@ -293,16 +356,33 @@ class QualtricsPlotsMixin:
|
|||||||
domain = [stats_df['average'].min(), stats_df['average'].max()]
|
domain = [stats_df['average'].min(), stats_df['average'].max()]
|
||||||
|
|
||||||
# Base bar chart - use y2 to explicitly start bars at domain minimum
|
# Base bar chart - use y2 to explicitly start bars at domain minimum
|
||||||
bars = alt.Chart(stats_df).mark_bar(color=color).encode(
|
if color_gender:
|
||||||
x=alt.X('voice:N', title=x_label, sort='-y'),
|
bars = alt.Chart(stats_df).mark_bar().encode(
|
||||||
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)),
|
x=alt.X('voice:N', title=x_label, sort='-y'),
|
||||||
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
|
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)),
|
||||||
tooltip=[
|
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
|
||||||
alt.Tooltip('voice:N', title='Voice'),
|
color=alt.Color('gender:N',
|
||||||
alt.Tooltip('average:Q', title='Average', format='.2f'),
|
scale=alt.Scale(domain=['Male', 'Female'],
|
||||||
alt.Tooltip('count:Q', title='Count')
|
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
|
||||||
]
|
legend=alt.Legend(orient='top', direction='horizontal', title='Gender')),
|
||||||
)
|
tooltip=[
|
||||||
|
alt.Tooltip('voice:N', title='Voice'),
|
||||||
|
alt.Tooltip('average:Q', title='Average', format='.2f'),
|
||||||
|
alt.Tooltip('count:Q', title='Count'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
bars = alt.Chart(stats_df).mark_bar(color=color).encode(
|
||||||
|
x=alt.X('voice:N', title=x_label, sort='-y'),
|
||||||
|
y=alt.Y('average:Q', title=y_label, scale=alt.Scale(domain=domain)),
|
||||||
|
y2=alt.datum(domain[0]), # Bars start at domain minimum (bottom edge)
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('voice:N', title='Voice'),
|
||||||
|
alt.Tooltip('average:Q', title='Average', format='.2f'),
|
||||||
|
alt.Tooltip('count:Q', title='Count')
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# Text overlay for counts
|
# Text overlay for counts
|
||||||
text = alt.Chart(stats_df).mark_text(
|
text = alt.Chart(stats_df).mark_text(
|
||||||
@@ -390,8 +470,14 @@ class QualtricsPlotsMixin:
|
|||||||
y_label: str = "Number of Votes",
|
y_label: str = "Number of Votes",
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Create a stacked bar chart showing the distribution of rankings (1st to 3rd)."""
|
"""Create a stacked bar chart showing the distribution of rankings (1st to 3rd).
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender with rank intensity
|
||||||
|
(blue shades=male, pink shades=female).
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
stats = []
|
stats = []
|
||||||
@@ -406,10 +492,11 @@ class QualtricsPlotsMixin:
|
|||||||
|
|
||||||
if total > 0:
|
if total > 0:
|
||||||
label = self._clean_voice_label(col)
|
label = self._clean_voice_label(col)
|
||||||
stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'rank1': r1})
|
gender = self._get_voice_gender(label) if color_gender else None
|
||||||
stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'rank1': r1})
|
stats.append({'item': label, 'rank': 'Rank 1 (Best)', 'count': r1, 'total': total, 'gender': gender, 'rank_order': 1})
|
||||||
stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'rank1': r1})
|
stats.append({'item': label, 'rank': 'Rank 2', 'count': r2, 'total': total, 'gender': gender, 'rank_order': 2})
|
||||||
# stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'rank1': r1})
|
stats.append({'item': label, 'rank': 'Rank 3', 'count': r3, 'total': total, 'gender': gender, 'rank_order': 3})
|
||||||
|
# stats.append({'item': label, 'rank': 'Rank 4 (Worst)', 'count': r4, 'total': total, 'gender': gender, 'rank_order': 4})
|
||||||
|
|
||||||
if not stats:
|
if not stats:
|
||||||
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
|
return alt.Chart(pd.DataFrame({'text': ['No data']})).mark_text().encode(text='text:N')
|
||||||
@@ -419,25 +506,59 @@ class QualtricsPlotsMixin:
|
|||||||
# Interactive legend selection - click to filter
|
# Interactive legend selection - click to filter
|
||||||
selection = alt.selection_point(fields=['rank'], bind='legend')
|
selection = alt.selection_point(fields=['rank'], bind='legend')
|
||||||
|
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
if color_gender:
|
||||||
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='rank1', order='descending')),
|
# Add gender_rank column for combined color encoding
|
||||||
y=alt.Y('count:Q', title=y_label, stack='zero'),
|
stats_df['gender_rank'] = stats_df['gender'] + ' - ' + stats_df['rank']
|
||||||
color=alt.Color('rank:N',
|
|
||||||
scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'],
|
# Define combined domain and range for gender + rank
|
||||||
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
|
domain = [
|
||||||
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
|
'Male - Rank 1 (Best)', 'Male - Rank 2', 'Male - Rank 3',
|
||||||
order=alt.Order('rank:N', sort='ascending'),
|
'Female - Rank 1 (Best)', 'Female - Rank 2', 'Female - Rank 3'
|
||||||
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
|
|
||||||
tooltip=[
|
|
||||||
alt.Tooltip('item:N', title='Item'),
|
|
||||||
alt.Tooltip('rank:N', title='Rank'),
|
|
||||||
alt.Tooltip('count:Q', title='Count')
|
|
||||||
]
|
]
|
||||||
).add_params(selection).properties(
|
range_colors = [
|
||||||
title=self._process_title(title),
|
ColorPalette.GENDER_MALE_RANK_1, ColorPalette.GENDER_MALE_RANK_2, ColorPalette.GENDER_MALE_RANK_3,
|
||||||
width=width or 800,
|
ColorPalette.GENDER_FEMALE_RANK_1, ColorPalette.GENDER_FEMALE_RANK_2, ColorPalette.GENDER_FEMALE_RANK_3
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
]
|
||||||
)
|
|
||||||
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
|
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')),
|
||||||
|
y=alt.Y('count:Q', title=y_label, stack='zero'),
|
||||||
|
color=alt.Color('gender_rank:N',
|
||||||
|
scale=alt.Scale(domain=domain, range=range_colors),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title=None, columns=3)),
|
||||||
|
order=alt.Order('rank_order:Q', sort='ascending'),
|
||||||
|
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('item:N', title='Item'),
|
||||||
|
alt.Tooltip('rank:N', title='Rank'),
|
||||||
|
alt.Tooltip('count:Q', title='Count'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
).add_params(selection).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
|
x=alt.X('item:N', title=x_label, sort=alt.EncodingSortField(field='total', order='descending')),
|
||||||
|
y=alt.Y('count:Q', title=y_label, stack='zero'),
|
||||||
|
color=alt.Color('rank:N',
|
||||||
|
scale=alt.Scale(domain=['Rank 1 (Best)', 'Rank 2', 'Rank 3'],
|
||||||
|
range=[ColorPalette.RANK_1, ColorPalette.RANK_2, ColorPalette.RANK_3]),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
|
||||||
|
order=alt.Order('rank_order:Q', sort='ascending'),
|
||||||
|
opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('item:N', title='Item'),
|
||||||
|
alt.Tooltip('rank:N', title='Rank'),
|
||||||
|
alt.Tooltip('count:Q', title='Count')
|
||||||
|
]
|
||||||
|
).add_params(selection).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
@@ -450,8 +571,14 @@ class QualtricsPlotsMixin:
|
|||||||
y_label: str = "Count of 1st Place Rankings",
|
y_label: str = "Count of 1st Place Rankings",
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted."""
|
"""Create a bar chart showing which item was ranked #1 the most. Top 3 highlighted.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender with highlight/neutral intensity
|
||||||
|
(blue shades=male, pink shades=female).
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
stats = []
|
stats = []
|
||||||
@@ -460,7 +587,8 @@ class QualtricsPlotsMixin:
|
|||||||
for col in ranking_cols:
|
for col in ranking_cols:
|
||||||
count_rank_1 = df.filter(pl.col(col) == 1).height
|
count_rank_1 = df.filter(pl.col(col) == 1).height
|
||||||
label = self._clean_voice_label(col)
|
label = self._clean_voice_label(col)
|
||||||
stats.append({'item': label, 'count': count_rank_1})
|
gender = self._get_voice_gender(label) if color_gender else None
|
||||||
|
stats.append({'item': label, 'count': count_rank_1, 'gender': gender})
|
||||||
|
|
||||||
# Convert and sort
|
# Convert and sort
|
||||||
stats_df = pl.DataFrame(stats).sort('count', descending=True)
|
stats_df = pl.DataFrame(stats).sort('count', descending=True)
|
||||||
@@ -474,23 +602,51 @@ class QualtricsPlotsMixin:
|
|||||||
.alias('category')
|
.alias('category')
|
||||||
).to_pandas()
|
).to_pandas()
|
||||||
|
|
||||||
# Bar chart with conditional color
|
if color_gender:
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
# Add gender_category column for combined color encoding
|
||||||
x=alt.X('item:N', title=x_label, sort='-y'),
|
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
|
||||||
y=alt.Y('count:Q', title=y_label),
|
|
||||||
color=alt.Color('category:N',
|
# Define combined domain and range for gender + category
|
||||||
scale=alt.Scale(domain=['Top 3', 'Other'],
|
domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other']
|
||||||
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
range_colors = [
|
||||||
legend=None),
|
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
|
||||||
tooltip=[
|
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
|
||||||
alt.Tooltip('item:N', title='Item'),
|
|
||||||
alt.Tooltip('count:Q', title='1st Place Votes')
|
|
||||||
]
|
]
|
||||||
).properties(
|
|
||||||
title=self._process_title(title),
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
width=width or 800,
|
x=alt.X('item:N', title=x_label, sort='-y'),
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
y=alt.Y('count:Q', title=y_label),
|
||||||
)
|
color=alt.Color('gender_category:N',
|
||||||
|
scale=alt.Scale(domain=domain, range=range_colors),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('item:N', title='Item'),
|
||||||
|
alt.Tooltip('count:Q', title='1st Place Votes'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Bar chart with conditional color
|
||||||
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
|
x=alt.X('item:N', title=x_label, sort='-y'),
|
||||||
|
y=alt.Y('count:Q', title=y_label),
|
||||||
|
color=alt.Color('category:N',
|
||||||
|
scale=alt.Scale(domain=['Top 3', 'Other'],
|
||||||
|
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('item:N', title='Item'),
|
||||||
|
alt.Tooltip('count:Q', title='1st Place Votes')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
@@ -504,19 +660,43 @@ class QualtricsPlotsMixin:
|
|||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Create a bar chart showing the weighted ranking score for each character."""
|
"""Create a bar chart showing the weighted ranking score for each character.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
||||||
|
"""
|
||||||
weighted_df = self._ensure_dataframe(data).to_pandas()
|
weighted_df = self._ensure_dataframe(data).to_pandas()
|
||||||
|
|
||||||
# Bar chart
|
if color_gender:
|
||||||
bars = alt.Chart(weighted_df).mark_bar(color=color).encode(
|
# Add gender column based on Character name
|
||||||
x=alt.X('Character:N', title=x_label, sort='-y'),
|
weighted_df['gender'] = weighted_df['Character'].apply(self._get_voice_gender)
|
||||||
y=alt.Y('Weighted Score:Q', title=y_label),
|
|
||||||
tooltip=[
|
# Bar chart with gender coloring
|
||||||
alt.Tooltip('Character:N'),
|
bars = alt.Chart(weighted_df).mark_bar().encode(
|
||||||
alt.Tooltip('Weighted Score:Q', title='Score')
|
x=alt.X('Character:N', title=x_label, sort='-y'),
|
||||||
]
|
y=alt.Y('Weighted Score:Q', title=y_label),
|
||||||
)
|
color=alt.Color('gender:N',
|
||||||
|
scale=alt.Scale(domain=['Male', 'Female'],
|
||||||
|
range=[ColorPalette.GENDER_MALE, ColorPalette.GENDER_FEMALE]),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title='Gender')),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('Character:N'),
|
||||||
|
alt.Tooltip('Weighted Score:Q', title='Score'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Bar chart
|
||||||
|
bars = alt.Chart(weighted_df).mark_bar(color=color).encode(
|
||||||
|
x=alt.X('Character:N', title=x_label, sort='-y'),
|
||||||
|
y=alt.Y('Weighted Score:Q', title=y_label),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('Character:N'),
|
||||||
|
alt.Tooltip('Weighted Score:Q', title='Score')
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# Text overlay
|
# Text overlay
|
||||||
text = bars.mark_text(
|
text = bars.mark_text(
|
||||||
@@ -545,8 +725,14 @@ class QualtricsPlotsMixin:
|
|||||||
y_label: str = "Number of Times Chosen",
|
y_label: str = "Number of Times Chosen",
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Create a bar plot showing the frequency of voice selections."""
|
"""Create a bar plot showing the frequency of voice selections.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender with highlight/neutral intensity
|
||||||
|
(blue shades=male, pink shades=female).
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if target_column not in df.columns:
|
if target_column not in df.columns:
|
||||||
@@ -573,22 +759,52 @@ class QualtricsPlotsMixin:
|
|||||||
.to_pandas()
|
.to_pandas()
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
if color_gender:
|
||||||
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
# Add gender column based on voice label
|
||||||
y=alt.Y('count:Q', title=y_label),
|
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
|
||||||
color=alt.Color('category:N',
|
# Add gender_category column for combined color encoding
|
||||||
scale=alt.Scale(domain=['Top 8', 'Other'],
|
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
|
||||||
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
|
||||||
legend=None),
|
# Define combined domain and range for gender + category
|
||||||
tooltip=[
|
domain = ['Male - Top 8', 'Male - Other', 'Female - Top 8', 'Female - Other']
|
||||||
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
range_colors = [
|
||||||
alt.Tooltip('count:Q', title='Selections')
|
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
|
||||||
|
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
|
||||||
]
|
]
|
||||||
).properties(
|
|
||||||
title=self._process_title(title),
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
width=width or 800,
|
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
y=alt.Y('count:Q', title=y_label),
|
||||||
)
|
color=alt.Color('gender_category:N',
|
||||||
|
scale=alt.Scale(domain=domain, range=range_colors),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
||||||
|
alt.Tooltip('count:Q', title='Selections'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
|
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
||||||
|
y=alt.Y('count:Q', title=y_label),
|
||||||
|
color=alt.Color('category:N',
|
||||||
|
scale=alt.Scale(domain=['Top 8', 'Other'],
|
||||||
|
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
||||||
|
alt.Tooltip('count:Q', title='Selections')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
@@ -602,8 +818,14 @@ class QualtricsPlotsMixin:
|
|||||||
y_label: str = "Count of Mentions in Top 3",
|
y_label: str = "Count of Mentions in Top 3",
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
|
color_gender: bool = False,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Question: Which 3 voices are chosen the most out of 18?"""
|
"""Question: Which 3 voices are chosen the most out of 18?
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
color_gender: If True, color bars by voice gender with highlight/neutral intensity
|
||||||
|
(blue shades=male, pink shades=female).
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if target_column not in df.columns:
|
if target_column not in df.columns:
|
||||||
@@ -629,22 +851,52 @@ class QualtricsPlotsMixin:
|
|||||||
.to_pandas()
|
.to_pandas()
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = alt.Chart(stats_df).mark_bar().encode(
|
if color_gender:
|
||||||
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
# Add gender column based on voice label
|
||||||
y=alt.Y('count:Q', title=y_label),
|
stats_df['gender'] = stats_df[target_column].apply(self._get_voice_gender)
|
||||||
color=alt.Color('category:N',
|
# Add gender_category column for combined color encoding
|
||||||
scale=alt.Scale(domain=['Top 3', 'Other'],
|
stats_df['gender_category'] = stats_df['gender'] + ' - ' + stats_df['category']
|
||||||
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
|
||||||
legend=None),
|
# Define combined domain and range for gender + category
|
||||||
tooltip=[
|
domain = ['Male - Top 3', 'Male - Other', 'Female - Top 3', 'Female - Other']
|
||||||
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
range_colors = [
|
||||||
alt.Tooltip('count:Q', title='In Top 3')
|
ColorPalette.GENDER_MALE, ColorPalette.GENDER_MALE_NEUTRAL,
|
||||||
|
ColorPalette.GENDER_FEMALE, ColorPalette.GENDER_FEMALE_NEUTRAL
|
||||||
]
|
]
|
||||||
).properties(
|
|
||||||
title=self._process_title(title),
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
width=width or 800,
|
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
||||||
height=height or getattr(self, 'plot_height', 400)
|
y=alt.Y('count:Q', title=y_label),
|
||||||
)
|
color=alt.Color('gender_category:N',
|
||||||
|
scale=alt.Scale(domain=domain, range=range_colors),
|
||||||
|
legend=alt.Legend(orient='top', direction='horizontal', title=None)),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
||||||
|
alt.Tooltip('count:Q', title='In Top 3'),
|
||||||
|
alt.Tooltip('gender:N', title='Gender')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
chart = alt.Chart(stats_df).mark_bar().encode(
|
||||||
|
x=alt.X(f'{target_column}:N', title=x_label, sort='-y'),
|
||||||
|
y=alt.Y('count:Q', title=y_label),
|
||||||
|
color=alt.Color('category:N',
|
||||||
|
scale=alt.Scale(domain=['Top 3', 'Other'],
|
||||||
|
range=[ColorPalette.PRIMARY, ColorPalette.NEUTRAL]),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip(f'{target_column}:N', title='Voice'),
|
||||||
|
alt.Tooltip('count:Q', title='In Top 3')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
|
|||||||
18
theme.py
18
theme.py
@@ -59,6 +59,24 @@ class ColorPalette:
|
|||||||
"#457B9D", # Steel Blue
|
"#457B9D", # Steel Blue
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Gender-based colors (Male = Blue tones, Female = Pink tones)
|
||||||
|
# Primary colors by gender
|
||||||
|
GENDER_MALE = "#0077B6" # Medium Blue (same as PRIMARY)
|
||||||
|
GENDER_FEMALE = "#B6007A" # Medium Pink
|
||||||
|
|
||||||
|
# Ranking colors by gender (Darkest -> Lightest)
|
||||||
|
GENDER_MALE_RANK_1 = "#004C6D" # Dark Blue
|
||||||
|
GENDER_MALE_RANK_2 = "#0077B6" # Medium Blue
|
||||||
|
GENDER_MALE_RANK_3 = "#669BBC" # Light Steel Blue
|
||||||
|
|
||||||
|
GENDER_FEMALE_RANK_1 = "#6D004C" # Dark Pink
|
||||||
|
GENDER_FEMALE_RANK_2 = "#B6007A" # Medium Pink
|
||||||
|
GENDER_FEMALE_RANK_3 = "#BC669B" # Light Pink
|
||||||
|
|
||||||
|
# Neutral colors by gender (for non-highlighted items)
|
||||||
|
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
||||||
|
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
||||||
|
|
||||||
|
|
||||||
def jpmc_altair_theme():
|
def jpmc_altair_theme():
|
||||||
"""JPMC brand theme for Altair charts."""
|
"""JPMC brand theme for Altair charts."""
|
||||||
|
|||||||
7
utils.py
7
utils.py
@@ -508,9 +508,16 @@ def update_ppt_alt_text(ppt_path: Union[str, Path], image_source_dir: Union[str,
|
|||||||
print(f"Error updating alt text for {original_path}: {e}")
|
print(f"Error updating alt text for {original_path}: {e}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# Check if image already has alt text set - if so, skip reporting as unmatched
|
||||||
|
existing_alt = _get_shape_alt_text(shape)
|
||||||
|
if existing_alt:
|
||||||
|
# Image already has alt text, no need to report as unmatched
|
||||||
|
continue
|
||||||
|
|
||||||
shape_id = getattr(shape, 'shape_id', getattr(shape, 'id', 'Unknown ID'))
|
shape_id = getattr(shape, 'shape_id', getattr(shape, 'id', 'Unknown ID'))
|
||||||
shape_name = shape.name if shape.name else f"Unnamed Shape (ID: {shape_id})"
|
shape_name = shape.name if shape.name else f"Unnamed Shape (ID: {shape_id})"
|
||||||
hash_type = "pHash" if use_perceptual_hash else "SHA1"
|
hash_type = "pHash" if use_perceptual_hash else "SHA1"
|
||||||
|
|
||||||
unmatched_images.append({
|
unmatched_images.append({
|
||||||
'slide': i+1,
|
'slide': i+1,
|
||||||
'shape_name': shape_name,
|
'shape_name': shape_name,
|
||||||
|
|||||||
Reference in New Issue
Block a user