voice gender split correlation plots
This commit is contained in:
@@ -573,41 +573,24 @@ joined_scale_female = joined_scale.filter(pl.col("Voice").is_in(FEMALE_VOICES))
|
|||||||
joined_ranking_male = joined_ranking.filter(pl.col("Voice").is_in(MALE_VOICES))
|
joined_ranking_male = joined_ranking.filter(pl.col("Voice").is_in(MALE_VOICES))
|
||||||
joined_ranking_female = joined_ranking.filter(pl.col("Voice").is_in(FEMALE_VOICES))
|
joined_ranking_female = joined_ranking.filter(pl.col("Voice").is_in(FEMALE_VOICES))
|
||||||
|
|
||||||
# Colors vs Scale 1-10 (Male voices only)
|
# Colors vs Scale 1-10 (grouped by voice gender)
|
||||||
color_corr_scale_male, _ = utils.transform_speaking_style_color_correlation(joined_scale_male, SPEAKING_STYLES)
|
S.plot_speaking_style_color_correlation_by_gender(
|
||||||
S.plot_speaking_style_color_correlation(
|
data_male=joined_scale_male,
|
||||||
data=color_corr_scale_male,
|
data_female=joined_scale_female,
|
||||||
title="Correlation: Speaking Style Colors and Voice Scale 1-10 (Male Voices Only)"
|
speaking_styles=SPEAKING_STYLES,
|
||||||
|
target_column="Voice_Scale_Score",
|
||||||
|
title="Correlation: Speaking Style Colors and Voice Scale 1-10 (by Voice Gender)",
|
||||||
|
filename="correlation_speaking_style_and_voice_scale_1-10_by_voice_gender_color",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Colors vs Scale 1-10 (Female voices only)
|
# Colors vs Ranking Points (grouped by voice gender)
|
||||||
color_corr_scale_female, _ = utils.transform_speaking_style_color_correlation(joined_scale_female, SPEAKING_STYLES)
|
S.plot_speaking_style_color_correlation_by_gender(
|
||||||
S.plot_speaking_style_color_correlation(
|
data_male=joined_ranking_male,
|
||||||
data=color_corr_scale_female,
|
data_female=joined_ranking_female,
|
||||||
title="Correlation: Speaking Style Colors and Voice Scale 1-10 (Female Voices Only)"
|
speaking_styles=SPEAKING_STYLES,
|
||||||
)
|
target_column="Ranking_Points",
|
||||||
|
title="Correlation: Speaking Style Colors and Voice Ranking Points (by Voice Gender)",
|
||||||
# %%
|
filename="correlation_speaking_style_and_voice_ranking_points_by_voice_gender_color",
|
||||||
# Colors vs Ranking Points (Male voices only)
|
|
||||||
color_corr_ranking_male, _ = utils.transform_speaking_style_color_correlation(
|
|
||||||
joined_ranking_male,
|
|
||||||
SPEAKING_STYLES,
|
|
||||||
target_column="Ranking_Points"
|
|
||||||
)
|
|
||||||
S.plot_speaking_style_color_correlation(
|
|
||||||
data=color_corr_ranking_male,
|
|
||||||
title="Correlation: Speaking Style Colors and Voice Ranking Points (Male Voices Only)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Colors vs Ranking Points (Female voices only)
|
|
||||||
color_corr_ranking_female, _ = utils.transform_speaking_style_color_correlation(
|
|
||||||
joined_ranking_female,
|
|
||||||
SPEAKING_STYLES,
|
|
||||||
target_column="Ranking_Points"
|
|
||||||
)
|
|
||||||
S.plot_speaking_style_color_correlation(
|
|
||||||
data=color_corr_ranking_female,
|
|
||||||
title="Correlation: Speaking Style Colors and Voice Ranking Points (Female Voices Only)"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
@@ -659,15 +642,17 @@ for _style, _traits in SPEAKING_STYLES.items():
|
|||||||
mo.md(_content)
|
mo.md(_content)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Individual Traits vs Scale 1-10 (Male voices only)
|
# Individual Traits vs Scale 1-10 (grouped by voice gender)
|
||||||
_content = """### Individual Traits vs Scale 1-10 (Male Voices Only)\n\n"""
|
_content = """### Individual Traits vs Scale 1-10 (by Voice Gender)\n\n"""
|
||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
_fig = S.plot_speaking_style_scale_correlation(
|
_fig = S.plot_speaking_style_scale_correlation_by_gender(
|
||||||
data=joined_scale_male,
|
data_male=joined_scale_male,
|
||||||
|
data_female=joined_scale_female,
|
||||||
style_color=_style,
|
style_color=_style,
|
||||||
style_traits=_traits,
|
style_traits=_traits,
|
||||||
title=f"Correlation: Speaking Style {_style} and Voice Scale 1-10 (Male Voices Only)",
|
title=f"Correlation: Speaking Style {_style} and Voice Scale 1-10 (by Voice Gender)",
|
||||||
|
filename=f"correlation_speaking_style_and_voice_scale_1-10_by_voice_gender_{_style.lower()}",
|
||||||
)
|
)
|
||||||
_content += f"""
|
_content += f"""
|
||||||
#### Speaking Style **{_style}**:
|
#### Speaking Style **{_style}**:
|
||||||
@@ -678,53 +663,17 @@ for _style, _traits in SPEAKING_STYLES.items():
|
|||||||
mo.md(_content)
|
mo.md(_content)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Individual Traits vs Scale 1-10 (Female voices only)
|
# Individual Traits vs Ranking Points (grouped by voice gender)
|
||||||
_content = """### Individual Traits vs Scale 1-10 (Female Voices Only)\n\n"""
|
_content = """### Individual Traits vs Ranking Points (by Voice Gender)\n\n"""
|
||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
_fig = S.plot_speaking_style_scale_correlation(
|
_fig = S.plot_speaking_style_ranking_correlation_by_gender(
|
||||||
data=joined_scale_female,
|
data_male=joined_ranking_male,
|
||||||
|
data_female=joined_ranking_female,
|
||||||
style_color=_style,
|
style_color=_style,
|
||||||
style_traits=_traits,
|
style_traits=_traits,
|
||||||
title=f"Correlation: Speaking Style {_style} and Voice Scale 1-10 (Female Voices Only)",
|
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points (by Voice Gender)",
|
||||||
)
|
filename=f"correlation_speaking_style_and_voice_ranking_points_by_voice_gender_{_style.lower()}",
|
||||||
_content += f"""
|
|
||||||
#### Speaking Style **{_style}**:
|
|
||||||
|
|
||||||
{mo.ui.altair_chart(_fig)}
|
|
||||||
|
|
||||||
"""
|
|
||||||
mo.md(_content)
|
|
||||||
|
|
||||||
# %%
|
|
||||||
# Individual Traits vs Ranking Points (Male voices only)
|
|
||||||
_content = """### Individual Traits vs Ranking Points (Male Voices Only)\n\n"""
|
|
||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
|
||||||
_fig = S.plot_speaking_style_ranking_correlation(
|
|
||||||
data=joined_ranking_male,
|
|
||||||
style_color=_style,
|
|
||||||
style_traits=_traits,
|
|
||||||
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points (Male Voices Only)",
|
|
||||||
)
|
|
||||||
_content += f"""
|
|
||||||
#### Speaking Style **{_style}**:
|
|
||||||
|
|
||||||
{mo.ui.altair_chart(_fig)}
|
|
||||||
|
|
||||||
"""
|
|
||||||
mo.md(_content)
|
|
||||||
|
|
||||||
# %%
|
|
||||||
# Individual Traits vs Ranking Points (Female voices only)
|
|
||||||
_content = """### Individual Traits vs Ranking Points (Female Voices Only)\n\n"""
|
|
||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
|
||||||
_fig = S.plot_speaking_style_ranking_correlation(
|
|
||||||
data=joined_ranking_female,
|
|
||||||
style_color=_style,
|
|
||||||
style_traits=_traits,
|
|
||||||
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points (Female Voices Only)",
|
|
||||||
)
|
)
|
||||||
_content += f"""
|
_content += f"""
|
||||||
#### Speaking Style **{_style}**:
|
#### Speaking Style **{_style}**:
|
||||||
|
|||||||
323
plots.py
323
plots.py
@@ -1256,6 +1256,237 @@ class QualtricsPlotsMixin:
|
|||||||
chart = self._save_plot(chart, title, filename=filename)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
|
def _create_gender_correlation_legend(self) -> alt.Chart:
|
||||||
|
"""Create a custom legend for gender correlation plots with dual-color swatches.
|
||||||
|
|
||||||
|
Horizontal layout below the chart:
|
||||||
|
[■][■] Male [■][■] Female
|
||||||
|
"""
|
||||||
|
# Horizontal layout: Male at x=0-2, Female at x=5-7 (gap for whitespace)
|
||||||
|
legend_data = pd.DataFrame([
|
||||||
|
{"x": 0, "color": ColorPalette.CORR_MALE_POSITIVE},
|
||||||
|
{"x": 1, "color": ColorPalette.CORR_MALE_NEGATIVE},
|
||||||
|
{"x": 5, "color": ColorPalette.CORR_FEMALE_POSITIVE},
|
||||||
|
{"x": 6, "color": ColorPalette.CORR_FEMALE_NEGATIVE},
|
||||||
|
])
|
||||||
|
|
||||||
|
# Color blocks
|
||||||
|
blocks = alt.Chart(legend_data).mark_rect(width=12, height=12).encode(
|
||||||
|
x=alt.X('x:Q', axis=None, scale=alt.Scale(domain=[0, 9])),
|
||||||
|
y=alt.value(6),
|
||||||
|
color=alt.Color('color:N', scale=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Labels positioned after each pair of blocks
|
||||||
|
label_data = pd.DataFrame([
|
||||||
|
{"x": 2.3, "label": "Male"},
|
||||||
|
{"x": 7.3, "label": "Female"},
|
||||||
|
])
|
||||||
|
labels = alt.Chart(label_data).mark_text(align='left', baseline='middle', fontSize=11).encode(
|
||||||
|
x=alt.X('x:Q', axis=None, scale=alt.Scale(domain=[0, 9])),
|
||||||
|
y=alt.value(6),
|
||||||
|
text='label:N'
|
||||||
|
)
|
||||||
|
|
||||||
|
legend = (blocks + labels).properties(width=200, height=20)
|
||||||
|
return legend
|
||||||
|
|
||||||
|
def plot_speaking_style_scale_correlation_by_gender(
|
||||||
|
self,
|
||||||
|
style_color: str,
|
||||||
|
style_traits: list[str],
|
||||||
|
data_male: pl.LazyFrame | pl.DataFrame,
|
||||||
|
data_female: pl.LazyFrame | pl.DataFrame,
|
||||||
|
title: str | None = None,
|
||||||
|
filename: str | None = None,
|
||||||
|
width: int | str | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
) -> alt.Chart:
|
||||||
|
"""Plots correlation between Speaking Style Trait Scores and Voice Scale,
|
||||||
|
with grouped bars comparing male vs female voices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
style_color: The speaking style color (e.g., "Green", "Blue")
|
||||||
|
style_traits: List of traits for this style
|
||||||
|
data_male: DataFrame filtered to male voices only
|
||||||
|
data_female: DataFrame filtered to female voices only
|
||||||
|
title: Chart title
|
||||||
|
filename: Optional explicit filename for saving
|
||||||
|
width: Chart width in pixels
|
||||||
|
height: Chart height in pixels
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Altair chart with grouped bars (male/female) per trait
|
||||||
|
"""
|
||||||
|
df_male = self._ensure_dataframe(data_male)
|
||||||
|
df_female = self._ensure_dataframe(data_female)
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
title = f"Speaking style {style_color} and voice scale 1-10 correlations (by Voice Gender)"
|
||||||
|
|
||||||
|
trait_correlations = []
|
||||||
|
|
||||||
|
for i, trait in enumerate(style_traits):
|
||||||
|
trait_display = trait.replace('|', '\n')
|
||||||
|
|
||||||
|
# Male correlation
|
||||||
|
subset_m = df_male.filter(pl.col("Right_Anchor") == trait)
|
||||||
|
valid_m = subset_m.select(["score", "Voice_Scale_Score"]).drop_nulls()
|
||||||
|
if valid_m.height > 1:
|
||||||
|
corr_m = valid_m.select(pl.corr("score", "Voice_Scale_Score")).item()
|
||||||
|
corr_val = corr_m if corr_m is not None else 0.0
|
||||||
|
trait_correlations.append({
|
||||||
|
"trait_display": trait_display,
|
||||||
|
"Gender": "Male",
|
||||||
|
"correlation": corr_val,
|
||||||
|
"color_key": "Male_Pos" if corr_val >= 0 else "Male_Neg"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Female correlation
|
||||||
|
subset_f = df_female.filter(pl.col("Right_Anchor") == trait)
|
||||||
|
valid_f = subset_f.select(["score", "Voice_Scale_Score"]).drop_nulls()
|
||||||
|
if valid_f.height > 1:
|
||||||
|
corr_f = valid_f.select(pl.corr("score", "Voice_Scale_Score")).item()
|
||||||
|
corr_val = corr_f if corr_f is not None else 0.0
|
||||||
|
trait_correlations.append({
|
||||||
|
"trait_display": trait_display,
|
||||||
|
"Gender": "Female",
|
||||||
|
"correlation": corr_val,
|
||||||
|
"color_key": "Female_Pos" if corr_val >= 0 else "Female_Neg"
|
||||||
|
})
|
||||||
|
|
||||||
|
if not trait_correlations:
|
||||||
|
return alt.Chart(pd.DataFrame({'text': [f"No data for {style_color} Style"]})).mark_text().encode(text='text:N')
|
||||||
|
|
||||||
|
plot_df = pl.DataFrame(trait_correlations).to_pandas()
|
||||||
|
|
||||||
|
main_chart = alt.Chart(plot_df).mark_bar().encode(
|
||||||
|
x=alt.X('trait_display:N', title=None, axis=alt.Axis(labelAngle=0, grid=False)),
|
||||||
|
xOffset='Gender:N',
|
||||||
|
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
|
||||||
|
color=alt.Color('color_key:N',
|
||||||
|
scale=alt.Scale(
|
||||||
|
domain=['Male_Pos', 'Female_Pos', 'Male_Neg', 'Female_Neg'],
|
||||||
|
range=[ColorPalette.CORR_MALE_POSITIVE, ColorPalette.CORR_FEMALE_POSITIVE,
|
||||||
|
ColorPalette.CORR_MALE_NEGATIVE, ColorPalette.CORR_FEMALE_NEGATIVE]
|
||||||
|
),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('trait_display:N', title='Trait'),
|
||||||
|
alt.Tooltip('Gender:N'),
|
||||||
|
alt.Tooltip('correlation:Q', format='.3f')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or 350
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add custom legend below the chart
|
||||||
|
legend = self._create_gender_correlation_legend()
|
||||||
|
chart = alt.vconcat(main_chart, legend, spacing=10).resolve_scale(color='independent')
|
||||||
|
|
||||||
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
|
return chart
|
||||||
|
|
||||||
|
def plot_speaking_style_ranking_correlation_by_gender(
|
||||||
|
self,
|
||||||
|
style_color: str,
|
||||||
|
style_traits: list[str],
|
||||||
|
data_male: pl.LazyFrame | pl.DataFrame,
|
||||||
|
data_female: pl.LazyFrame | pl.DataFrame,
|
||||||
|
title: str | None = None,
|
||||||
|
filename: str | None = None,
|
||||||
|
width: int | str | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
) -> alt.Chart:
|
||||||
|
"""Plots correlation between Speaking Style Trait Scores and Voice Ranking Points,
|
||||||
|
with grouped bars comparing male vs female voices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
style_color: The speaking style color (e.g., "Green", "Blue")
|
||||||
|
style_traits: List of traits for this style
|
||||||
|
data_male: DataFrame filtered to male voices only
|
||||||
|
data_female: DataFrame filtered to female voices only
|
||||||
|
title: Chart title
|
||||||
|
filename: Optional explicit filename for saving
|
||||||
|
width: Chart width in pixels
|
||||||
|
height: Chart height in pixels
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Altair chart with grouped bars (male/female) per trait
|
||||||
|
"""
|
||||||
|
df_male = self._ensure_dataframe(data_male)
|
||||||
|
df_female = self._ensure_dataframe(data_female)
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
title = f"Speaking style {style_color} and voice ranking points correlations (by Voice Gender)"
|
||||||
|
|
||||||
|
trait_correlations = []
|
||||||
|
|
||||||
|
for i, trait in enumerate(style_traits):
|
||||||
|
trait_display = trait.replace('|', '\n')
|
||||||
|
|
||||||
|
# Male correlation
|
||||||
|
subset_m = df_male.filter(pl.col("Right_Anchor") == trait)
|
||||||
|
valid_m = subset_m.select(["score", "Ranking_Points"]).drop_nulls()
|
||||||
|
if valid_m.height > 1:
|
||||||
|
corr_m = valid_m.select(pl.corr("score", "Ranking_Points")).item()
|
||||||
|
corr_val = corr_m if corr_m is not None else 0.0
|
||||||
|
trait_correlations.append({
|
||||||
|
"trait_display": trait_display,
|
||||||
|
"Gender": "Male",
|
||||||
|
"correlation": corr_val,
|
||||||
|
"color_key": "Male_Pos" if corr_val >= 0 else "Male_Neg"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Female correlation
|
||||||
|
subset_f = df_female.filter(pl.col("Right_Anchor") == trait)
|
||||||
|
valid_f = subset_f.select(["score", "Ranking_Points"]).drop_nulls()
|
||||||
|
if valid_f.height > 1:
|
||||||
|
corr_f = valid_f.select(pl.corr("score", "Ranking_Points")).item()
|
||||||
|
corr_val = corr_f if corr_f is not None else 0.0
|
||||||
|
trait_correlations.append({
|
||||||
|
"trait_display": trait_display,
|
||||||
|
"Gender": "Female",
|
||||||
|
"correlation": corr_val,
|
||||||
|
"color_key": "Female_Pos" if corr_val >= 0 else "Female_Neg"
|
||||||
|
})
|
||||||
|
|
||||||
|
if not trait_correlations:
|
||||||
|
return alt.Chart(pd.DataFrame({'text': [f"No data for {style_color} Style"]})).mark_text().encode(text='text:N')
|
||||||
|
|
||||||
|
plot_df = pl.DataFrame(trait_correlations).to_pandas()
|
||||||
|
|
||||||
|
main_chart = alt.Chart(plot_df).mark_bar().encode(
|
||||||
|
x=alt.X('trait_display:N', title='Speaking Style Trait', axis=alt.Axis(labelAngle=0, grid=False)),
|
||||||
|
xOffset='Gender:N',
|
||||||
|
y=alt.Y('correlation:Q', title='Correlation', scale=alt.Scale(domain=[-1, 1]), axis=alt.Axis(grid=True)),
|
||||||
|
color=alt.Color('color_key:N',
|
||||||
|
scale=alt.Scale(
|
||||||
|
domain=['Male_Pos', 'Female_Pos', 'Male_Neg', 'Female_Neg'],
|
||||||
|
range=[ColorPalette.CORR_MALE_POSITIVE, ColorPalette.CORR_FEMALE_POSITIVE,
|
||||||
|
ColorPalette.CORR_MALE_NEGATIVE, ColorPalette.CORR_FEMALE_NEGATIVE]
|
||||||
|
),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('trait_display:N', title='Trait'),
|
||||||
|
alt.Tooltip('Gender:N'),
|
||||||
|
alt.Tooltip('correlation:Q', format='.3f')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 800,
|
||||||
|
height=height or 350
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add custom legend below the chart
|
||||||
|
legend = self._create_gender_correlation_legend()
|
||||||
|
chart = alt.vconcat(main_chart, legend, spacing=10).resolve_scale(color='independent')
|
||||||
|
|
||||||
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
|
return chart
|
||||||
|
|
||||||
def plot_speaking_style_color_correlation(
|
def plot_speaking_style_color_correlation(
|
||||||
self,
|
self,
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
@@ -1313,6 +1544,98 @@ class QualtricsPlotsMixin:
|
|||||||
chart = self._save_plot(chart, title, filename=filename)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
|
def plot_speaking_style_color_correlation_by_gender(
|
||||||
|
self,
|
||||||
|
data_male: pl.LazyFrame | pl.DataFrame,
|
||||||
|
data_female: pl.LazyFrame | pl.DataFrame,
|
||||||
|
speaking_styles: dict[str, list[str]],
|
||||||
|
target_column: str = "Voice_Scale_Score",
|
||||||
|
title: str = "Speaking Style Colors Correlation (by Voice Gender)",
|
||||||
|
filename: str | None = None,
|
||||||
|
width: int | str | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
) -> alt.Chart:
|
||||||
|
"""Plot correlation by speaking style color with grouped bars for male vs female voices.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_male: DataFrame filtered to male voices only
|
||||||
|
data_female: DataFrame filtered to female voices only
|
||||||
|
speaking_styles: Dictionary mapping color names to their constituent traits
|
||||||
|
target_column: The column to correlate against ("Voice_Scale_Score" or "Ranking_Points")
|
||||||
|
title: Chart title
|
||||||
|
filename: Optional explicit filename for saving
|
||||||
|
width: Chart width in pixels
|
||||||
|
height: Chart height in pixels
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Altair chart with grouped bars (male/female) per color
|
||||||
|
"""
|
||||||
|
import utils
|
||||||
|
|
||||||
|
df_male = self._ensure_dataframe(data_male)
|
||||||
|
df_female = self._ensure_dataframe(data_female)
|
||||||
|
|
||||||
|
# Get correlations for each gender
|
||||||
|
color_corr_male, _ = utils.transform_speaking_style_color_correlation(
|
||||||
|
df_male, speaking_styles, target_column=target_column
|
||||||
|
)
|
||||||
|
color_corr_female, _ = utils.transform_speaking_style_color_correlation(
|
||||||
|
df_female, speaking_styles, target_column=target_column
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add gender column and color_key based on correlation sign
|
||||||
|
color_corr_male = color_corr_male.with_columns([
|
||||||
|
pl.lit("Male").alias("Gender"),
|
||||||
|
pl.when(pl.col("correlation") >= 0)
|
||||||
|
.then(pl.lit("Male_Pos"))
|
||||||
|
.otherwise(pl.lit("Male_Neg"))
|
||||||
|
.alias("color_key")
|
||||||
|
])
|
||||||
|
color_corr_female = color_corr_female.with_columns([
|
||||||
|
pl.lit("Female").alias("Gender"),
|
||||||
|
pl.when(pl.col("correlation") >= 0)
|
||||||
|
.then(pl.lit("Female_Pos"))
|
||||||
|
.otherwise(pl.lit("Female_Neg"))
|
||||||
|
.alias("color_key")
|
||||||
|
])
|
||||||
|
combined = pl.concat([color_corr_male, color_corr_female])
|
||||||
|
|
||||||
|
main_chart = alt.Chart(combined.to_pandas()).mark_bar().encode(
|
||||||
|
x=alt.X('Color:N',
|
||||||
|
title='Speaking Style Color',
|
||||||
|
axis=alt.Axis(labelAngle=0, grid=False),
|
||||||
|
sort=["Green", "Blue", "Orange", "Red"]),
|
||||||
|
xOffset='Gender:N',
|
||||||
|
y=alt.Y('correlation:Q',
|
||||||
|
title='Average Correlation',
|
||||||
|
scale=alt.Scale(domain=[-1, 1]),
|
||||||
|
axis=alt.Axis(grid=True)),
|
||||||
|
color=alt.Color('color_key:N',
|
||||||
|
scale=alt.Scale(
|
||||||
|
domain=['Male_Pos', 'Female_Pos', 'Male_Neg', 'Female_Neg'],
|
||||||
|
range=[ColorPalette.CORR_MALE_POSITIVE, ColorPalette.CORR_FEMALE_POSITIVE,
|
||||||
|
ColorPalette.CORR_MALE_NEGATIVE, ColorPalette.CORR_FEMALE_NEGATIVE]
|
||||||
|
),
|
||||||
|
legend=None),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('Color:N', title='Speaking Style'),
|
||||||
|
alt.Tooltip('Gender:N'),
|
||||||
|
alt.Tooltip('correlation:Q', format='.3f', title='Avg Correlation'),
|
||||||
|
alt.Tooltip('n_traits:Q', title='# Traits')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 400,
|
||||||
|
height=height or 350
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add custom legend below the chart
|
||||||
|
legend = self._create_gender_correlation_legend()
|
||||||
|
chart = alt.vconcat(main_chart, legend, spacing=10).resolve_scale(color='independent')
|
||||||
|
|
||||||
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
|
return chart
|
||||||
|
|
||||||
def plot_demographic_distribution(
|
def plot_demographic_distribution(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
|
|||||||
7
theme.py
7
theme.py
@@ -77,6 +77,13 @@ class ColorPalette:
|
|||||||
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
||||||
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
||||||
|
|
||||||
|
# Gender colors for correlation plots (green/red indicate +/- correlation)
|
||||||
|
# Male = darker shade, Female = lighter shade
|
||||||
|
CORR_MALE_POSITIVE = "#1B5E20" # Dark Green
|
||||||
|
CORR_FEMALE_POSITIVE = "#81C784" # Light Green
|
||||||
|
CORR_MALE_NEGATIVE = "#B71C1C" # Dark Red
|
||||||
|
CORR_FEMALE_NEGATIVE = "#E57373" # Light Red
|
||||||
|
|
||||||
# Speaking Style Colors (named after the style quadrant colors)
|
# Speaking Style Colors (named after the style quadrant colors)
|
||||||
STYLE_GREEN = "#2E7D32" # Forest Green
|
STYLE_GREEN = "#2E7D32" # Forest Green
|
||||||
STYLE_BLUE = "#1565C0" # Strong Blue
|
STYLE_BLUE = "#1565C0" # Strong Blue
|
||||||
|
|||||||
Reference in New Issue
Block a user