base correlations
This commit is contained in:
@@ -664,5 +664,140 @@ def _():
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
## Correlation Speaking Styles
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, data, top3_voices):
|
||||
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||||
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||||
|
||||
# Combine the data
|
||||
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||
_d = ss_all.collect()
|
||||
|
||||
choice_map = {**choice_map_or, **choice_map_gb}
|
||||
# print(_d.head())
|
||||
# print(choice_map)
|
||||
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
|
||||
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||||
|
||||
vscales = S.get_voice_scale_1_10(data)[0]
|
||||
df_scale_long = utils.process_voice_scale_data(vscales)
|
||||
|
||||
joined_scale = df_style.join(df_scale_long, on=["_recordId", "Voice"], how="inner")
|
||||
|
||||
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||||
joined_ranking = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||||
return joined_ranking, joined_scale
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Colors vs Scale 1-10
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, joined_scale):
|
||||
# Transform to get one row per color with average correlation
|
||||
color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, SPEAKING_STYLES)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=color_corr_scale,
|
||||
title="Correlation: Speaking Style Colors and Voice Scale 1-10"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Colors vs Ranking Points
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, joined_ranking):
|
||||
color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
|
||||
joined_ranking,
|
||||
SPEAKING_STYLES,
|
||||
target_column="Ranking_Points"
|
||||
)
|
||||
S.plot_speaking_style_color_correlation(
|
||||
data=color_corr_ranking,
|
||||
title="Correlation: Speaking Style Colors and Voice Ranking Points"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Scale 1-10
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, joined_scale):
|
||||
_content = """"""
|
||||
|
||||
for _style, _traits in SPEAKING_STYLES.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
_fig = S.plot_speaking_style_correlation(
|
||||
data=joined_scale,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
|
||||
)
|
||||
_content += f"""
|
||||
#### Speaking Style **{_style}**:
|
||||
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _():
|
||||
mo.md(r"""
|
||||
### Individual Traits vs Ranking Points
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(S, joined_ranking):
|
||||
_content = """"""
|
||||
|
||||
for _style, _traits in SPEAKING_STYLES.items():
|
||||
# print(f"Correlation plot for {style}...")
|
||||
_fig = S.plot_speaking_style_ranking_correlation(
|
||||
data=joined_ranking,
|
||||
style_color=_style,
|
||||
style_traits=_traits,
|
||||
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
|
||||
)
|
||||
_content += f"""
|
||||
#### Speaking Style **{_style}**:
|
||||
|
||||
{mo.ui.altair_chart(_fig)}
|
||||
|
||||
"""
|
||||
mo.md(_content)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
|
||||
53
plots.py
53
plots.py
@@ -1048,6 +1048,59 @@ class QualtricsPlotsMixin:
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_speaking_style_color_correlation(
|
||||
self,
|
||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||
title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)",
|
||||
width: int | str | None = None,
|
||||
height: int | None = None,
|
||||
) -> alt.Chart:
|
||||
"""Plot high-level correlation showing one bar per speaking style color.
|
||||
|
||||
Original use-case: "I want to create high-level correlation plots between
|
||||
'green, blue, orange, red' speaking styles and the 'voice scale scores'.
|
||||
I want to go to one plot with one bar for each color."
|
||||
|
||||
Args:
|
||||
data: DataFrame with columns [Color, correlation, n_traits] from
|
||||
utils.transform_speaking_style_color_correlation
|
||||
title: Chart title (supports <br> for line breaks)
|
||||
width: Chart width in pixels
|
||||
height: Chart height in pixels
|
||||
|
||||
Returns:
|
||||
Altair chart with one bar per speaking style color
|
||||
"""
|
||||
df = self._ensure_dataframe(data)
|
||||
|
||||
# Conditional color based on sign (matches plot_speaking_style_correlation)
|
||||
chart = alt.Chart(df.to_pandas()).mark_bar().encode(
|
||||
x=alt.X('Color:N',
|
||||
title=None,
|
||||
axis=alt.Axis(labelAngle=0),
|
||||
sort=["Green", "Blue", "Orange", "Red"]),
|
||||
y=alt.Y('correlation:Q',
|
||||
title='Average Correlation',
|
||||
scale=alt.Scale(domain=[-1, 1])),
|
||||
color=alt.condition(
|
||||
alt.datum.correlation >= 0,
|
||||
alt.value('green'),
|
||||
alt.value('red')
|
||||
),
|
||||
tooltip=[
|
||||
alt.Tooltip('Color:N', title='Speaking Style'),
|
||||
alt.Tooltip('correlation:Q', format='.3f', title='Avg Correlation'),
|
||||
alt.Tooltip('n_traits:Q', title='# Traits')
|
||||
]
|
||||
).properties(
|
||||
title=self._process_title(title),
|
||||
width=width or 400,
|
||||
height=height or 350
|
||||
)
|
||||
|
||||
chart = self._save_plot(chart, title)
|
||||
return chart
|
||||
|
||||
def plot_demographic_distribution(
|
||||
self,
|
||||
column: str,
|
||||
|
||||
6
theme.py
6
theme.py
@@ -77,6 +77,12 @@ class ColorPalette:
|
||||
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
||||
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
||||
|
||||
# Speaking Style Colors (named after the style quadrant colors)
|
||||
STYLE_GREEN = "#2E7D32" # Forest Green
|
||||
STYLE_BLUE = "#1565C0" # Strong Blue
|
||||
STYLE_ORANGE = "#E07A00" # Burnt Orange
|
||||
STYLE_RED = "#C62828" # Deep Red
|
||||
|
||||
|
||||
def jpmc_altair_theme():
|
||||
"""JPMC brand theme for Altair charts."""
|
||||
|
||||
63
utils.py
63
utils.py
@@ -1676,6 +1676,69 @@ def join_voice_and_style_data(
|
||||
how="inner"
|
||||
)
|
||||
|
||||
|
||||
def transform_speaking_style_color_correlation(
|
||||
joined_df: pl.LazyFrame | pl.DataFrame,
|
||||
speaking_styles: dict[str, list[str]],
|
||||
target_column: str = "Voice_Scale_Score"
|
||||
) -> tuple[pl.DataFrame, dict | None]:
|
||||
"""Aggregate speaking style correlation by color (Green, Blue, Orange, Red).
|
||||
|
||||
Original use-case: "I want to create high-level correlation plots between
|
||||
'green, blue, orange, red' speaking styles and the 'voice scale scores'.
|
||||
I want to go to one plot with one bar for each color."
|
||||
|
||||
This function calculates the mean correlation per speaking style color by
|
||||
averaging the correlations of all traits within each color.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
joined_df : pl.LazyFrame or pl.DataFrame
|
||||
Pre-fetched data from joining speaking style data with target data.
|
||||
Must have columns: Right_Anchor, score, and the target_column
|
||||
speaking_styles : dict
|
||||
Dictionary mapping color names to their constituent traits.
|
||||
Typically imported from speaking_styles.SPEAKING_STYLES
|
||||
target_column : str
|
||||
The column to correlate against speaking style scores.
|
||||
Default: "Voice_Scale_Score" (for voice scale 1-10)
|
||||
Alternative: "Ranking_Points" (for top 3 voice ranking)
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[pl.DataFrame, dict | None]
|
||||
(DataFrame with columns [Color, correlation, n_traits], None)
|
||||
"""
|
||||
if isinstance(joined_df, pl.LazyFrame):
|
||||
joined_df = joined_df.collect()
|
||||
|
||||
color_correlations = []
|
||||
|
||||
for color, traits in speaking_styles.items():
|
||||
trait_corrs = []
|
||||
for trait in traits:
|
||||
# Filter to this specific trait
|
||||
subset = joined_df.filter(pl.col("Right_Anchor") == trait)
|
||||
valid_data = subset.select(["score", target_column]).drop_nulls()
|
||||
|
||||
if valid_data.height > 1:
|
||||
corr_val = valid_data.select(pl.corr("score", target_column)).item()
|
||||
if corr_val is not None:
|
||||
trait_corrs.append(corr_val)
|
||||
|
||||
# Average across all traits for this color
|
||||
if trait_corrs:
|
||||
avg_corr = sum(trait_corrs) / len(trait_corrs)
|
||||
color_correlations.append({
|
||||
"Color": color,
|
||||
"correlation": avg_corr,
|
||||
"n_traits": len(trait_corrs)
|
||||
})
|
||||
|
||||
result_df = pl.DataFrame(color_correlations)
|
||||
return result_df, None
|
||||
|
||||
|
||||
def process_voice_ranking_data(
|
||||
df: Union[pl.LazyFrame, pl.DataFrame]
|
||||
) -> pl.DataFrame:
|
||||
|
||||
Reference in New Issue
Block a user