base correlations
This commit is contained in:
@@ -664,5 +664,140 @@ def _():
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
## Correlation Speaking Styles
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, data, top3_voices):
|
||||||
|
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||||||
|
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||||||
|
|
||||||
|
# Combine the data
|
||||||
|
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||||
|
_d = ss_all.collect()
|
||||||
|
|
||||||
|
choice_map = {**choice_map_or, **choice_map_gb}
|
||||||
|
# print(_d.head())
|
||||||
|
# print(choice_map)
|
||||||
|
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
|
||||||
|
|
||||||
|
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||||||
|
|
||||||
|
vscales = S.get_voice_scale_1_10(data)[0]
|
||||||
|
df_scale_long = utils.process_voice_scale_data(vscales)
|
||||||
|
|
||||||
|
joined_scale = df_style.join(df_scale_long, on=["_recordId", "Voice"], how="inner")
|
||||||
|
|
||||||
|
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||||||
|
joined_ranking = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||||||
|
return joined_ranking, joined_scale
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
### Colors vs Scale 1-10
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, joined_scale):
|
||||||
|
# Transform to get one row per color with average correlation
|
||||||
|
color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, SPEAKING_STYLES)
|
||||||
|
S.plot_speaking_style_color_correlation(
|
||||||
|
data=color_corr_scale,
|
||||||
|
title="Correlation: Speaking Style Colors and Voice Scale 1-10"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
### Colors vs Ranking Points
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, joined_ranking):
|
||||||
|
color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
|
||||||
|
joined_ranking,
|
||||||
|
SPEAKING_STYLES,
|
||||||
|
target_column="Ranking_Points"
|
||||||
|
)
|
||||||
|
S.plot_speaking_style_color_correlation(
|
||||||
|
data=color_corr_ranking,
|
||||||
|
title="Correlation: Speaking Style Colors and Voice Ranking Points"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
### Individual Traits vs Scale 1-10
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, joined_scale):
|
||||||
|
_content = """"""
|
||||||
|
|
||||||
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
|
# print(f"Correlation plot for {style}...")
|
||||||
|
_fig = S.plot_speaking_style_correlation(
|
||||||
|
data=joined_scale,
|
||||||
|
style_color=_style,
|
||||||
|
style_traits=_traits,
|
||||||
|
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
|
||||||
|
)
|
||||||
|
_content += f"""
|
||||||
|
#### Speaking Style **{_style}**:
|
||||||
|
|
||||||
|
{mo.ui.altair_chart(_fig)}
|
||||||
|
|
||||||
|
"""
|
||||||
|
mo.md(_content)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell(hide_code=True)
|
||||||
|
def _():
|
||||||
|
mo.md(r"""
|
||||||
|
### Individual Traits vs Ranking Points
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(S, joined_ranking):
|
||||||
|
_content = """"""
|
||||||
|
|
||||||
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
|
# print(f"Correlation plot for {style}...")
|
||||||
|
_fig = S.plot_speaking_style_ranking_correlation(
|
||||||
|
data=joined_ranking,
|
||||||
|
style_color=_style,
|
||||||
|
style_traits=_traits,
|
||||||
|
title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
|
||||||
|
)
|
||||||
|
_content += f"""
|
||||||
|
#### Speaking Style **{_style}**:
|
||||||
|
|
||||||
|
{mo.ui.altair_chart(_fig)}
|
||||||
|
|
||||||
|
"""
|
||||||
|
mo.md(_content)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run()
|
app.run()
|
||||||
|
|||||||
53
plots.py
53
plots.py
@@ -1048,6 +1048,59 @@ class QualtricsPlotsMixin:
|
|||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
|
def plot_speaking_style_color_correlation(
|
||||||
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
|
title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)",
|
||||||
|
width: int | str | None = None,
|
||||||
|
height: int | None = None,
|
||||||
|
) -> alt.Chart:
|
||||||
|
"""Plot high-level correlation showing one bar per speaking style color.
|
||||||
|
|
||||||
|
Original use-case: "I want to create high-level correlation plots between
|
||||||
|
'green, blue, orange, red' speaking styles and the 'voice scale scores'.
|
||||||
|
I want to go to one plot with one bar for each color."
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: DataFrame with columns [Color, correlation, n_traits] from
|
||||||
|
utils.transform_speaking_style_color_correlation
|
||||||
|
title: Chart title (supports <br> for line breaks)
|
||||||
|
width: Chart width in pixels
|
||||||
|
height: Chart height in pixels
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Altair chart with one bar per speaking style color
|
||||||
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
|
# Conditional color based on sign (matches plot_speaking_style_correlation)
|
||||||
|
chart = alt.Chart(df.to_pandas()).mark_bar().encode(
|
||||||
|
x=alt.X('Color:N',
|
||||||
|
title=None,
|
||||||
|
axis=alt.Axis(labelAngle=0),
|
||||||
|
sort=["Green", "Blue", "Orange", "Red"]),
|
||||||
|
y=alt.Y('correlation:Q',
|
||||||
|
title='Average Correlation',
|
||||||
|
scale=alt.Scale(domain=[-1, 1])),
|
||||||
|
color=alt.condition(
|
||||||
|
alt.datum.correlation >= 0,
|
||||||
|
alt.value('green'),
|
||||||
|
alt.value('red')
|
||||||
|
),
|
||||||
|
tooltip=[
|
||||||
|
alt.Tooltip('Color:N', title='Speaking Style'),
|
||||||
|
alt.Tooltip('correlation:Q', format='.3f', title='Avg Correlation'),
|
||||||
|
alt.Tooltip('n_traits:Q', title='# Traits')
|
||||||
|
]
|
||||||
|
).properties(
|
||||||
|
title=self._process_title(title),
|
||||||
|
width=width or 400,
|
||||||
|
height=height or 350
|
||||||
|
)
|
||||||
|
|
||||||
|
chart = self._save_plot(chart, title)
|
||||||
|
return chart
|
||||||
|
|
||||||
def plot_demographic_distribution(
|
def plot_demographic_distribution(
|
||||||
self,
|
self,
|
||||||
column: str,
|
column: str,
|
||||||
|
|||||||
6
theme.py
6
theme.py
@@ -77,6 +77,12 @@ class ColorPalette:
|
|||||||
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
|
||||||
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
|
||||||
|
|
||||||
|
# Speaking Style Colors (named after the style quadrant colors)
|
||||||
|
STYLE_GREEN = "#2E7D32" # Forest Green
|
||||||
|
STYLE_BLUE = "#1565C0" # Strong Blue
|
||||||
|
STYLE_ORANGE = "#E07A00" # Burnt Orange
|
||||||
|
STYLE_RED = "#C62828" # Deep Red
|
||||||
|
|
||||||
|
|
||||||
def jpmc_altair_theme():
|
def jpmc_altair_theme():
|
||||||
"""JPMC brand theme for Altair charts."""
|
"""JPMC brand theme for Altair charts."""
|
||||||
|
|||||||
63
utils.py
63
utils.py
@@ -1676,6 +1676,69 @@ def join_voice_and_style_data(
|
|||||||
how="inner"
|
how="inner"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def transform_speaking_style_color_correlation(
|
||||||
|
joined_df: pl.LazyFrame | pl.DataFrame,
|
||||||
|
speaking_styles: dict[str, list[str]],
|
||||||
|
target_column: str = "Voice_Scale_Score"
|
||||||
|
) -> tuple[pl.DataFrame, dict | None]:
|
||||||
|
"""Aggregate speaking style correlation by color (Green, Blue, Orange, Red).
|
||||||
|
|
||||||
|
Original use-case: "I want to create high-level correlation plots between
|
||||||
|
'green, blue, orange, red' speaking styles and the 'voice scale scores'.
|
||||||
|
I want to go to one plot with one bar for each color."
|
||||||
|
|
||||||
|
This function calculates the mean correlation per speaking style color by
|
||||||
|
averaging the correlations of all traits within each color.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
joined_df : pl.LazyFrame or pl.DataFrame
|
||||||
|
Pre-fetched data from joining speaking style data with target data.
|
||||||
|
Must have columns: Right_Anchor, score, and the target_column
|
||||||
|
speaking_styles : dict
|
||||||
|
Dictionary mapping color names to their constituent traits.
|
||||||
|
Typically imported from speaking_styles.SPEAKING_STYLES
|
||||||
|
target_column : str
|
||||||
|
The column to correlate against speaking style scores.
|
||||||
|
Default: "Voice_Scale_Score" (for voice scale 1-10)
|
||||||
|
Alternative: "Ranking_Points" (for top 3 voice ranking)
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple[pl.DataFrame, dict | None]
|
||||||
|
(DataFrame with columns [Color, correlation, n_traits], None)
|
||||||
|
"""
|
||||||
|
if isinstance(joined_df, pl.LazyFrame):
|
||||||
|
joined_df = joined_df.collect()
|
||||||
|
|
||||||
|
color_correlations = []
|
||||||
|
|
||||||
|
for color, traits in speaking_styles.items():
|
||||||
|
trait_corrs = []
|
||||||
|
for trait in traits:
|
||||||
|
# Filter to this specific trait
|
||||||
|
subset = joined_df.filter(pl.col("Right_Anchor") == trait)
|
||||||
|
valid_data = subset.select(["score", target_column]).drop_nulls()
|
||||||
|
|
||||||
|
if valid_data.height > 1:
|
||||||
|
corr_val = valid_data.select(pl.corr("score", target_column)).item()
|
||||||
|
if corr_val is not None:
|
||||||
|
trait_corrs.append(corr_val)
|
||||||
|
|
||||||
|
# Average across all traits for this color
|
||||||
|
if trait_corrs:
|
||||||
|
avg_corr = sum(trait_corrs) / len(trait_corrs)
|
||||||
|
color_correlations.append({
|
||||||
|
"Color": color,
|
||||||
|
"correlation": avg_corr,
|
||||||
|
"n_traits": len(trait_corrs)
|
||||||
|
})
|
||||||
|
|
||||||
|
result_df = pl.DataFrame(color_correlations)
|
||||||
|
return result_df, None
|
||||||
|
|
||||||
|
|
||||||
def process_voice_ranking_data(
|
def process_voice_ranking_data(
|
||||||
df: Union[pl.LazyFrame, pl.DataFrame]
|
df: Union[pl.LazyFrame, pl.DataFrame]
|
||||||
) -> pl.DataFrame:
|
) -> pl.DataFrame:
|
||||||
|
|||||||
Reference in New Issue
Block a user