diff --git a/03_quant_report.py b/03_quant_report.py
index 5ef8436..a902439 100644
--- a/03_quant_report.py
+++ b/03_quant_report.py
@@ -664,5 +664,140 @@ def _():
return
+@app.cell(hide_code=True)
+def _():
+ mo.md(r"""
+ ## Correlation Speaking Styles
+ """)
+ return
+
+
+@app.cell
+def _(S, data, top3_voices):
+ ss_or, choice_map_or = S.get_ss_orange_red(data)
+ ss_gb, choice_map_gb = S.get_ss_green_blue(data)
+
+ # Combine the data
+ ss_all = ss_or.join(ss_gb, on='_recordId')
+ _d = ss_all.collect()
+
+ choice_map = {**choice_map_or, **choice_map_gb}
+ # print(_d.head())
+ # print(choice_map)
+ ss_long = utils.process_speaking_style_data(ss_all, choice_map)
+
+ df_style = utils.process_speaking_style_data(ss_all, choice_map)
+
+ vscales = S.get_voice_scale_1_10(data)[0]
+ df_scale_long = utils.process_voice_scale_data(vscales)
+
+ joined_scale = df_style.join(df_scale_long, on=["_recordId", "Voice"], how="inner")
+
+ df_ranking = utils.process_voice_ranking_data(top3_voices)
+ joined_ranking = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
+ return joined_ranking, joined_scale
+
+
+@app.cell
+def _():
+ mo.md(r"""
+ ### Colors vs Scale 1-10
+ """)
+ return
+
+
+@app.cell
+def _(S, joined_scale):
+ # Transform to get one row per color with average correlation
+ color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, SPEAKING_STYLES)
+ S.plot_speaking_style_color_correlation(
+ data=color_corr_scale,
+ title="Correlation: Speaking Style Colors and Voice Scale 1-10"
+ )
+ return
+
+
+@app.cell
+def _():
+ mo.md(r"""
+ ### Colors vs Ranking Points
+ """)
+ return
+
+
+@app.cell
+def _(S, joined_ranking):
+ color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
+ joined_ranking,
+ SPEAKING_STYLES,
+ target_column="Ranking_Points"
+ )
+ S.plot_speaking_style_color_correlation(
+ data=color_corr_ranking,
+ title="Correlation: Speaking Style Colors and Voice Ranking Points"
+ )
+ return
+
+
+@app.cell
+def _():
+ mo.md(r"""
+ ### Individual Traits vs Scale 1-10
+ """)
+ return
+
+
+@app.cell
+def _(S, joined_scale):
+ _content = """"""
+
+ for _style, _traits in SPEAKING_STYLES.items():
+ # print(f"Correlation plot for {style}...")
+ _fig = S.plot_speaking_style_correlation(
+ data=joined_scale,
+ style_color=_style,
+ style_traits=_traits,
+ title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
+ )
+ _content += f"""
+ #### Speaking Style **{_style}**:
+
+ {mo.ui.altair_chart(_fig)}
+
+ """
+ mo.md(_content)
+ return
+
+
+@app.cell(hide_code=True)
+def _():
+ mo.md(r"""
+ ### Individual Traits vs Ranking Points
+ """)
+ return
+
+
+@app.cell
+def _(S, joined_ranking):
+ _content = """"""
+
+ for _style, _traits in SPEAKING_STYLES.items():
+ # print(f"Correlation plot for {style}...")
+ _fig = S.plot_speaking_style_ranking_correlation(
+ data=joined_ranking,
+ style_color=_style,
+ style_traits=_traits,
+ title=f"Correlation: Speaking Style {_style} and Voice Ranking Points",
+ )
+ _content += f"""
+ #### Speaking Style **{_style}**:
+
+ {mo.ui.altair_chart(_fig)}
+
+ """
+ mo.md(_content)
+ return
+
+
if __name__ == "__main__":
app.run()
diff --git a/plots.py b/plots.py
index ca81be4..0bee376 100644
--- a/plots.py
+++ b/plots.py
@@ -1048,6 +1048,59 @@ class QualtricsPlotsMixin:
chart = self._save_plot(chart, title)
return chart
+ def plot_speaking_style_color_correlation(
+ self,
+ data: pl.LazyFrame | pl.DataFrame | None = None,
+ title: str = "Speaking Style and Voice Scale 1-10 Correlations
(Average by Color)",
+ width: int | str | None = None,
+ height: int | None = None,
+ ) -> alt.Chart:
+ """Plot high-level correlation showing one bar per speaking style color.
+
+ Original use-case: "I want to create high-level correlation plots between
+ 'green, blue, orange, red' speaking styles and the 'voice scale scores'.
+ I want to go to one plot with one bar for each color."
+
+ Args:
+ data: DataFrame with columns [Color, correlation, n_traits] from
+ utils.transform_speaking_style_color_correlation
+ title: Chart title (supports
for line breaks)
+ width: Chart width in pixels
+ height: Chart height in pixels
+
+ Returns:
+ Altair chart with one bar per speaking style color
+ """
+ df = self._ensure_dataframe(data)
+
+ # Conditional color based on sign (matches plot_speaking_style_correlation)
+ chart = alt.Chart(df.to_pandas()).mark_bar().encode(
+ x=alt.X('Color:N',
+ title=None,
+ axis=alt.Axis(labelAngle=0),
+ sort=["Green", "Blue", "Orange", "Red"]),
+ y=alt.Y('correlation:Q',
+ title='Average Correlation',
+ scale=alt.Scale(domain=[-1, 1])),
+ color=alt.condition(
+ alt.datum.correlation >= 0,
+ alt.value('green'),
+ alt.value('red')
+ ),
+ tooltip=[
+ alt.Tooltip('Color:N', title='Speaking Style'),
+ alt.Tooltip('correlation:Q', format='.3f', title='Avg Correlation'),
+ alt.Tooltip('n_traits:Q', title='# Traits')
+ ]
+ ).properties(
+ title=self._process_title(title),
+ width=width or 400,
+ height=height or 350
+ )
+
+ chart = self._save_plot(chart, title)
+ return chart
+
def plot_demographic_distribution(
self,
column: str,
diff --git a/theme.py b/theme.py
index 9164769..9ad9914 100644
--- a/theme.py
+++ b/theme.py
@@ -77,6 +77,12 @@ class ColorPalette:
GENDER_MALE_NEUTRAL = "#B8C9D9" # Grey-Blue
GENDER_FEMALE_NEUTRAL = "#D9B8C9" # Grey-Pink
+ # Speaking Style Colors (named after the style quadrant colors)
+ STYLE_GREEN = "#2E7D32" # Forest Green
+ STYLE_BLUE = "#1565C0" # Strong Blue
+ STYLE_ORANGE = "#E07A00" # Burnt Orange
+ STYLE_RED = "#C62828" # Deep Red
+
def jpmc_altair_theme():
"""JPMC brand theme for Altair charts."""
diff --git a/utils.py b/utils.py
index 2f0d34f..f784f3d 100644
--- a/utils.py
+++ b/utils.py
@@ -1676,6 +1676,69 @@ def join_voice_and_style_data(
how="inner"
)
+
+def transform_speaking_style_color_correlation(
+ joined_df: pl.LazyFrame | pl.DataFrame,
+ speaking_styles: dict[str, list[str]],
+ target_column: str = "Voice_Scale_Score"
+) -> tuple[pl.DataFrame, dict | None]:
+ """Aggregate speaking style correlation by color (Green, Blue, Orange, Red).
+
+ Original use-case: "I want to create high-level correlation plots between
+ 'green, blue, orange, red' speaking styles and the 'voice scale scores'.
+ I want to go to one plot with one bar for each color."
+
+ This function calculates the mean correlation per speaking style color by
+ averaging the correlations of all traits within each color.
+
+ Parameters
+ ----------
+ joined_df : pl.LazyFrame or pl.DataFrame
+ Pre-fetched data from joining speaking style data with target data.
+ Must have columns: Right_Anchor, score, and the target_column
+ speaking_styles : dict
+ Dictionary mapping color names to their constituent traits.
+ Typically imported from speaking_styles.SPEAKING_STYLES
+ target_column : str
+ The column to correlate against speaking style scores.
+ Default: "Voice_Scale_Score" (for voice scale 1-10)
+ Alternative: "Ranking_Points" (for top 3 voice ranking)
+
+ Returns
+ -------
+ tuple[pl.DataFrame, dict | None]
+ (DataFrame with columns [Color, correlation, n_traits], None)
+ """
+ if isinstance(joined_df, pl.LazyFrame):
+ joined_df = joined_df.collect()
+
+ color_correlations = []
+
+ for color, traits in speaking_styles.items():
+ trait_corrs = []
+ for trait in traits:
+ # Filter to this specific trait
+ subset = joined_df.filter(pl.col("Right_Anchor") == trait)
+ valid_data = subset.select(["score", target_column]).drop_nulls()
+
+ if valid_data.height > 1:
+ corr_val = valid_data.select(pl.corr("score", target_column)).item()
+ if corr_val is not None:
+ trait_corrs.append(corr_val)
+
+ # Average across all traits for this color
+ if trait_corrs:
+ avg_corr = sum(trait_corrs) / len(trait_corrs)
+ color_correlations.append({
+ "Color": color,
+ "correlation": avg_corr,
+ "n_traits": len(trait_corrs)
+ })
+
+ result_df = pl.DataFrame(color_correlations)
+ return result_df, None
+
+
def process_voice_ranking_data(
df: Union[pl.LazyFrame, pl.DataFrame]
) -> pl.DataFrame: