correlation start
This commit is contained in:
220
plots.py
220
plots.py
@@ -854,3 +854,223 @@ def plot_speaking_style_trait_scores(
|
||||
font=dict(size=11)
|
||||
)
|
||||
return fig
|
||||
|
||||
def plot_speaking_style_correlation(
|
||||
df: pl.DataFrame,
|
||||
style_color: str,
|
||||
style_traits: list[str],
|
||||
title=f"Speaking style and voice scale 1-10 correlations"
|
||||
) -> go.Figure:
|
||||
"""
|
||||
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10) using a Bar Chart.
|
||||
Each bar represents one trait.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pl.DataFrame
|
||||
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Voice_Scale_Score'.
|
||||
style_color : str
|
||||
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
|
||||
style_traits : list[str]
|
||||
List of trait descriptions (positive side) to include in the plot.
|
||||
These should match the 'Right_Anchor' column values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
go.Figure
|
||||
"""
|
||||
|
||||
trait_correlations = []
|
||||
|
||||
# 1. Calculate Correlations
|
||||
for i, trait in enumerate(style_traits):
|
||||
# Match against Right_Anchor which contains the positive trait description
|
||||
# Use exact match for reliability
|
||||
subset = df.filter(
|
||||
pl.col("Right_Anchor") == trait
|
||||
)
|
||||
|
||||
# Drop Nulls for correlation calculation
|
||||
valid_data = subset.select(["score", "Voice_Scale_Score"]).drop_nulls()
|
||||
|
||||
if valid_data.height > 1:
|
||||
# Calculate Pearson Correlation
|
||||
corr_val = valid_data.select(pl.corr("score", "Voice_Scale_Score")).item()
|
||||
|
||||
# Trait Label for Plot (Use the provided list text, maybe truncated or wrapped later)
|
||||
trait_label = f"Trait {i+1}: {trait}"
|
||||
# Or just "Trait {i+1}" and put full text in hover or subtitle?
|
||||
# User example showed "Trait 1", "Trait 2".
|
||||
# User request said "Use the traits directly".
|
||||
# Let's use the trait text as the x-axis label, perhaps wrapped.
|
||||
|
||||
trait_correlations.append({
|
||||
"trait_full": trait,
|
||||
"trait_short": f"Trait {i+1}",
|
||||
"correlation": corr_val if corr_val is not None else 0.0
|
||||
})
|
||||
|
||||
# 2. Build Plot Data
|
||||
if not trait_correlations:
|
||||
# Return empty fig with title
|
||||
fig = go.Figure()
|
||||
fig.update_layout(title=f"No data for {style_color} Style")
|
||||
return fig
|
||||
|
||||
plot_df = pl.DataFrame(trait_correlations)
|
||||
|
||||
# Determine colors based on correlation sign
|
||||
colors = []
|
||||
for val in plot_df["correlation"]:
|
||||
if val >= 0:
|
||||
colors.append("green") # Positive
|
||||
else:
|
||||
colors.append("red") # Negative
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
fig.add_trace(go.Bar(
|
||||
x=[f"Trait {i+1}" for i in range(len(plot_df))], # Simple Labels on Axis
|
||||
y=plot_df["correlation"],
|
||||
text=[f"{val:.2f}" for val in plot_df["correlation"]],
|
||||
textposition='outside', # Or auto
|
||||
marker_color=colors,
|
||||
hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
|
||||
customdata=plot_df["trait_full"] # Full text on hover
|
||||
))
|
||||
|
||||
# 3. Add Trait Descriptions as Subtitle or Annotation?
|
||||
# Or put on X-axis? The traits are long strings "Friendly | Conversational ...".
|
||||
# User's example has "Trait 1", "Trait 2" on axis.
|
||||
# But user specifically said "Use the traits directly".
|
||||
# This might mean "Don't map choice 1->Green, choice 2->Blue dynamically, trusting indices. Instead use the text match".
|
||||
# It might ALSO mean "Show the text on the chart".
|
||||
# The example image has simple "Trait X" labels.
|
||||
# I will stick to "Trait X" on axis but add the legend/list in the title or as annotations,
|
||||
# OR better: Use the full text on X-axis but with <br> wrapping.
|
||||
# Given the length ("Optimistic | Benevolent | Positive | Appreciative"), wrapping is needed.
|
||||
|
||||
# Wrap text at the "|" separator for cleaner line breaks
|
||||
def wrap_text_at_pipe(text):
|
||||
parts = [p.strip() for p in text.split("|")]
|
||||
return "<br>".join(parts)
|
||||
|
||||
x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
|
||||
|
||||
# Update trace to use full labels
|
||||
fig.data[0].x = x_labels
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
yaxis_title="Correlation",
|
||||
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
|
||||
xaxis=dict(tickangle=0), # Keep flat if possible
|
||||
height=400,
|
||||
width=1000,
|
||||
template="plotly_white",
|
||||
showlegend=False
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def plot_speaking_style_ranking_correlation(
|
||||
df: pl.DataFrame,
|
||||
style_color: str,
|
||||
style_traits: list[str],
|
||||
title: str = None
|
||||
) -> go.Figure:
|
||||
"""
|
||||
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
|
||||
Each bar represents one trait.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pl.DataFrame
|
||||
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Ranking_Points'.
|
||||
style_color : str
|
||||
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
|
||||
style_traits : list[str]
|
||||
List of trait descriptions (positive side) to include in the plot.
|
||||
These should match the 'Right_Anchor' column values.
|
||||
title : str, optional
|
||||
Custom title for the plot. If None, uses default.
|
||||
|
||||
Returns
|
||||
-------
|
||||
go.Figure
|
||||
"""
|
||||
|
||||
if title is None:
|
||||
title = f"Speaking style {style_color} and voice ranking points correlations"
|
||||
|
||||
trait_correlations = []
|
||||
|
||||
# 1. Calculate Correlations
|
||||
for i, trait in enumerate(style_traits):
|
||||
# Match against Right_Anchor which contains the positive trait description
|
||||
subset = df.filter(pl.col("Right_Anchor") == trait)
|
||||
|
||||
# Drop Nulls for correlation calculation
|
||||
valid_data = subset.select(["score", "Ranking_Points"]).drop_nulls()
|
||||
|
||||
if valid_data.height > 1:
|
||||
# Calculate Pearson Correlation
|
||||
corr_val = valid_data.select(pl.corr("score", "Ranking_Points")).item()
|
||||
|
||||
trait_correlations.append({
|
||||
"trait_full": trait,
|
||||
"trait_short": f"Trait {i+1}",
|
||||
"correlation": corr_val if corr_val is not None else 0.0
|
||||
})
|
||||
|
||||
# 2. Build Plot Data
|
||||
if not trait_correlations:
|
||||
fig = go.Figure()
|
||||
fig.update_layout(title=f"No data for {style_color} Style")
|
||||
return fig
|
||||
|
||||
plot_df = pl.DataFrame(trait_correlations)
|
||||
|
||||
# Determine colors based on correlation sign
|
||||
colors = []
|
||||
for val in plot_df["correlation"]:
|
||||
if val >= 0:
|
||||
colors.append("green")
|
||||
else:
|
||||
colors.append("red")
|
||||
|
||||
fig = go.Figure()
|
||||
|
||||
fig.add_trace(go.Bar(
|
||||
x=[f"Trait {i+1}" for i in range(len(plot_df))],
|
||||
y=plot_df["correlation"],
|
||||
text=[f"{val:.2f}" for val in plot_df["correlation"]],
|
||||
textposition='outside',
|
||||
marker_color=colors,
|
||||
hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
|
||||
customdata=plot_df["trait_full"]
|
||||
))
|
||||
|
||||
# Wrap text at the "|" separator for cleaner line breaks
|
||||
def wrap_text_at_pipe(text):
|
||||
parts = [p.strip() for p in text.split("|")]
|
||||
return "<br>".join(parts)
|
||||
|
||||
x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
|
||||
|
||||
# Update trace to use full labels
|
||||
fig.data[0].x = x_labels
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
yaxis_title="Correlation",
|
||||
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
|
||||
xaxis=dict(tickangle=0),
|
||||
height=400,
|
||||
width=1000,
|
||||
template="plotly_white",
|
||||
showlegend=False
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
Reference in New Issue
Block a user