correlation start

This commit is contained in:
2026-01-27 17:22:16 +01:00
parent 393c527656
commit fd4cb4b596
9 changed files with 5375 additions and 24 deletions

220
plots.py
View File

@@ -854,3 +854,223 @@ def plot_speaking_style_trait_scores(
font=dict(size=11)
)
return fig
def plot_speaking_style_correlation(
df: pl.DataFrame,
style_color: str,
style_traits: list[str],
title=f"Speaking style and voice scale 1-10 correlations"
) -> go.Figure:
"""
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10) using a Bar Chart.
Each bar represents one trait.
Parameters
----------
df : pl.DataFrame
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Voice_Scale_Score'.
style_color : str
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
style_traits : list[str]
List of trait descriptions (positive side) to include in the plot.
These should match the 'Right_Anchor' column values.
Returns
-------
go.Figure
"""
trait_correlations = []
# 1. Calculate Correlations
for i, trait in enumerate(style_traits):
# Match against Right_Anchor which contains the positive trait description
# Use exact match for reliability
subset = df.filter(
pl.col("Right_Anchor") == trait
)
# Drop Nulls for correlation calculation
valid_data = subset.select(["score", "Voice_Scale_Score"]).drop_nulls()
if valid_data.height > 1:
# Calculate Pearson Correlation
corr_val = valid_data.select(pl.corr("score", "Voice_Scale_Score")).item()
# Trait Label for Plot (Use the provided list text, maybe truncated or wrapped later)
trait_label = f"Trait {i+1}: {trait}"
# Or just "Trait {i+1}" and put full text in hover or subtitle?
# User example showed "Trait 1", "Trait 2".
# User request said "Use the traits directly".
# Let's use the trait text as the x-axis label, perhaps wrapped.
trait_correlations.append({
"trait_full": trait,
"trait_short": f"Trait {i+1}",
"correlation": corr_val if corr_val is not None else 0.0
})
# 2. Build Plot Data
if not trait_correlations:
# Return empty fig with title
fig = go.Figure()
fig.update_layout(title=f"No data for {style_color} Style")
return fig
plot_df = pl.DataFrame(trait_correlations)
# Determine colors based on correlation sign
colors = []
for val in plot_df["correlation"]:
if val >= 0:
colors.append("green") # Positive
else:
colors.append("red") # Negative
fig = go.Figure()
fig.add_trace(go.Bar(
x=[f"Trait {i+1}" for i in range(len(plot_df))], # Simple Labels on Axis
y=plot_df["correlation"],
text=[f"{val:.2f}" for val in plot_df["correlation"]],
textposition='outside', # Or auto
marker_color=colors,
hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
customdata=plot_df["trait_full"] # Full text on hover
))
# 3. Add Trait Descriptions as Subtitle or Annotation?
# Or put on X-axis? The traits are long strings "Friendly | Conversational ...".
# User's example has "Trait 1", "Trait 2" on axis.
# But user specifically said "Use the traits directly".
# This might mean "Don't map choice 1->Green, choice 2->Blue dynamically, trusting indices. Instead use the text match".
# It might ALSO mean "Show the text on the chart".
# The example image has simple "Trait X" labels.
# I will stick to "Trait X" on axis but add the legend/list in the title or as annotations,
# OR better: Use the full text on X-axis but with <br> wrapping.
# Given the length ("Optimistic | Benevolent | Positive | Appreciative"), wrapping is needed.
# Wrap text at the "|" separator for cleaner line breaks
def wrap_text_at_pipe(text):
parts = [p.strip() for p in text.split("|")]
return "<br>".join(parts)
x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
# Update trace to use full labels
fig.data[0].x = x_labels
fig.update_layout(
title=title,
yaxis_title="Correlation",
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
xaxis=dict(tickangle=0), # Keep flat if possible
height=400,
width=1000,
template="plotly_white",
showlegend=False
)
return fig
def plot_speaking_style_ranking_correlation(
df: pl.DataFrame,
style_color: str,
style_traits: list[str],
title: str = None
) -> go.Figure:
"""
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
Each bar represents one trait.
Parameters
----------
df : pl.DataFrame
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Ranking_Points'.
style_color : str
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
style_traits : list[str]
List of trait descriptions (positive side) to include in the plot.
These should match the 'Right_Anchor' column values.
title : str, optional
Custom title for the plot. If None, uses default.
Returns
-------
go.Figure
"""
if title is None:
title = f"Speaking style {style_color} and voice ranking points correlations"
trait_correlations = []
# 1. Calculate Correlations
for i, trait in enumerate(style_traits):
# Match against Right_Anchor which contains the positive trait description
subset = df.filter(pl.col("Right_Anchor") == trait)
# Drop Nulls for correlation calculation
valid_data = subset.select(["score", "Ranking_Points"]).drop_nulls()
if valid_data.height > 1:
# Calculate Pearson Correlation
corr_val = valid_data.select(pl.corr("score", "Ranking_Points")).item()
trait_correlations.append({
"trait_full": trait,
"trait_short": f"Trait {i+1}",
"correlation": corr_val if corr_val is not None else 0.0
})
# 2. Build Plot Data
if not trait_correlations:
fig = go.Figure()
fig.update_layout(title=f"No data for {style_color} Style")
return fig
plot_df = pl.DataFrame(trait_correlations)
# Determine colors based on correlation sign
colors = []
for val in plot_df["correlation"]:
if val >= 0:
colors.append("green")
else:
colors.append("red")
fig = go.Figure()
fig.add_trace(go.Bar(
x=[f"Trait {i+1}" for i in range(len(plot_df))],
y=plot_df["correlation"],
text=[f"{val:.2f}" for val in plot_df["correlation"]],
textposition='outside',
marker_color=colors,
hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
customdata=plot_df["trait_full"]
))
# Wrap text at the "|" separator for cleaner line breaks
def wrap_text_at_pipe(text):
parts = [p.strip() for p in text.split("|")]
return "<br>".join(parts)
x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
# Update trace to use full labels
fig.data[0].x = x_labels
fig.update_layout(
title=title,
yaxis_title="Correlation",
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
xaxis=dict(tickangle=0),
height=400,
width=1000,
template="plotly_white",
showlegend=False
)
return fig