correlation start

2026-01-27 17:22:16 +01:00
parent 393c527656
commit fd4cb4b596
9 changed files with 5375 additions and 24 deletions
--- a/plots.py
+++ b/plots.py
@@ -854,3 +854,223 @@ def plot_speaking_style_trait_scores(
        font=dict(size=11)
    )
    return fig
+
+def plot_speaking_style_correlation(
+    df: pl.DataFrame,
+    style_color: str,
+    style_traits: list[str],
+    title=f"Speaking style and voice scale 1-10 correlations"
+) -> go.Figure:
+    """
+    Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10) using a Bar Chart.
+    Each bar represents one trait.
+    
+    Parameters
+    ----------
+    df : pl.DataFrame
+        Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Voice_Scale_Score'.
+    style_color : str
+        The name of the style (e.g., 'Green', 'Blue') for title and coloring.
+    style_traits : list[str]
+        List of trait descriptions (positive side) to include in the plot.
+        These should match the 'Right_Anchor' column values.
+        
+    Returns
+    -------
+    go.Figure
+    """
+    
+    trait_correlations = []
+    
+    # 1. Calculate Correlations
+    for i, trait in enumerate(style_traits):
+        # Match against Right_Anchor which contains the positive trait description
+        # Use exact match for reliability
+        subset = df.filter(
+            pl.col("Right_Anchor") == trait
+        )
+        
+        # Drop Nulls for correlation calculation
+        valid_data = subset.select(["score", "Voice_Scale_Score"]).drop_nulls()
+        
+        if valid_data.height > 1:
+            # Calculate Pearson Correlation
+            corr_val = valid_data.select(pl.corr("score", "Voice_Scale_Score")).item()
+            
+            # Trait Label for Plot (Use the provided list text, maybe truncated or wrapped later)
+            trait_label = f"Trait {i+1}: {trait}"
+            # Or just "Trait {i+1}" and put full text in hover or subtitle?
+            # User example showed "Trait 1", "Trait 2".
+            # User request said "Use the traits directly".
+            # Let's use the trait text as the x-axis label, perhaps wrapped.
+            
+            trait_correlations.append({
+                "trait_full": trait,
+                "trait_short": f"Trait {i+1}", 
+                "correlation": corr_val if corr_val is not None else 0.0
+            })
+    
+    # 2. Build Plot Data
+    if not trait_correlations:
+        # Return empty fig with title
+        fig = go.Figure()
+        fig.update_layout(title=f"No data for {style_color} Style")
+        return fig
+        
+    plot_df = pl.DataFrame(trait_correlations)
+    
+    # Determine colors based on correlation sign
+    colors = []
+    for val in plot_df["correlation"]:
+        if val >= 0:
+            colors.append("green") # Positive
+        else:
+            colors.append("red")   # Negative
+            
+    fig = go.Figure()
+    
+    fig.add_trace(go.Bar(
+        x=[f"Trait {i+1}" for i in range(len(plot_df))], # Simple Labels on Axis
+        y=plot_df["correlation"],
+        text=[f"{val:.2f}" for val in plot_df["correlation"]],
+        textposition='outside', # Or auto
+        marker_color=colors,
+        hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
+        customdata=plot_df["trait_full"] # Full text on hover
+    ))
+    
+    # 3. Add Trait Descriptions as Subtitle or Annotation? 
+    # Or put on X-axis? The traits are long strings "Friendly | Conversational ...".
+    # User's example has "Trait 1", "Trait 2" on axis.
+    # But user specifically said "Use the traits directly".
+    # This might mean "Don't map choice 1->Green, choice 2->Blue dynamically, trusting indices. Instead use the text match".
+    # It might ALSO mean "Show the text on the chart".
+    # The example image has simple "Trait X" labels.
+    # I will stick to "Trait X" on axis but add the legend/list in the title or as annotations, 
+    # OR better: Use the full text on X-axis but with <br> wrapping.
+    # Given the length ("Optimistic | Benevolent | Positive | Appreciative"), wrapping is needed.
+    
+    # Wrap text at the "|" separator for cleaner line breaks
+    def wrap_text_at_pipe(text):
+        parts = [p.strip() for p in text.split("|")]
+        return "<br>".join(parts)
+        
+    x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
+    
+    # Update trace to use full labels
+    fig.data[0].x = x_labels
+    
+    fig.update_layout(
+        title=title,
+        yaxis_title="Correlation",
+        yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
+        xaxis=dict(tickangle=0), # Keep flat if possible
+        height=400,
+        width=1000,
+        template="plotly_white",
+        showlegend=False
+    )
+    
+    return fig
+
+
+def plot_speaking_style_ranking_correlation(
+    df: pl.DataFrame,
+    style_color: str,
+    style_traits: list[str],
+    title: str = None
+) -> go.Figure:
+    """
+    Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
+    Each bar represents one trait.
+    
+    Parameters
+    ----------
+    df : pl.DataFrame
+        Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Ranking_Points'.
+    style_color : str
+        The name of the style (e.g., 'Green', 'Blue') for title and coloring.
+    style_traits : list[str]
+        List of trait descriptions (positive side) to include in the plot.
+        These should match the 'Right_Anchor' column values.
+    title : str, optional
+        Custom title for the plot. If None, uses default.
+        
+    Returns
+    -------
+    go.Figure
+    """
+    
+    if title is None:
+        title = f"Speaking style {style_color} and voice ranking points correlations"
+    
+    trait_correlations = []
+    
+    # 1. Calculate Correlations
+    for i, trait in enumerate(style_traits):
+        # Match against Right_Anchor which contains the positive trait description
+        subset = df.filter(pl.col("Right_Anchor") == trait)
+        
+        # Drop Nulls for correlation calculation
+        valid_data = subset.select(["score", "Ranking_Points"]).drop_nulls()
+        
+        if valid_data.height > 1:
+            # Calculate Pearson Correlation
+            corr_val = valid_data.select(pl.corr("score", "Ranking_Points")).item()
+            
+            trait_correlations.append({
+                "trait_full": trait,
+                "trait_short": f"Trait {i+1}",
+                "correlation": corr_val if corr_val is not None else 0.0
+            })
+    
+    # 2. Build Plot Data
+    if not trait_correlations:
+        fig = go.Figure()
+        fig.update_layout(title=f"No data for {style_color} Style")
+        return fig
+        
+    plot_df = pl.DataFrame(trait_correlations)
+    
+    # Determine colors based on correlation sign
+    colors = []
+    for val in plot_df["correlation"]:
+        if val >= 0:
+            colors.append("green")
+        else:
+            colors.append("red")
+            
+    fig = go.Figure()
+    
+    fig.add_trace(go.Bar(
+        x=[f"Trait {i+1}" for i in range(len(plot_df))],
+        y=plot_df["correlation"],
+        text=[f"{val:.2f}" for val in plot_df["correlation"]],
+        textposition='outside',
+        marker_color=colors,
+        hovertemplate="<b>%{customdata}</b><br>Correlation: %{y:.2f}<extra></extra>",
+        customdata=plot_df["trait_full"]
+    ))
+    
+    # Wrap text at the "|" separator for cleaner line breaks
+    def wrap_text_at_pipe(text):
+        parts = [p.strip() for p in text.split("|")]
+        return "<br>".join(parts)
+        
+    x_labels = [wrap_text_at_pipe(t) for t in plot_df["trait_full"]]
+    
+    # Update trace to use full labels
+    fig.data[0].x = x_labels
+    
+    fig.update_layout(
+        title=title,
+        yaxis_title="Correlation",
+        yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
+        xaxis=dict(tickangle=0),
+        height=400,
+        width=1000,
+        template="plotly_white",
+        showlegend=False
+    )
+    
+    return fig