JPMC-quant/plots.py

"""Plotting functions for Voice Branding analysis."""

import plotly.graph_objects as go
import polars as pl
from theme import ColorPalette


def plot_average_scores_with_counts(
    df: pl.DataFrame,
    title: str = "General Impression (1-10)<br>Per Voice with Number of Participants Who Rated It",
    x_label: str = "Stimuli",
    y_label: str = "Average General Impression Rating (1-10)",
    color: str = ColorPalette.PRIMARY,
    height: int = 500,
    width: int = 1000,
) -> go.Figure:
    """
    Create a bar plot showing average scores and count of non-null values for each column.

    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing numeric columns to analyze.
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    color : str, optional
        Bar color (hex code or named color).
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.

    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    # Calculate average and count of non-null values for each column
    # Exclude _recordId column
    stats = []
    for col in [c for c in df.columns if c != '_recordId']:
        avg_score = df[col].mean()
        non_null_count = df[col].drop_nulls().len()
        stats.append({
            'column': col,
            'average': avg_score,
            'count': non_null_count
        })

    # Sort by average score in descending order
    stats_df = pl.DataFrame(stats).sort('average', descending=True)

    # Extract voice identifiers from column names (e.g., "V14" from "Voice_Scale_1_10__V14")
    labels = [col.split('__')[-1] if '__' in col else col for col in stats_df['column']]

    # Create the plot
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=labels,
        y=stats_df['average'],
        text=stats_df['count'],
        textposition='inside',
        textfont=dict(size=10, color='black'),
        marker_color=color,
        hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<br>Count: %{text}<extra></extra>'
    ))

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            range=[0, 10],
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        font=dict(size=11)
    )

    return fig


def plot_top3_ranking_distribution(
    df: pl.DataFrame,
    title: str = "Top 3 Rankings Distribution<br>Count of 1st, 2nd, and 3rd Place Votes per Voice",
    x_label: str = "Voices",
    y_label: str = "Number of Mentions in Top 3",
    height: int = 500,
    width: int = 1000,
) -> go.Figure:
    """
    Create a stacked bar chart showing how often each voice was ranked 1st, 2nd, or 3rd.

    The total height of the bar represents the popularity (frequency of being in Top 3),
    while the segments show the quality of those rankings.

    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing ranking columns (values 1, 2, 3).
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.

    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    # Exclude _recordId column
    stats = []
    for col in [c for c in df.columns if c != '_recordId']:
        # Count occurrences of each rank (1, 2, 3)
        # We ensure we're just counting the specific integer values
        rank1 = df.filter(pl.col(col) == 1).height
        rank2 = df.filter(pl.col(col) == 2).height
        rank3 = df.filter(pl.col(col) == 3).height
        total = rank1 + rank2 + rank3

        # Only include if it received at least one vote (optional, but keeps chart clean)
        if total > 0:
            stats.append({
                'column': col,
                'Rank 1': rank1,
                'Rank 2': rank2,
                'Rank 3': rank3,
                'Total': total
            })

    # Sort by Total count descending (Most popular overall)
    # Tie-break with Rank 1 count
    stats_df = pl.DataFrame(stats).sort(['Total', 'Rank 1'], descending=[True, True])

    # Extract voice identifiers from column names
    labels = [col.split('__')[-1] if '__' in col else col for col in stats_df['column']]

    fig = go.Figure()

    # Add traces for Rank 1, 2, and 3.
    # Stack order: Rank 1 at bottom (Base) -> Rank 2 -> Rank 3
    # This makes it easy to compare the "First Choice" volume across bars.

    fig.add_trace(go.Bar(
        name='Rank 1 (1st Choice)',
        x=labels,
        y=stats_df['Rank 1'],
        marker_color=ColorPalette.RANK_1,
        hovertemplate='<b>%{x}</b><br>Rank 1: %{y}<extra></extra>'
    ))

    fig.add_trace(go.Bar(
        name='Rank 2 (2nd Choice)',
        x=labels,
        y=stats_df['Rank 2'],
        marker_color=ColorPalette.RANK_2,
        hovertemplate='<b>%{x}</b><br>Rank 2: %{y}<extra></extra>'
    ))

    fig.add_trace(go.Bar(
        name='Rank 3 (3rd Choice)',
        x=labels,
        y=stats_df['Rank 3'],
        marker_color=ColorPalette.RANK_3,
        hovertemplate='<b>%{x}</b><br>Rank 3: %{y}<extra></extra>'
    ))

    fig.update_layout(
        barmode='stack',
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            traceorder="normal"
        ),
        font=dict(size=11)
    )

    return fig


def plot_character_ranking_distribution(
    df: pl.DataFrame,
    title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
    x_label: str = "Character Personality",
    y_label: str = "Number of Votes",
    height: int = 500,
    width: int = 1000,
) -> go.Figure:
    """
    Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
    Sorted by the number of Rank 1 votes to highlight the 'Best' options.

    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing character ranking columns (prefix 'Character_Ranking').
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.

    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    stats = []
    # Identify columns related to Character Ranking (excluding ID)
    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]

    for col in ranking_cols:
        # Count occurrences of each rank (1, 2, 3, 4)
        # Using height/len to count rows in the filtered frame
        r1 = df.filter(pl.col(col) == 1).height
        r2 = df.filter(pl.col(col) == 2).height
        r3 = df.filter(pl.col(col) == 3).height
        r4 = df.filter(pl.col(col) == 4).height
        total = r1 + r2 + r3 + r4

        if total > 0:
            stats.append({
                'column': col,
                'Rank 1': r1,
                'Rank 2': r2,
                'Rank 3': r3,
                'Rank 4': r4
            })

    if not stats:
        return go.Figure()

    # Sort by Rank 1 (Most "Best" votes) descending to show the winner first
    # Secondary sort by Rank 2
    stats_df = pl.DataFrame(stats).sort(['Rank 1', 'Rank 2'], descending=[True, True])

    # Clean up labels: Remove prefix and underscores
    # e.g. "Character_Ranking_The_Coach" -> "The Coach"
    labels = [
        col.replace('Character_Ranking_', '').replace('_', ' ').strip()
        for col in stats_df['column']
    ]

    fig = go.Figure()

    # Rank 1 (Best)
    fig.add_trace(go.Bar(
        name='Rank 1 (Best)',
        x=labels,
        y=stats_df['Rank 1'],
        marker_color=ColorPalette.RANK_1,
        hovertemplate='<b>%{x}</b><br>Rank 1: %{y}<extra></extra>'
    ))

    # Rank 2
    fig.add_trace(go.Bar(
        name='Rank 2',
        x=labels,
        y=stats_df['Rank 2'],
        marker_color=ColorPalette.RANK_2,
        hovertemplate='<b>%{x}</b><br>Rank 2: %{y}<extra></extra>'
    ))

    # Rank 3
    fig.add_trace(go.Bar(
        name='Rank 3',
        x=labels,
        y=stats_df['Rank 3'],
        marker_color=ColorPalette.RANK_3,
        hovertemplate='<b>%{x}</b><br>Rank 3: %{y}<extra></extra>'
    ))

    # Rank 4 (Worst)
    # Using a neutral grey as a fallback for the lowest rank to keep focus on top ranks
    fig.add_trace(go.Bar(
        name='Rank 4 (Worst)',
        x=labels,
        y=stats_df['Rank 4'],
        marker_color=ColorPalette.RANK_4,
        hovertemplate='<b>%{x}</b><br>Rank 4: %{y}<extra></extra>'
    ))

    fig.update_layout(
        barmode='stack',
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            traceorder="normal"
        ),
        font=dict(size=11)
    )

    return fig


def plot_most_ranked_1_character(
    df: pl.DataFrame,
    title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
    x_label: str = "Character Personality",
    y_label: str = "Count of 1st Place Rankings",
    height: int = 500,
    width: int = 1000,
) -> go.Figure:
    """
    Create a bar chart showing which character personality was ranked #1 the most.

    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing character ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    height : int, optional
        Plot height in pixels.
    width : int, optional
        Plot width in pixels.

    Returns
    -------
    go.Figure
        Plotly figure object.
    """
    stats = []
    # Identify columns related to Character Ranking
    ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]

    for col in ranking_cols:
        # Count occurrences of rank 1
        count_rank_1 = df.filter(pl.col(col) == 1).height

        stats.append({
            'column': col,
            'count': count_rank_1
        })

    # Sort by count descending
    stats_df = pl.DataFrame(stats).sort('count', descending=True)

    # Clean up labels
    labels = [
        col.replace('Character_Ranking_', '').replace('_', ' ').strip()
        for col in stats_df['column']
    ]

    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=labels,
        y=stats_df['count'],
        text=stats_df['count'],
        textposition='inside',
        textfont=dict(size=10, color='white'),
        marker_color=ColorPalette.PRIMARY,
        hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
    ))

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        font=dict(size=11)
    )

    return fig


def plot_weighted_ranking_score(
    weighted_df: pl.DataFrame,
    title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
    x_label: str = "Character Personality",
    y_label: str = "Total Weighted Score",
    color: str = ColorPalette.PRIMARY,
    height: int = 500,
    width: int = 1000,
) -> go.Figure:
    """
    Create a bar chart showing the weighted ranking score for each character.

    Parameters
    ----------
    df : pl.DataFrame
        DataFrame containing ranking columns.
    title : str, optional
        Plot title.
    x_label : str, optional
        X-axis label.
    y_label : str, optional
        Y-axis label.
    color : str, optional
        Bar color.
    height : int, optional
        Plot height.
    width : int, optional
        Plot width.

    Returns
    -------
    go.Figure
        Plotly figure object.
    """

    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=weighted_df['Character'],
        y=weighted_df['Weighted Score'],
        text=weighted_df['Weighted Score'],
        textposition='inside',
        textfont=dict(size=11, color='white'),
        marker_color=color,
        hovertemplate='<b>%{x}</b><br>Score: %{y}<extra></extra>'
    ))

    fig.update_layout(
        title=title,
        xaxis_title=x_label,
        yaxis_title=y_label,
        height=height,
        width=width,
        plot_bgcolor=ColorPalette.BACKGROUND,
        xaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID,
            tickangle=-45
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor=ColorPalette.GRID
        ),
        font=dict(size=11)
    )

    return fig