From 424355f4a1ad1a2c363fd7629a19151e7463848d Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Fri, 23 Jan 2026 10:50:26 +0100 Subject: [PATCH] Character personality ranking complete --- 01_ingest_qualtrics_export.py | 8 + 02_quant_analysis.py | 63 +++++++- plots.py | 296 ++++++++++++++++++++++++++++++++++ theme.py | 1 + utils.py | 60 ++++++- 5 files changed, 421 insertions(+), 7 deletions(-) diff --git a/01_ingest_qualtrics_export.py b/01_ingest_qualtrics_export.py index 14c347c..9ad93ff 100644 --- a/01_ingest_qualtrics_export.py +++ b/01_ingest_qualtrics_export.py @@ -44,6 +44,14 @@ def _(survey): return +app._unparsable_cell( + r""" + data. + """, + name="_" +) + + @app.cell def _(mo): mo.md(r""" diff --git a/02_quant_analysis.py b/02_quant_analysis.py index 7db89ff..6ffa7f4 100644 --- a/02_quant_analysis.py +++ b/02_quant_analysis.py @@ -11,15 +11,20 @@ def _(): from pathlib import Path from validation import check_progress, duration_validation - from utils import JPMCSurvey, combine_exclusive_columns - from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution + from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores + from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score return ( JPMCSurvey, Path, + calculate_weighted_ranking_scores, check_progress, duration_validation, mo, plot_average_scores_with_counts, + plot_character_ranking_distribution, + plot_most_ranked_1_character, + plot_top3_ranking_distribution, + plot_weighted_ranking_score, ) @@ -108,12 +113,49 @@ def _(mo): mo.md(r""" ## Character personality ranking - 1. Which character personality is ranked best? - 2. Which character personality is ranked number 1 the most? + ### 1. Which character personality is ranked best? """) return +@app.cell +def _(data, survey): + char_rank = survey.get_character_ranking(data)[0].collect() + + return (char_rank,) + + +@app.cell +def _(char_rank, plot_character_ranking_distribution): + plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000) + return + + +@app.cell +def _(mo): + mo.md(r""" + ### 2. Which character personality is ranked number 1 the most? + """) + return + + +@app.cell +def _( + calculate_weighted_ranking_scores, + char_rank, + plot_weighted_ranking_score, +): + char_rank_weighted = calculate_weighted_ranking_scores(char_rank) + plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000) + return + + +@app.cell +def _(char_rank, plot_most_ranked_1_character): + plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000) + return + + @app.cell(hide_code=True) def _(mo): mo.md(r""" @@ -122,6 +164,13 @@ def _(mo): return +@app.cell +def _(data, survey): + v_18_8_3 = survey.get_18_8_3(data)[0].collect() + print(v_18_8_3.head()) + return + + @app.cell(hide_code=True) def _(mo): mo.md(r""" @@ -147,6 +196,12 @@ def _(mo): return +@app.cell +def _(plot_top3_ranking_distribution, top3_voices): + plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000) + return + + @app.cell(hide_code=True) def _(mo): mo.md(r""" diff --git a/plots.py b/plots.py index 06d21f6..ff80272 100644 --- a/plots.py +++ b/plots.py @@ -5,6 +5,7 @@ import polars as pl from theme import ColorPalette + def plot_average_scores_with_counts( df: pl.DataFrame, title: str = "General Impression (1-10)
Per Voice with Number of Participants Who Rated It", @@ -213,3 +214,298 @@ def plot_top3_ranking_distribution( ) return fig + + +def plot_character_ranking_distribution( + df: pl.DataFrame, + title: str = "Character Personality Rankings
Distribution of Votes (1st to 4th Place)", + x_label: str = "Character Personality", + y_label: str = "Number of Votes", + height: int = 500, + width: int = 1000, +) -> go.Figure: + """ + Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities. + Sorted by the number of Rank 1 votes to highlight the 'Best' options. + + Parameters + ---------- + df : pl.DataFrame + DataFrame containing character ranking columns (prefix 'Character_Ranking'). + title : str, optional + Plot title. + x_label : str, optional + X-axis label. + y_label : str, optional + Y-axis label. + height : int, optional + Plot height in pixels. + width : int, optional + Plot width in pixels. + + Returns + ------- + go.Figure + Plotly figure object. + """ + stats = [] + # Identify columns related to Character Ranking (excluding ID) + ranking_cols = [c for c in df.columns if 'Character_Ranking' in c] + + for col in ranking_cols: + # Count occurrences of each rank (1, 2, 3, 4) + # Using height/len to count rows in the filtered frame + r1 = df.filter(pl.col(col) == 1).height + r2 = df.filter(pl.col(col) == 2).height + r3 = df.filter(pl.col(col) == 3).height + r4 = df.filter(pl.col(col) == 4).height + total = r1 + r2 + r3 + r4 + + if total > 0: + stats.append({ + 'column': col, + 'Rank 1': r1, + 'Rank 2': r2, + 'Rank 3': r3, + 'Rank 4': r4 + }) + + if not stats: + return go.Figure() + + # Sort by Rank 1 (Most "Best" votes) descending to show the winner first + # Secondary sort by Rank 2 + stats_df = pl.DataFrame(stats).sort(['Rank 1', 'Rank 2'], descending=[True, True]) + + # Clean up labels: Remove prefix and underscores + # e.g. "Character_Ranking_The_Coach" -> "The Coach" + labels = [ + col.replace('Character_Ranking_', '').replace('_', ' ').strip() + for col in stats_df['column'] + ] + + fig = go.Figure() + + # Rank 1 (Best) + fig.add_trace(go.Bar( + name='Rank 1 (Best)', + x=labels, + y=stats_df['Rank 1'], + marker_color=ColorPalette.RANK_1, + hovertemplate='%{x}
Rank 1: %{y}' + )) + + # Rank 2 + fig.add_trace(go.Bar( + name='Rank 2', + x=labels, + y=stats_df['Rank 2'], + marker_color=ColorPalette.RANK_2, + hovertemplate='%{x}
Rank 2: %{y}' + )) + + # Rank 3 + fig.add_trace(go.Bar( + name='Rank 3', + x=labels, + y=stats_df['Rank 3'], + marker_color=ColorPalette.RANK_3, + hovertemplate='%{x}
Rank 3: %{y}' + )) + + # Rank 4 (Worst) + # Using a neutral grey as a fallback for the lowest rank to keep focus on top ranks + fig.add_trace(go.Bar( + name='Rank 4 (Worst)', + x=labels, + y=stats_df['Rank 4'], + marker_color=ColorPalette.RANK_4, + hovertemplate='%{x}
Rank 4: %{y}' + )) + + fig.update_layout( + barmode='stack', + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + height=height, + width=width, + plot_bgcolor=ColorPalette.BACKGROUND, + xaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID, + tickangle=-45 + ), + yaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID + ), + legend=dict( + orientation="h", + yanchor="bottom", + y=1.02, + xanchor="right", + x=1, + traceorder="normal" + ), + font=dict(size=11) + ) + + return fig + + +def plot_most_ranked_1_character( + df: pl.DataFrame, + title: str = "Most Popular Character Personality
(Number of Times Ranked 1st)", + x_label: str = "Character Personality", + y_label: str = "Count of 1st Place Rankings", + height: int = 500, + width: int = 1000, +) -> go.Figure: + """ + Create a bar chart showing which character personality was ranked #1 the most. + + Parameters + ---------- + df : pl.DataFrame + DataFrame containing character ranking columns. + title : str, optional + Plot title. + x_label : str, optional + X-axis label. + y_label : str, optional + Y-axis label. + height : int, optional + Plot height in pixels. + width : int, optional + Plot width in pixels. + + Returns + ------- + go.Figure + Plotly figure object. + """ + stats = [] + # Identify columns related to Character Ranking + ranking_cols = [c for c in df.columns if 'Character_Ranking' in c] + + for col in ranking_cols: + # Count occurrences of rank 1 + count_rank_1 = df.filter(pl.col(col) == 1).height + + stats.append({ + 'column': col, + 'count': count_rank_1 + }) + + # Sort by count descending + stats_df = pl.DataFrame(stats).sort('count', descending=True) + + # Clean up labels + labels = [ + col.replace('Character_Ranking_', '').replace('_', ' ').strip() + for col in stats_df['column'] + ] + + fig = go.Figure() + + fig.add_trace(go.Bar( + x=labels, + y=stats_df['count'], + text=stats_df['count'], + textposition='inside', + textfont=dict(size=10, color='white'), + marker_color=ColorPalette.PRIMARY, + hovertemplate='%{x}
1st Place Votes: %{y}' + )) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + height=height, + width=width, + plot_bgcolor=ColorPalette.BACKGROUND, + xaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID, + tickangle=-45 + ), + yaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID + ), + font=dict(size=11) + ) + + return fig + + + +def plot_weighted_ranking_score( + weighted_df: pl.DataFrame, + title: str = "Character Popularity Score
(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)", + x_label: str = "Character Personality", + y_label: str = "Total Weighted Score", + color: str = ColorPalette.PRIMARY, + height: int = 500, + width: int = 1000, +) -> go.Figure: + """ + Create a bar chart showing the weighted ranking score for each character. + + Parameters + ---------- + df : pl.DataFrame + DataFrame containing ranking columns. + title : str, optional + Plot title. + x_label : str, optional + X-axis label. + y_label : str, optional + Y-axis label. + color : str, optional + Bar color. + height : int, optional + Plot height. + width : int, optional + Plot width. + + Returns + ------- + go.Figure + Plotly figure object. + """ + + fig = go.Figure() + + fig.add_trace(go.Bar( + x=weighted_df['Character'], + y=weighted_df['Weighted Score'], + text=weighted_df['Weighted Score'], + textposition='inside', + textfont=dict(size=11, color='white'), + marker_color=color, + hovertemplate='%{x}
Score: %{y}' + )) + + fig.update_layout( + title=title, + xaxis_title=x_label, + yaxis_title=y_label, + height=height, + width=width, + plot_bgcolor=ColorPalette.BACKGROUND, + xaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID, + tickangle=-45 + ), + yaxis=dict( + showgrid=True, + gridcolor=ColorPalette.GRID + ), + font=dict(size=11) + ) + + return fig \ No newline at end of file diff --git a/theme.py b/theme.py index be5658a..589292f 100644 --- a/theme.py +++ b/theme.py @@ -14,6 +14,7 @@ class ColorPalette: RANK_1 = "#004C6D" # Dark Blue (1st Choice) RANK_2 = "#008493" # Teal (2nd Choice) RANK_3 = "#5AAE95" # Sea Green (3rd Choice) + RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst) # General UI elements TEXT = "black" diff --git a/utils.py b/utils.py index 8432093..c8435b5 100644 --- a/utils.py +++ b/utils.py @@ -55,6 +55,45 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe +def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame: + """ + Calculate weighted scores for character rankings. + Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt. + + Parameters + ---------- + df : pl.DataFrame + DataFrame containing character ranking columns. + + Returns + ------- + pl.DataFrame + DataFrame with columns 'Character' and 'Weighted Score', sorted by score. + """ + scores = [] + # Identify columns related to Character Ranking + ranking_cols = [c for c in df.columns if 'Character_Ranking' in c] + + for col in ranking_cols: + # Calculate score: + # (Count of Rank 1 * 3) + (Count of Rank 2 * 2) + (Count of Rank 3 * 1) + r1_count = df.filter(pl.col(col) == 1).height + r2_count = df.filter(pl.col(col) == 2).height + r3_count = df.filter(pl.col(col) == 3).height + + weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1) + + # Clean name + clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip() + + scores.append({ + 'Character': clean_name, + 'Weighted Score': weighted_score + }) + + return pl.DataFrame(scores).sort('Weighted Score', descending=True) + + class JPMCSurvey: """Class to handle JPMorgan Chase survey data.""" @@ -249,9 +288,19 @@ class JPMCSurvey: rename_dict = { 'QID29': '18-8_Set-A', 'QID101': '18-8_Set-B', - 'QID36_0_GROUP': '8-3_Ranked' + 'QID36_0_GROUP': '3_Ranked' } - return self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict), None + + subset = self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict) + + # Combine 18-8 Set A and Set B into single column + subset = subset.with_columns( + pl.coalesce(['18-8_Set-A', '18-8_Set-B']).alias('8_Combined') + ) + # Change order of columns + subset = subset.select(['_recordId', '18-8_Set-A', '18-8_Set-B', '8_Combined', '3_Ranked']) + + return subset, None def get_voice_scale_1_10(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]: @@ -363,4 +412,9 @@ class JPMCSurvey: """ QIDs = ['QID44', 'QID97', 'QID95', 'QID96'] - return self._get_subset(q, QIDs, rename_cols=True), None \ No newline at end of file + return self._get_subset(q, QIDs, rename_cols=True), None + + + + +