diff --git a/01_ingest_qualtrics_export.py b/01_ingest_qualtrics_export.py
index 14c347c..9ad93ff 100644
--- a/01_ingest_qualtrics_export.py
+++ b/01_ingest_qualtrics_export.py
@@ -44,6 +44,14 @@ def _(survey):
return
+app._unparsable_cell(
+ r"""
+ data.
+ """,
+ name="_"
+)
+
+
@app.cell
def _(mo):
mo.md(r"""
diff --git a/02_quant_analysis.py b/02_quant_analysis.py
index 7db89ff..6ffa7f4 100644
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -11,15 +11,20 @@ def _():
from pathlib import Path
from validation import check_progress, duration_validation
- from utils import JPMCSurvey, combine_exclusive_columns
- from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution
+ from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
+ from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
return (
JPMCSurvey,
Path,
+ calculate_weighted_ranking_scores,
check_progress,
duration_validation,
mo,
plot_average_scores_with_counts,
+ plot_character_ranking_distribution,
+ plot_most_ranked_1_character,
+ plot_top3_ranking_distribution,
+ plot_weighted_ranking_score,
)
@@ -108,12 +113,49 @@ def _(mo):
mo.md(r"""
## Character personality ranking
- 1. Which character personality is ranked best?
- 2. Which character personality is ranked number 1 the most?
+ ### 1. Which character personality is ranked best?
""")
return
+@app.cell
+def _(data, survey):
+ char_rank = survey.get_character_ranking(data)[0].collect()
+
+ return (char_rank,)
+
+
+@app.cell
+def _(char_rank, plot_character_ranking_distribution):
+ plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""
+ ### 2. Which character personality is ranked number 1 the most?
+ """)
+ return
+
+
+@app.cell
+def _(
+ calculate_weighted_ranking_scores,
+ char_rank,
+ plot_weighted_ranking_score,
+):
+ char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
+ plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
+ return
+
+
+@app.cell
+def _(char_rank, plot_most_ranked_1_character):
+ plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
+ return
+
+
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
@@ -122,6 +164,13 @@ def _(mo):
return
+@app.cell
+def _(data, survey):
+ v_18_8_3 = survey.get_18_8_3(data)[0].collect()
+ print(v_18_8_3.head())
+ return
+
+
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
@@ -147,6 +196,12 @@ def _(mo):
return
+@app.cell
+def _(plot_top3_ranking_distribution, top3_voices):
+ plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
+ return
+
+
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
diff --git a/plots.py b/plots.py
index 06d21f6..ff80272 100644
--- a/plots.py
+++ b/plots.py
@@ -5,6 +5,7 @@ import polars as pl
from theme import ColorPalette
+
def plot_average_scores_with_counts(
df: pl.DataFrame,
title: str = "General Impression (1-10)
Per Voice with Number of Participants Who Rated It",
@@ -213,3 +214,298 @@ def plot_top3_ranking_distribution(
)
return fig
+
+
+def plot_character_ranking_distribution(
+ df: pl.DataFrame,
+ title: str = "Character Personality Rankings
Distribution of Votes (1st to 4th Place)",
+ x_label: str = "Character Personality",
+ y_label: str = "Number of Votes",
+ height: int = 500,
+ width: int = 1000,
+) -> go.Figure:
+ """
+ Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
+ Sorted by the number of Rank 1 votes to highlight the 'Best' options.
+
+ Parameters
+ ----------
+ df : pl.DataFrame
+ DataFrame containing character ranking columns (prefix 'Character_Ranking').
+ title : str, optional
+ Plot title.
+ x_label : str, optional
+ X-axis label.
+ y_label : str, optional
+ Y-axis label.
+ height : int, optional
+ Plot height in pixels.
+ width : int, optional
+ Plot width in pixels.
+
+ Returns
+ -------
+ go.Figure
+ Plotly figure object.
+ """
+ stats = []
+ # Identify columns related to Character Ranking (excluding ID)
+ ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+
+ for col in ranking_cols:
+ # Count occurrences of each rank (1, 2, 3, 4)
+ # Using height/len to count rows in the filtered frame
+ r1 = df.filter(pl.col(col) == 1).height
+ r2 = df.filter(pl.col(col) == 2).height
+ r3 = df.filter(pl.col(col) == 3).height
+ r4 = df.filter(pl.col(col) == 4).height
+ total = r1 + r2 + r3 + r4
+
+ if total > 0:
+ stats.append({
+ 'column': col,
+ 'Rank 1': r1,
+ 'Rank 2': r2,
+ 'Rank 3': r3,
+ 'Rank 4': r4
+ })
+
+ if not stats:
+ return go.Figure()
+
+ # Sort by Rank 1 (Most "Best" votes) descending to show the winner first
+ # Secondary sort by Rank 2
+ stats_df = pl.DataFrame(stats).sort(['Rank 1', 'Rank 2'], descending=[True, True])
+
+ # Clean up labels: Remove prefix and underscores
+ # e.g. "Character_Ranking_The_Coach" -> "The Coach"
+ labels = [
+ col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+ for col in stats_df['column']
+ ]
+
+ fig = go.Figure()
+
+ # Rank 1 (Best)
+ fig.add_trace(go.Bar(
+ name='Rank 1 (Best)',
+ x=labels,
+ y=stats_df['Rank 1'],
+ marker_color=ColorPalette.RANK_1,
+ hovertemplate='%{x}
Rank 1: %{y}'
+ ))
+
+ # Rank 2
+ fig.add_trace(go.Bar(
+ name='Rank 2',
+ x=labels,
+ y=stats_df['Rank 2'],
+ marker_color=ColorPalette.RANK_2,
+ hovertemplate='%{x}
Rank 2: %{y}'
+ ))
+
+ # Rank 3
+ fig.add_trace(go.Bar(
+ name='Rank 3',
+ x=labels,
+ y=stats_df['Rank 3'],
+ marker_color=ColorPalette.RANK_3,
+ hovertemplate='%{x}
Rank 3: %{y}'
+ ))
+
+ # Rank 4 (Worst)
+ # Using a neutral grey as a fallback for the lowest rank to keep focus on top ranks
+ fig.add_trace(go.Bar(
+ name='Rank 4 (Worst)',
+ x=labels,
+ y=stats_df['Rank 4'],
+ marker_color=ColorPalette.RANK_4,
+ hovertemplate='%{x}
Rank 4: %{y}'
+ ))
+
+ fig.update_layout(
+ barmode='stack',
+ title=title,
+ xaxis_title=x_label,
+ yaxis_title=y_label,
+ height=height,
+ width=width,
+ plot_bgcolor=ColorPalette.BACKGROUND,
+ xaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID,
+ tickangle=-45
+ ),
+ yaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID
+ ),
+ legend=dict(
+ orientation="h",
+ yanchor="bottom",
+ y=1.02,
+ xanchor="right",
+ x=1,
+ traceorder="normal"
+ ),
+ font=dict(size=11)
+ )
+
+ return fig
+
+
+def plot_most_ranked_1_character(
+ df: pl.DataFrame,
+ title: str = "Most Popular Character Personality
(Number of Times Ranked 1st)",
+ x_label: str = "Character Personality",
+ y_label: str = "Count of 1st Place Rankings",
+ height: int = 500,
+ width: int = 1000,
+) -> go.Figure:
+ """
+ Create a bar chart showing which character personality was ranked #1 the most.
+
+ Parameters
+ ----------
+ df : pl.DataFrame
+ DataFrame containing character ranking columns.
+ title : str, optional
+ Plot title.
+ x_label : str, optional
+ X-axis label.
+ y_label : str, optional
+ Y-axis label.
+ height : int, optional
+ Plot height in pixels.
+ width : int, optional
+ Plot width in pixels.
+
+ Returns
+ -------
+ go.Figure
+ Plotly figure object.
+ """
+ stats = []
+ # Identify columns related to Character Ranking
+ ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+
+ for col in ranking_cols:
+ # Count occurrences of rank 1
+ count_rank_1 = df.filter(pl.col(col) == 1).height
+
+ stats.append({
+ 'column': col,
+ 'count': count_rank_1
+ })
+
+ # Sort by count descending
+ stats_df = pl.DataFrame(stats).sort('count', descending=True)
+
+ # Clean up labels
+ labels = [
+ col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+ for col in stats_df['column']
+ ]
+
+ fig = go.Figure()
+
+ fig.add_trace(go.Bar(
+ x=labels,
+ y=stats_df['count'],
+ text=stats_df['count'],
+ textposition='inside',
+ textfont=dict(size=10, color='white'),
+ marker_color=ColorPalette.PRIMARY,
+ hovertemplate='%{x}
1st Place Votes: %{y}'
+ ))
+
+ fig.update_layout(
+ title=title,
+ xaxis_title=x_label,
+ yaxis_title=y_label,
+ height=height,
+ width=width,
+ plot_bgcolor=ColorPalette.BACKGROUND,
+ xaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID,
+ tickangle=-45
+ ),
+ yaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID
+ ),
+ font=dict(size=11)
+ )
+
+ return fig
+
+
+
+def plot_weighted_ranking_score(
+ weighted_df: pl.DataFrame,
+ title: str = "Character Popularity Score
(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
+ x_label: str = "Character Personality",
+ y_label: str = "Total Weighted Score",
+ color: str = ColorPalette.PRIMARY,
+ height: int = 500,
+ width: int = 1000,
+) -> go.Figure:
+ """
+ Create a bar chart showing the weighted ranking score for each character.
+
+ Parameters
+ ----------
+ df : pl.DataFrame
+ DataFrame containing ranking columns.
+ title : str, optional
+ Plot title.
+ x_label : str, optional
+ X-axis label.
+ y_label : str, optional
+ Y-axis label.
+ color : str, optional
+ Bar color.
+ height : int, optional
+ Plot height.
+ width : int, optional
+ Plot width.
+
+ Returns
+ -------
+ go.Figure
+ Plotly figure object.
+ """
+
+ fig = go.Figure()
+
+ fig.add_trace(go.Bar(
+ x=weighted_df['Character'],
+ y=weighted_df['Weighted Score'],
+ text=weighted_df['Weighted Score'],
+ textposition='inside',
+ textfont=dict(size=11, color='white'),
+ marker_color=color,
+ hovertemplate='%{x}
Score: %{y}'
+ ))
+
+ fig.update_layout(
+ title=title,
+ xaxis_title=x_label,
+ yaxis_title=y_label,
+ height=height,
+ width=width,
+ plot_bgcolor=ColorPalette.BACKGROUND,
+ xaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID,
+ tickangle=-45
+ ),
+ yaxis=dict(
+ showgrid=True,
+ gridcolor=ColorPalette.GRID
+ ),
+ font=dict(size=11)
+ )
+
+ return fig
\ No newline at end of file
diff --git a/theme.py b/theme.py
index be5658a..589292f 100644
--- a/theme.py
+++ b/theme.py
@@ -14,6 +14,7 @@ class ColorPalette:
RANK_1 = "#004C6D" # Dark Blue (1st Choice)
RANK_2 = "#008493" # Teal (2nd Choice)
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
+ RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
# General UI elements
TEXT = "black"
diff --git a/utils.py b/utils.py
index 8432093..c8435b5 100644
--- a/utils.py
+++ b/utils.py
@@ -55,6 +55,45 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
+def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
+ """
+ Calculate weighted scores for character rankings.
+ Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
+
+ Parameters
+ ----------
+ df : pl.DataFrame
+ DataFrame containing character ranking columns.
+
+ Returns
+ -------
+ pl.DataFrame
+ DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
+ """
+ scores = []
+ # Identify columns related to Character Ranking
+ ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
+
+ for col in ranking_cols:
+ # Calculate score:
+ # (Count of Rank 1 * 3) + (Count of Rank 2 * 2) + (Count of Rank 3 * 1)
+ r1_count = df.filter(pl.col(col) == 1).height
+ r2_count = df.filter(pl.col(col) == 2).height
+ r3_count = df.filter(pl.col(col) == 3).height
+
+ weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
+
+ # Clean name
+ clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
+
+ scores.append({
+ 'Character': clean_name,
+ 'Weighted Score': weighted_score
+ })
+
+ return pl.DataFrame(scores).sort('Weighted Score', descending=True)
+
+
class JPMCSurvey:
"""Class to handle JPMorgan Chase survey data."""
@@ -249,9 +288,19 @@ class JPMCSurvey:
rename_dict = {
'QID29': '18-8_Set-A',
'QID101': '18-8_Set-B',
- 'QID36_0_GROUP': '8-3_Ranked'
+ 'QID36_0_GROUP': '3_Ranked'
}
- return self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict), None
+
+ subset = self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict)
+
+ # Combine 18-8 Set A and Set B into single column
+ subset = subset.with_columns(
+ pl.coalesce(['18-8_Set-A', '18-8_Set-B']).alias('8_Combined')
+ )
+ # Change order of columns
+ subset = subset.select(['_recordId', '18-8_Set-A', '18-8_Set-B', '8_Combined', '3_Ranked'])
+
+ return subset, None
def get_voice_scale_1_10(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
@@ -363,4 +412,9 @@ class JPMCSurvey:
"""
QIDs = ['QID44', 'QID97', 'QID95', 'QID96']
- return self._get_subset(q, QIDs, rename_cols=True), None
\ No newline at end of file
+ return self._get_subset(q, QIDs, rename_cols=True), None
+
+
+
+
+