Character personality ranking complete
This commit is contained in:
@@ -44,6 +44,14 @@ def _(survey):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
app._unparsable_cell(
|
||||||
|
r"""
|
||||||
|
data.
|
||||||
|
""",
|
||||||
|
name="_"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
|
|||||||
@@ -11,15 +11,20 @@ def _():
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from validation import check_progress, duration_validation
|
from validation import check_progress, duration_validation
|
||||||
from utils import JPMCSurvey, combine_exclusive_columns
|
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||||||
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution
|
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
|
||||||
return (
|
return (
|
||||||
JPMCSurvey,
|
JPMCSurvey,
|
||||||
Path,
|
Path,
|
||||||
|
calculate_weighted_ranking_scores,
|
||||||
check_progress,
|
check_progress,
|
||||||
duration_validation,
|
duration_validation,
|
||||||
mo,
|
mo,
|
||||||
plot_average_scores_with_counts,
|
plot_average_scores_with_counts,
|
||||||
|
plot_character_ranking_distribution,
|
||||||
|
plot_most_ranked_1_character,
|
||||||
|
plot_top3_ranking_distribution,
|
||||||
|
plot_weighted_ranking_score,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -108,12 +113,49 @@ def _(mo):
|
|||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
## Character personality ranking
|
## Character personality ranking
|
||||||
|
|
||||||
1. Which character personality is ranked best?
|
### 1. Which character personality is ranked best?
|
||||||
2. Which character personality is ranked number 1 the most?
|
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(data, survey):
|
||||||
|
char_rank = survey.get_character_ranking(data)[0].collect()
|
||||||
|
|
||||||
|
return (char_rank,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(char_rank, plot_character_ranking_distribution):
|
||||||
|
plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(mo):
|
||||||
|
mo.md(r"""
|
||||||
|
### 2. Which character personality is ranked number 1 the most?
|
||||||
|
""")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(
|
||||||
|
calculate_weighted_ranking_scores,
|
||||||
|
char_rank,
|
||||||
|
plot_weighted_ranking_score,
|
||||||
|
):
|
||||||
|
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||||
|
plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(char_rank, plot_most_ranked_1_character):
|
||||||
|
plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
@@ -122,6 +164,13 @@ def _(mo):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(data, survey):
|
||||||
|
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
|
||||||
|
print(v_18_8_3.head())
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
@@ -147,6 +196,12 @@ def _(mo):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(plot_top3_ranking_distribution, top3_voices):
|
||||||
|
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
|
|||||||
296
plots.py
296
plots.py
@@ -5,6 +5,7 @@ import polars as pl
|
|||||||
from theme import ColorPalette
|
from theme import ColorPalette
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_average_scores_with_counts(
|
def plot_average_scores_with_counts(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
title: str = "General Impression (1-10)<br>Per Voice with Number of Participants Who Rated It",
|
title: str = "General Impression (1-10)<br>Per Voice with Number of Participants Who Rated It",
|
||||||
@@ -213,3 +214,298 @@ def plot_top3_ranking_distribution(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_character_ranking_distribution(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
|
||||||
|
x_label: str = "Character Personality",
|
||||||
|
y_label: str = "Number of Votes",
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
|
||||||
|
Sorted by the number of Rank 1 votes to highlight the 'Best' options.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing character ranking columns (prefix 'Character_Ranking').
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
x_label : str, optional
|
||||||
|
X-axis label.
|
||||||
|
y_label : str, optional
|
||||||
|
Y-axis label.
|
||||||
|
height : int, optional
|
||||||
|
Plot height in pixels.
|
||||||
|
width : int, optional
|
||||||
|
Plot width in pixels.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
stats = []
|
||||||
|
# Identify columns related to Character Ranking (excluding ID)
|
||||||
|
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
||||||
|
|
||||||
|
for col in ranking_cols:
|
||||||
|
# Count occurrences of each rank (1, 2, 3, 4)
|
||||||
|
# Using height/len to count rows in the filtered frame
|
||||||
|
r1 = df.filter(pl.col(col) == 1).height
|
||||||
|
r2 = df.filter(pl.col(col) == 2).height
|
||||||
|
r3 = df.filter(pl.col(col) == 3).height
|
||||||
|
r4 = df.filter(pl.col(col) == 4).height
|
||||||
|
total = r1 + r2 + r3 + r4
|
||||||
|
|
||||||
|
if total > 0:
|
||||||
|
stats.append({
|
||||||
|
'column': col,
|
||||||
|
'Rank 1': r1,
|
||||||
|
'Rank 2': r2,
|
||||||
|
'Rank 3': r3,
|
||||||
|
'Rank 4': r4
|
||||||
|
})
|
||||||
|
|
||||||
|
if not stats:
|
||||||
|
return go.Figure()
|
||||||
|
|
||||||
|
# Sort by Rank 1 (Most "Best" votes) descending to show the winner first
|
||||||
|
# Secondary sort by Rank 2
|
||||||
|
stats_df = pl.DataFrame(stats).sort(['Rank 1', 'Rank 2'], descending=[True, True])
|
||||||
|
|
||||||
|
# Clean up labels: Remove prefix and underscores
|
||||||
|
# e.g. "Character_Ranking_The_Coach" -> "The Coach"
|
||||||
|
labels = [
|
||||||
|
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
||||||
|
for col in stats_df['column']
|
||||||
|
]
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
# Rank 1 (Best)
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
name='Rank 1 (Best)',
|
||||||
|
x=labels,
|
||||||
|
y=stats_df['Rank 1'],
|
||||||
|
marker_color=ColorPalette.RANK_1,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Rank 1: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Rank 2
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
name='Rank 2',
|
||||||
|
x=labels,
|
||||||
|
y=stats_df['Rank 2'],
|
||||||
|
marker_color=ColorPalette.RANK_2,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Rank 2: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Rank 3
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
name='Rank 3',
|
||||||
|
x=labels,
|
||||||
|
y=stats_df['Rank 3'],
|
||||||
|
marker_color=ColorPalette.RANK_3,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Rank 3: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Rank 4 (Worst)
|
||||||
|
# Using a neutral grey as a fallback for the lowest rank to keep focus on top ranks
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
name='Rank 4 (Worst)',
|
||||||
|
x=labels,
|
||||||
|
y=stats_df['Rank 4'],
|
||||||
|
marker_color=ColorPalette.RANK_4,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Rank 4: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
barmode='stack',
|
||||||
|
title=title,
|
||||||
|
xaxis_title=x_label,
|
||||||
|
yaxis_title=y_label,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
tickangle=-45
|
||||||
|
),
|
||||||
|
yaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID
|
||||||
|
),
|
||||||
|
legend=dict(
|
||||||
|
orientation="h",
|
||||||
|
yanchor="bottom",
|
||||||
|
y=1.02,
|
||||||
|
xanchor="right",
|
||||||
|
x=1,
|
||||||
|
traceorder="normal"
|
||||||
|
),
|
||||||
|
font=dict(size=11)
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_most_ranked_1_character(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
|
||||||
|
x_label: str = "Character Personality",
|
||||||
|
y_label: str = "Count of 1st Place Rankings",
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Create a bar chart showing which character personality was ranked #1 the most.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing character ranking columns.
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
x_label : str, optional
|
||||||
|
X-axis label.
|
||||||
|
y_label : str, optional
|
||||||
|
Y-axis label.
|
||||||
|
height : int, optional
|
||||||
|
Plot height in pixels.
|
||||||
|
width : int, optional
|
||||||
|
Plot width in pixels.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
stats = []
|
||||||
|
# Identify columns related to Character Ranking
|
||||||
|
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
||||||
|
|
||||||
|
for col in ranking_cols:
|
||||||
|
# Count occurrences of rank 1
|
||||||
|
count_rank_1 = df.filter(pl.col(col) == 1).height
|
||||||
|
|
||||||
|
stats.append({
|
||||||
|
'column': col,
|
||||||
|
'count': count_rank_1
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by count descending
|
||||||
|
stats_df = pl.DataFrame(stats).sort('count', descending=True)
|
||||||
|
|
||||||
|
# Clean up labels
|
||||||
|
labels = [
|
||||||
|
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
||||||
|
for col in stats_df['column']
|
||||||
|
]
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
x=labels,
|
||||||
|
y=stats_df['count'],
|
||||||
|
text=stats_df['count'],
|
||||||
|
textposition='inside',
|
||||||
|
textfont=dict(size=10, color='white'),
|
||||||
|
marker_color=ColorPalette.PRIMARY,
|
||||||
|
hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title=x_label,
|
||||||
|
yaxis_title=y_label,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
tickangle=-45
|
||||||
|
),
|
||||||
|
yaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID
|
||||||
|
),
|
||||||
|
font=dict(size=11)
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def plot_weighted_ranking_score(
|
||||||
|
weighted_df: pl.DataFrame,
|
||||||
|
title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
|
||||||
|
x_label: str = "Character Personality",
|
||||||
|
y_label: str = "Total Weighted Score",
|
||||||
|
color: str = ColorPalette.PRIMARY,
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Create a bar chart showing the weighted ranking score for each character.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing ranking columns.
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
x_label : str, optional
|
||||||
|
X-axis label.
|
||||||
|
y_label : str, optional
|
||||||
|
Y-axis label.
|
||||||
|
color : str, optional
|
||||||
|
Bar color.
|
||||||
|
height : int, optional
|
||||||
|
Plot height.
|
||||||
|
width : int, optional
|
||||||
|
Plot width.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
x=weighted_df['Character'],
|
||||||
|
y=weighted_df['Weighted Score'],
|
||||||
|
text=weighted_df['Weighted Score'],
|
||||||
|
textposition='inside',
|
||||||
|
textfont=dict(size=11, color='white'),
|
||||||
|
marker_color=color,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Score: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title=x_label,
|
||||||
|
yaxis_title=y_label,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
tickangle=-45
|
||||||
|
),
|
||||||
|
yaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID
|
||||||
|
),
|
||||||
|
font=dict(size=11)
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
1
theme.py
1
theme.py
@@ -14,6 +14,7 @@ class ColorPalette:
|
|||||||
RANK_1 = "#004C6D" # Dark Blue (1st Choice)
|
RANK_1 = "#004C6D" # Dark Blue (1st Choice)
|
||||||
RANK_2 = "#008493" # Teal (2nd Choice)
|
RANK_2 = "#008493" # Teal (2nd Choice)
|
||||||
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
|
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
|
||||||
|
RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
|
||||||
|
|
||||||
# General UI elements
|
# General UI elements
|
||||||
TEXT = "black"
|
TEXT = "black"
|
||||||
|
|||||||
60
utils.py
60
utils.py
@@ -55,6 +55,45 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Calculate weighted scores for character rankings.
|
||||||
|
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing character ranking columns.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pl.DataFrame
|
||||||
|
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
||||||
|
"""
|
||||||
|
scores = []
|
||||||
|
# Identify columns related to Character Ranking
|
||||||
|
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
||||||
|
|
||||||
|
for col in ranking_cols:
|
||||||
|
# Calculate score:
|
||||||
|
# (Count of Rank 1 * 3) + (Count of Rank 2 * 2) + (Count of Rank 3 * 1)
|
||||||
|
r1_count = df.filter(pl.col(col) == 1).height
|
||||||
|
r2_count = df.filter(pl.col(col) == 2).height
|
||||||
|
r3_count = df.filter(pl.col(col) == 3).height
|
||||||
|
|
||||||
|
weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
|
||||||
|
|
||||||
|
# Clean name
|
||||||
|
clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
||||||
|
|
||||||
|
scores.append({
|
||||||
|
'Character': clean_name,
|
||||||
|
'Weighted Score': weighted_score
|
||||||
|
})
|
||||||
|
|
||||||
|
return pl.DataFrame(scores).sort('Weighted Score', descending=True)
|
||||||
|
|
||||||
|
|
||||||
class JPMCSurvey:
|
class JPMCSurvey:
|
||||||
"""Class to handle JPMorgan Chase survey data."""
|
"""Class to handle JPMorgan Chase survey data."""
|
||||||
|
|
||||||
@@ -249,9 +288,19 @@ class JPMCSurvey:
|
|||||||
rename_dict = {
|
rename_dict = {
|
||||||
'QID29': '18-8_Set-A',
|
'QID29': '18-8_Set-A',
|
||||||
'QID101': '18-8_Set-B',
|
'QID101': '18-8_Set-B',
|
||||||
'QID36_0_GROUP': '8-3_Ranked'
|
'QID36_0_GROUP': '3_Ranked'
|
||||||
}
|
}
|
||||||
return self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict), None
|
|
||||||
|
subset = self._get_subset(q, QIDs, rename_cols=False).rename(rename_dict)
|
||||||
|
|
||||||
|
# Combine 18-8 Set A and Set B into single column
|
||||||
|
subset = subset.with_columns(
|
||||||
|
pl.coalesce(['18-8_Set-A', '18-8_Set-B']).alias('8_Combined')
|
||||||
|
)
|
||||||
|
# Change order of columns
|
||||||
|
subset = subset.select(['_recordId', '18-8_Set-A', '18-8_Set-B', '8_Combined', '3_Ranked'])
|
||||||
|
|
||||||
|
return subset, None
|
||||||
|
|
||||||
|
|
||||||
def get_voice_scale_1_10(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
|
def get_voice_scale_1_10(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
|
||||||
@@ -363,4 +412,9 @@ class JPMCSurvey:
|
|||||||
"""
|
"""
|
||||||
QIDs = ['QID44', 'QID97', 'QID95', 'QID96']
|
QIDs = ['QID44', 'QID97', 'QID95', 'QID96']
|
||||||
|
|
||||||
return self._get_subset(q, QIDs, rename_cols=True), None
|
return self._get_subset(q, QIDs, rename_cols=True), None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user