speaking style trait scores vertical

This commit is contained in:
2026-01-23 12:26:47 +01:00
parent 424355f4a1
commit 84a0f8052e
5 changed files with 615 additions and 90 deletions

384
plots.py
View File

@@ -216,22 +216,22 @@ def plot_top3_ranking_distribution(
return fig
def plot_character_ranking_distribution(
def plot_ranking_distribution(
df: pl.DataFrame,
title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
x_label: str = "Character Personality",
title: str = "Rankings Distribution<br>(1st to 4th Place)",
x_label: str = "Item",
y_label: str = "Number of Votes",
height: int = 500,
width: int = 1000,
) -> go.Figure:
"""
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
Sorted by the number of Rank 1 votes to highlight the 'Best' options.
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
Sorted by the number of Rank 1 votes.
Parameters
----------
df : pl.DataFrame
DataFrame containing character ranking columns (prefix 'Character_Ranking').
DataFrame containing ranking columns.
title : str, optional
Plot title.
x_label : str, optional
@@ -249,8 +249,8 @@ def plot_character_ranking_distribution(
Plotly figure object.
"""
stats = []
# Identify columns related to Character Ranking (excluding ID)
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
# Identify ranking columns (assume all columns except _recordId)
ranking_cols = [c for c in df.columns if c != '_recordId']
for col in ranking_cols:
# Count occurrences of each rank (1, 2, 3, 4)
@@ -280,7 +280,7 @@ def plot_character_ranking_distribution(
# Clean up labels: Remove prefix and underscores
# e.g. "Character_Ranking_The_Coach" -> "The Coach"
labels = [
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
for col in stats_df['column']
]
@@ -354,21 +354,22 @@ def plot_character_ranking_distribution(
return fig
def plot_most_ranked_1_character(
def plot_most_ranked_1(
df: pl.DataFrame,
title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
x_label: str = "Character Personality",
title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
x_label: str = "Item",
y_label: str = "Count of 1st Place Rankings",
height: int = 500,
width: int = 1000,
) -> go.Figure:
"""
Create a bar chart showing which character personality was ranked #1 the most.
Create a bar chart showing which item (character/voice) was ranked #1 the most.
Top 3 items are highlighted.
Parameters
----------
df : pl.DataFrame
DataFrame containing character ranking columns.
DataFrame containing ranking columns.
title : str, optional
Plot title.
x_label : str, optional
@@ -386,8 +387,8 @@ def plot_most_ranked_1_character(
Plotly figure object.
"""
stats = []
# Identify columns related to Character Ranking
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
# Identify ranking columns (assume all columns except _recordId)
ranking_cols = [c for c in df.columns if c != '_recordId']
for col in ranking_cols:
# Count occurrences of rank 1
@@ -403,19 +404,25 @@ def plot_most_ranked_1_character(
# Clean up labels
labels = [
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
for col in stats_df['column']
]
fig = go.Figure()
# Assign colors: Top 3 get PRIMARY (Blue), others get NEUTRAL (Grey)
colors = [
ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
for i in range(len(stats_df))
]
fig = go.Figure()
fig.add_trace(go.Bar(
x=labels,
y=stats_df['count'],
text=stats_df['count'],
textposition='inside',
textfont=dict(size=10, color='white'),
marker_color=ColorPalette.PRIMARY,
marker_color=colors,
hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
))
@@ -444,7 +451,7 @@ def plot_most_ranked_1_character(
def plot_weighted_ranking_score(
weighted_df: pl.DataFrame,
title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
x_label: str = "Character Personality",
y_label: str = "Total Weighted Score",
color: str = ColorPalette.PRIMARY,
@@ -508,4 +515,339 @@ def plot_weighted_ranking_score(
font=dict(size=11)
)
return fig
return fig
def plot_voice_selection_counts(
df: pl.DataFrame,
target_column: str = "8_Combined",
title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
x_label: str = "Voice",
y_label: str = "Number of Times Chosen",
height: int = 500,
width: int = 1000,
) -> go.Figure:
"""
Create a bar plot showing the frequency of voice selections.
Takes a column containing comma-separated values (e.g. "Voice 1, Voice 2..."),
counts occurrences, and highlights the top 8 most frequent voices.
Parameters
----------
df : pl.DataFrame
DataFrame containing the selection column.
target_column : str, optional
Name of the column containing comma-separated voice selections.
Defaults to "8_Combined".
title : str, optional
Plot title.
x_label : str, optional
X-axis label.
y_label : str, optional
Y-axis label.
height : int, optional
Plot height in pixels.
width : int, optional
Plot width in pixels.
Returns
-------
go.Figure
Plotly figure object.
"""
if target_column not in df.columns:
return go.Figure()
# Process the data:
# 1. Select the relevant column and remove nulls
# 2. Split the comma-separated string into a list
# 3. Explode the list so each voice gets its own row
# 4. Strip whitespace ensuring "Voice 1" and " Voice 1" match
# 5. Count occurrences
stats_df = (
df.select(pl.col(target_column))
.drop_nulls()
.with_columns(pl.col(target_column).str.split(","))
.explode(target_column)
.with_columns(pl.col(target_column).str.strip_chars())
.filter(pl.col(target_column) != "")
.group_by(target_column)
.agg(pl.len().alias("count"))
.sort("count", descending=True)
)
# Define colors: Top 8 get PRIMARY, rest get NEUTRAL
colors = [
ColorPalette.PRIMARY if i < 8 else ColorPalette.NEUTRAL
for i in range(len(stats_df))
]
fig = go.Figure()
fig.add_trace(go.Bar(
x=stats_df[target_column],
y=stats_df['count'],
text=stats_df['count'],
textposition='outside',
marker_color=colors,
hovertemplate='<b>%{x}</b><br>Selections: %{y}<extra></extra>'
))
fig.update_layout(
title=title,
xaxis_title=x_label,
yaxis_title=y_label,
height=height,
width=width,
plot_bgcolor=ColorPalette.BACKGROUND,
xaxis=dict(
showgrid=True,
gridcolor=ColorPalette.GRID,
tickangle=-45
),
yaxis=dict(
showgrid=True,
gridcolor=ColorPalette.GRID
),
font=dict(size=11),
)
return fig
def plot_top3_selection_counts(
df: pl.DataFrame,
target_column: str = "3_Ranked",
title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
x_label: str = "Voice",
y_label: str = "Count of Mentions in Top 3",
height: int = 500,
width: int = 1000,
) -> go.Figure:
"""
Question: Which 3 voices are chosen the most out of 18?
How many times does each voice end up in the top 3?
(this is based on the survey question where participants need to choose 3 out
of the earlier selected 8 voices). So how often each of the 18 stimuli ended
up in participants' Top 3, after they first selected 8 out of 18.
Parameters
----------
df : pl.DataFrame
DataFrame containing the ranking column (comma-separated strings).
target_column : str, optional
Name of the column containing comma-separated Top 3 voice elections.
Defaults to "3_Ranked".
title : str, optional
Plot title.
x_label : str, optional
X-axis label.
y_label : str, optional
Y-axis label.
height : int, optional
Plot height in pixels.
width : int, optional
Plot width in pixels.
Returns
-------
go.Figure
Plotly figure object.
"""
if target_column not in df.columns:
return go.Figure()
# Process the data:
# Same logic as plot_voice_selection_counts: explode comma-separated string
stats_df = (
df.select(pl.col(target_column))
.drop_nulls()
.with_columns(pl.col(target_column).str.split(","))
.explode(target_column)
.with_columns(pl.col(target_column).str.strip_chars())
.filter(pl.col(target_column) != "")
.group_by(target_column)
.agg(pl.len().alias("count"))
.sort("count", descending=True)
)
# Define colors: Top 3 get PRIMARY, rest get NEUTRAL
colors = [
ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
for i in range(len(stats_df))
]
fig = go.Figure()
fig.add_trace(go.Bar(
x=stats_df[target_column],
y=stats_df['count'],
text=stats_df['count'],
textposition='outside',
marker_color=colors,
hovertemplate='<b>%{x}</b><br>In Top 3: %{y} times<extra></extra>'
))
fig.update_layout(
title=title,
xaxis_title=x_label,
yaxis_title=y_label,
height=height,
width=width,
plot_bgcolor=ColorPalette.BACKGROUND,
xaxis=dict(
showgrid=True,
gridcolor=ColorPalette.GRID,
tickangle=-45
),
yaxis=dict(
showgrid=True,
gridcolor=ColorPalette.GRID
),
font=dict(size=11),
)
return fig
def plot_speaking_style_trait_scores(
df: pl.DataFrame,
trait_description: str = None,
left_anchor: str = None,
right_anchor: str = None,
title: str = "Speaking Style Trait Analysis",
height: int = 500,
width: int = 1000,
) -> go.Figure:
"""
Plot scores for a single speaking style trait across multiple voices.
The plot shows the average score per Voice, sorted by score.
It expects the DataFrame to contain 'Voice' and 'score' columns,
typically filtered for a single trait/description.
Parameters
----------
df : pl.DataFrame
DataFrame containing at least 'Voice' and 'score' columns.
Produced by utils.process_speaking_style_data and filtered.
trait_description : str, optional
Description of the trait being analyzed (e.g. "Indifferent : Attentive").
If not provided, it will be constructed from annotations.
left_anchor : str, optional
Label for the lower end of the scale (e.g. "Indifferent").
If not provided, attempts to read 'Left_Anchor' column from df.
right_anchor : str, optional
Label for the upper end of the scale (e.g. "Attentive").
If not provided, attempts to read 'Right_Anchor' column from df.
title : str, optional
Plot title.
height : int, optional
Plot height.
width : int, optional
Plot width.
Returns
-------
go.Figure
Plotly figure object.
"""
if df.is_empty():
return go.Figure()
required_cols = ["Voice", "score"]
if not all(col in df.columns for col in required_cols):
return go.Figure()
# Calculate stats: Mean, Count
stats = (
df.filter(pl.col("score").is_not_null())
.group_by("Voice")
.agg([
pl.col("score").mean().alias("mean_score"),
pl.col("score").count().alias("count")
])
.sort("mean_score", descending=True) # Descending for Left-to-Right
)
# Attempt to extract anchors from DF if not provided
if (left_anchor is None or right_anchor is None) and "Left_Anchor" in df.columns:
head = df.filter(pl.col("Left_Anchor").is_not_null()).head(1)
if not head.is_empty():
if left_anchor is None: left_anchor = head["Left_Anchor"][0]
if right_anchor is None: right_anchor = head["Right_Anchor"][0]
if trait_description is None:
if left_anchor and right_anchor:
trait_description = f"{left_anchor.split('|')[0]} vs. {right_anchor.split('|')[0]}"
else:
# Try getting from Description column
if "Description" in df.columns:
head = df.filter(pl.col("Description").is_not_null()).head(1)
if not head.is_empty():
trait_description = head["Description"][0]
else:
trait_description = ""
else:
trait_description = ""
fig = go.Figure()
fig.add_trace(go.Bar(
x=stats["Voice"], # X is Voice
y=stats["mean_score"], # Y is Score
text=stats["count"],
textposition='inside',
texttemplate='%{text}', # Count on bar
marker_color=ColorPalette.PRIMARY,
hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<br>Count: %{text}<extra></extra>'
))
# Add annotations for anchors
annotations = []
# Place anchors on the right side
if left_anchor:
annotations.append(dict(
xref='paper', yref='y',
x=1.01, y=1,
xanchor='left', yanchor='middle',
text=f"<b>1: {left_anchor.split('|')[0]}</b>",
showarrow=False,
font=dict(size=10, color='gray')
))
if right_anchor:
annotations.append(dict(
xref='paper', yref='y',
x=1.01, y=5,
xanchor='left', yanchor='middle',
text=f"<b>5: {right_anchor.split('|')[0]}</b>",
showarrow=False,
font=dict(size=10, color='gray')
))
fig.update_layout(
title=dict(
text=f"{title}<br><sub>{trait_description}</sub><br><sub>(Numbers on bars indicate respondent count)</sub>",
y=0.92
),
xaxis_title="Voice",
yaxis_title="Average Score (1-5)",
height=height,
width=width,
plot_bgcolor=ColorPalette.BACKGROUND,
yaxis=dict(
range=[1, 5],
showgrid=True,
gridcolor=ColorPalette.GRID,
zeroline=False
),
xaxis=dict(
showgrid=False
),
margin=dict(r=150),
annotations=annotations,
font=dict(size=11)
)
return fig