speaking style trait scores vertical
This commit is contained in:
@@ -12,7 +12,10 @@ def _():
|
|||||||
|
|
||||||
from validation import check_progress, duration_validation
|
from validation import check_progress, duration_validation
|
||||||
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||||||
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_character_ranking_distribution, plot_most_ranked_1_character, plot_weighted_ranking_score
|
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
|
||||||
|
|
||||||
|
import plots as plts
|
||||||
|
import utils as utl
|
||||||
return (
|
return (
|
||||||
JPMCSurvey,
|
JPMCSurvey,
|
||||||
Path,
|
Path,
|
||||||
@@ -20,27 +23,23 @@ def _():
|
|||||||
check_progress,
|
check_progress,
|
||||||
duration_validation,
|
duration_validation,
|
||||||
mo,
|
mo,
|
||||||
|
pl,
|
||||||
plot_average_scores_with_counts,
|
plot_average_scores_with_counts,
|
||||||
plot_character_ranking_distribution,
|
plot_most_ranked_1,
|
||||||
plot_most_ranked_1_character,
|
plot_ranking_distribution,
|
||||||
plot_top3_ranking_distribution,
|
plot_top3_ranking_distribution,
|
||||||
|
plot_top3_selection_counts,
|
||||||
|
plot_voice_selection_counts,
|
||||||
plot_weighted_ranking_score,
|
plot_weighted_ranking_score,
|
||||||
|
plts,
|
||||||
|
utl,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
|
||||||
def _(mo):
|
|
||||||
mo.md(r"""
|
|
||||||
# Load Data
|
|
||||||
""")
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(Path, mo):
|
def _():
|
||||||
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
||||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||||
mo.md(f"**Dataset:** `{Path(RESULTS_FILE).name}`")
|
|
||||||
return QSF_FILE, RESULTS_FILE
|
return QSF_FILE, RESULTS_FILE
|
||||||
|
|
||||||
|
|
||||||
@@ -52,17 +51,30 @@ def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
|
|||||||
return data_all, survey
|
return data_all, survey
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell
|
||||||
def _(mo):
|
def _(Path, RESULTS_FILE, data_all, mo):
|
||||||
mo.md(r"""
|
mo.md(f"""
|
||||||
## Data Validation
|
# Load Data
|
||||||
|
|
||||||
|
**Dataset:** `{Path(RESULTS_FILE).name}`
|
||||||
|
|
||||||
|
{mo.ui.table(data_all.collect())}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell(hide_code=True)
|
||||||
def _(check_progress, data_all):
|
def _(check_progress, data_all, duration_validation, mo):
|
||||||
check_progress(data_all)
|
mo.md(f"""
|
||||||
|
## Data Validation
|
||||||
|
|
||||||
|
{check_progress(data_all)}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{duration_validation(data_all)}
|
||||||
|
|
||||||
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -112,8 +124,6 @@ def _(mo):
|
|||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
## Character personality ranking
|
## Character personality ranking
|
||||||
|
|
||||||
### 1. Which character personality is ranked best?
|
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -126,15 +136,23 @@ def _(data, survey):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(char_rank, plot_character_ranking_distribution):
|
def _(char_rank, mo, plot_top3_ranking_distribution):
|
||||||
plot_character_ranking_distribution(char_rank, x_label='Character Personality', width=1000)
|
mo.md(f"""
|
||||||
|
### 1. Which character personality is ranked best?
|
||||||
|
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
|
||||||
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo):
|
def _(char_rank, mo, plot_most_ranked_1):
|
||||||
mo.md(r"""
|
mo.md(f"""
|
||||||
### 2. Which character personality is ranked number 1 the most?
|
### 2. Which character personality is ranked 1st the most?
|
||||||
|
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -143,16 +161,18 @@ def _(mo):
|
|||||||
def _(
|
def _(
|
||||||
calculate_weighted_ranking_scores,
|
calculate_weighted_ranking_scores,
|
||||||
char_rank,
|
char_rank,
|
||||||
|
mo,
|
||||||
plot_weighted_ranking_score,
|
plot_weighted_ranking_score,
|
||||||
):
|
):
|
||||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||||
plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
|
# plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
|
||||||
return
|
|
||||||
|
mo.md(f"""
|
||||||
|
### 3. Which character personality most popular based on weighted scores?
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
{mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
|
||||||
def _(char_rank, plot_most_ranked_1_character):
|
""")
|
||||||
plot_most_ranked_1_character(char_rank, x_label='Character Personality', width=1000)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -167,51 +187,74 @@ def _(mo):
|
|||||||
@app.cell
|
@app.cell
|
||||||
def _(data, survey):
|
def _(data, survey):
|
||||||
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
|
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
|
||||||
print(v_18_8_3.head())
|
# print(v_18_8_3.head())
|
||||||
return
|
return (v_18_8_3,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo, plot_voice_selection_counts, v_18_8_3):
|
||||||
mo.md(r"""
|
mo.md(f"""
|
||||||
Which 8 voices are chosen the most out of 18?
|
### Which 8 voices are chosen the most out of 18?
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo, plot_top3_selection_counts, v_18_8_3):
|
||||||
mo.md(r"""
|
mo.md(f"""
|
||||||
Which 3 voices are chosen the most out of 18? How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
### Which 3 voices are chosen the most out of 18?
|
||||||
|
|
||||||
|
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(
|
||||||
mo.md(r"""
|
calculate_weighted_ranking_scores,
|
||||||
Which voice is ranked best in the ranking question for top 3.? (so not best 3 out of 8 question)
|
data,
|
||||||
|
mo,
|
||||||
|
plot_ranking_distribution,
|
||||||
|
survey,
|
||||||
|
):
|
||||||
|
top3_voices = survey.get_top_3_voices(data)[0].collect()
|
||||||
|
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||||||
|
|
||||||
|
mo.md(f"""
|
||||||
|
### Which voice is ranked best in the ranking question for top 3?
|
||||||
|
|
||||||
|
(not best 3 out of 8 question)
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
|
||||||
|
|
||||||
|
""")
|
||||||
|
return top3_voices, top3_voices_weighted
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(mo, plot_weighted_ranking_score, top3_voices_weighted):
|
||||||
|
mo.md(f"""
|
||||||
|
### Most popular **voice** based on weighted scores?
|
||||||
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
||||||
|
Distribution of the rankings for each voice:
|
||||||
|
|
||||||
|
{mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(plot_top3_ranking_distribution, top3_voices):
|
def _(mo, plot_most_ranked_1, top3_voices):
|
||||||
plot_top3_ranking_distribution(top3_voices, x_label='Voice', width=1000)
|
mo.md(f"""
|
||||||
return
|
### Which voice is ranked number 1 the most?
|
||||||
|
|
||||||
|
(not always the voice with most points)
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
{mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
|
||||||
def _(mo):
|
|
||||||
mo.md(r"""
|
|
||||||
Which voice is ranked number 1 the most? (not always the voice with most points)
|
|
||||||
|
|
||||||
- Each of the 350 participants gives exactly one 1st-place vote.
|
|
||||||
- Total Rank-1 votes = 350.
|
|
||||||
- Voices are sorted from most to least 1st-place votes.
|
|
||||||
- The top 3 voices with the most Rank-1 votes are colored blue.
|
|
||||||
- This can differ from the points-based winners (3–2–1 totals), because a voice may receive many 2nd/3rd places but fewer 1st places.
|
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -235,6 +278,56 @@ def _(mo):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(data, survey):
|
||||||
|
ss_or, choice_map_or = survey.get_ss_orange_red(data)
|
||||||
|
ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
|
||||||
|
|
||||||
|
# Combine the data
|
||||||
|
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||||
|
_d = ss_all.collect()
|
||||||
|
|
||||||
|
choice_map = {**choice_map_or, **choice_map_gb}
|
||||||
|
# print(_d.head())
|
||||||
|
print(choice_map)
|
||||||
|
return choice_map, ss_all
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(choice_map, ss_all, utl):
|
||||||
|
ss_long = utl.process_speaking_style_data(ss_all, choice_map)
|
||||||
|
ss_long
|
||||||
|
return (ss_long,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(pl, ss_long):
|
||||||
|
target_trait = "Indifferent | Unfocussed | Detached:Attentive | Helpful | Caring | Deliberate"
|
||||||
|
trait_data = ss_long.filter(pl.col("Description") == target_trait)
|
||||||
|
trait_data
|
||||||
|
return target_trait, trait_data
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(plts, target_trait, trait_data):
|
||||||
|
plts.plot_speaking_style_trait_scores(
|
||||||
|
trait_data,
|
||||||
|
title=target_trait.replace(":", " ↔ "),
|
||||||
|
# trait_description="Attentive vs Indifferent", # simplified title
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
app._unparsable_cell(
|
||||||
|
"""
|
||||||
|
for trait in ss_long.select(\"Description\").unique().to_series().to_list():
|
||||||
|
trait_data = ss_long.filter(pl.col(\"Description\") == trait)
|
||||||
|
mo.md(f\"\"\"
|
||||||
|
""",
|
||||||
|
name="_"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
|
|||||||
380
plots.py
380
plots.py
@@ -216,22 +216,22 @@ def plot_top3_ranking_distribution(
|
|||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_character_ranking_distribution(
|
def plot_ranking_distribution(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
title: str = "Character Personality Rankings<br>Distribution of Votes (1st to 4th Place)",
|
title: str = "Rankings Distribution<br>(1st to 4th Place)",
|
||||||
x_label: str = "Character Personality",
|
x_label: str = "Item",
|
||||||
y_label: str = "Number of Votes",
|
y_label: str = "Number of Votes",
|
||||||
height: int = 500,
|
height: int = 500,
|
||||||
width: int = 1000,
|
width: int = 1000,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for character personalities.
|
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
|
||||||
Sorted by the number of Rank 1 votes to highlight the 'Best' options.
|
Sorted by the number of Rank 1 votes.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df : pl.DataFrame
|
df : pl.DataFrame
|
||||||
DataFrame containing character ranking columns (prefix 'Character_Ranking').
|
DataFrame containing ranking columns.
|
||||||
title : str, optional
|
title : str, optional
|
||||||
Plot title.
|
Plot title.
|
||||||
x_label : str, optional
|
x_label : str, optional
|
||||||
@@ -249,8 +249,8 @@ def plot_character_ranking_distribution(
|
|||||||
Plotly figure object.
|
Plotly figure object.
|
||||||
"""
|
"""
|
||||||
stats = []
|
stats = []
|
||||||
# Identify columns related to Character Ranking (excluding ID)
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
|
|
||||||
for col in ranking_cols:
|
for col in ranking_cols:
|
||||||
# Count occurrences of each rank (1, 2, 3, 4)
|
# Count occurrences of each rank (1, 2, 3, 4)
|
||||||
@@ -280,7 +280,7 @@ def plot_character_ranking_distribution(
|
|||||||
# Clean up labels: Remove prefix and underscores
|
# Clean up labels: Remove prefix and underscores
|
||||||
# e.g. "Character_Ranking_The_Coach" -> "The Coach"
|
# e.g. "Character_Ranking_The_Coach" -> "The Coach"
|
||||||
labels = [
|
labels = [
|
||||||
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
|
||||||
for col in stats_df['column']
|
for col in stats_df['column']
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -354,21 +354,22 @@ def plot_character_ranking_distribution(
|
|||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_most_ranked_1_character(
|
def plot_most_ranked_1(
|
||||||
df: pl.DataFrame,
|
df: pl.DataFrame,
|
||||||
title: str = "Most Popular Character Personality<br>(Number of Times Ranked 1st)",
|
title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
|
||||||
x_label: str = "Character Personality",
|
x_label: str = "Item",
|
||||||
y_label: str = "Count of 1st Place Rankings",
|
y_label: str = "Count of 1st Place Rankings",
|
||||||
height: int = 500,
|
height: int = 500,
|
||||||
width: int = 1000,
|
width: int = 1000,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a bar chart showing which character personality was ranked #1 the most.
|
Create a bar chart showing which item (character/voice) was ranked #1 the most.
|
||||||
|
Top 3 items are highlighted.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df : pl.DataFrame
|
df : pl.DataFrame
|
||||||
DataFrame containing character ranking columns.
|
DataFrame containing ranking columns.
|
||||||
title : str, optional
|
title : str, optional
|
||||||
Plot title.
|
Plot title.
|
||||||
x_label : str, optional
|
x_label : str, optional
|
||||||
@@ -386,8 +387,8 @@ def plot_most_ranked_1_character(
|
|||||||
Plotly figure object.
|
Plotly figure object.
|
||||||
"""
|
"""
|
||||||
stats = []
|
stats = []
|
||||||
# Identify columns related to Character Ranking
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
|
|
||||||
for col in ranking_cols:
|
for col in ranking_cols:
|
||||||
# Count occurrences of rank 1
|
# Count occurrences of rank 1
|
||||||
@@ -403,10 +404,16 @@ def plot_most_ranked_1_character(
|
|||||||
|
|
||||||
# Clean up labels
|
# Clean up labels
|
||||||
labels = [
|
labels = [
|
||||||
col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
|
||||||
for col in stats_df['column']
|
for col in stats_df['column']
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Assign colors: Top 3 get PRIMARY (Blue), others get NEUTRAL (Grey)
|
||||||
|
colors = [
|
||||||
|
ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
|
||||||
|
for i in range(len(stats_df))
|
||||||
|
]
|
||||||
|
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
|
|
||||||
fig.add_trace(go.Bar(
|
fig.add_trace(go.Bar(
|
||||||
@@ -415,7 +422,7 @@ def plot_most_ranked_1_character(
|
|||||||
text=stats_df['count'],
|
text=stats_df['count'],
|
||||||
textposition='inside',
|
textposition='inside',
|
||||||
textfont=dict(size=10, color='white'),
|
textfont=dict(size=10, color='white'),
|
||||||
marker_color=ColorPalette.PRIMARY,
|
marker_color=colors,
|
||||||
hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
|
hovertemplate='<b>%{x}</b><br>1st Place Votes: %{y}<extra></extra>'
|
||||||
))
|
))
|
||||||
|
|
||||||
@@ -444,7 +451,7 @@ def plot_most_ranked_1_character(
|
|||||||
|
|
||||||
def plot_weighted_ranking_score(
|
def plot_weighted_ranking_score(
|
||||||
weighted_df: pl.DataFrame,
|
weighted_df: pl.DataFrame,
|
||||||
title: str = "Character Popularity Score<br>(Weighted: 1st=3pts, 2nd=2pts, 3rd=1pt)",
|
title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
|
||||||
x_label: str = "Character Personality",
|
x_label: str = "Character Personality",
|
||||||
y_label: str = "Total Weighted Score",
|
y_label: str = "Total Weighted Score",
|
||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
@@ -509,3 +516,338 @@ def plot_weighted_ranking_score(
|
|||||||
)
|
)
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_voice_selection_counts(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
target_column: str = "8_Combined",
|
||||||
|
title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
|
||||||
|
x_label: str = "Voice",
|
||||||
|
y_label: str = "Number of Times Chosen",
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Create a bar plot showing the frequency of voice selections.
|
||||||
|
Takes a column containing comma-separated values (e.g. "Voice 1, Voice 2..."),
|
||||||
|
counts occurrences, and highlights the top 8 most frequent voices.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing the selection column.
|
||||||
|
target_column : str, optional
|
||||||
|
Name of the column containing comma-separated voice selections.
|
||||||
|
Defaults to "8_Combined".
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
x_label : str, optional
|
||||||
|
X-axis label.
|
||||||
|
y_label : str, optional
|
||||||
|
Y-axis label.
|
||||||
|
height : int, optional
|
||||||
|
Plot height in pixels.
|
||||||
|
width : int, optional
|
||||||
|
Plot width in pixels.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
if target_column not in df.columns:
|
||||||
|
return go.Figure()
|
||||||
|
|
||||||
|
# Process the data:
|
||||||
|
# 1. Select the relevant column and remove nulls
|
||||||
|
# 2. Split the comma-separated string into a list
|
||||||
|
# 3. Explode the list so each voice gets its own row
|
||||||
|
# 4. Strip whitespace ensuring "Voice 1" and " Voice 1" match
|
||||||
|
# 5. Count occurrences
|
||||||
|
stats_df = (
|
||||||
|
df.select(pl.col(target_column))
|
||||||
|
.drop_nulls()
|
||||||
|
.with_columns(pl.col(target_column).str.split(","))
|
||||||
|
.explode(target_column)
|
||||||
|
.with_columns(pl.col(target_column).str.strip_chars())
|
||||||
|
.filter(pl.col(target_column) != "")
|
||||||
|
.group_by(target_column)
|
||||||
|
.agg(pl.len().alias("count"))
|
||||||
|
.sort("count", descending=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define colors: Top 8 get PRIMARY, rest get NEUTRAL
|
||||||
|
colors = [
|
||||||
|
ColorPalette.PRIMARY if i < 8 else ColorPalette.NEUTRAL
|
||||||
|
for i in range(len(stats_df))
|
||||||
|
]
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
x=stats_df[target_column],
|
||||||
|
y=stats_df['count'],
|
||||||
|
text=stats_df['count'],
|
||||||
|
textposition='outside',
|
||||||
|
marker_color=colors,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Selections: %{y}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title=x_label,
|
||||||
|
yaxis_title=y_label,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
tickangle=-45
|
||||||
|
),
|
||||||
|
yaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID
|
||||||
|
),
|
||||||
|
font=dict(size=11),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_top3_selection_counts(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
target_column: str = "3_Ranked",
|
||||||
|
title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
|
||||||
|
x_label: str = "Voice",
|
||||||
|
y_label: str = "Count of Mentions in Top 3",
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Question: Which 3 voices are chosen the most out of 18?
|
||||||
|
|
||||||
|
How many times does each voice end up in the top 3?
|
||||||
|
(this is based on the survey question where participants need to choose 3 out
|
||||||
|
of the earlier selected 8 voices). So how often each of the 18 stimuli ended
|
||||||
|
up in participants' Top 3, after they first selected 8 out of 18.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing the ranking column (comma-separated strings).
|
||||||
|
target_column : str, optional
|
||||||
|
Name of the column containing comma-separated Top 3 voice elections.
|
||||||
|
Defaults to "3_Ranked".
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
x_label : str, optional
|
||||||
|
X-axis label.
|
||||||
|
y_label : str, optional
|
||||||
|
Y-axis label.
|
||||||
|
height : int, optional
|
||||||
|
Plot height in pixels.
|
||||||
|
width : int, optional
|
||||||
|
Plot width in pixels.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
if target_column not in df.columns:
|
||||||
|
return go.Figure()
|
||||||
|
|
||||||
|
# Process the data:
|
||||||
|
# Same logic as plot_voice_selection_counts: explode comma-separated string
|
||||||
|
stats_df = (
|
||||||
|
df.select(pl.col(target_column))
|
||||||
|
.drop_nulls()
|
||||||
|
.with_columns(pl.col(target_column).str.split(","))
|
||||||
|
.explode(target_column)
|
||||||
|
.with_columns(pl.col(target_column).str.strip_chars())
|
||||||
|
.filter(pl.col(target_column) != "")
|
||||||
|
.group_by(target_column)
|
||||||
|
.agg(pl.len().alias("count"))
|
||||||
|
.sort("count", descending=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define colors: Top 3 get PRIMARY, rest get NEUTRAL
|
||||||
|
colors = [
|
||||||
|
ColorPalette.PRIMARY if i < 3 else ColorPalette.NEUTRAL
|
||||||
|
for i in range(len(stats_df))
|
||||||
|
]
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
x=stats_df[target_column],
|
||||||
|
y=stats_df['count'],
|
||||||
|
text=stats_df['count'],
|
||||||
|
textposition='outside',
|
||||||
|
marker_color=colors,
|
||||||
|
hovertemplate='<b>%{x}</b><br>In Top 3: %{y} times<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title=x_label,
|
||||||
|
yaxis_title=y_label,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
tickangle=-45
|
||||||
|
),
|
||||||
|
yaxis=dict(
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID
|
||||||
|
),
|
||||||
|
font=dict(size=11),
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_speaking_style_trait_scores(
|
||||||
|
df: pl.DataFrame,
|
||||||
|
trait_description: str = None,
|
||||||
|
left_anchor: str = None,
|
||||||
|
right_anchor: str = None,
|
||||||
|
title: str = "Speaking Style Trait Analysis",
|
||||||
|
height: int = 500,
|
||||||
|
width: int = 1000,
|
||||||
|
) -> go.Figure:
|
||||||
|
"""
|
||||||
|
Plot scores for a single speaking style trait across multiple voices.
|
||||||
|
|
||||||
|
The plot shows the average score per Voice, sorted by score.
|
||||||
|
It expects the DataFrame to contain 'Voice' and 'score' columns,
|
||||||
|
typically filtered for a single trait/description.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.DataFrame
|
||||||
|
DataFrame containing at least 'Voice' and 'score' columns.
|
||||||
|
Produced by utils.process_speaking_style_data and filtered.
|
||||||
|
trait_description : str, optional
|
||||||
|
Description of the trait being analyzed (e.g. "Indifferent : Attentive").
|
||||||
|
If not provided, it will be constructed from annotations.
|
||||||
|
left_anchor : str, optional
|
||||||
|
Label for the lower end of the scale (e.g. "Indifferent").
|
||||||
|
If not provided, attempts to read 'Left_Anchor' column from df.
|
||||||
|
right_anchor : str, optional
|
||||||
|
Label for the upper end of the scale (e.g. "Attentive").
|
||||||
|
If not provided, attempts to read 'Right_Anchor' column from df.
|
||||||
|
title : str, optional
|
||||||
|
Plot title.
|
||||||
|
height : int, optional
|
||||||
|
Plot height.
|
||||||
|
width : int, optional
|
||||||
|
Plot width.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
go.Figure
|
||||||
|
Plotly figure object.
|
||||||
|
"""
|
||||||
|
if df.is_empty():
|
||||||
|
return go.Figure()
|
||||||
|
|
||||||
|
required_cols = ["Voice", "score"]
|
||||||
|
if not all(col in df.columns for col in required_cols):
|
||||||
|
return go.Figure()
|
||||||
|
|
||||||
|
# Calculate stats: Mean, Count
|
||||||
|
stats = (
|
||||||
|
df.filter(pl.col("score").is_not_null())
|
||||||
|
.group_by("Voice")
|
||||||
|
.agg([
|
||||||
|
pl.col("score").mean().alias("mean_score"),
|
||||||
|
pl.col("score").count().alias("count")
|
||||||
|
])
|
||||||
|
.sort("mean_score", descending=True) # Descending for Left-to-Right
|
||||||
|
)
|
||||||
|
|
||||||
|
# Attempt to extract anchors from DF if not provided
|
||||||
|
if (left_anchor is None or right_anchor is None) and "Left_Anchor" in df.columns:
|
||||||
|
head = df.filter(pl.col("Left_Anchor").is_not_null()).head(1)
|
||||||
|
if not head.is_empty():
|
||||||
|
if left_anchor is None: left_anchor = head["Left_Anchor"][0]
|
||||||
|
if right_anchor is None: right_anchor = head["Right_Anchor"][0]
|
||||||
|
|
||||||
|
if trait_description is None:
|
||||||
|
if left_anchor and right_anchor:
|
||||||
|
trait_description = f"{left_anchor.split('|')[0]} vs. {right_anchor.split('|')[0]}"
|
||||||
|
else:
|
||||||
|
# Try getting from Description column
|
||||||
|
if "Description" in df.columns:
|
||||||
|
head = df.filter(pl.col("Description").is_not_null()).head(1)
|
||||||
|
if not head.is_empty():
|
||||||
|
trait_description = head["Description"][0]
|
||||||
|
else:
|
||||||
|
trait_description = ""
|
||||||
|
else:
|
||||||
|
trait_description = ""
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(go.Bar(
|
||||||
|
x=stats["Voice"], # X is Voice
|
||||||
|
y=stats["mean_score"], # Y is Score
|
||||||
|
text=stats["count"],
|
||||||
|
textposition='inside',
|
||||||
|
texttemplate='%{text}', # Count on bar
|
||||||
|
marker_color=ColorPalette.PRIMARY,
|
||||||
|
hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<br>Count: %{text}<extra></extra>'
|
||||||
|
))
|
||||||
|
|
||||||
|
# Add annotations for anchors
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
# Place anchors on the right side
|
||||||
|
if left_anchor:
|
||||||
|
annotations.append(dict(
|
||||||
|
xref='paper', yref='y',
|
||||||
|
x=1.01, y=1,
|
||||||
|
xanchor='left', yanchor='middle',
|
||||||
|
text=f"<b>1: {left_anchor.split('|')[0]}</b>",
|
||||||
|
showarrow=False,
|
||||||
|
font=dict(size=10, color='gray')
|
||||||
|
))
|
||||||
|
if right_anchor:
|
||||||
|
annotations.append(dict(
|
||||||
|
xref='paper', yref='y',
|
||||||
|
x=1.01, y=5,
|
||||||
|
xanchor='left', yanchor='middle',
|
||||||
|
text=f"<b>5: {right_anchor.split('|')[0]}</b>",
|
||||||
|
showarrow=False,
|
||||||
|
font=dict(size=10, color='gray')
|
||||||
|
))
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=dict(
|
||||||
|
text=f"{title}<br><sub>{trait_description}</sub><br><sub>(Numbers on bars indicate respondent count)</sub>",
|
||||||
|
y=0.92
|
||||||
|
),
|
||||||
|
xaxis_title="Voice",
|
||||||
|
yaxis_title="Average Score (1-5)",
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
|
yaxis=dict(
|
||||||
|
range=[1, 5],
|
||||||
|
showgrid=True,
|
||||||
|
gridcolor=ColorPalette.GRID,
|
||||||
|
zeroline=False
|
||||||
|
),
|
||||||
|
xaxis=dict(
|
||||||
|
showgrid=False
|
||||||
|
),
|
||||||
|
margin=dict(r=150),
|
||||||
|
annotations=annotations,
|
||||||
|
font=dict(size=11)
|
||||||
|
)
|
||||||
|
return fig
|
||||||
|
|||||||
3
theme.py
3
theme.py
@@ -16,6 +16,9 @@ class ColorPalette:
|
|||||||
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
|
RANK_3 = "#5AAE95" # Sea Green (3rd Choice)
|
||||||
RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
|
RANK_4 = "#9E9E9E" # Grey (4th Choice / Worst)
|
||||||
|
|
||||||
|
# Neutral color for unhighlighted comparison items
|
||||||
|
NEUTRAL = "#D3D3D3" # Light Grey
|
||||||
|
|
||||||
# General UI elements
|
# General UI elements
|
||||||
TEXT = "black"
|
TEXT = "black"
|
||||||
GRID = "lightgray"
|
GRID = "lightgray"
|
||||||
|
|||||||
100
utils.py
100
utils.py
@@ -3,7 +3,6 @@ from pathlib import Path
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import Union
|
from typing import Union
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def extract_voice_label(html_str: str) -> str:
|
def extract_voice_label(html_str: str) -> str:
|
||||||
@@ -57,13 +56,13 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
|
|||||||
|
|
||||||
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
||||||
"""
|
"""
|
||||||
Calculate weighted scores for character rankings.
|
Calculate weighted scores for character or voice rankings.
|
||||||
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df : pl.DataFrame
|
df : pl.DataFrame
|
||||||
DataFrame containing character ranking columns.
|
DataFrame containing character/ voice ranking columns.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -71,8 +70,8 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
|||||||
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
||||||
"""
|
"""
|
||||||
scores = []
|
scores = []
|
||||||
# Identify columns related to Character Ranking
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if 'Character_Ranking' in c]
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
|
|
||||||
for col in ranking_cols:
|
for col in ranking_cols:
|
||||||
# Calculate score:
|
# Calculate score:
|
||||||
@@ -84,7 +83,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
|||||||
weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
|
weighted_score = (r1_count * 3) + (r2_count * 2) + (r3_count * 1)
|
||||||
|
|
||||||
# Clean name
|
# Clean name
|
||||||
clean_name = col.replace('Character_Ranking_', '').replace('_', ' ').strip()
|
clean_name = col.replace('Character_Ranking_', '').replace('Top_3_Voices_ranking__', '').replace('_', ' ').strip()
|
||||||
|
|
||||||
scores.append({
|
scores.append({
|
||||||
'Character': clean_name,
|
'Character': clean_name,
|
||||||
@@ -415,6 +414,95 @@ class JPMCSurvey:
|
|||||||
return self._get_subset(q, QIDs, rename_cols=True), None
|
return self._get_subset(q, QIDs, rename_cols=True), None
|
||||||
|
|
||||||
|
|
||||||
|
def process_speaking_style_data(
|
||||||
|
df: Union[pl.LazyFrame, pl.DataFrame],
|
||||||
|
trait_map: dict[str, str]
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Process speaking style columns from wide to long format and map trait descriptions.
|
||||||
|
|
||||||
|
Parses columns with format: SS_{StyleGroup}__{Voice}__{ChoiceID}
|
||||||
|
Example: SS_Orange_Red__V14__Choice_1
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pl.LazyFrame or pl.DataFrame
|
||||||
|
Input dataframe containing SS_* columns.
|
||||||
|
trait_map : dict
|
||||||
|
Dictionary mapping column names to trait descriptions.
|
||||||
|
Keys should be full column names like "SS_Orange_Red__V14__Choice_1".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pl.DataFrame
|
||||||
|
Long-format dataframe with columns:
|
||||||
|
_recordId, Voice, Style_Group, Choice_ID, Description, Score, Left_Anchor, Right_Anchor
|
||||||
|
"""
|
||||||
|
# Normalize input to LazyFrame
|
||||||
|
lf = df.lazy() if isinstance(df, pl.DataFrame) else df
|
||||||
|
|
||||||
|
# 1. Melt SS_ columns
|
||||||
|
melted = lf.melt(
|
||||||
|
id_vars=["_recordId"],
|
||||||
|
value_vars=pl.col("^SS_.*$"),
|
||||||
|
variable_name="full_col_name",
|
||||||
|
value_name="score"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Extract components from column name
|
||||||
|
# Regex captures: Style_Group (e.g. SS_Orange_Red), Voice (e.g. V14), Choice_ID (e.g. Choice_1)
|
||||||
|
pattern = r"^(?P<Style_Group>SS_.+?)__(?P<Voice>.+?)__(?P<Choice_ID>Choice_\d+)$"
|
||||||
|
|
||||||
|
processed = melted.with_columns(
|
||||||
|
pl.col("full_col_name").str.extract_groups(pattern)
|
||||||
|
).unnest("full_col_name")
|
||||||
|
|
||||||
|
# 3. Create Mapping Lookup from the provided dictionary
|
||||||
|
# We map (Style_Group, Choice_ID) -> Description
|
||||||
|
mapping_data = []
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for col_name, desc in trait_map.items():
|
||||||
|
match = re.match(pattern, col_name)
|
||||||
|
if match:
|
||||||
|
groups = match.groupdict()
|
||||||
|
key = (groups["Style_Group"], groups["Choice_ID"])
|
||||||
|
|
||||||
|
if key not in seen:
|
||||||
|
# Parse description into anchors if possible (Left : Right)
|
||||||
|
parts = desc.split(':')
|
||||||
|
left_anchor = parts[0].strip() if len(parts) > 0 else ""
|
||||||
|
right_anchor = parts[1].strip() if len(parts) > 1 else ""
|
||||||
|
|
||||||
|
mapping_data.append({
|
||||||
|
"Style_Group": groups["Style_Group"],
|
||||||
|
"Choice_ID": groups["Choice_ID"],
|
||||||
|
"Description": desc,
|
||||||
|
"Left_Anchor": left_anchor,
|
||||||
|
"Right_Anchor": right_anchor
|
||||||
|
})
|
||||||
|
seen.add(key)
|
||||||
|
|
||||||
|
if not mapping_data:
|
||||||
|
return processed.collect()
|
||||||
|
|
||||||
|
mapping_lf = pl.LazyFrame(mapping_data)
|
||||||
|
|
||||||
|
# 4. Join Data with Mapping
|
||||||
|
result = processed.join(
|
||||||
|
mapping_lf,
|
||||||
|
on=["Style_Group", "Choice_ID"],
|
||||||
|
how="left"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. Cast score to Int
|
||||||
|
result = result.with_columns(
|
||||||
|
pl.col("score").cast(pl.Int64, strict=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.collect()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ import polars as pl
|
|||||||
def check_progress(data):
|
def check_progress(data):
|
||||||
"""Check if all responses are complete based on 'progress' column."""
|
"""Check if all responses are complete based on 'progress' column."""
|
||||||
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
|
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
|
||||||
return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
|
return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
|
||||||
|
|
||||||
return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
|
return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"
|
||||||
|
|
||||||
|
|
||||||
def duration_validation(data):
|
def duration_validation(data):
|
||||||
@@ -30,10 +30,9 @@ def duration_validation(data):
|
|||||||
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
|
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
|
||||||
|
|
||||||
if outlier_data.shape[0] == 0:
|
if outlier_data.shape[0] == 0:
|
||||||
return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
|
return "### Duration Outliers: \n\n✅ No duration outliers detected"
|
||||||
|
|
||||||
return mo.md(f"""
|
return f"""### Duration Outliers:
|
||||||
### Duration Outliers:
|
|
||||||
|
|
||||||
**⚠️ Potential outliers detected based on response duration ⚠️**
|
**⚠️ Potential outliers detected based on response duration ⚠️**
|
||||||
|
|
||||||
@@ -50,5 +49,5 @@ def duration_validation(data):
|
|||||||
|
|
||||||
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
|
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
|
||||||
|
|
||||||
""")
|
"""
|
||||||
|
|
||||||
Reference in New Issue
Block a user