move plots to mixin class of JPMCSurvey to simplify file saving
This commit is contained in:
@@ -12,9 +12,6 @@ def _():
|
|||||||
|
|
||||||
from validation import check_progress, duration_validation
|
from validation import check_progress, duration_validation
|
||||||
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
|
||||||
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
|
|
||||||
|
|
||||||
import plots
|
|
||||||
import utils
|
import utils
|
||||||
|
|
||||||
from speaking_styles import SPEAKING_STYLES
|
from speaking_styles import SPEAKING_STYLES
|
||||||
@@ -27,13 +24,6 @@ def _():
|
|||||||
duration_validation,
|
duration_validation,
|
||||||
mo,
|
mo,
|
||||||
pl,
|
pl,
|
||||||
plot_most_ranked_1,
|
|
||||||
plot_ranking_distribution,
|
|
||||||
plot_top3_ranking_distribution,
|
|
||||||
plot_top3_selection_counts,
|
|
||||||
plot_voice_selection_counts,
|
|
||||||
plot_weighted_ranking_score,
|
|
||||||
plots,
|
|
||||||
utils,
|
utils,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -47,10 +37,10 @@ def _():
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
|
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
|
||||||
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
|
||||||
data_all = survey.load_data()
|
data_all = S.load_data()
|
||||||
data_all.collect()
|
data_all.collect()
|
||||||
return data_all, survey
|
return S, data_all
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
@@ -108,18 +98,22 @@ def _(mo):
|
|||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(data_all, mo):
|
def _(data_all, mo):
|
||||||
data_all_collected = data_all.collect()
|
data_all_collected = data_all.collect()
|
||||||
ages = mo.ui.multiselect(options=data_all_collected["QID1"], value=data_all_collected["QID1"].unique(), label="Select Age Group(s):")
|
age = mo.ui.multiselect(options=data_all_collected["QID1"], value=data_all_collected["QID1"].unique(), label="Select Age Group(s):")
|
||||||
income = mo.ui.multiselect(data_all_collected["QID15"], value=data_all_collected["QID15"], label="Select Income Group(s):")
|
income = mo.ui.multiselect(data_all_collected["QID15"], value=data_all_collected["QID15"], label="Select Income Group(s):")
|
||||||
gender = mo.ui.multiselect(data_all_collected["QID2"], value=data_all_collected["QID2"], label="Select Gender(s)")
|
gender = mo.ui.multiselect(data_all_collected["QID2"], value=data_all_collected["QID2"], label="Select Gender(s)")
|
||||||
ethnicity = mo.ui.multiselect(data_all_collected["QID3"], value=data_all_collected["QID3"], label="Select Ethnicities:")
|
ethnicity = mo.ui.multiselect(data_all_collected["QID3"], value=data_all_collected["QID3"], label="Select Ethnicities:")
|
||||||
consumer = mo.ui.multiselect(data_all_collected["Consumer"], value=data_all_collected["Consumer"], label="Select Consumer Groups:")
|
consumer = mo.ui.multiselect(data_all_collected["Consumer"], value=data_all_collected["Consumer"], label="Select Consumer Groups:")
|
||||||
|
return age, consumer, ethnicity, gender, income
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(age, consumer, ethnicity, gender, income, mo):
|
||||||
|
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
# Data Filters
|
# Data Filters
|
||||||
|
|
||||||
|
|
||||||
{ages}
|
{age}
|
||||||
|
|
||||||
{gender}
|
{gender}
|
||||||
|
|
||||||
@@ -130,12 +124,14 @@ def _(data_all, mo):
|
|||||||
{consumer}
|
{consumer}
|
||||||
|
|
||||||
""")
|
""")
|
||||||
return ages, consumer, ethnicity, gender, income
|
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(ages, consumer, data_all, ethnicity, gender, income, survey):
|
def _(S, age, consumer, data_all, ethnicity, gender, income):
|
||||||
data = survey.filter_data(data_all, age=ages.value, gender=gender.value, income=income.value, ethnicity=ethnicity.value, consumer=consumer.value)
|
data = S.filter_data(data_all, age=age.value, gender=gender.value, income=income.value, ethnicity=ethnicity.value, consumer=consumer.value)
|
||||||
data.collect()
|
data.collect()
|
||||||
return (data,)
|
return (data,)
|
||||||
|
|
||||||
@@ -159,49 +155,42 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(data, survey):
|
def _(S, data):
|
||||||
char_rank = survey.get_character_ranking(data)[0].collect()
|
char_rank = S.get_character_ranking(data)[0]
|
||||||
return (char_rank,)
|
return (char_rank,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(char_rank, mo, plot_top3_ranking_distribution, survey):
|
def _(S, char_rank, mo):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### 1. Which character personality is ranked best?
|
### 1. Which character personality is ranked best?
|
||||||
|
|
||||||
|
|
||||||
{mo.ui.plotly(plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_top3_ranking_distribution(char_rank, x_label='Character Personality', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(char_rank, mo, plot_most_ranked_1, survey):
|
def _(S, char_rank, mo):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### 2. Which character personality is ranked 1st the most?
|
### 2. Which character personality is ranked 1st the most?
|
||||||
|
|
||||||
|
|
||||||
{mo.ui.plotly(plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_most_ranked_1(char_rank, title="Most Popular Character<br>(Number of Times Ranked 1st)", x_label='Character Personality', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(
|
def _(S, calculate_weighted_ranking_scores, char_rank, mo):
|
||||||
calculate_weighted_ranking_scores,
|
|
||||||
char_rank,
|
|
||||||
mo,
|
|
||||||
plot_weighted_ranking_score,
|
|
||||||
survey,
|
|
||||||
):
|
|
||||||
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
char_rank_weighted = calculate_weighted_ranking_scores(char_rank)
|
||||||
# plot_weighted_ranking_score(char_rank_weighted, x_label='Voice', width=1000)
|
|
||||||
|
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### 3. Which character personality most popular based on weighted scores?
|
### 3. Which character personality most popular based on weighted scores?
|
||||||
|
|
||||||
|
|
||||||
{mo.ui.plotly(plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_weighted_ranking_score(char_rank_weighted, title="Most Popular Character - Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)", x_label='Voice', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -215,73 +204,73 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(data, survey):
|
def _(S, data):
|
||||||
v_18_8_3 = survey.get_18_8_3(data)[0].collect()
|
v_18_8_3 = S.get_18_8_3(data)[0].collect()
|
||||||
# print(v_18_8_3.head())
|
# print(v_18_8_3.head())
|
||||||
return (v_18_8_3,)
|
return (v_18_8_3,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, plot_voice_selection_counts, survey, v_18_8_3):
|
def _(S, mo, v_18_8_3):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### Which 8 voices are chosen the most out of 18?
|
### Which 8 voices are chosen the most out of 18?
|
||||||
|
|
||||||
{mo.ui.plotly(plot_voice_selection_counts(v_18_8_3, height=500, width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_voice_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, plot_top3_selection_counts, survey, v_18_8_3):
|
def _(S, mo, v_18_8_3):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### Which 3 voices are chosen the most out of 18?
|
### Which 3 voices are chosen the most out of 18?
|
||||||
|
|
||||||
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
How many times does each voice end up in the top 3? ( this is based on the survey question where participants need to choose 3 out of the earlier selected 8 voices. So how often each of the 18 stimuli ended up in participants’ Top 3, after they first selected 8 out of 18.
|
||||||
|
|
||||||
{mo.ui.plotly(plot_top3_selection_counts(v_18_8_3, height=500, width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_top3_selection_counts(v_18_8_3, height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(calculate_weighted_ranking_scores, data, survey):
|
def _(S, calculate_weighted_ranking_scores, data):
|
||||||
top3_voices = survey.get_top_3_voices(data)[0].collect()
|
top3_voices = S.get_top_3_voices(data)[0]
|
||||||
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
top3_voices_weighted = calculate_weighted_ranking_scores(top3_voices)
|
||||||
return top3_voices, top3_voices_weighted
|
return top3_voices, top3_voices_weighted
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo, plot_ranking_distribution, survey, top3_voices):
|
def _(S, mo, top3_voices):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### Which voice is ranked best in the ranking question for top 3?
|
### Which voice is ranked best in the ranking question for top 3?
|
||||||
|
|
||||||
(not best 3 out of 8 question)
|
(not best 3 out of 8 question)
|
||||||
|
|
||||||
{mo.ui.plotly(plot_ranking_distribution(top3_voices, x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_ranking_distribution(top3_voices, x_label='Voice', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo, plot_weighted_ranking_score, survey, top3_voices_weighted):
|
def _(S, mo, top3_voices_weighted):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### Most popular **voice** based on weighted scores?
|
### Most popular **voice** based on weighted scores?
|
||||||
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
- E.g. 1 point for place 3. 2 points for place 2 and 3 points for place 1. The voice with most points is ranked best.
|
||||||
Distribution of the rankings for each voice:
|
Distribution of the rankings for each voice:
|
||||||
|
|
||||||
{mo.ui.plotly(plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_weighted_ranking_score(top3_voices_weighted, title="Most Popular Voice - Weighted Popularity Score<br>(1st = 3pts, 2nd = 2pts, 3rd = 1pt)", height=500, width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo, plot_most_ranked_1, survey, top3_voices):
|
def _(S, mo, top3_voices):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### Which voice is ranked number 1 the most?
|
### Which voice is ranked number 1 the most?
|
||||||
|
|
||||||
(not always the voice with most points)
|
(not always the voice with most points)
|
||||||
|
|
||||||
{mo.ui.plotly(plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_most_ranked_1(top3_voices, title="Most Popular Voice<br>(Number of Times Ranked 1st)", x_label='Voice', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -297,9 +286,9 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(data, survey, utils):
|
def _(S, data, utils):
|
||||||
ss_or, choice_map_or = survey.get_ss_orange_red(data)
|
ss_or, choice_map_or = S.get_ss_orange_red(data)
|
||||||
ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
|
ss_gb, choice_map_gb = S.get_ss_green_blue(data)
|
||||||
|
|
||||||
# Combine the data
|
# Combine the data
|
||||||
ss_all = ss_or.join(ss_gb, on='_recordId')
|
ss_all = ss_or.join(ss_gb, on='_recordId')
|
||||||
@@ -313,7 +302,7 @@ def _(data, survey, utils):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo, pl, plots, ss_long, survey):
|
def _(S, mo, pl, ss_long):
|
||||||
content = """### How does each voice score for each “speaking style labeled trait”?"""
|
content = """### How does each voice score for each “speaking style labeled trait”?"""
|
||||||
|
|
||||||
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
|
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
|
||||||
@@ -322,7 +311,7 @@ def _(mo, pl, plots, ss_long, survey):
|
|||||||
content += f"""
|
content += f"""
|
||||||
### {i+1}) {trait.replace(":", " ↔ ")}
|
### {i+1}) {trait.replace(":", " ↔ ")}
|
||||||
|
|
||||||
{mo.ui.plotly(plots.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", " ↔ "), height=550))}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
mo.md(content)
|
mo.md(content)
|
||||||
@@ -338,18 +327,18 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(data, survey):
|
def _(S, data):
|
||||||
vscales = survey.get_voice_scale_1_10(data)[0].collect()
|
vscales = S.get_voice_scale_1_10(data)[0]
|
||||||
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
|
||||||
return (vscales,)
|
return (vscales,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(mo, plots, survey, vscales):
|
def _(S, mo, vscales):
|
||||||
mo.md(f"""
|
mo.md(f"""
|
||||||
### How does each voice score on a scale from 1-10?
|
### How does each voice score on a scale from 1-10?
|
||||||
|
|
||||||
{mo.ui.plotly(plots.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000, results_dir=survey.fig_save_dir))}
|
{mo.ui.plotly(S.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -394,7 +383,7 @@ def _(mo):
|
|||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(choice_map, ss_all, utils, vscales):
|
def _(choice_map, ss_all, utils, vscales):
|
||||||
df_style = utils.process_speaking_style_data(ss_all.collect(), choice_map)
|
df_style = utils.process_speaking_style_data(ss_all, choice_map)
|
||||||
df_voice_long = utils.process_voice_scale_data(vscales)
|
df_voice_long = utils.process_voice_scale_data(vscales)
|
||||||
|
|
||||||
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
|
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
|
||||||
@@ -403,19 +392,18 @@ def _(choice_map, ss_all, utils, vscales):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(SPEAKING_STYLES, joined_df, mo, plots, survey):
|
def _(S, SPEAKING_STYLES, joined_df, mo):
|
||||||
_content = """### Total Results
|
_content = """### Total Results
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for style, traits in SPEAKING_STYLES.items():
|
for style, traits in SPEAKING_STYLES.items():
|
||||||
# print(f"Correlation plot for {style}...")
|
# print(f"Correlation plot for {style}...")
|
||||||
fig = plots.plot_speaking_style_correlation(
|
fig = S.plot_speaking_style_correlation(
|
||||||
df=joined_df,
|
data=joined_df,
|
||||||
style_color=style,
|
style_color=style,
|
||||||
style_traits=traits,
|
style_traits=traits,
|
||||||
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10",
|
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
|
||||||
results_dir=survey.fig_save_dir
|
|
||||||
)
|
)
|
||||||
_content += f"""
|
_content += f"""
|
||||||
#### Speaking Style **{style}**:
|
#### Speaking Style **{style}**:
|
||||||
@@ -470,7 +458,7 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(SPEAKING_STYLES, df_style, mo, plots, survey, top3_voices, utils):
|
def _(S, SPEAKING_STYLES, df_style, mo, top3_voices, utils):
|
||||||
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
df_ranking = utils.process_voice_ranking_data(top3_voices)
|
||||||
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
|
||||||
|
|
||||||
@@ -480,7 +468,7 @@ def _(SPEAKING_STYLES, df_style, mo, plots, survey, top3_voices, utils):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
_fig = plots.plot_speaking_style_ranking_correlation(joined, _style, _traits, results_dir=survey.fig_save_dir)
|
_fig = S.plot_speaking_style_ranking_correlation(data=joined, style_color=_style, style_traits=_traits)
|
||||||
_content += f"""
|
_content += f"""
|
||||||
|
|
||||||
#### Speaking Style **{_style}**:
|
#### Speaking Style **{_style}**:
|
||||||
|
|||||||
427
plots.py
427
plots.py
@@ -8,7 +8,10 @@ import polars as pl
|
|||||||
from theme import ColorPalette
|
from theme import ColorPalette
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_filename(title: str) -> str:
|
class JPMCPlotsMixin:
|
||||||
|
"""Mixin class for plotting functions in JPMCSurvey."""
|
||||||
|
|
||||||
|
def _sanitize_filename(self, title: str) -> str:
|
||||||
"""Convert plot title to a safe filename."""
|
"""Convert plot title to a safe filename."""
|
||||||
# Remove HTML tags
|
# Remove HTML tags
|
||||||
clean = re.sub(r'<[^>]+>', ' ', title)
|
clean = re.sub(r'<[^>]+>', ' ', title)
|
||||||
@@ -21,53 +24,41 @@ def _sanitize_filename(title: str) -> str:
|
|||||||
# Lowercase and limit length
|
# Lowercase and limit length
|
||||||
return clean.lower()[:100]
|
return clean.lower()[:100]
|
||||||
|
|
||||||
|
def _save_plot(self, fig: go.Figure, title: str) -> None:
|
||||||
def _save_plot(fig: go.Figure, results_dir: str | None, title: str) -> None:
|
"""Save plot to PNG file if fig_save_dir is set."""
|
||||||
"""Save plot to PNG file if results_dir is provided."""
|
if hasattr(self, 'fig_save_dir') and self.fig_save_dir:
|
||||||
if results_dir:
|
path = Path(self.fig_save_dir)
|
||||||
path = Path(results_dir)
|
if not path.exists():
|
||||||
path.mkdir(parents=True, exist_ok=True)
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
filename = f"{_sanitize_filename(title)}.png"
|
|
||||||
|
filename = f"{self._sanitize_filename(title)}.png"
|
||||||
fig.write_image(path / filename, width=fig.layout.width, height=fig.layout.height)
|
fig.write_image(path / filename, width=fig.layout.width, height=fig.layout.height)
|
||||||
|
|
||||||
|
def _ensure_dataframe(self, data: pl.LazyFrame | pl.DataFrame | None) -> pl.DataFrame:
|
||||||
|
"""Ensure data is an eager DataFrame, collecting if necessary."""
|
||||||
|
df = data if data is not None else getattr(self, 'data_filtered', None)
|
||||||
|
if df is None:
|
||||||
|
raise ValueError("No data provided and self.data_filtered is None.")
|
||||||
|
|
||||||
|
if isinstance(df, pl.LazyFrame):
|
||||||
|
return df.collect()
|
||||||
|
return df
|
||||||
|
|
||||||
def plot_average_scores_with_counts(
|
def plot_average_scores_with_counts(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "General Impression (1-10)<br>Per Voice with Number of Participants Who Rated It",
|
title: str = "General Impression (1-10)<br>Per Voice with Number of Participants Who Rated It",
|
||||||
x_label: str = "Stimuli",
|
x_label: str = "Stimuli",
|
||||||
y_label: str = "Average General Impression Rating (1-10)",
|
y_label: str = "Average General Impression Rating (1-10)",
|
||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a bar plot showing average scores and count of non-null values for each column.
|
Create a bar plot showing average scores and count of non-null values for each column.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing numeric columns to analyze.
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
color : str, optional
|
|
||||||
Bar color (hex code or named color).
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
# Calculate average and count of non-null values for each column
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
# Exclude _recordId column
|
# Exclude _recordId column
|
||||||
stats = []
|
stats = []
|
||||||
for col in [c for c in df.columns if c != '_recordId']:
|
for col in [c for c in df.columns if c != '_recordId']:
|
||||||
@@ -102,8 +93,8 @@ def plot_average_scores_with_counts(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -118,45 +109,23 @@ def plot_average_scores_with_counts(
|
|||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_top3_ranking_distribution(
|
def plot_top3_ranking_distribution(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Top 3 Rankings Distribution<br>Count of 1st, 2nd, and 3rd Place Votes per Voice",
|
title: str = "Top 3 Rankings Distribution<br>Count of 1st, 2nd, and 3rd Place Votes per Voice",
|
||||||
x_label: str = "Voices",
|
x_label: str = "Voices",
|
||||||
y_label: str = "Number of Mentions in Top 3",
|
y_label: str = "Number of Mentions in Top 3",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a stacked bar chart showing how often each voice was ranked 1st, 2nd, or 3rd.
|
Create a stacked bar chart showing how often each voice was ranked 1st, 2nd, or 3rd.
|
||||||
|
|
||||||
The total height of the bar represents the popularity (frequency of being in Top 3),
|
|
||||||
while the segments show the quality of those rankings.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing ranking columns (values 1, 2, 3).
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
# Exclude _recordId column
|
# Exclude _recordId column
|
||||||
stats = []
|
stats = []
|
||||||
for col in [c for c in df.columns if c != '_recordId']:
|
for col in [c for c in df.columns if c != '_recordId']:
|
||||||
@@ -219,8 +188,8 @@ def plot_top3_ranking_distribution(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -242,43 +211,24 @@ def plot_top3_ranking_distribution(
|
|||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_ranking_distribution(
|
def plot_ranking_distribution(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Rankings Distribution<br>(1st to 4th Place)",
|
title: str = "Rankings Distribution<br>(1st to 4th Place)",
|
||||||
x_label: str = "Item",
|
x_label: str = "Item",
|
||||||
y_label: str = "Number of Votes",
|
y_label: str = "Number of Votes",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
|
Create a stacked bar chart showing the distribution of rankings (1st to 4th) for characters or voices.
|
||||||
Sorted by the number of Rank 1 votes.
|
Sorted by the number of Rank 1 votes.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing ranking columns.
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
stats = []
|
stats = []
|
||||||
# Identify ranking columns (assume all columns except _recordId)
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if c != '_recordId']
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
@@ -359,8 +309,8 @@ def plot_ranking_distribution(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -382,43 +332,24 @@ def plot_ranking_distribution(
|
|||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_most_ranked_1(
|
def plot_most_ranked_1(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
|
title: str = "Most Popular Choice<br>(Number of Times Ranked 1st)",
|
||||||
x_label: str = "Item",
|
x_label: str = "Item",
|
||||||
y_label: str = "Count of 1st Place Rankings",
|
y_label: str = "Count of 1st Place Rankings",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a bar chart showing which item (character/voice) was ranked #1 the most.
|
Create a bar chart showing which item (character/voice) was ranked #1 the most.
|
||||||
Top 3 items are highlighted.
|
Top 3 items are highlighted.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing ranking columns.
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
stats = []
|
stats = []
|
||||||
# Identify ranking columns (assume all columns except _recordId)
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if c != '_recordId']
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
@@ -463,8 +394,8 @@ def plot_most_ranked_1(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -478,46 +409,23 @@ def plot_most_ranked_1(
|
|||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_weighted_ranking_score(
|
def plot_weighted_ranking_score(
|
||||||
weighted_df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
|
title: str = "Weighted Popularity Score<br>(1st=3pts, 2nd=2pts, 3rd=1pt)",
|
||||||
x_label: str = "Character Personality",
|
x_label: str = "Character Personality",
|
||||||
y_label: str = "Total Weighted Score",
|
y_label: str = "Total Weighted Score",
|
||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a bar chart showing the weighted ranking score for each character.
|
Create a bar chart showing the weighted ranking score for each character.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing ranking columns.
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
color : str, optional
|
|
||||||
Bar color.
|
|
||||||
height : int, optional
|
|
||||||
Plot height.
|
|
||||||
width : int, optional
|
|
||||||
Plot width.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
weighted_df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
fig = go.Figure()
|
fig = go.Figure()
|
||||||
|
|
||||||
@@ -535,8 +443,8 @@ def plot_weighted_ranking_score(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -550,48 +458,24 @@ def plot_weighted_ranking_score(
|
|||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_voice_selection_counts(
|
def plot_voice_selection_counts(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
target_column: str = "8_Combined",
|
target_column: str = "8_Combined",
|
||||||
title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
|
title: str = "Most Frequently Chosen Voices<br>(Top 8 Highlighted)",
|
||||||
x_label: str = "Voice",
|
x_label: str = "Voice",
|
||||||
y_label: str = "Number of Times Chosen",
|
y_label: str = "Number of Times Chosen",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Create a bar plot showing the frequency of voice selections.
|
Create a bar plot showing the frequency of voice selections.
|
||||||
Takes a column containing comma-separated values (e.g. "Voice 1, Voice 2..."),
|
|
||||||
counts occurrences, and highlights the top 8 most frequent voices.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing the selection column.
|
|
||||||
target_column : str, optional
|
|
||||||
Name of the column containing comma-separated voice selections.
|
|
||||||
Defaults to "8_Combined".
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if target_column not in df.columns:
|
if target_column not in df.columns:
|
||||||
return go.Figure()
|
return go.Figure()
|
||||||
|
|
||||||
@@ -634,8 +518,8 @@ def plot_voice_selection_counts(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -649,51 +533,24 @@ def plot_voice_selection_counts(
|
|||||||
font=dict(size=11),
|
font=dict(size=11),
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_top3_selection_counts(
|
def plot_top3_selection_counts(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
target_column: str = "3_Ranked",
|
target_column: str = "3_Ranked",
|
||||||
title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
|
title: str = "Most Frequently Chosen Top 3 Voices<br>(Top 3 Highlighted)",
|
||||||
x_label: str = "Voice",
|
x_label: str = "Voice",
|
||||||
y_label: str = "Count of Mentions in Top 3",
|
y_label: str = "Count of Mentions in Top 3",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Question: Which 3 voices are chosen the most out of 18?
|
Question: Which 3 voices are chosen the most out of 18?
|
||||||
|
|
||||||
How many times does each voice end up in the top 3?
|
|
||||||
(this is based on the survey question where participants need to choose 3 out
|
|
||||||
of the earlier selected 8 voices). So how often each of the 18 stimuli ended
|
|
||||||
up in participants' Top 3, after they first selected 8 out of 18.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing the ranking column (comma-separated strings).
|
|
||||||
target_column : str, optional
|
|
||||||
Name of the column containing comma-separated Top 3 voice elections.
|
|
||||||
Defaults to "3_Ranked".
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
x_label : str, optional
|
|
||||||
X-axis label.
|
|
||||||
y_label : str, optional
|
|
||||||
Y-axis label.
|
|
||||||
height : int, optional
|
|
||||||
Plot height in pixels.
|
|
||||||
width : int, optional
|
|
||||||
Plot width in pixels.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if target_column not in df.columns:
|
if target_column not in df.columns:
|
||||||
return go.Figure()
|
return go.Figure()
|
||||||
|
|
||||||
@@ -732,8 +589,8 @@ def plot_top3_selection_counts(
|
|||||||
title=title,
|
title=title,
|
||||||
xaxis_title=x_label,
|
xaxis_title=x_label,
|
||||||
yaxis_title=y_label,
|
yaxis_title=y_label,
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
showgrid=True,
|
showgrid=True,
|
||||||
@@ -747,53 +604,24 @@ def plot_top3_selection_counts(
|
|||||||
font=dict(size=11),
|
font=dict(size=11),
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_speaking_style_trait_scores(
|
def plot_speaking_style_trait_scores(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
trait_description: str = None,
|
trait_description: str = None,
|
||||||
left_anchor: str = None,
|
left_anchor: str = None,
|
||||||
right_anchor: str = None,
|
right_anchor: str = None,
|
||||||
title: str = "Speaking Style Trait Analysis",
|
title: str = "Speaking Style Trait Analysis",
|
||||||
height: int = 500,
|
height: int | None = None,
|
||||||
width: int = 1000,
|
width: int | None = None,
|
||||||
results_dir: str | None = None,
|
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Plot scores for a single speaking style trait across multiple voices.
|
Plot scores for a single speaking style trait across multiple voices.
|
||||||
|
|
||||||
The plot shows the average score per Voice, sorted by score.
|
|
||||||
It expects the DataFrame to contain 'Voice' and 'score' columns,
|
|
||||||
typically filtered for a single trait/description.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
DataFrame containing at least 'Voice' and 'score' columns.
|
|
||||||
Produced by utils.process_speaking_style_data and filtered.
|
|
||||||
trait_description : str, optional
|
|
||||||
Description of the trait being analyzed (e.g. "Indifferent : Attentive").
|
|
||||||
If not provided, it will be constructed from annotations.
|
|
||||||
left_anchor : str, optional
|
|
||||||
Label for the lower end of the scale (e.g. "Indifferent").
|
|
||||||
If not provided, attempts to read 'Left_Anchor' column from df.
|
|
||||||
right_anchor : str, optional
|
|
||||||
Label for the upper end of the scale (e.g. "Attentive").
|
|
||||||
If not provided, attempts to read 'Right_Anchor' column from df.
|
|
||||||
title : str, optional
|
|
||||||
Plot title.
|
|
||||||
height : int, optional
|
|
||||||
Plot height.
|
|
||||||
width : int, optional
|
|
||||||
Plot width.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
Plotly figure object.
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if df.is_empty():
|
if df.is_empty():
|
||||||
return go.Figure()
|
return go.Figure()
|
||||||
|
|
||||||
@@ -878,8 +706,8 @@ def plot_speaking_style_trait_scores(
|
|||||||
),
|
),
|
||||||
xaxis_title="Average Score (1-5)",
|
xaxis_title="Average Score (1-5)",
|
||||||
yaxis_title="Voice",
|
yaxis_title="Voice",
|
||||||
height=height,
|
height=height if height else getattr(self, 'plot_height', 500),
|
||||||
width=width,
|
width=width if width else getattr(self, 'plot_width', 1000),
|
||||||
plot_bgcolor=ColorPalette.BACKGROUND,
|
plot_bgcolor=ColorPalette.BACKGROUND,
|
||||||
xaxis=dict(
|
xaxis=dict(
|
||||||
range=[1, 5],
|
range=[1, 5],
|
||||||
@@ -894,34 +722,23 @@ def plot_speaking_style_trait_scores(
|
|||||||
annotations=annotations,
|
annotations=annotations,
|
||||||
font=dict(size=11)
|
font=dict(size=11)
|
||||||
)
|
)
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def plot_speaking_style_correlation(
|
def plot_speaking_style_correlation(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
style_color: str,
|
style_color: str,
|
||||||
style_traits: list[str],
|
style_traits: list[str],
|
||||||
title=f"Speaking style and voice scale 1-10 correlations",
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
results_dir: str | None = None,
|
title: str | None = None,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10) using a Bar Chart.
|
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10) using a Bar Chart.
|
||||||
Each bar represents one trait.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Voice_Scale_Score'.
|
|
||||||
style_color : str
|
|
||||||
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
|
|
||||||
style_traits : list[str]
|
|
||||||
List of trait descriptions (positive side) to include in the plot.
|
|
||||||
These should match the 'Right_Anchor' column values.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
|
if title is None:
|
||||||
|
title = f"Speaking style and voice scale 1-10 correlations"
|
||||||
|
|
||||||
trait_correlations = []
|
trait_correlations = []
|
||||||
|
|
||||||
@@ -940,13 +757,7 @@ def plot_speaking_style_correlation(
|
|||||||
# Calculate Pearson Correlation
|
# Calculate Pearson Correlation
|
||||||
corr_val = valid_data.select(pl.corr("score", "Voice_Scale_Score")).item()
|
corr_val = valid_data.select(pl.corr("score", "Voice_Scale_Score")).item()
|
||||||
|
|
||||||
# Trait Label for Plot (Use the provided list text, maybe truncated or wrapped later)
|
# Trait Label for Plot
|
||||||
trait_label = f"Trait {i+1}: {trait}"
|
|
||||||
# Or just "Trait {i+1}" and put full text in hover or subtitle?
|
|
||||||
# User example showed "Trait 1", "Trait 2".
|
|
||||||
# User request said "Use the traits directly".
|
|
||||||
# Let's use the trait text as the x-axis label, perhaps wrapped.
|
|
||||||
|
|
||||||
trait_correlations.append({
|
trait_correlations.append({
|
||||||
"trait_full": trait,
|
"trait_full": trait,
|
||||||
"trait_short": f"Trait {i+1}",
|
"trait_short": f"Trait {i+1}",
|
||||||
@@ -982,17 +793,6 @@ def plot_speaking_style_correlation(
|
|||||||
customdata=plot_df["trait_full"] # Full text on hover
|
customdata=plot_df["trait_full"] # Full text on hover
|
||||||
))
|
))
|
||||||
|
|
||||||
# 3. Add Trait Descriptions as Subtitle or Annotation?
|
|
||||||
# Or put on X-axis? The traits are long strings "Friendly | Conversational ...".
|
|
||||||
# User's example has "Trait 1", "Trait 2" on axis.
|
|
||||||
# But user specifically said "Use the traits directly".
|
|
||||||
# This might mean "Don't map choice 1->Green, choice 2->Blue dynamically, trusting indices. Instead use the text match".
|
|
||||||
# It might ALSO mean "Show the text on the chart".
|
|
||||||
# The example image has simple "Trait X" labels.
|
|
||||||
# I will stick to "Trait X" on axis but add the legend/list in the title or as annotations,
|
|
||||||
# OR better: Use the full text on X-axis but with <br> wrapping.
|
|
||||||
# Given the length ("Optimistic | Benevolent | Positive | Appreciative"), wrapping is needed.
|
|
||||||
|
|
||||||
# Wrap text at the "|" separator for cleaner line breaks
|
# Wrap text at the "|" separator for cleaner line breaks
|
||||||
def wrap_text_at_pipe(text):
|
def wrap_text_at_pipe(text):
|
||||||
parts = [p.strip() for p in text.split("|")]
|
parts = [p.strip() for p in text.split("|")]
|
||||||
@@ -1008,43 +808,26 @@ def plot_speaking_style_correlation(
|
|||||||
yaxis_title="Correlation",
|
yaxis_title="Correlation",
|
||||||
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
|
yaxis=dict(range=[-1, 1], zeroline=True, zerolinecolor="black"),
|
||||||
xaxis=dict(tickangle=0), # Keep flat if possible
|
xaxis=dict(tickangle=0), # Keep flat if possible
|
||||||
height=400,
|
height=400, # Use fixed default from original
|
||||||
width=1000,
|
width=1000,
|
||||||
template="plotly_white",
|
template="plotly_white",
|
||||||
showlegend=False
|
showlegend=False
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def plot_speaking_style_ranking_correlation(
|
def plot_speaking_style_ranking_correlation(
|
||||||
df: pl.DataFrame,
|
self,
|
||||||
style_color: str,
|
style_color: str,
|
||||||
style_traits: list[str],
|
style_traits: list[str],
|
||||||
title: str = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
results_dir: str | None = None,
|
title: str | None = None,
|
||||||
) -> go.Figure:
|
) -> go.Figure:
|
||||||
"""
|
"""
|
||||||
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
|
Plots the correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
|
||||||
Each bar represents one trait.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
df : pl.DataFrame
|
|
||||||
Joined dataframe containing 'Right_Anchor', 'score' (Trait Score), and 'Ranking_Points'.
|
|
||||||
style_color : str
|
|
||||||
The name of the style (e.g., 'Green', 'Blue') for title and coloring.
|
|
||||||
style_traits : list[str]
|
|
||||||
List of trait descriptions (positive side) to include in the plot.
|
|
||||||
These should match the 'Right_Anchor' column values.
|
|
||||||
title : str, optional
|
|
||||||
Custom title for the plot. If None, uses default.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
go.Figure
|
|
||||||
"""
|
"""
|
||||||
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if title is None:
|
if title is None:
|
||||||
title = f"Speaking style {style_color} and voice ranking points correlations"
|
title = f"Speaking style {style_color} and voice ranking points correlations"
|
||||||
@@ -1118,5 +901,5 @@ def plot_speaking_style_ranking_correlation(
|
|||||||
showlegend=False
|
showlegend=False
|
||||||
)
|
)
|
||||||
|
|
||||||
_save_plot(fig, results_dir, title)
|
self._save_plot(fig, title)
|
||||||
return fig
|
return fig
|
||||||
|
|||||||
33
utils.py
33
utils.py
@@ -4,6 +4,9 @@ import pandas as pd
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from plots import JPMCPlotsMixin
|
||||||
|
|
||||||
|
import marimo as mo
|
||||||
|
|
||||||
def extract_voice_label(html_str: str) -> str:
|
def extract_voice_label(html_str: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -54,7 +57,7 @@ def combine_exclusive_columns(df: pl.DataFrame, id_col: str = "_recordId", targe
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
def calculate_weighted_ranking_scores(df: pl.LazyFrame) -> pl.DataFrame:
|
||||||
"""
|
"""
|
||||||
Calculate weighted scores for character or voice rankings.
|
Calculate weighted scores for character or voice rankings.
|
||||||
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
Points system: 1st place = 3 pts, 2nd place = 2 pts, 3rd place = 1 pt.
|
||||||
@@ -69,6 +72,9 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
|||||||
pl.DataFrame
|
pl.DataFrame
|
||||||
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
DataFrame with columns 'Character' and 'Weighted Score', sorted by score.
|
||||||
"""
|
"""
|
||||||
|
if isinstance(df, pl.LazyFrame):
|
||||||
|
df = df.collect()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
# Identify ranking columns (assume all columns except _recordId)
|
# Identify ranking columns (assume all columns except _recordId)
|
||||||
ranking_cols = [c for c in df.columns if c != '_recordId']
|
ranking_cols = [c for c in df.columns if c != '_recordId']
|
||||||
@@ -93,7 +99,7 @@ def calculate_weighted_ranking_scores(df: pl.DataFrame) -> pl.DataFrame:
|
|||||||
return pl.DataFrame(scores).sort('Weighted Score', descending=True)
|
return pl.DataFrame(scores).sort('Weighted Score', descending=True)
|
||||||
|
|
||||||
|
|
||||||
class JPMCSurvey:
|
class JPMCSurvey(JPMCPlotsMixin):
|
||||||
"""Class to handle JPMorgan Chase survey data."""
|
"""Class to handle JPMorgan Chase survey data."""
|
||||||
|
|
||||||
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
|
def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
|
||||||
@@ -113,6 +119,18 @@ class JPMCSurvey:
|
|||||||
if not self.fig_save_dir.exists():
|
if not self.fig_save_dir.exists():
|
||||||
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
|
self.fig_save_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
self.data_filtered = None
|
||||||
|
self.plot_height = 500
|
||||||
|
self.plot_width = 1000
|
||||||
|
|
||||||
|
# Filter values
|
||||||
|
self.filter_age:list = None
|
||||||
|
self.filter_gender:list = None
|
||||||
|
self.filter_consumer:list = None
|
||||||
|
self.filter_ethnicity:list = None
|
||||||
|
self.filter_income:list = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_qid_descr_map(self) -> dict:
|
def _extract_qid_descr_map(self) -> dict:
|
||||||
"""Extract mapping of Qualtrics ImportID to Question Description from results file."""
|
"""Extract mapping of Qualtrics ImportID to Question Description from results file."""
|
||||||
@@ -217,25 +235,32 @@ class JPMCSurvey:
|
|||||||
- ethnicity: list
|
- ethnicity: list
|
||||||
- income: list
|
- income: list
|
||||||
|
|
||||||
Returns filtered polars LazyFrame.
|
Also saves the result to self.data_filtered.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Apply filters
|
||||||
if age is not None:
|
if age is not None:
|
||||||
|
self.filter_age = age
|
||||||
q = q.filter(pl.col('QID1').is_in(age))
|
q = q.filter(pl.col('QID1').is_in(age))
|
||||||
|
|
||||||
if gender is not None:
|
if gender is not None:
|
||||||
|
self.filter_gender = gender
|
||||||
q = q.filter(pl.col('QID2').is_in(gender))
|
q = q.filter(pl.col('QID2').is_in(gender))
|
||||||
|
|
||||||
if consumer is not None:
|
if consumer is not None:
|
||||||
|
self.filter_consumer = consumer
|
||||||
q = q.filter(pl.col('Consumer').is_in(consumer))
|
q = q.filter(pl.col('Consumer').is_in(consumer))
|
||||||
|
|
||||||
if ethnicity is not None:
|
if ethnicity is not None:
|
||||||
|
self.filter_ethnicity = ethnicity
|
||||||
q = q.filter(pl.col('QID3').is_in(ethnicity))
|
q = q.filter(pl.col('QID3').is_in(ethnicity))
|
||||||
|
|
||||||
if income is not None:
|
if income is not None:
|
||||||
|
self.filter_income = income
|
||||||
q = q.filter(pl.col('QID15').is_in(income))
|
q = q.filter(pl.col('QID15').is_in(income))
|
||||||
|
|
||||||
return q
|
self.data_filtered = q
|
||||||
|
return self.data_filtered
|
||||||
|
|
||||||
def get_demographics(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
|
def get_demographics(self, q: pl.LazyFrame) -> Union[pl.LazyFrame, None]:
|
||||||
"""Extract columns containing the demographics.
|
"""Extract columns containing the demographics.
|
||||||
|
|||||||
Reference in New Issue
Block a user