correlation start

This commit is contained in:
2026-01-27 17:22:16 +01:00
parent 393c527656
commit fd4cb4b596
9 changed files with 5375 additions and 24 deletions

View File

@@ -14,25 +14,27 @@ def _():
from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
from plots import plot_average_scores_with_counts, plot_top3_ranking_distribution, plot_ranking_distribution, plot_most_ranked_1, plot_weighted_ranking_score, plot_voice_selection_counts, plot_top3_selection_counts
import plots as plts
import utils as utl
import plots
import utils
from speaking_styles import SPEAKING_STYLES
return (
JPMCSurvey,
Path,
SPEAKING_STYLES,
calculate_weighted_ranking_scores,
check_progress,
duration_validation,
mo,
pl,
plot_average_scores_with_counts,
plot_most_ranked_1,
plot_ranking_distribution,
plot_top3_ranking_distribution,
plot_top3_selection_counts,
plot_voice_selection_counts,
plot_weighted_ranking_score,
plts,
utl,
plots,
utils,
)
@@ -47,7 +49,7 @@ def _():
def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
data_all = survey.load_data()
data_all.collect()
# data_all.collect()
return data_all, survey
@@ -298,7 +300,7 @@ def _(mo):
@app.cell
def _(data, survey, utl):
def _(data, survey, utils):
ss_or, choice_map_or = survey.get_ss_orange_red(data)
ss_gb, choice_map_gb = survey.get_ss_green_blue(data)
@@ -309,12 +311,12 @@ def _(data, survey, utl):
choice_map = {**choice_map_or, **choice_map_gb}
# print(_d.head())
# print(choice_map)
ss_long = utl.process_speaking_style_data(ss_all, choice_map)
return (ss_long,)
ss_long = utils.process_speaking_style_data(ss_all, choice_map)
return choice_map, ss_all, ss_long
@app.cell
def _(mo, pl, plts, ss_long):
def _(mo, pl, plots, ss_long):
content = """### How does each voice score for each “speaking style labeled trait”?"""
for i, trait in enumerate(ss_long.select("Description").unique().to_series().to_list()):
@@ -323,7 +325,7 @@ def _(mo, pl, plts, ss_long):
content += f"""
### {i+1}) {trait.replace(":", "")}
{mo.ui.plotly(plts.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", ""), height=550))}
{mo.ui.plotly(plots.plot_speaking_style_trait_scores(trait_d, title=trait.replace(":", ""), height=550))}
"""
mo.md(content)
@@ -339,17 +341,17 @@ def _(mo):
@app.cell
def _(data, mo, plot_average_scores_with_counts, survey):
def _(data, mo, plots, survey):
vscales = survey.get_voice_scale_1_10(data)[0].collect()
plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
# plot_average_scores_with_counts(vscales, x_label='Voice', width=1000)
mo.md(f"""
### How does each voice score on a scale from 1-10?
{mo.ui.plotly(plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
{mo.ui.plotly(plots.plot_average_scores_with_counts(vscales, x_label='Voice', width=1000))}
""")
return
return (vscales,)
@app.cell(hide_code=True)
@@ -373,16 +375,57 @@ def _(mo):
return
@app.cell
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
### Total Results
### How to Interpret These Correlation Results
Each bar represents the Pearson correlation coefficient (r) between a speaking style trait rating (1-5 scale) and the overall Voice Scale rating (1-10).
- [ ] 4 correlation diagrams
**Reading the Chart**
| Correlation Value | Interpretation |
|-----------|----------|
| r > 0 (Green bars)| Positive correlation — voices rated higher on this trait tend to receive higher Voice Scale scores|
| r < 0 (Red bars)| Negative correlation — voices rated higher on this trait tend to receive lower Voice Scale scores|
| r ≈ 0| No relationship — this trait doesn't predict Voice Scale ratings|
""")
return
@app.cell
def _(choice_map, ss_all, utils, vscales):
df_style = utils.process_speaking_style_data(ss_all.collect(), choice_map)
df_voice_long = utils.process_voice_scale_data(vscales)
joined_df = df_style.join(df_voice_long, on=["_recordId", "Voice"], how="inner")
# df_voice_long
return df_style, joined_df
@app.cell
def _(SPEAKING_STYLES, joined_df, mo, plots):
_content = """### Total Results
"""
for style, traits in SPEAKING_STYLES.items():
# print(f"Correlation plot for {style}...")
fig = plots.plot_speaking_style_correlation(
df=joined_df,
style_color=style,
style_traits=traits,
title=f"Correlation: Speaking Style {style} and Voice Scale 1-10"
)
_content += f"""
#### Speaking Style **{style}**:
{mo.ui.plotly(fig)}
"""
mo.md(_content)
return
@app.cell
def _(mo):
mo.md(r"""
@@ -425,6 +468,30 @@ def _(mo):
return
@app.cell
def _(SPEAKING_STYLES, df_style, mo, plots, top3_voices, utils):
df_ranking = utils.process_voice_ranking_data(top3_voices)
joined = df_style.join(df_ranking, on=['_recordId', 'Voice'], how='inner')
_content = """## Correlations Voice Speaking Styles <-> Voice Ranking Points
"""
for _style, _traits in SPEAKING_STYLES.items():
_fig = plots.plot_speaking_style_ranking_correlation(joined, _style, _traits)
_content += f"""
#### Speaking Style **{_style}**:
{mo.ui.plotly(_fig)}
"""
mo.md(_content)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""