correlation plots for best bc

This commit is contained in:
2026-02-04 10:46:31 +01:00
parent ad1d8c6e58
commit e17646eb70
2 changed files with 164 additions and 78 deletions

View File

@@ -570,7 +570,7 @@ _content = """"""
for _style, _traits in SPEAKING_STYLES.items(): for _style, _traits in SPEAKING_STYLES.items():
# print(f"Correlation plot for {style}...") # print(f"Correlation plot for {style}...")
_fig = S.plot_speaking_style_correlation( _fig = S.plot_speaking_style_scale_correlation(
data=joined_scale, data=joined_scale,
style_color=_style, style_color=_style,
style_traits=_traits, style_traits=_traits,
@@ -609,86 +609,145 @@ for _style, _traits in SPEAKING_STYLES.items():
mo.md(_content) mo.md(_content)
# %% # %%
mo.md(r""" # ## Correlations when "Best Brand Character" is chosen
## Correlations when "Best Brand Character" is chosen # For each of the 4 brand characters, filter the dataset to only those respondents
# who selected that character as their #1 choice.
Select only the traits that fit with that character
""")
# %% # %%
from reference import ORIGINAL_CHARACTER_TRAITS # Prepare character-filtered data subsets
chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER] char_rank_for_filter = S.get_character_ranking(data)[0].collect()
CHARACTER_FILTER_MAP = {
'Familiar Friend': 'Character_Ranking_Familiar_Friend',
'The Coach': 'Character_Ranking_The_Coach',
'Personal Assistant': 'Character_Ranking_The_Personal_Assistant',
'Bank Teller': 'Character_Ranking_The_Bank_Teller',
}
def get_filtered_data_for_character(char_name: str) -> tuple[pl.DataFrame, pl.DataFrame, int]:
"""Filter joined_scale and joined_ranking to respondents who ranked char_name #1."""
col = CHARACTER_FILTER_MAP[char_name]
respondents = char_rank_for_filter.filter(pl.col(col) == 1).select('_recordId')
n = respondents.height
filtered_scale = joined_scale.join(respondents, on='_recordId', how='inner')
filtered_ranking = joined_ranking.join(respondents, on='_recordId', how='inner')
return filtered_scale, filtered_ranking, n
def _char_filename(char_name: str, suffix: str) -> str:
"""Generate filename for character-filtered plots (without n-value).
Format: bc_ranked_1_{suffix}__{char_slug}
This groups all plot types together in directory listings.
"""
char_slug = char_name.lower().replace(' ', '_')
return f"bc_ranked_1_{suffix}__{char_slug}"
# %% # %%
STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits) # ### Voice Weighted Ranking Score (by Best Character)
for char_name in CHARACTER_FILTER_MAP:
# %% _, _, n = get_filtered_data_for_character(char_name)
mo.md(r""" # Get top3 voices for this character subset using _recordIds
### Individual Traits vs Ranking Points respondents = char_rank_for_filter.filter(
""") pl.col(CHARACTER_FILTER_MAP[char_name]) == 1
).select('_recordId')
# %% # Collect top3_voices if it's a LazyFrame, then join
_content = "" top3_df = top3_voices.collect() if isinstance(top3_voices, pl.LazyFrame) else top3_voices
for _style, _traits in STYLES_SUBSET.items(): filtered_top3 = top3_df.join(respondents, on='_recordId', how='inner')
_fig = S.plot_speaking_style_ranking_correlation( weighted = calculate_weighted_ranking_scores(filtered_top3)
data=joined_ranking, S.plot_weighted_ranking_score(
style_color=_style, data=weighted,
style_traits=_traits, title=f'"{char_name}" Ranked #1 (n={n})<br>Most Popular Voice - Weighted Score (1st=3pts, 2nd=2pts, 3rd=1pt)',
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points""" filename=_char_filename(char_name, "voice_weighted_ranking_score"),
color_gender=COLOR_GENDER,
) )
_content += f"""
{mo.ui.altair_chart(_fig)}
"""
mo.md(_content)
# %% # %%
mo.md(r""" # ### Voice Scale 1-10 Average Scores (by Best Character)
### Individual Traits vs Scale 1-10 for char_name in CHARACTER_FILTER_MAP:
""") _, _, n = get_filtered_data_for_character(char_name)
# Get voice scale data for this character subset using _recordIds
# %% respondents = char_rank_for_filter.filter(
_content = """""" pl.col(CHARACTER_FILTER_MAP[char_name]) == 1
).select('_recordId')
for _style, _traits in STYLES_SUBSET.items(): # Collect voice_1_10 if it's a LazyFrame, then join
# print(f"Correlation plot for {style}...") voice_1_10_df = voice_1_10.collect() if isinstance(voice_1_10, pl.LazyFrame) else voice_1_10
_fig = S.plot_speaking_style_correlation( filtered_voice_1_10 = voice_1_10_df.join(respondents, on='_recordId', how='inner')
data=joined_scale, S.plot_average_scores_with_counts(
style_color=_style, data=filtered_voice_1_10,
style_traits=_traits, title=f'"{char_name}" Ranked #1 (n={n})<br>Voice General Impression (Scale 1-10)',
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""", filename=_char_filename(char_name, "voice_scale_1-10"),
x_label='Voice',
domain=[1, 10],
color_gender=COLOR_GENDER,
) )
_content += f"""
{mo.ui.altair_chart(_fig)}
"""
mo.md(_content)
# %% # %%
mo.md(r""" # ### Speaking Style Colors vs Scale 1-10 (only for Best Character)
### Colors vs Scale 1-10 (Best Character) for char_name in CHARACTER_FILTER_MAP:
""") if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
continue
filtered_scale, _, n = get_filtered_data_for_character(char_name)
color_corr, _ = utils.transform_speaking_style_color_correlation(filtered_scale, SPEAKING_STYLES)
S.plot_speaking_style_color_correlation(
data=color_corr,
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: Speaking Style Colors vs Voice Scale 1-10',
filename=_char_filename(char_name, "colors_vs_voice_scale_1-10"),
)
# %% # %%
# Transform to get one row per color with average correlation # ### Speaking Style Colors vs Ranking Points (only for Best Character)
_color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET) for char_name in CHARACTER_FILTER_MAP:
S.plot_speaking_style_color_correlation( if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
data=_color_corr_scale, continue
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10"""
) _, filtered_ranking, n = get_filtered_data_for_character(char_name)
color_corr, _ = utils.transform_speaking_style_color_correlation(
filtered_ranking, SPEAKING_STYLES, target_column="Ranking_Points"
)
S.plot_speaking_style_color_correlation(
data=color_corr,
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: Speaking Style Colors vs Voice Ranking Points',
filename=_char_filename(char_name, "colors_vs_voice_ranking_points"),
)
# %% # %%
mo.md(r""" # ### Individual Traits vs Scale 1-10 (only for Best Character)
### Colors vs Ranking Points (Best Character) for _style, _traits in SPEAKING_STYLES.items():
""") print(f"--- Speaking Style: {_style} ---")
for char_name in CHARACTER_FILTER_MAP:
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
continue
filtered_scale, _, n = get_filtered_data_for_character(char_name)
S.plot_speaking_style_scale_correlation(
data=filtered_scale,
style_color=_style,
style_traits=_traits,
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: {_style} vs Voice Scale 1-10',
filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_scale_1-10"),
)
# %%
# ### Individual Traits vs Ranking Points (only for Best Character)
for _style, _traits in SPEAKING_STYLES.items():
print(f"--- Speaking Style: {_style} ---")
for char_name in CHARACTER_FILTER_MAP:
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
continue
_, filtered_ranking, n = get_filtered_data_for_character(char_name)
S.plot_speaking_style_ranking_correlation(
data=filtered_ranking,
style_color=_style,
style_traits=_traits,
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: {_style} vs Voice Ranking Points',
filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_ranking_points"),
)
# %% # %%
_color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
joined_ranking,
STYLES_SUBSET,
target_column="Ranking_Points"
)
S.plot_speaking_style_color_correlation(
data=_color_corr_ranking,
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points"""
)

View File

@@ -253,9 +253,15 @@ class QualtricsPlotsMixin:
return chart.properties(title=title_config) return chart.properties(title=title_config)
def _save_plot(self, chart: alt.Chart, title: str) -> alt.Chart: def _save_plot(self, chart: alt.Chart, title: str, filename: str | None = None) -> alt.Chart:
"""Save chart to PNG file if fig_save_dir is set. """Save chart to PNG file if fig_save_dir is set.
Args:
chart: The Altair chart to save
title: Chart title (used for filename if filename not provided)
filename: Optional explicit filename (without extension). If provided,
this is used instead of deriving from title.
Returns the (potentially modified) chart with filter footnote added. Returns the (potentially modified) chart with filter footnote added.
""" """
# Add filter footnote - returns combined chart if filters active # Add filter footnote - returns combined chart if filters active
@@ -271,7 +277,9 @@ class QualtricsPlotsMixin:
if not path.exists(): if not path.exists():
path.mkdir(parents=True, exist_ok=True) path.mkdir(parents=True, exist_ok=True)
filename = f"{self._sanitize_filename(title)}.png" # Use explicit filename if provided, otherwise derive from title
base_name = filename if filename else self._sanitize_filename(title)
filename = f"{base_name}.png"
filepath = path / filename filepath = path / filename
# Use vl_convert directly with theme config for consistent rendering # Use vl_convert directly with theme config for consistent rendering
@@ -397,6 +405,7 @@ class QualtricsPlotsMixin:
self, self,
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
title: str = "General Impression (1-10)\nPer Voice with Number of Participants Who Rated It", title: str = "General Impression (1-10)\nPer Voice with Number of Participants Who Rated It",
filename: str | None = None,
x_label: str = "Stimuli", x_label: str = "Stimuli",
y_label: str = "Average General Impression Rating (1-10)", y_label: str = "Average General Impression Rating (1-10)",
color: str = ColorPalette.PRIMARY, color: str = ColorPalette.PRIMARY,
@@ -408,6 +417,7 @@ class QualtricsPlotsMixin:
"""Create a bar plot showing average scores and count of non-null values for each column. """Create a bar plot showing average scores and count of non-null values for each column.
Parameters: Parameters:
filename: Optional explicit filename (without extension) for saving.
color_gender: If True, color bars by voice gender (blue=male, pink=female). color_gender: If True, color bars by voice gender (blue=male, pink=female).
""" """
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
@@ -484,7 +494,7 @@ class QualtricsPlotsMixin:
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
) )
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title, filename=filename)
return chart return chart
def plot_top3_ranking_distribution( def plot_top3_ranking_distribution(
@@ -803,6 +813,7 @@ class QualtricsPlotsMixin:
self, self,
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
title: str = "Weighted Popularity Score\n(1st=3pts, 2nd=2pts, 3rd=1pt)", title: str = "Weighted Popularity Score\n(1st=3pts, 2nd=2pts, 3rd=1pt)",
filename: str | None = None,
x_label: str = "Character Personality", x_label: str = "Character Personality",
y_label: str = "Total Weighted Score", y_label: str = "Total Weighted Score",
color: str = ColorPalette.PRIMARY, color: str = ColorPalette.PRIMARY,
@@ -813,6 +824,7 @@ class QualtricsPlotsMixin:
"""Create a bar chart showing the weighted ranking score for each character. """Create a bar chart showing the weighted ranking score for each character.
Parameters: Parameters:
filename: Optional explicit filename (without extension) for saving.
color_gender: If True, color bars by voice gender (blue=male, pink=female). color_gender: If True, color bars by voice gender (blue=male, pink=female).
""" """
weighted_df = self._ensure_dataframe(data).to_pandas() weighted_df = self._ensure_dataframe(data).to_pandas()
@@ -863,7 +875,7 @@ class QualtricsPlotsMixin:
height=height or getattr(self, 'plot_height', 400) height=height or getattr(self, 'plot_height', 400)
) )
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title, filename=filename)
return chart return chart
def plot_voice_selection_counts( def plot_voice_selection_counts(
@@ -1179,16 +1191,22 @@ class QualtricsPlotsMixin:
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title)
return chart return chart
def plot_speaking_style_correlation( def plot_speaking_style_scale_correlation(
self, self,
style_color: str, style_color: str,
style_traits: list[str], style_traits: list[str],
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
title: str | None = None, title: str | None = None,
filename: str | None = None,
width: int | str | None = None, width: int | str | None = None,
height: int | None = None, height: int | None = None,
) -> alt.Chart: ) -> alt.Chart:
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10).""" """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10).
Args:
filename: Optional explicit filename (without extension) for saving.
If not provided, filename is derived from title.
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
if title is None: if title is None:
@@ -1235,13 +1253,14 @@ class QualtricsPlotsMixin:
height=height or 350 height=height or 350
) )
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title, filename=filename)
return chart return chart
def plot_speaking_style_color_correlation( def plot_speaking_style_color_correlation(
self, self,
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)", title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)",
filename: str | None = None,
width: int | str | None = None, width: int | str | None = None,
height: int | None = None, height: int | None = None,
) -> alt.Chart: ) -> alt.Chart:
@@ -1255,6 +1274,8 @@ class QualtricsPlotsMixin:
data: DataFrame with columns [Color, correlation, n_traits] from data: DataFrame with columns [Color, correlation, n_traits] from
utils.transform_speaking_style_color_correlation utils.transform_speaking_style_color_correlation
title: Chart title (supports <br> for line breaks) title: Chart title (supports <br> for line breaks)
filename: Optional explicit filename (without extension) for saving.
If not provided, filename is derived from title.
width: Chart width in pixels width: Chart width in pixels
height: Chart height in pixels height: Chart height in pixels
@@ -1289,7 +1310,7 @@ class QualtricsPlotsMixin:
height=height or 350 height=height or 350
) )
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title, filename=filename)
return chart return chart
def plot_demographic_distribution( def plot_demographic_distribution(
@@ -1415,10 +1436,16 @@ class QualtricsPlotsMixin:
style_traits: list[str], style_traits: list[str],
data: pl.LazyFrame | pl.DataFrame | None = None, data: pl.LazyFrame | pl.DataFrame | None = None,
title: str | None = None, title: str | None = None,
filename: str | None = None,
width: int | str | None = None, width: int | str | None = None,
height: int | None = None, height: int | None = None,
) -> alt.Chart: ) -> alt.Chart:
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).""" """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
Args:
filename: Optional explicit filename (without extension) for saving.
If not provided, filename is derived from title.
"""
df = self._ensure_dataframe(data) df = self._ensure_dataframe(data)
if title is None: if title is None:
@@ -1462,7 +1489,7 @@ class QualtricsPlotsMixin:
height=height or 350 height=height or 350
) )
chart = self._save_plot(chart, title) chart = self._save_plot(chart, title, filename=filename)
return chart return chart
def plot_traits_wordcloud( def plot_traits_wordcloud(