correlation plots for best bc
This commit is contained in:
@@ -570,7 +570,7 @@ _content = """"""
|
|||||||
|
|
||||||
for _style, _traits in SPEAKING_STYLES.items():
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
# print(f"Correlation plot for {style}...")
|
# print(f"Correlation plot for {style}...")
|
||||||
_fig = S.plot_speaking_style_correlation(
|
_fig = S.plot_speaking_style_scale_correlation(
|
||||||
data=joined_scale,
|
data=joined_scale,
|
||||||
style_color=_style,
|
style_color=_style,
|
||||||
style_traits=_traits,
|
style_traits=_traits,
|
||||||
@@ -609,86 +609,145 @@ for _style, _traits in SPEAKING_STYLES.items():
|
|||||||
mo.md(_content)
|
mo.md(_content)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
mo.md(r"""
|
# ## Correlations when "Best Brand Character" is chosen
|
||||||
## Correlations when "Best Brand Character" is chosen
|
# For each of the 4 brand characters, filter the dataset to only those respondents
|
||||||
|
# who selected that character as their #1 choice.
|
||||||
Select only the traits that fit with that character
|
|
||||||
""")
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
from reference import ORIGINAL_CHARACTER_TRAITS
|
# Prepare character-filtered data subsets
|
||||||
chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER]
|
char_rank_for_filter = S.get_character_ranking(data)[0].collect()
|
||||||
|
|
||||||
# %%
|
CHARACTER_FILTER_MAP = {
|
||||||
STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits)
|
'Familiar Friend': 'Character_Ranking_Familiar_Friend',
|
||||||
|
'The Coach': 'Character_Ranking_The_Coach',
|
||||||
|
'Personal Assistant': 'Character_Ranking_The_Personal_Assistant',
|
||||||
|
'Bank Teller': 'Character_Ranking_The_Bank_Teller',
|
||||||
|
}
|
||||||
|
|
||||||
# %%
|
def get_filtered_data_for_character(char_name: str) -> tuple[pl.DataFrame, pl.DataFrame, int]:
|
||||||
mo.md(r"""
|
"""Filter joined_scale and joined_ranking to respondents who ranked char_name #1."""
|
||||||
### Individual Traits vs Ranking Points
|
col = CHARACTER_FILTER_MAP[char_name]
|
||||||
""")
|
respondents = char_rank_for_filter.filter(pl.col(col) == 1).select('_recordId')
|
||||||
|
n = respondents.height
|
||||||
|
filtered_scale = joined_scale.join(respondents, on='_recordId', how='inner')
|
||||||
|
filtered_ranking = joined_ranking.join(respondents, on='_recordId', how='inner')
|
||||||
|
return filtered_scale, filtered_ranking, n
|
||||||
|
|
||||||
# %%
|
def _char_filename(char_name: str, suffix: str) -> str:
|
||||||
_content = ""
|
"""Generate filename for character-filtered plots (without n-value).
|
||||||
for _style, _traits in STYLES_SUBSET.items():
|
|
||||||
_fig = S.plot_speaking_style_ranking_correlation(
|
|
||||||
data=joined_ranking,
|
|
||||||
style_color=_style,
|
|
||||||
style_traits=_traits,
|
|
||||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points"""
|
|
||||||
)
|
|
||||||
_content += f"""
|
|
||||||
{mo.ui.altair_chart(_fig)}
|
|
||||||
|
|
||||||
|
Format: bc_ranked_1_{suffix}__{char_slug}
|
||||||
|
This groups all plot types together in directory listings.
|
||||||
"""
|
"""
|
||||||
mo.md(_content)
|
char_slug = char_name.lower().replace(' ', '_')
|
||||||
|
return f"bc_ranked_1_{suffix}__{char_slug}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
mo.md(r"""
|
# ### Voice Weighted Ranking Score (by Best Character)
|
||||||
### Individual Traits vs Scale 1-10
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
""")
|
_, _, n = get_filtered_data_for_character(char_name)
|
||||||
|
# Get top3 voices for this character subset using _recordIds
|
||||||
# %%
|
respondents = char_rank_for_filter.filter(
|
||||||
_content = """"""
|
pl.col(CHARACTER_FILTER_MAP[char_name]) == 1
|
||||||
|
).select('_recordId')
|
||||||
for _style, _traits in STYLES_SUBSET.items():
|
# Collect top3_voices if it's a LazyFrame, then join
|
||||||
# print(f"Correlation plot for {style}...")
|
top3_df = top3_voices.collect() if isinstance(top3_voices, pl.LazyFrame) else top3_voices
|
||||||
_fig = S.plot_speaking_style_correlation(
|
filtered_top3 = top3_df.join(respondents, on='_recordId', how='inner')
|
||||||
data=joined_scale,
|
weighted = calculate_weighted_ranking_scores(filtered_top3)
|
||||||
style_color=_style,
|
S.plot_weighted_ranking_score(
|
||||||
style_traits=_traits,
|
data=weighted,
|
||||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""",
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Most Popular Voice - Weighted Score (1st=3pts, 2nd=2pts, 3rd=1pt)',
|
||||||
|
filename=_char_filename(char_name, "voice_weighted_ranking_score"),
|
||||||
|
color_gender=COLOR_GENDER,
|
||||||
)
|
)
|
||||||
_content += f"""
|
|
||||||
{mo.ui.altair_chart(_fig)}
|
|
||||||
|
|
||||||
"""
|
|
||||||
mo.md(_content)
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
mo.md(r"""
|
# ### Voice Scale 1-10 Average Scores (by Best Character)
|
||||||
### Colors vs Scale 1-10 (Best Character)
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
""")
|
_, _, n = get_filtered_data_for_character(char_name)
|
||||||
|
# Get voice scale data for this character subset using _recordIds
|
||||||
|
respondents = char_rank_for_filter.filter(
|
||||||
|
pl.col(CHARACTER_FILTER_MAP[char_name]) == 1
|
||||||
|
).select('_recordId')
|
||||||
|
# Collect voice_1_10 if it's a LazyFrame, then join
|
||||||
|
voice_1_10_df = voice_1_10.collect() if isinstance(voice_1_10, pl.LazyFrame) else voice_1_10
|
||||||
|
filtered_voice_1_10 = voice_1_10_df.join(respondents, on='_recordId', how='inner')
|
||||||
|
S.plot_average_scores_with_counts(
|
||||||
|
data=filtered_voice_1_10,
|
||||||
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Voice General Impression (Scale 1-10)',
|
||||||
|
filename=_char_filename(char_name, "voice_scale_1-10"),
|
||||||
|
x_label='Voice',
|
||||||
|
domain=[1, 10],
|
||||||
|
color_gender=COLOR_GENDER,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Transform to get one row per color with average correlation
|
# ### Speaking Style Colors vs Scale 1-10 (only for Best Character)
|
||||||
_color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET)
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
|
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered_scale, _, n = get_filtered_data_for_character(char_name)
|
||||||
|
color_corr, _ = utils.transform_speaking_style_color_correlation(filtered_scale, SPEAKING_STYLES)
|
||||||
S.plot_speaking_style_color_correlation(
|
S.plot_speaking_style_color_correlation(
|
||||||
data=_color_corr_scale,
|
data=color_corr,
|
||||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10"""
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: Speaking Style Colors vs Voice Scale 1-10',
|
||||||
|
filename=_char_filename(char_name, "colors_vs_voice_scale_1-10"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
mo.md(r"""
|
# ### Speaking Style Colors vs Ranking Points (only for Best Character)
|
||||||
### Colors vs Ranking Points (Best Character)
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
""")
|
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
|
||||||
|
continue
|
||||||
|
|
||||||
# %%
|
_, filtered_ranking, n = get_filtered_data_for_character(char_name)
|
||||||
_color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
|
color_corr, _ = utils.transform_speaking_style_color_correlation(
|
||||||
joined_ranking,
|
filtered_ranking, SPEAKING_STYLES, target_column="Ranking_Points"
|
||||||
STYLES_SUBSET,
|
|
||||||
target_column="Ranking_Points"
|
|
||||||
)
|
)
|
||||||
S.plot_speaking_style_color_correlation(
|
S.plot_speaking_style_color_correlation(
|
||||||
data=_color_corr_ranking,
|
data=color_corr,
|
||||||
title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points"""
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: Speaking Style Colors vs Voice Ranking Points',
|
||||||
|
filename=_char_filename(char_name, "colors_vs_voice_ranking_points"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# ### Individual Traits vs Scale 1-10 (only for Best Character)
|
||||||
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
|
print(f"--- Speaking Style: {_style} ---")
|
||||||
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
|
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered_scale, _, n = get_filtered_data_for_character(char_name)
|
||||||
|
S.plot_speaking_style_scale_correlation(
|
||||||
|
data=filtered_scale,
|
||||||
|
style_color=_style,
|
||||||
|
style_traits=_traits,
|
||||||
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: {_style} vs Voice Scale 1-10',
|
||||||
|
filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_scale_1-10"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# ### Individual Traits vs Ranking Points (only for Best Character)
|
||||||
|
for _style, _traits in SPEAKING_STYLES.items():
|
||||||
|
print(f"--- Speaking Style: {_style} ---")
|
||||||
|
for char_name in CHARACTER_FILTER_MAP:
|
||||||
|
if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER:
|
||||||
|
continue
|
||||||
|
|
||||||
|
_, filtered_ranking, n = get_filtered_data_for_character(char_name)
|
||||||
|
S.plot_speaking_style_ranking_correlation(
|
||||||
|
data=filtered_ranking,
|
||||||
|
style_color=_style,
|
||||||
|
style_traits=_traits,
|
||||||
|
title=f'"{char_name}" Ranked #1 (n={n})<br>Correlation: {_style} vs Voice Ranking Points',
|
||||||
|
filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_ranking_points"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|||||||
47
plots.py
47
plots.py
@@ -253,9 +253,15 @@ class QualtricsPlotsMixin:
|
|||||||
|
|
||||||
return chart.properties(title=title_config)
|
return chart.properties(title=title_config)
|
||||||
|
|
||||||
def _save_plot(self, chart: alt.Chart, title: str) -> alt.Chart:
|
def _save_plot(self, chart: alt.Chart, title: str, filename: str | None = None) -> alt.Chart:
|
||||||
"""Save chart to PNG file if fig_save_dir is set.
|
"""Save chart to PNG file if fig_save_dir is set.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chart: The Altair chart to save
|
||||||
|
title: Chart title (used for filename if filename not provided)
|
||||||
|
filename: Optional explicit filename (without extension). If provided,
|
||||||
|
this is used instead of deriving from title.
|
||||||
|
|
||||||
Returns the (potentially modified) chart with filter footnote added.
|
Returns the (potentially modified) chart with filter footnote added.
|
||||||
"""
|
"""
|
||||||
# Add filter footnote - returns combined chart if filters active
|
# Add filter footnote - returns combined chart if filters active
|
||||||
@@ -271,7 +277,9 @@ class QualtricsPlotsMixin:
|
|||||||
if not path.exists():
|
if not path.exists():
|
||||||
path.mkdir(parents=True, exist_ok=True)
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
filename = f"{self._sanitize_filename(title)}.png"
|
# Use explicit filename if provided, otherwise derive from title
|
||||||
|
base_name = filename if filename else self._sanitize_filename(title)
|
||||||
|
filename = f"{base_name}.png"
|
||||||
filepath = path / filename
|
filepath = path / filename
|
||||||
|
|
||||||
# Use vl_convert directly with theme config for consistent rendering
|
# Use vl_convert directly with theme config for consistent rendering
|
||||||
@@ -397,6 +405,7 @@ class QualtricsPlotsMixin:
|
|||||||
self,
|
self,
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "General Impression (1-10)\nPer Voice with Number of Participants Who Rated It",
|
title: str = "General Impression (1-10)\nPer Voice with Number of Participants Who Rated It",
|
||||||
|
filename: str | None = None,
|
||||||
x_label: str = "Stimuli",
|
x_label: str = "Stimuli",
|
||||||
y_label: str = "Average General Impression Rating (1-10)",
|
y_label: str = "Average General Impression Rating (1-10)",
|
||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
@@ -408,6 +417,7 @@ class QualtricsPlotsMixin:
|
|||||||
"""Create a bar plot showing average scores and count of non-null values for each column.
|
"""Create a bar plot showing average scores and count of non-null values for each column.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
filename: Optional explicit filename (without extension) for saving.
|
||||||
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
||||||
"""
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
@@ -484,7 +494,7 @@ class QualtricsPlotsMixin:
|
|||||||
height=height or getattr(self, 'plot_height', 400)
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_top3_ranking_distribution(
|
def plot_top3_ranking_distribution(
|
||||||
@@ -803,6 +813,7 @@ class QualtricsPlotsMixin:
|
|||||||
self,
|
self,
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Weighted Popularity Score\n(1st=3pts, 2nd=2pts, 3rd=1pt)",
|
title: str = "Weighted Popularity Score\n(1st=3pts, 2nd=2pts, 3rd=1pt)",
|
||||||
|
filename: str | None = None,
|
||||||
x_label: str = "Character Personality",
|
x_label: str = "Character Personality",
|
||||||
y_label: str = "Total Weighted Score",
|
y_label: str = "Total Weighted Score",
|
||||||
color: str = ColorPalette.PRIMARY,
|
color: str = ColorPalette.PRIMARY,
|
||||||
@@ -813,6 +824,7 @@ class QualtricsPlotsMixin:
|
|||||||
"""Create a bar chart showing the weighted ranking score for each character.
|
"""Create a bar chart showing the weighted ranking score for each character.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
filename: Optional explicit filename (without extension) for saving.
|
||||||
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
color_gender: If True, color bars by voice gender (blue=male, pink=female).
|
||||||
"""
|
"""
|
||||||
weighted_df = self._ensure_dataframe(data).to_pandas()
|
weighted_df = self._ensure_dataframe(data).to_pandas()
|
||||||
@@ -863,7 +875,7 @@ class QualtricsPlotsMixin:
|
|||||||
height=height or getattr(self, 'plot_height', 400)
|
height=height or getattr(self, 'plot_height', 400)
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_voice_selection_counts(
|
def plot_voice_selection_counts(
|
||||||
@@ -1179,16 +1191,22 @@ class QualtricsPlotsMixin:
|
|||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_speaking_style_correlation(
|
def plot_speaking_style_scale_correlation(
|
||||||
self,
|
self,
|
||||||
style_color: str,
|
style_color: str,
|
||||||
style_traits: list[str],
|
style_traits: list[str],
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
|
filename: str | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10)."""
|
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Optional explicit filename (without extension) for saving.
|
||||||
|
If not provided, filename is derived from title.
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if title is None:
|
if title is None:
|
||||||
@@ -1235,13 +1253,14 @@ class QualtricsPlotsMixin:
|
|||||||
height=height or 350
|
height=height or 350
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_speaking_style_color_correlation(
|
def plot_speaking_style_color_correlation(
|
||||||
self,
|
self,
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)",
|
title: str = "Speaking Style and Voice Scale 1-10 Correlations<br>(Average by Color)",
|
||||||
|
filename: str | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
@@ -1255,6 +1274,8 @@ class QualtricsPlotsMixin:
|
|||||||
data: DataFrame with columns [Color, correlation, n_traits] from
|
data: DataFrame with columns [Color, correlation, n_traits] from
|
||||||
utils.transform_speaking_style_color_correlation
|
utils.transform_speaking_style_color_correlation
|
||||||
title: Chart title (supports <br> for line breaks)
|
title: Chart title (supports <br> for line breaks)
|
||||||
|
filename: Optional explicit filename (without extension) for saving.
|
||||||
|
If not provided, filename is derived from title.
|
||||||
width: Chart width in pixels
|
width: Chart width in pixels
|
||||||
height: Chart height in pixels
|
height: Chart height in pixels
|
||||||
|
|
||||||
@@ -1289,7 +1310,7 @@ class QualtricsPlotsMixin:
|
|||||||
height=height or 350
|
height=height or 350
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_demographic_distribution(
|
def plot_demographic_distribution(
|
||||||
@@ -1415,10 +1436,16 @@ class QualtricsPlotsMixin:
|
|||||||
style_traits: list[str],
|
style_traits: list[str],
|
||||||
data: pl.LazyFrame | pl.DataFrame | None = None,
|
data: pl.LazyFrame | pl.DataFrame | None = None,
|
||||||
title: str | None = None,
|
title: str | None = None,
|
||||||
|
filename: str | None = None,
|
||||||
width: int | str | None = None,
|
width: int | str | None = None,
|
||||||
height: int | None = None,
|
height: int | None = None,
|
||||||
) -> alt.Chart:
|
) -> alt.Chart:
|
||||||
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3)."""
|
"""Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Optional explicit filename (without extension) for saving.
|
||||||
|
If not provided, filename is derived from title.
|
||||||
|
"""
|
||||||
df = self._ensure_dataframe(data)
|
df = self._ensure_dataframe(data)
|
||||||
|
|
||||||
if title is None:
|
if title is None:
|
||||||
@@ -1462,7 +1489,7 @@ class QualtricsPlotsMixin:
|
|||||||
height=height or 350
|
height=height or 350
|
||||||
)
|
)
|
||||||
|
|
||||||
chart = self._save_plot(chart, title)
|
chart = self._save_plot(chart, title, filename=filename)
|
||||||
return chart
|
return chart
|
||||||
|
|
||||||
def plot_traits_wordcloud(
|
def plot_traits_wordcloud(
|
||||||
|
|||||||
Reference in New Issue
Block a user