From e17646eb703b3b1fb88cfa10d691c277193404d8 Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Wed, 4 Feb 2026 10:46:31 +0100 Subject: [PATCH] correlation plots for best bc --- 03_quant_report.script.py | 193 +++++++++++++++++++++++++------------- plots.py | 49 +++++++--- 2 files changed, 164 insertions(+), 78 deletions(-) diff --git a/03_quant_report.script.py b/03_quant_report.script.py index 498dc66..396facb 100644 --- a/03_quant_report.script.py +++ b/03_quant_report.script.py @@ -570,7 +570,7 @@ _content = """""" for _style, _traits in SPEAKING_STYLES.items(): # print(f"Correlation plot for {style}...") - _fig = S.plot_speaking_style_correlation( + _fig = S.plot_speaking_style_scale_correlation( data=joined_scale, style_color=_style, style_traits=_traits, @@ -609,86 +609,145 @@ for _style, _traits in SPEAKING_STYLES.items(): mo.md(_content) # %% -mo.md(r""" -## Correlations when "Best Brand Character" is chosen - -Select only the traits that fit with that character -""") +# ## Correlations when "Best Brand Character" is chosen +# For each of the 4 brand characters, filter the dataset to only those respondents +# who selected that character as their #1 choice. # %% -from reference import ORIGINAL_CHARACTER_TRAITS -chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER] +# Prepare character-filtered data subsets +char_rank_for_filter = S.get_character_ranking(data)[0].collect() + +CHARACTER_FILTER_MAP = { + 'Familiar Friend': 'Character_Ranking_Familiar_Friend', + 'The Coach': 'Character_Ranking_The_Coach', + 'Personal Assistant': 'Character_Ranking_The_Personal_Assistant', + 'Bank Teller': 'Character_Ranking_The_Bank_Teller', +} + +def get_filtered_data_for_character(char_name: str) -> tuple[pl.DataFrame, pl.DataFrame, int]: + """Filter joined_scale and joined_ranking to respondents who ranked char_name #1.""" + col = CHARACTER_FILTER_MAP[char_name] + respondents = char_rank_for_filter.filter(pl.col(col) == 1).select('_recordId') + n = respondents.height + filtered_scale = joined_scale.join(respondents, on='_recordId', how='inner') + filtered_ranking = joined_ranking.join(respondents, on='_recordId', how='inner') + return filtered_scale, filtered_ranking, n + +def _char_filename(char_name: str, suffix: str) -> str: + """Generate filename for character-filtered plots (without n-value). + + Format: bc_ranked_1_{suffix}__{char_slug} + This groups all plot types together in directory listings. + """ + char_slug = char_name.lower().replace(' ', '_') + return f"bc_ranked_1_{suffix}__{char_slug}" + + # %% -STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits) - -# %% -mo.md(r""" -### Individual Traits vs Ranking Points -""") - -# %% -_content = "" -for _style, _traits in STYLES_SUBSET.items(): - _fig = S.plot_speaking_style_ranking_correlation( - data=joined_ranking, - style_color=_style, - style_traits=_traits, - title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points""" +# ### Voice Weighted Ranking Score (by Best Character) +for char_name in CHARACTER_FILTER_MAP: + _, _, n = get_filtered_data_for_character(char_name) + # Get top3 voices for this character subset using _recordIds + respondents = char_rank_for_filter.filter( + pl.col(CHARACTER_FILTER_MAP[char_name]) == 1 + ).select('_recordId') + # Collect top3_voices if it's a LazyFrame, then join + top3_df = top3_voices.collect() if isinstance(top3_voices, pl.LazyFrame) else top3_voices + filtered_top3 = top3_df.join(respondents, on='_recordId', how='inner') + weighted = calculate_weighted_ranking_scores(filtered_top3) + S.plot_weighted_ranking_score( + data=weighted, + title=f'"{char_name}" Ranked #1 (n={n})
Most Popular Voice - Weighted Score (1st=3pts, 2nd=2pts, 3rd=1pt)', + filename=_char_filename(char_name, "voice_weighted_ranking_score"), + color_gender=COLOR_GENDER, ) - _content += f""" -{mo.ui.altair_chart(_fig)} - -""" -mo.md(_content) # %% -mo.md(r""" -### Individual Traits vs Scale 1-10 -""") - -# %% -_content = """""" - -for _style, _traits in STYLES_SUBSET.items(): - # print(f"Correlation plot for {style}...") - _fig = S.plot_speaking_style_correlation( - data=joined_scale, - style_color=_style, - style_traits=_traits, - title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""", +# ### Voice Scale 1-10 Average Scores (by Best Character) +for char_name in CHARACTER_FILTER_MAP: + _, _, n = get_filtered_data_for_character(char_name) + # Get voice scale data for this character subset using _recordIds + respondents = char_rank_for_filter.filter( + pl.col(CHARACTER_FILTER_MAP[char_name]) == 1 + ).select('_recordId') + # Collect voice_1_10 if it's a LazyFrame, then join + voice_1_10_df = voice_1_10.collect() if isinstance(voice_1_10, pl.LazyFrame) else voice_1_10 + filtered_voice_1_10 = voice_1_10_df.join(respondents, on='_recordId', how='inner') + S.plot_average_scores_with_counts( + data=filtered_voice_1_10, + title=f'"{char_name}" Ranked #1 (n={n})
Voice General Impression (Scale 1-10)', + filename=_char_filename(char_name, "voice_scale_1-10"), + x_label='Voice', + domain=[1, 10], + color_gender=COLOR_GENDER, ) - _content += f""" -{mo.ui.altair_chart(_fig)} -""" -mo.md(_content) + # %% -mo.md(r""" -### Colors vs Scale 1-10 (Best Character) -""") +# ### Speaking Style Colors vs Scale 1-10 (only for Best Character) +for char_name in CHARACTER_FILTER_MAP: + if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER: + continue + + filtered_scale, _, n = get_filtered_data_for_character(char_name) + color_corr, _ = utils.transform_speaking_style_color_correlation(filtered_scale, SPEAKING_STYLES) + S.plot_speaking_style_color_correlation( + data=color_corr, + title=f'"{char_name}" Ranked #1 (n={n})
Correlation: Speaking Style Colors vs Voice Scale 1-10', + filename=_char_filename(char_name, "colors_vs_voice_scale_1-10"), + ) # %% -# Transform to get one row per color with average correlation -_color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET) -S.plot_speaking_style_color_correlation( - data=_color_corr_scale, - title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10""" -) +# ### Speaking Style Colors vs Ranking Points (only for Best Character) +for char_name in CHARACTER_FILTER_MAP: + if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER: + continue + + _, filtered_ranking, n = get_filtered_data_for_character(char_name) + color_corr, _ = utils.transform_speaking_style_color_correlation( + filtered_ranking, SPEAKING_STYLES, target_column="Ranking_Points" + ) + S.plot_speaking_style_color_correlation( + data=color_corr, + title=f'"{char_name}" Ranked #1 (n={n})
Correlation: Speaking Style Colors vs Voice Ranking Points', + filename=_char_filename(char_name, "colors_vs_voice_ranking_points"), + ) # %% -mo.md(r""" -### Colors vs Ranking Points (Best Character) -""") +# ### Individual Traits vs Scale 1-10 (only for Best Character) +for _style, _traits in SPEAKING_STYLES.items(): + print(f"--- Speaking Style: {_style} ---") + for char_name in CHARACTER_FILTER_MAP: + if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER: + continue + + filtered_scale, _, n = get_filtered_data_for_character(char_name) + S.plot_speaking_style_scale_correlation( + data=filtered_scale, + style_color=_style, + style_traits=_traits, + title=f'"{char_name}" Ranked #1 (n={n})
Correlation: {_style} vs Voice Scale 1-10', + filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_scale_1-10"), + ) + +# %% +# ### Individual Traits vs Ranking Points (only for Best Character) +for _style, _traits in SPEAKING_STYLES.items(): + print(f"--- Speaking Style: {_style} ---") + for char_name in CHARACTER_FILTER_MAP: + if char_name.lower().replace(' ', '_') != BEST_CHOSEN_CHARACTER: + continue + + _, filtered_ranking, n = get_filtered_data_for_character(char_name) + S.plot_speaking_style_ranking_correlation( + data=filtered_ranking, + style_color=_style, + style_traits=_traits, + title=f'"{char_name}" Ranked #1 (n={n})
Correlation: {_style} vs Voice Ranking Points', + filename=_char_filename(char_name, f"{_style.lower()}_vs_voice_ranking_points"), + ) + # %% -_color_corr_ranking, _ = utils.transform_speaking_style_color_correlation( - joined_ranking, - STYLES_SUBSET, - target_column="Ranking_Points" -) -S.plot_speaking_style_color_correlation( - data=_color_corr_ranking, - title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points""" -) \ No newline at end of file diff --git a/plots.py b/plots.py index f0244b7..bf03a9a 100644 --- a/plots.py +++ b/plots.py @@ -253,9 +253,15 @@ class QualtricsPlotsMixin: return chart.properties(title=title_config) - def _save_plot(self, chart: alt.Chart, title: str) -> alt.Chart: + def _save_plot(self, chart: alt.Chart, title: str, filename: str | None = None) -> alt.Chart: """Save chart to PNG file if fig_save_dir is set. + Args: + chart: The Altair chart to save + title: Chart title (used for filename if filename not provided) + filename: Optional explicit filename (without extension). If provided, + this is used instead of deriving from title. + Returns the (potentially modified) chart with filter footnote added. """ # Add filter footnote - returns combined chart if filters active @@ -270,8 +276,10 @@ class QualtricsPlotsMixin: if not path.exists(): path.mkdir(parents=True, exist_ok=True) - - filename = f"{self._sanitize_filename(title)}.png" + + # Use explicit filename if provided, otherwise derive from title + base_name = filename if filename else self._sanitize_filename(title) + filename = f"{base_name}.png" filepath = path / filename # Use vl_convert directly with theme config for consistent rendering @@ -397,6 +405,7 @@ class QualtricsPlotsMixin: self, data: pl.LazyFrame | pl.DataFrame | None = None, title: str = "General Impression (1-10)\nPer Voice with Number of Participants Who Rated It", + filename: str | None = None, x_label: str = "Stimuli", y_label: str = "Average General Impression Rating (1-10)", color: str = ColorPalette.PRIMARY, @@ -408,6 +417,7 @@ class QualtricsPlotsMixin: """Create a bar plot showing average scores and count of non-null values for each column. Parameters: + filename: Optional explicit filename (without extension) for saving. color_gender: If True, color bars by voice gender (blue=male, pink=female). """ df = self._ensure_dataframe(data) @@ -484,7 +494,7 @@ class QualtricsPlotsMixin: height=height or getattr(self, 'plot_height', 400) ) - chart = self._save_plot(chart, title) + chart = self._save_plot(chart, title, filename=filename) return chart def plot_top3_ranking_distribution( @@ -803,6 +813,7 @@ class QualtricsPlotsMixin: self, data: pl.LazyFrame | pl.DataFrame | None = None, title: str = "Weighted Popularity Score\n(1st=3pts, 2nd=2pts, 3rd=1pt)", + filename: str | None = None, x_label: str = "Character Personality", y_label: str = "Total Weighted Score", color: str = ColorPalette.PRIMARY, @@ -813,6 +824,7 @@ class QualtricsPlotsMixin: """Create a bar chart showing the weighted ranking score for each character. Parameters: + filename: Optional explicit filename (without extension) for saving. color_gender: If True, color bars by voice gender (blue=male, pink=female). """ weighted_df = self._ensure_dataframe(data).to_pandas() @@ -863,7 +875,7 @@ class QualtricsPlotsMixin: height=height or getattr(self, 'plot_height', 400) ) - chart = self._save_plot(chart, title) + chart = self._save_plot(chart, title, filename=filename) return chart def plot_voice_selection_counts( @@ -1179,16 +1191,22 @@ class QualtricsPlotsMixin: chart = self._save_plot(chart, title) return chart - def plot_speaking_style_correlation( + def plot_speaking_style_scale_correlation( self, style_color: str, style_traits: list[str], data: pl.LazyFrame | pl.DataFrame | None = None, title: str | None = None, + filename: str | None = None, width: int | str | None = None, height: int | None = None, ) -> alt.Chart: - """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10).""" + """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Scale (1-10). + + Args: + filename: Optional explicit filename (without extension) for saving. + If not provided, filename is derived from title. + """ df = self._ensure_dataframe(data) if title is None: @@ -1235,13 +1253,14 @@ class QualtricsPlotsMixin: height=height or 350 ) - chart = self._save_plot(chart, title) + chart = self._save_plot(chart, title, filename=filename) return chart def plot_speaking_style_color_correlation( self, data: pl.LazyFrame | pl.DataFrame | None = None, title: str = "Speaking Style and Voice Scale 1-10 Correlations
(Average by Color)", + filename: str | None = None, width: int | str | None = None, height: int | None = None, ) -> alt.Chart: @@ -1255,6 +1274,8 @@ class QualtricsPlotsMixin: data: DataFrame with columns [Color, correlation, n_traits] from utils.transform_speaking_style_color_correlation title: Chart title (supports
for line breaks) + filename: Optional explicit filename (without extension) for saving. + If not provided, filename is derived from title. width: Chart width in pixels height: Chart height in pixels @@ -1289,7 +1310,7 @@ class QualtricsPlotsMixin: height=height or 350 ) - chart = self._save_plot(chart, title) + chart = self._save_plot(chart, title, filename=filename) return chart def plot_demographic_distribution( @@ -1415,10 +1436,16 @@ class QualtricsPlotsMixin: style_traits: list[str], data: pl.LazyFrame | pl.DataFrame | None = None, title: str | None = None, + filename: str | None = None, width: int | str | None = None, height: int | None = None, ) -> alt.Chart: - """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3).""" + """Plots correlation between Speaking Style Trait Scores (1-5) and Voice Ranking Points (0-3). + + Args: + filename: Optional explicit filename (without extension) for saving. + If not provided, filename is derived from title. + """ df = self._ensure_dataframe(data) if title is None: @@ -1462,7 +1489,7 @@ class QualtricsPlotsMixin: height=height or 350 ) - chart = self._save_plot(chart, title) + chart = self._save_plot(chart, title, filename=filename) return chart def plot_traits_wordcloud(