From 190e4fbdc4310eda91983fcbc5547c05badc40fc Mon Sep 17 00:00:00 2001
From: Luigi Maiorano <luigi.maiorano@qumo.io>
Date: Tue, 3 Feb 2026 01:59:26 +0100
Subject: [PATCH] finished correlation plots and generic voice plots

---
 03_quant_report.py | 130 +++++++++++++++++++++++++++++++++++++++++++++
 utils.py           |  15 ++++++
 2 files changed, 145 insertions(+)

diff --git a/03_quant_report.py b/03_quant_report.py
index a902439..62067c2 100644
--- a/03_quant_report.py
+++ b/03_quant_report.py
@@ -187,6 +187,12 @@ def _(S):
     return
 
 
+@app.cell
+def _():
+    BEST_CHOSEN_CHARACTER = "the_coach"
+    return (BEST_CHOSEN_CHARACTER,)
+
+
 @app.cell
 def _(data_validated):
     # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
@@ -698,6 +704,12 @@ def _(S, data, top3_voices):
     return joined_ranking, joined_scale
 
 
+@app.cell
+def _(joined_ranking):
+    joined_ranking.head()
+    return
+
+
 @app.cell
 def _():
     mo.md(r"""
@@ -799,5 +811,123 @@ def _(S, joined_ranking):
     return
 
 
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ## Correlations when "Best Brand Character" is chosen
+
+    Select only the traits that fit with that character
+    """)
+    return
+
+
+@app.cell
+def _(BEST_CHOSEN_CHARACTER):
+    from reference import ORIGINAL_CHARACTER_TRAITS
+    chosen_bc_traits = ORIGINAL_CHARACTER_TRAITS[BEST_CHOSEN_CHARACTER]
+    return (chosen_bc_traits,)
+
+
+@app.cell
+def _(chosen_bc_traits):
+    STYLES_SUBSET = utils.filter_speaking_styles(SPEAKING_STYLES, chosen_bc_traits)
+    return (STYLES_SUBSET,)
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Individual Traits vs Ranking Points
+    """)
+    return
+
+
+@app.cell
+def _(BEST_CHOSEN_CHARACTER, S, STYLES_SUBSET, joined_ranking):
+    _content = ""
+    for _style, _traits in STYLES_SUBSET.items():
+        _fig = S.plot_speaking_style_ranking_correlation(
+            data=joined_ranking,
+            style_color=_style,
+            style_traits=_traits,
+            title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Ranking Points"""
+        )
+        _content += f"""
+    {mo.ui.altair_chart(_fig)}
+
+    """
+    mo.md(_content)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Individual Traits vs Scale 1-10
+    """)
+    return
+
+
+@app.cell
+def _(BEST_CHOSEN_CHARACTER, S, STYLES_SUBSET, joined_scale):
+    _content = """"""
+
+    for _style, _traits in STYLES_SUBSET.items():
+        # print(f"Correlation plot for {style}...")
+        _fig = S.plot_speaking_style_correlation(
+            data=joined_scale,
+            style_color=_style,
+            style_traits=_traits,
+            title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style {_style} and Voice Scale 1-10""",
+        )
+        _content += f"""
+    {mo.ui.altair_chart(_fig)}
+
+    """
+    mo.md(_content)
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Colors vs Scale 1-10 (Best Character)
+    """)
+    return
+
+
+@app.cell
+def _(BEST_CHOSEN_CHARACTER, S, STYLES_SUBSET, joined_scale):
+    # Transform to get one row per color with average correlation
+    _color_corr_scale, _ = utils.transform_speaking_style_color_correlation(joined_scale, STYLES_SUBSET)
+    S.plot_speaking_style_color_correlation(
+        data=_color_corr_scale,
+        title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Scale 1-10"""
+    )
+    return
+
+
+@app.cell(hide_code=True)
+def _():
+    mo.md(r"""
+    ### Colors vs Ranking Points (Best Character)
+    """)
+    return
+
+
+@app.cell
+def _(BEST_CHOSEN_CHARACTER, S, STYLES_SUBSET, joined_ranking):
+    _color_corr_ranking, _ = utils.transform_speaking_style_color_correlation(
+        joined_ranking, 
+        STYLES_SUBSET, 
+        target_column="Ranking_Points"
+    )
+    S.plot_speaking_style_color_correlation(
+        data=_color_corr_ranking,
+        title=f"""Brand Character "{BEST_CHOSEN_CHARACTER.replace('_', ' ').title()}" - Correlation: Speaking Style Colors and Voice Ranking Points"""
+    )
+    return
+
+
 if __name__ == "__main__":
     app.run()
diff --git a/utils.py b/utils.py
index f784f3d..7f5e9ff 100644
--- a/utils.py
+++ b/utils.py
@@ -1825,3 +1825,18 @@ def split_consumer_groups(df: Union[pl.LazyFrame, pl.DataFrame], col: str = "Con
         groups[group] = df_clean.filter(pl.col(group_col_alias) == group)
         
     return groups
+
+
+
+# Filter SPEAKING_STYLES to only include traits containing any keyword
+def filter_speaking_styles(speaking_styles: dict, keywords: list[str]) -> dict:
+    """Filter speaking styles to only include traits matching any keyword."""
+    filtered = {}
+    for color, traits in speaking_styles.items():
+        matching_traits = [
+            trait for trait in traits 
+            if any(kw.lower() in trait.lower() for kw in keywords)
+        ]
+        if matching_traits:
+            filtered[color] = matching_traits
+    return filtered
\ No newline at end of file