correlation start

2026-01-27 17:22:16 +01:00
parent 393c527656
commit fd4cb4b596
9 changed files with 5375 additions and 24 deletions
--- a/example_correlation_plots.py
+++ b/example_correlation_plots.py
@@ -0,0 +1,60 @@
+
+import polars as pl
+from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
+from plots import plot_speaking_style_correlation
+from speaking_styles import SPEAKING_STYLES
+
+# 1. Initialize Survey and Load Data
+# We need to point to the actual data files if possible, or use standard paths
+# Assuming the file structure observed in workspace:
+# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
+# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf
+
+RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
+QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
+
+try:
+    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+except TypeError:
+    # Fallback if signature is different or file not found (just in case)
+    print("Error initializing survey with paths. Checking signature...")
+    # This part is just for debugging if it fails again
+    raise
+
+data = survey.load_data()
+
+# 2. Extract Data
+# Speaking Styles
+ss_gb, map_gb = survey.get_ss_green_blue(data)
+ss_or, map_or = survey.get_ss_orange_red(data)
+
+# Voice Scale 1-10
+voice_scale, _ = survey.get_voice_scale_1_10(data)
+
+# 3. Process Dataframes (Wide to Long)
+# Note: process_speaking_style_data handles the melt and parsing
+# We collect them because the plotting functions expect eager DataFrames usually, 
+# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())
+
+df_style_gb = process_speaking_style_data(ss_gb, map_gb)
+df_style_or = process_speaking_style_data(ss_or, map_or)
+
+# Combine both style dataframes
+df_style_all = pl.concat([df_style_gb, df_style_or])
+
+# Process Voice Scale
+df_voice_long = process_voice_scale_data(voice_scale)
+
+# 4. Join Style + Voice Data
+joined_df = join_voice_and_style_data(df_style_all, df_voice_long)
+
+# 5. Generate Plots for each Style Color
+for style, traits in SPEAKING_STYLES.items():
+    print(f"Generating plot for {style}...")
+    fig = plot_speaking_style_correlation(
+        df=joined_df,
+        style_color=style,
+        style_traits=traits
+    )
+    fig.show()
+    # If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'