correlation start

This commit is contained in:
2026-01-27 17:22:16 +01:00
parent 393c527656
commit fd4cb4b596
9 changed files with 5375 additions and 24 deletions

View File

@@ -0,0 +1,60 @@
import polars as pl
from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
from plots import plot_speaking_style_correlation
from speaking_styles import SPEAKING_STYLES
# 1. Initialize Survey and Load Data
# We need to point to the actual data files if possible, or use standard paths
# Assuming the file structure observed in workspace:
# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf
RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
try:
survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
except TypeError:
# Fallback if signature is different or file not found (just in case)
print("Error initializing survey with paths. Checking signature...")
# This part is just for debugging if it fails again
raise
data = survey.load_data()
# 2. Extract Data
# Speaking Styles
ss_gb, map_gb = survey.get_ss_green_blue(data)
ss_or, map_or = survey.get_ss_orange_red(data)
# Voice Scale 1-10
voice_scale, _ = survey.get_voice_scale_1_10(data)
# 3. Process Dataframes (Wide to Long)
# Note: process_speaking_style_data handles the melt and parsing
# We collect them because the plotting functions expect eager DataFrames usually,
# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())
df_style_gb = process_speaking_style_data(ss_gb, map_gb)
df_style_or = process_speaking_style_data(ss_or, map_or)
# Combine both style dataframes
df_style_all = pl.concat([df_style_gb, df_style_or])
# Process Voice Scale
df_voice_long = process_voice_scale_data(voice_scale)
# 4. Join Style + Voice Data
joined_df = join_voice_and_style_data(df_style_all, df_voice_long)
# 5. Generate Plots for each Style Color
for style, traits in SPEAKING_STYLES.items():
print(f"Generating plot for {style}...")
fig = plot_speaking_style_correlation(
df=joined_df,
style_color=style,
style_traits=traits
)
fig.show()
# If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'