JPMC-quant/example_correlation_plots.py


import polars as pl
from utils import QualtricsSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
from plots import plot_speaking_style_correlation
from speaking_styles import SPEAKING_STYLES

# 1. Initialize Survey and Load Data
# We need to point to the actual data files if possible, or use standard paths
# Assuming the file structure observed in workspace:
# Data: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv
# QSF: data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf

RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Values.csv"
QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"

try:
    survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
except TypeError:
    # Fallback if signature is different or file not found (just in case)
    print("Error initializing survey with paths. Checking signature...")
    # This part is just for debugging if it fails again
    raise

data = survey.load_data()

# 2. Extract Data
# Speaking Styles
ss_gb, map_gb = survey.get_ss_green_blue(data)
ss_or, map_or = survey.get_ss_orange_red(data)

# Voice Scale 1-10
voice_scale, _ = survey.get_voice_scale_1_10(data)

# 3. Process Dataframes (Wide to Long)
# Note: process_speaking_style_data handles the melt and parsing
# We collect them because the plotting functions expect eager DataFrames usually,
# but polars functions here return eager DFs currently based on `utils.py` implementation (return result.collect())

df_style_gb = process_speaking_style_data(ss_gb, map_gb)
df_style_or = process_speaking_style_data(ss_or, map_or)

# Combine both style dataframes
df_style_all = pl.concat([df_style_gb, df_style_or])

# Process Voice Scale
df_voice_long = process_voice_scale_data(voice_scale)

# 4. Join Style + Voice Data
joined_df = join_voice_and_style_data(df_style_all, df_voice_long)

# 5. Generate Plots for each Style Color
for style, traits in SPEAKING_STYLES.items():
    print(f"Generating plot for {style}...")
    fig = plot_speaking_style_correlation(
        df=joined_df,
        style_color=style,
        style_traits=traits
    )
    fig.show()
    # If in Marimo/Jupyter, just 'fig' or 'mo.ui.plotly(fig)'