From 6ba30ff0418d4b736ea592ef1c6106e3a138c1a3 Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Mon, 2 Feb 2026 17:21:57 +0100 Subject: [PATCH] add copilot instructions and rename classes --- .github/copilot-instructions.md | 105 ++++++++++++++++++++++++++ 00_qualtrics_validation.py | 2 +- 02_quant_analysis.py | 8 +- 03_quant_report.py | 4 +- 99_example_ingest_qualtrics_export.py | 8 +- docs/altair-migration-plan.md | 12 +-- docs/wordcloud-usage.md | 4 +- example_correlation_plots.py | 4 +- plots.py | 4 +- utils.py | 4 +- validation.py | 4 +- wordclouds.py | 2 +- 12 files changed, 133 insertions(+), 28 deletions(-) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..f7478f4 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,105 @@ +# Voice Branding Quantitative Analysis - Copilot Instructions + +## Project Overview +Qualtrics survey analysis for brand personality research. Analyzes voice samples (V04-V91) across speaking style traits, character rankings, and demographic segments. Uses **Marimo notebooks** for interactive analysis and **Polars** for data processing. + +## Architecture + +### Core Components +- **`QualtricsSurvey`** (`utils.py`): Main class combining data loading, filtering, and plotting via `QualtricsPlotsMixin` +- **Marimo notebooks** (`0X_*.py`): Interactive apps run via `uv run marimo run .py` +- **Data exports** (`data/exports//`): Qualtrics CSVs with `_Labels.csv` and `_Values.csv` variants +- **QSF files**: Qualtrics survey definitions for mapping QIDs to question text + +### Data Flow +``` +Qualtrics CSV (3-row header) → QualtricsSurvey.load_data() → LazyFrame with QID columns + ↓ + filter_data() → get_*() methods → plot_*() methods → figures/// +``` + +## ⚠️ Critical AI Agent Rules + +1. **NEVER modify Marimo notebooks directly** - The `XX_*.py` files are Marimo notebooks and should not be edited by AI agents +2. **NEVER run Marimo notebooks for debugging** - These are interactive apps, not test scripts +3. **For debugging**: Create a standalone temporary Python script (e.g., `debug_temp.py`) to test functions +4. **Reading notebooks is OK** - You may read notebook files to understand how functions are used. Ask the user which notebook they're working in for context +5. **No changelog markdown files** - Do not create new markdown files to document small changes or describe new usage + +## Key Patterns + +### Polars LazyFrames +Always work with `pl.LazyFrame` until visualization; call `.collect()` only when needed: +```python +data = S.load_data() # Returns LazyFrame +subset, meta = S.get_voice_scale_1_10(data) # Returns (LazyFrame, Optional[dict]) +df = subset.collect() # Materialize for plotting +``` + +### Column Naming Convention +Survey columns follow patterns that encode voice/trait info: +- `SS_Green_Blue__V14__Choice_1` → Speaking Style, Voice 14, Trait 1 +- `Voice_Scale_1_10__V48` → 1-10 rating for Voice 48 +- `Top_3_Voices_ranking__V77` → Ranking position for Voice 77 + +### Filter State & Figure Output +`QualtricsSurvey` stores filter state and auto-generates output paths: +```python +S.filter_data(data, consumer=['Early Professional']) +# Plots save to: figures//Cons-Early_Professional/.png +``` + +### Getter Methods Return Tuples +All `get_*()` methods return `(LazyFrame, Optional[metadata])`: +```python +df, choices_map = S.get_ss_green_blue(data) # choices_map has trait descriptions +df, _ = S.get_character_ranking(data) # Second element may be None +``` + +## Development Commands + +```bash +# Run interactive analysis notebook +uv run marimo run 02_quant_analysis.py --port 8080 + +# Edit notebook in editor mode +uv run marimo edit 02_quant_analysis.py + +# Headless mode for shared access +uv run marimo run 02_quant_analysis.py --headless --port 8080 +``` + +## Important Files + +| File | Purpose | +|------|---------| +| `utils.py` | `QualtricsSurvey` class, data transformations, PPTX utilities | +| `plots.py` | `QualtricsPlotsMixin` with all Altair plotting methods | +| `theme.py` | `ColorPalette` and `jpmc_altair_theme()` for consistent styling | +| `validation.py` | Data quality checks (progress, duration outliers, straight-liners) | +| `speaking_styles.py` | `SPEAKING_STYLES` dict mapping colors to trait groups | + +## Conventions + +### Altair Charts & Colors +- **ALL colors MUST come from `theme.py`** - Use `ColorPalette.PRIMARY`, `ColorPalette.RANK_1`, etc. +- If a new color is needed, add it to `ColorPalette` in `theme.py` first, then use it +- Never hardcode hex colors directly in plotting code +- Charts auto-save via `_save_plot()` when `fig_save_dir` is set +- Filter footnotes added automatically via `_add_filter_footnote()` + +### QSF Parsing +Use `_get_qsf_question_by_QID()` to extract question config: +```python +cfg = self._get_qsf_question_by_QID('QID27')['Payload'] +recode_map = cfg['RecodeValues'] # Maps choice numbers to values +``` + +### PPTX Image Replacement +Images matched by perceptual hash (not filename); alt-text encodes figure path: +```python +utils.update_ppt_alt_text(ppt_path, image_source_dir) # Tag images with alt-text +utils.pptx_replace_named_image(ppt, target_tag, new_image) # Replace by alt-text +``` + +This is a process that should be run manually be the user ONLY. \ No newline at end of file diff --git a/00_qualtrics_validation.py b/00_qualtrics_validation.py index 5847b32..f6f6518 100644 --- a/00_qualtrics_validation.py +++ b/00_qualtrics_validation.py @@ -27,7 +27,7 @@ def _(Path): @app.cell def _(qsf_file, results_file, utils): - survey = utils.JPMCSurvey(results_file, qsf_file) + survey = utils.QualtricsSurvey(results_file, qsf_file) data_all = survey.load_data() return (survey,) diff --git a/02_quant_analysis.py b/02_quant_analysis.py index f6b5bcb..fdc6cac 100644 --- a/02_quant_analysis.py +++ b/02_quant_analysis.py @@ -11,12 +11,12 @@ def _(): from pathlib import Path from validation import check_progress, duration_validation, check_straight_liners - from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores + from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores import utils from speaking_styles import SPEAKING_STYLES return ( - JPMCSurvey, + QualtricsSurvey, Path, SPEAKING_STYLES, calculate_weighted_ranking_scores, @@ -49,8 +49,8 @@ def _(Path, file_browser, mo): @app.cell -def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo): - S = JPMCSurvey(RESULTS_FILE, QSF_FILE) +def _(QualtricsSurvey, QSF_FILE, RESULTS_FILE, mo): + S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) try: data_all = S.load_data() except NotImplementedError as e: diff --git a/03_quant_report.py b/03_quant_report.py index 5a4ebb8..7db0901 100644 --- a/03_quant_report.py +++ b/03_quant_report.py @@ -9,7 +9,7 @@ with app.setup: from pathlib import Path from validation import check_progress, duration_validation, check_straight_liners - from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores + from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores import utils from speaking_styles import SPEAKING_STYLES @@ -35,7 +35,7 @@ def _(file_browser): @app.cell def _(QSF_FILE, RESULTS_FILE): - S = JPMCSurvey(RESULTS_FILE, QSF_FILE) + S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) try: data_all = S.load_data() except NotImplementedError as e: diff --git a/99_example_ingest_qualtrics_export.py b/99_example_ingest_qualtrics_export.py index 54c0a23..f88b77c 100644 --- a/99_example_ingest_qualtrics_export.py +++ b/99_example_ingest_qualtrics_export.py @@ -10,8 +10,8 @@ def _(): import polars as pl from pathlib import Path - from utils import JPMCSurvey, combine_exclusive_columns - return JPMCSurvey, combine_exclusive_columns, mo, pl + from utils import QualtricsSurvey, combine_exclusive_columns + return QualtricsSurvey, combine_exclusive_columns, mo, pl @app.cell @@ -29,8 +29,8 @@ def _(): @app.cell -def _(JPMCSurvey, QSF_FILE, RESULTS_FILE): - survey = JPMCSurvey(RESULTS_FILE, QSF_FILE) +def _(QualtricsSurvey, QSF_FILE, RESULTS_FILE): + survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE) data = survey.load_data() data.collect() return data, survey diff --git a/docs/altair-migration-plan.md b/docs/altair-migration-plan.md index 3c87863..9898bde 100644 --- a/docs/altair-migration-plan.md +++ b/docs/altair-migration-plan.md @@ -1,4 +1,4 @@ -# Altair Migration Plan: Plotly → Altair for JPMCPlotsMixin +# Altair Migration Plan: Plotly → Altair for QualtricsPlotsMixin **Date:** January 28, 2026 **Status:** Not Started @@ -22,9 +22,9 @@ Current Plotly implementation has a critical layout issue: filter annotations ov ## Current System Analysis ### File Structure -- **`plots.py`** - Contains `JPMCPlotsMixin` class with 10 plotting methods +- **`plots.py`** - Contains `QualtricsPlotsMixin` class with 10 plotting methods - **`theme.py`** - Contains `ColorPalette` class with all styling constants -- **`utils.py`** - Contains `JPMCSurvey` class that mixes in `JPMCPlotsMixin` +- **`utils.py`** - Contains `QualtricsSurvey` class that mixes in `QualtricsPlotsMixin` ### Color Palette (from theme.py) ```python @@ -1140,10 +1140,10 @@ uv remove plotly kaleido ```python import marimo as mo import polars as pl -from utils import JPMCSurvey +from utils import QualtricsSurvey # Load sample data -survey = JPMCSurvey() +survey = QualtricsSurvey() survey.load_data('path/to/data') survey.fig_save_dir = 'figures/altair_test' @@ -1244,7 +1244,7 @@ After completing all tasks, verify the following: ### Regression Testing - [ ] Existing Marimo notebooks still work - [ ] Data filtering still works (`filter_data()`) -- [ ] `JPMCSurvey` class initialization unchanged +- [ ] `QualtricsSurvey` class initialization unchanged - [ ] No breaking changes to public API ### Documentation diff --git a/docs/wordcloud-usage.md b/docs/wordcloud-usage.md index 857f41b..562c1d4 100644 --- a/docs/wordcloud-usage.md +++ b/docs/wordcloud-usage.md @@ -5,14 +5,14 @@ This example shows how to use the `create_traits_wordcloud` function to visualiz ## Basic Usage in Jupyter/Marimo Notebook ```python -from utils import JPMCSurvey, create_traits_wordcloud +from utils import QualtricsSurvey, create_traits_wordcloud from pathlib import Path # Load your survey data RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv" QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf" -S = JPMCSurvey(RESULTS_FILE, QSF_FILE) +S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) data = S.load_data() # Get Top 3 Traits data diff --git a/example_correlation_plots.py b/example_correlation_plots.py index 0ab5ab1..bed7754 100644 --- a/example_correlation_plots.py +++ b/example_correlation_plots.py @@ -1,6 +1,6 @@ import polars as pl -from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data +from utils import QualtricsSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data from plots import plot_speaking_style_correlation from speaking_styles import SPEAKING_STYLES @@ -14,7 +14,7 @@ RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Bra QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf" try: - survey = JPMCSurvey(RESULTS_FILE, QSF_FILE) + survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE) except TypeError: # Fallback if signature is different or file not found (just in case) print("Error initializing survey with paths. Checking signature...") diff --git a/plots.py b/plots.py index 6aaf6a9..a85cae2 100644 --- a/plots.py +++ b/plots.py @@ -11,8 +11,8 @@ from theme import ColorPalette import hashlib -class JPMCPlotsMixin: - """Mixin class for plotting functions in JPMCSurvey.""" +class QualtricsPlotsMixin: + """Mixin class for plotting functions in QualtricsSurvey.""" def _process_title(self, title: str) -> str | list[str]: """Process title to handle
tags for Altair.""" diff --git a/utils.py b/utils.py index 01a0917..98e6b2d 100644 --- a/utils.py +++ b/utils.py @@ -11,7 +11,7 @@ from io import BytesIO import imagehash from PIL import Image -from plots import JPMCPlotsMixin +from plots import QualtricsPlotsMixin from pptx import Presentation @@ -514,7 +514,7 @@ def normalize_global_values(df: pl.DataFrame, target_cols: list[str]) -> pl.Data return res.lazy() if was_lazy else res -class JPMCSurvey(JPMCPlotsMixin): +class QualtricsSurvey(QualtricsPlotsMixin): """Class to handle JPMorgan Chase survey data.""" def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]): diff --git a/validation.py b/validation.py index 2efc416..c082bae 100644 --- a/validation.py +++ b/validation.py @@ -323,12 +323,12 @@ def check_straight_liners(data, max_score=3): if __name__ == "__main__": - from utils import JPMCSurvey + from utils import QualtricsSurvey RESULTS_FILE = "data/exports/OneDrive_2026-01-28/1-28-26 Afternoon/JPMC_Chase Brand Personality_Quant Round 1_January 28, 2026_Afternoon_Labels.csv" QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf" - S = JPMCSurvey(RESULTS_FILE, QSF_FILE) + S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) data = S.load_data() # print("Checking Green Blue:") diff --git a/wordclouds.py b/wordclouds.py index 1a62b73..f9625fe 100644 --- a/wordclouds.py +++ b/wordclouds.py @@ -1,6 +1,6 @@ """Word cloud utilities for Voice Branding analysis. -The main wordcloud function is available as a method on JPMCSurvey: +The main wordcloud function is available as a method on QualtricsSurvey: S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...') This module provides standalone imports for backwards compatibility.