From 6ba30ff0418d4b736ea592ef1c6106e3a138c1a3 Mon Sep 17 00:00:00 2001
From: Luigi Maiorano <luigi.maiorano@qumo.io>
Date: Mon, 2 Feb 2026 17:21:57 +0100
Subject: [PATCH] add copilot instructions and rename classes

---
 .github/copilot-instructions.md       | 105 ++++++++++++++++++++++++++
 00_qualtrics_validation.py            |   2 +-
 02_quant_analysis.py                  |   8 +-
 03_quant_report.py                    |   4 +-
 99_example_ingest_qualtrics_export.py |   8 +-
 docs/altair-migration-plan.md         |  12 +--
 docs/wordcloud-usage.md               |   4 +-
 example_correlation_plots.py          |   4 +-
 plots.py                              |   4 +-
 utils.py                              |   4 +-
 validation.py                         |   4 +-
 wordclouds.py                         |   2 +-
 12 files changed, 133 insertions(+), 28 deletions(-)
 create mode 100644 .github/copilot-instructions.md
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..f7478f4
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,105 @@
+# Voice Branding Quantitative Analysis - Copilot Instructions
+
+## Project Overview
+Qualtrics survey analysis for brand personality research. Analyzes voice samples (V04-V91) across speaking style traits, character rankings, and demographic segments. Uses **Marimo notebooks** for interactive analysis and **Polars** for data processing.
+
+## Architecture
+
+### Core Components
+- **`QualtricsSurvey`** (`utils.py`): Main class combining data loading, filtering, and plotting via `QualtricsPlotsMixin`
+- **Marimo notebooks** (`0X_*.py`): Interactive apps run via `uv run marimo run <file>.py`
+- **Data exports** (`data/exports/<date>/`): Qualtrics CSVs with `_Labels.csv` and `_Values.csv` variants
+- **QSF files**: Qualtrics survey definitions for mapping QIDs to question text
+
+### Data Flow
+```
+Qualtrics CSV (3-row header) → QualtricsSurvey.load_data() → LazyFrame with QID columns
+                                      ↓
+                           filter_data() → get_*() methods → plot_*() methods → figures/<export>/<filter>/
+```
+
+## ⚠️ Critical AI Agent Rules
+
+1. **NEVER modify Marimo notebooks directly** - The `XX_*.py` files are Marimo notebooks and should not be edited by AI agents
+2. **NEVER run Marimo notebooks for debugging** - These are interactive apps, not test scripts
+3. **For debugging**: Create a standalone temporary Python script (e.g., `debug_temp.py`) to test functions
+4. **Reading notebooks is OK** - You may read notebook files to understand how functions are used. Ask the user which notebook they're working in for context
+5. **No changelog markdown files** - Do not create new markdown files to document small changes or describe new usage
+
+## Key Patterns
+
+### Polars LazyFrames
+Always work with `pl.LazyFrame` until visualization; call `.collect()` only when needed:
+```python
+data = S.load_data()  # Returns LazyFrame
+subset, meta = S.get_voice_scale_1_10(data)  # Returns (LazyFrame, Optional[dict])
+df = subset.collect()  # Materialize for plotting
+```
+
+### Column Naming Convention
+Survey columns follow patterns that encode voice/trait info:
+- `SS_Green_Blue__V14__Choice_1` → Speaking Style, Voice 14, Trait 1
+- `Voice_Scale_1_10__V48` → 1-10 rating for Voice 48
+- `Top_3_Voices_ranking__V77` → Ranking position for Voice 77
+
+### Filter State & Figure Output
+`QualtricsSurvey` stores filter state and auto-generates output paths:
+```python
+S.filter_data(data, consumer=['Early Professional'])
+# Plots save to: figures/<export>/Cons-Early_Professional/<plot_name>.png
+```
+
+### Getter Methods Return Tuples
+All `get_*()` methods return `(LazyFrame, Optional[metadata])`:
+```python
+df, choices_map = S.get_ss_green_blue(data)  # choices_map has trait descriptions
+df, _ = S.get_character_ranking(data)  # Second element may be None
+```
+
+## Development Commands
+
+```bash
+# Run interactive analysis notebook
+uv run marimo run 02_quant_analysis.py --port 8080
+
+# Edit notebook in editor mode
+uv run marimo edit 02_quant_analysis.py
+
+# Headless mode for shared access
+uv run marimo run 02_quant_analysis.py --headless --port 8080
+```
+
+## Important Files
+
+| File | Purpose |
+|------|---------|
+| `utils.py` | `QualtricsSurvey` class, data transformations, PPTX utilities |
+| `plots.py` | `QualtricsPlotsMixin` with all Altair plotting methods |
+| `theme.py` | `ColorPalette` and `jpmc_altair_theme()` for consistent styling |
+| `validation.py` | Data quality checks (progress, duration outliers, straight-liners) |
+| `speaking_styles.py` | `SPEAKING_STYLES` dict mapping colors to trait groups |
+
+## Conventions
+
+### Altair Charts & Colors
+- **ALL colors MUST come from `theme.py`** - Use `ColorPalette.PRIMARY`, `ColorPalette.RANK_1`, etc.
+- If a new color is needed, add it to `ColorPalette` in `theme.py` first, then use it
+- Never hardcode hex colors directly in plotting code
+- Charts auto-save via `_save_plot()` when `fig_save_dir` is set
+- Filter footnotes added automatically via `_add_filter_footnote()`
+
+### QSF Parsing
+Use `_get_qsf_question_by_QID()` to extract question config:
+```python
+cfg = self._get_qsf_question_by_QID('QID27')['Payload']
+recode_map = cfg['RecodeValues']  # Maps choice numbers to values
+```
+
+### PPTX Image Replacement
+Images matched by perceptual hash (not filename); alt-text encodes figure path:
+```python
+utils.update_ppt_alt_text(ppt_path, image_source_dir)  # Tag images with alt-text
+utils.pptx_replace_named_image(ppt, target_tag, new_image)  # Replace by alt-text
+```
+
+This is a process that should be run manually be the user ONLY.
\ No newline at end of file
diff --git a/00_qualtrics_validation.py b/00_qualtrics_validation.py
index 5847b32..f6f6518 100644
--- a/00_qualtrics_validation.py
+++ b/00_qualtrics_validation.py
@@ -27,7 +27,7 @@ def _(Path):
 
 @app.cell
 def _(qsf_file, results_file, utils):
-    survey = utils.JPMCSurvey(results_file, qsf_file)
+    survey = utils.QualtricsSurvey(results_file, qsf_file)
     data_all = survey.load_data()
     return (survey,)
 
diff --git a/02_quant_analysis.py b/02_quant_analysis.py
index f6b5bcb..fdc6cac 100644
--- a/02_quant_analysis.py
+++ b/02_quant_analysis.py
@@ -11,12 +11,12 @@ def _():
     from pathlib import Path
 
     from validation import check_progress, duration_validation, check_straight_liners
-    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
+    from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
     import utils
 
     from speaking_styles import SPEAKING_STYLES
     return (
-        JPMCSurvey,
+        QualtricsSurvey,
         Path,
         SPEAKING_STYLES,
         calculate_weighted_ranking_scores,
@@ -49,8 +49,8 @@ def _(Path, file_browser, mo):
 
 
 @app.cell
-def _(JPMCSurvey, QSF_FILE, RESULTS_FILE, mo):
-    S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+def _(QualtricsSurvey, QSF_FILE, RESULTS_FILE, mo):
+    S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
     try:
         data_all = S.load_data()
     except NotImplementedError as e:
diff --git a/03_quant_report.py b/03_quant_report.py
index 5a4ebb8..7db0901 100644
--- a/03_quant_report.py
+++ b/03_quant_report.py
@@ -9,7 +9,7 @@ with app.setup:
     from pathlib import Path
 
     from validation import check_progress, duration_validation, check_straight_liners
-    from utils import JPMCSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
+    from utils import QualtricsSurvey, combine_exclusive_columns, calculate_weighted_ranking_scores
     import utils
 
     from speaking_styles import SPEAKING_STYLES
@@ -35,7 +35,7 @@ def _(file_browser):
 
 @app.cell
 def _(QSF_FILE, RESULTS_FILE):
-    S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+    S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
     try:
         data_all = S.load_data()
     except NotImplementedError as e:
diff --git a/99_example_ingest_qualtrics_export.py b/99_example_ingest_qualtrics_export.py
index 54c0a23..f88b77c 100644
--- a/99_example_ingest_qualtrics_export.py
+++ b/99_example_ingest_qualtrics_export.py
@@ -10,8 +10,8 @@ def _():
     import polars as pl
     from pathlib import Path
 
-    from utils import JPMCSurvey, combine_exclusive_columns
-    return JPMCSurvey, combine_exclusive_columns, mo, pl
+    from utils import QualtricsSurvey, combine_exclusive_columns
+    return QualtricsSurvey, combine_exclusive_columns, mo, pl
 
 
 @app.cell
@@ -29,8 +29,8 @@ def _():
 
 
 @app.cell
-def _(JPMCSurvey, QSF_FILE, RESULTS_FILE):
-    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+def _(QualtricsSurvey, QSF_FILE, RESULTS_FILE):
+    survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
     data = survey.load_data()
     data.collect()
     return data, survey
diff --git a/docs/altair-migration-plan.md b/docs/altair-migration-plan.md
index 3c87863..9898bde 100644
--- a/docs/altair-migration-plan.md
+++ b/docs/altair-migration-plan.md
@@ -1,4 +1,4 @@
-# Altair Migration Plan: Plotly → Altair for JPMCPlotsMixin
+# Altair Migration Plan: Plotly → Altair for QualtricsPlotsMixin
 
 **Date:** January 28, 2026  
 **Status:** Not Started  
@@ -22,9 +22,9 @@ Current Plotly implementation has a critical layout issue: filter annotations ov
 ## Current System Analysis
 
 ### File Structure
-- **`plots.py`** - Contains `JPMCPlotsMixin` class with 10 plotting methods
+- **`plots.py`** - Contains `QualtricsPlotsMixin` class with 10 plotting methods
 - **`theme.py`** - Contains `ColorPalette` class with all styling constants
-- **`utils.py`** - Contains `JPMCSurvey` class that mixes in `JPMCPlotsMixin`
+- **`utils.py`** - Contains `QualtricsSurvey` class that mixes in `QualtricsPlotsMixin`
 
 ### Color Palette (from theme.py)
 ```python
@@ -1140,10 +1140,10 @@ uv remove plotly kaleido
 ```python
 import marimo as mo
 import polars as pl
-from utils import JPMCSurvey
+from utils import QualtricsSurvey
 
 # Load sample data
-survey = JPMCSurvey()
+survey = QualtricsSurvey()
 survey.load_data('path/to/data')
 survey.fig_save_dir = 'figures/altair_test'
 
@@ -1244,7 +1244,7 @@ After completing all tasks, verify the following:
 ### Regression Testing
 - [ ] Existing Marimo notebooks still work
 - [ ] Data filtering still works (`filter_data()`)
-- [ ] `JPMCSurvey` class initialization unchanged
+- [ ] `QualtricsSurvey` class initialization unchanged
 - [ ] No breaking changes to public API
 
 ### Documentation
diff --git a/docs/wordcloud-usage.md b/docs/wordcloud-usage.md
index 857f41b..562c1d4 100644
--- a/docs/wordcloud-usage.md
+++ b/docs/wordcloud-usage.md
@@ -5,14 +5,14 @@ This example shows how to use the `create_traits_wordcloud` function to visualiz
 ## Basic Usage in Jupyter/Marimo Notebook
 
 ```python
-from utils import JPMCSurvey, create_traits_wordcloud
+from utils import QualtricsSurvey, create_traits_wordcloud
 from pathlib import Path
 
 # Load your survey data
 RESULTS_FILE = "data/exports/1-23-26/JPMC_Chase Brand Personality_Quant Round 1_January 23, 2026_Labels.csv"
 QSF_FILE = "data/19-dec_V1_quant_incl_shani_comments.qsf"
 
-S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
 data = S.load_data()
 
 # Get Top 3 Traits data
diff --git a/example_correlation_plots.py b/example_correlation_plots.py
index 0ab5ab1..bed7754 100644
--- a/example_correlation_plots.py
+++ b/example_correlation_plots.py
@@ -1,6 +1,6 @@
 
 import polars as pl
-from utils import JPMCSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
+from utils import QualtricsSurvey, process_speaking_style_data, process_voice_scale_data, join_voice_and_style_data
 from plots import plot_speaking_style_correlation
 from speaking_styles import SPEAKING_STYLES
 
@@ -14,7 +14,7 @@ RESULTS_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Bra
 QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
 
 try:
-    survey = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+    survey = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
 except TypeError:
     # Fallback if signature is different or file not found (just in case)
     print("Error initializing survey with paths. Checking signature...")
diff --git a/plots.py b/plots.py
index 6aaf6a9..a85cae2 100644
--- a/plots.py
+++ b/plots.py
@@ -11,8 +11,8 @@ from theme import ColorPalette
 
 import hashlib
 
-class JPMCPlotsMixin:
-    """Mixin class for plotting functions in JPMCSurvey."""
+class QualtricsPlotsMixin:
+    """Mixin class for plotting functions in QualtricsSurvey."""
 
     def _process_title(self, title: str) -> str | list[str]:
         """Process title to handle <br> tags for Altair."""
diff --git a/utils.py b/utils.py
index 01a0917..98e6b2d 100644
--- a/utils.py
+++ b/utils.py
@@ -11,7 +11,7 @@ from io import BytesIO
 import imagehash
 from PIL import Image
 
-from plots import JPMCPlotsMixin
+from plots import QualtricsPlotsMixin
 
 
 from pptx import Presentation
@@ -514,7 +514,7 @@ def normalize_global_values(df: pl.DataFrame, target_cols: list[str]) -> pl.Data
     return res.lazy() if was_lazy else res
 
 
-class JPMCSurvey(JPMCPlotsMixin):
+class QualtricsSurvey(QualtricsPlotsMixin):
     """Class to handle JPMorgan Chase survey data."""
     
     def __init__(self, data_path: Union[str, Path], qsf_path: Union[str, Path]):
diff --git a/validation.py b/validation.py
index 2efc416..c082bae 100644
--- a/validation.py
+++ b/validation.py
@@ -323,12 +323,12 @@ def check_straight_liners(data, max_score=3):
 
 if __name__ == "__main__":
     
-    from utils import JPMCSurvey
+    from utils import QualtricsSurvey
     
     RESULTS_FILE = "data/exports/OneDrive_2026-01-28/1-28-26 Afternoon/JPMC_Chase Brand Personality_Quant Round 1_January 28, 2026_Afternoon_Labels.csv"
     QSF_FILE = "data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf"
     
-    S = JPMCSurvey(RESULTS_FILE, QSF_FILE)
+    S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
     data = S.load_data()
     
     # print("Checking Green Blue:")
diff --git a/wordclouds.py b/wordclouds.py
index 1a62b73..f9625fe 100644
--- a/wordclouds.py
+++ b/wordclouds.py
@@ -1,6 +1,6 @@
 """Word cloud utilities for Voice Branding analysis.
 
-The main wordcloud function is available as a method on JPMCSurvey:
+The main wordcloud function is available as a method on QualtricsSurvey:
     S.plot_traits_wordcloud(data, column='Top_3_Traits', title='...')
 
 This module provides standalone imports for backwards compatibility.