From af9a15ccb0006ce943b2b2e5a6acd1626f87dfc5 Mon Sep 17 00:00:00 2001
From: Luigi Maiorano <luigi.maiorano@qumo.io>
Date: Thu, 5 Feb 2026 10:14:53 +0100
Subject: [PATCH] renamed notebooks and added significance test

---
 ...ort.script.py => XX_quant_report.script.py |  69 ++--------
 XX_statistical_significance.script.py         | 128 ++++++++++++++++++
 2 files changed, 142 insertions(+), 55 deletions(-)
 rename 03_quant_report.script.py => XX_quant_report.script.py (93%)
 create mode 100644 XX_statistical_significance.script.py

diff --git a/03_quant_report.script.py b/XX_quant_report.script.py
similarity index 93%
rename from 03_quant_report.script.py
rename to XX_quant_report.script.py
index 8705de8..eaeca7a 100644
--- a/03_quant_report.script.py
+++ b/XX_quant_report.script.py
@@ -14,6 +14,15 @@ import utils
 
 from speaking_styles import SPEAKING_STYLES
 
+# %% Fixed Variables
+
+# RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
+RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+BEST_CHOSEN_CHARACTER = "the_coach"
+
+
 # %%
 # CLI argument parsing for batch automation
 # When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
@@ -45,12 +54,9 @@ def parse_cli_args():
     # Only parse if running as script (not in Jupyter/interactive)
     try:
         # Check if running in Jupyter by looking for ipykernel
-        get_ipython()  # noqa: F821
+        get_ipython()  # noqa: F821 # type: ignore
         # Return namespace with all filters set to None
         no_filters = {f: None for f in FILTER_CONFIG}
-        
-        # debugging age
-        no_filters['age'] = FILTER_CONFIG['age']
         return argparse.Namespace(**no_filters, filter_name=None)
     except NameError:
         args = parser.parse_args()
@@ -62,19 +68,7 @@ def parse_cli_args():
 
 cli_args = parse_cli_args()
 
-# %%
 
-# file_browser = mo.ui.file_browser(
-#     initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
-# )
-# file_browser
-
-# # %%
-# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
-# RESULTS_FILE = Path(file_browser.path(index=0))
-
-RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
-QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
 
 # %%
 S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
@@ -83,46 +77,15 @@ try:
 except NotImplementedError as e:
     mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
 
-# %%
-BEST_CHOSEN_CHARACTER = "the_coach"
 
-# # %%
-# filter_form = mo.md('''
+# %% Build filtered dataset based on CLI args
 
-
-
-# {age}
-
-# {gender}
-
-# {ethnicity}
-
-# {income}
-
-# {consumer}
-# '''
-# ).batch(
-#     age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
-#     gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
-#     ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
-#     income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
-#     consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
-# ).form()
-# mo.md(f'''
-# ---
-
-# # Data Filter
-
-# {filter_form}
-# ''')
-
-# %%
-# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
 # CLI args: None means "no filter applied" - filter_data() will skip None filters
+
 # Build filter values dict dynamically from FILTER_CONFIG
 _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
 
-# %%
+# %% Apply filters
 _d = S.filter_data(data_all, **_active_filters)
 
 # Write filter description file if filter-name is provided
@@ -174,14 +137,10 @@ if cli_args.filter_name and S.fig_save_dir:
         _header += "-" * 80 + "\n"
         _summary_file.write_text(_header + _summary_line)
 
-# Stop execution and prevent other cells from running if no data is selected
-# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
+# Save to logical variable name for further analysis
 data = _d
-
-# data = data_validated
 data.collect()
 
-# %%
 
 
 # %%
diff --git a/XX_statistical_significance.script.py b/XX_statistical_significance.script.py
new file mode 100644
index 0000000..8d921d3
--- /dev/null
+++ b/XX_statistical_significance.script.py
@@ -0,0 +1,128 @@
+"""Extra statistical significance analyses for quant report."""
+# %% Imports
+
+from utils import QualtricsSurvey
+import polars as pl
+import argparse
+import json
+import re
+
+
+
+
+# %% Fixed Variables
+RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+
+# %% CLI argument parsing for batch automation
+# When run as script: uv run XX_statistical_significance.script.py --age '["18
+# Central filter configuration - add new filters here only
+# Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name'
+FILTER_CONFIG = {
+    'age': 'options_age',
+    'gender': 'options_gender',
+    'ethnicity': 'options_ethnicity',
+    'income': 'options_income',
+    'consumer': 'options_consumer',
+    'business_owner': 'options_business_owner',
+    'ai_user': 'options_ai_user',
+    'investable_assets': 'options_investable_assets',
+    'industry': 'options_industry',
+}
+
+def parse_cli_args():
+    parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
+    
+    # Dynamically add filter arguments from config
+    for filter_name in FILTER_CONFIG:
+        parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
+    
+    parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
+    
+    # Only parse if running as script (not in Jupyter/interactive)
+    try:
+        # Check if running in Jupyter by looking for ipykernel
+        get_ipython()  # noqa: F821 # type: ignore
+        # Return namespace with all filters set to None
+        no_filters = {f: None for f in FILTER_CONFIG}
+        return argparse.Namespace(**no_filters, filter_name=None)
+    except NameError:
+        args = parser.parse_args()
+        # Parse JSON strings to lists
+        for filter_name in FILTER_CONFIG:
+            val = getattr(args, filter_name)
+            setattr(args, filter_name, json.loads(val) if val else None)
+        return args
+
+cli_args = parse_cli_args()
+
+
+# %%
+S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
+data_all = S.load_data()
+
+
+# %% Build filtered dataset based on CLI args
+
+# CLI args: None means "no filter applied" - filter_data() will skip None filters
+
+# Build filter values dict dynamically from FILTER_CONFIG
+_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
+
+_d = S.filter_data(data_all, **_active_filters)
+
+# Write filter description file if filter-name is provided
+if cli_args.filter_name and S.fig_save_dir:
+    # Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.)
+    _filter_slug = S._get_filter_slug()
+    _filter_slug_dir = S.fig_save_dir / _filter_slug
+    _filter_slug_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Build filter description
+    _filter_desc_lines = [
+        f"Filter: {cli_args.filter_name}",
+        "",
+        "Applied Filters:",
+    ]
+    _short_desc_parts = []
+    for filter_name, options_attr in FILTER_CONFIG.items():
+        all_options = getattr(S, options_attr)
+        values = _active_filters[filter_name]
+        display_name = filter_name.replace('_', ' ').title()
+        # None means no filter applied (same as "All")
+        if values is not None and values != all_options:
+            _short_desc_parts.append(f"{display_name}: {', '.join(values)}")
+            _filter_desc_lines.append(f"  {display_name}: {', '.join(values)}")
+        else:
+            _filter_desc_lines.append(f"  {display_name}: All")
+    
+    # Write detailed description INSIDE the filter-slug directory
+    # Sanitize filter name for filename usage (replace / and other chars)
+    _safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
+    _filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
+    _filter_file.write_text('\n'.join(_filter_desc_lines))
+    
+    # Append to summary index file at figures/<export_date>/filter_index.txt
+    _summary_file = S.fig_save_dir / "filter_index.txt"
+    _short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents"
+    _summary_line = f"{_filter_slug}  |  {cli_args.filter_name}  |  {_short_desc}\n"
+    
+    # Append or create the summary file
+    if _summary_file.exists():
+        _existing = _summary_file.read_text()
+        # Avoid duplicate entries for same slug
+        if _filter_slug not in _existing:
+            with _summary_file.open('a') as f:
+                f.write(_summary_line)
+    else:
+        _header = "Filter Index\n" + "=" * 80 + "\n\n"
+        _header += "Directory  |  Filter Name  |  Description\n"
+        _header += "-" * 80 + "\n"
+        _summary_file.write_text(_header + _summary_line)
+
+# Save to logical variable name for further analysis
+data = _d
+data.collect()
+
+# %%