renamed notebooks and added significance test

2026-02-05 10:14:53 +01:00
parent a3cf9f103d
commit af9a15ccb0
2 changed files with 142 additions and 55 deletions
--- a/XX_quant_report.script.py
+++ b/XX_quant_report.script.py
@@ -14,6 +14,15 @@ import utils

 from speaking_styles import SPEAKING_STYLES

+# %% Fixed Variables
+
+# RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
+RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+BEST_CHOSEN_CHARACTER = "the_coach"
+
+
 # %%
 # CLI argument parsing for batch automation
 # When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
@@ -45,12 +54,9 @@ def parse_cli_args():
    # Only parse if running as script (not in Jupyter/interactive)
    try:
        # Check if running in Jupyter by looking for ipykernel
-        get_ipython()  # noqa: F821
+        get_ipython()  # noqa: F821 # type: ignore
        # Return namespace with all filters set to None
        no_filters = {f: None for f in FILTER_CONFIG}
-        
-        # debugging age
-        no_filters['age'] = FILTER_CONFIG['age']
        return argparse.Namespace(**no_filters, filter_name=None)
    except NameError:
        args = parser.parse_args()
@@ -62,19 +68,7 @@ def parse_cli_args():

 cli_args = parse_cli_args()

-# %%

-# file_browser = mo.ui.file_browser(
-#     initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
-# )
-# file_browser
-
-# # %%
-# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
-# RESULTS_FILE = Path(file_browser.path(index=0))
-
-RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
-QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'

 # %%
 S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
@@ -83,46 +77,15 @@ try:
 except NotImplementedError as e:
    mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))

-# %%
-BEST_CHOSEN_CHARACTER = "the_coach"

-# # %%
-# filter_form = mo.md('''
+# %% Build filtered dataset based on CLI args

-
-
-# {age}
-
-# {gender}
-
-# {ethnicity}
-
-# {income}
-
-# {consumer}
-# '''
-# ).batch(
-#     age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
-#     gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
-#     ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
-#     income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
-#     consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
-# ).form()
-# mo.md(f'''
-# ---
-
-# # Data Filter
-
-# {filter_form}
-# ''')
-
-# %%
-# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
 # CLI args: None means "no filter applied" - filter_data() will skip None filters
+
 # Build filter values dict dynamically from FILTER_CONFIG
 _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}

-# %%
+# %% Apply filters
 _d = S.filter_data(data_all, **_active_filters)

 # Write filter description file if filter-name is provided
@@ -174,14 +137,10 @@ if cli_args.filter_name and S.fig_save_dir:
        _header += "-" * 80 + "\n"
        _summary_file.write_text(_header + _summary_line)

-# Stop execution and prevent other cells from running if no data is selected
-# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
+# Save to logical variable name for further analysis
 data = _d
-
-# data = data_validated
 data.collect()

-# %%


 # %%
--- a/XX_statistical_significance.script.py
+++ b/XX_statistical_significance.script.py
@@ -0,0 +1,128 @@
+"""Extra statistical significance analyses for quant report."""
+# %% Imports
+
+from utils import QualtricsSurvey
+import polars as pl
+import argparse
+import json
+import re
+
+
+
+
+# %% Fixed Variables
+RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
+QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
+
+
+# %% CLI argument parsing for batch automation
+# When run as script: uv run XX_statistical_significance.script.py --age '["18
+# Central filter configuration - add new filters here only
+# Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name'
+FILTER_CONFIG = {
+    'age': 'options_age',
+    'gender': 'options_gender',
+    'ethnicity': 'options_ethnicity',
+    'income': 'options_income',
+    'consumer': 'options_consumer',
+    'business_owner': 'options_business_owner',
+    'ai_user': 'options_ai_user',
+    'investable_assets': 'options_investable_assets',
+    'industry': 'options_industry',
+}
+
+def parse_cli_args():
+    parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
+    
+    # Dynamically add filter arguments from config
+    for filter_name in FILTER_CONFIG:
+        parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
+    
+    parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
+    
+    # Only parse if running as script (not in Jupyter/interactive)
+    try:
+        # Check if running in Jupyter by looking for ipykernel
+        get_ipython()  # noqa: F821 # type: ignore
+        # Return namespace with all filters set to None
+        no_filters = {f: None for f in FILTER_CONFIG}
+        return argparse.Namespace(**no_filters, filter_name=None)
+    except NameError:
+        args = parser.parse_args()
+        # Parse JSON strings to lists
+        for filter_name in FILTER_CONFIG:
+            val = getattr(args, filter_name)
+            setattr(args, filter_name, json.loads(val) if val else None)
+        return args
+
+cli_args = parse_cli_args()
+
+
+# %%
+S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
+data_all = S.load_data()
+
+
+# %% Build filtered dataset based on CLI args
+
+# CLI args: None means "no filter applied" - filter_data() will skip None filters
+
+# Build filter values dict dynamically from FILTER_CONFIG
+_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
+
+_d = S.filter_data(data_all, **_active_filters)
+
+# Write filter description file if filter-name is provided
+if cli_args.filter_name and S.fig_save_dir:
+    # Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.)
+    _filter_slug = S._get_filter_slug()
+    _filter_slug_dir = S.fig_save_dir / _filter_slug
+    _filter_slug_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Build filter description
+    _filter_desc_lines = [
+        f"Filter: {cli_args.filter_name}",
+        "",
+        "Applied Filters:",
+    ]
+    _short_desc_parts = []
+    for filter_name, options_attr in FILTER_CONFIG.items():
+        all_options = getattr(S, options_attr)
+        values = _active_filters[filter_name]
+        display_name = filter_name.replace('_', ' ').title()
+        # None means no filter applied (same as "All")
+        if values is not None and values != all_options:
+            _short_desc_parts.append(f"{display_name}: {', '.join(values)}")
+            _filter_desc_lines.append(f"  {display_name}: {', '.join(values)}")
+        else:
+            _filter_desc_lines.append(f"  {display_name}: All")
+    
+    # Write detailed description INSIDE the filter-slug directory
+    # Sanitize filter name for filename usage (replace / and other chars)
+    _safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
+    _filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
+    _filter_file.write_text('\n'.join(_filter_desc_lines))
+    
+    # Append to summary index file at figures/<export_date>/filter_index.txt
+    _summary_file = S.fig_save_dir / "filter_index.txt"
+    _short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents"
+    _summary_line = f"{_filter_slug}  |  {cli_args.filter_name}  |  {_short_desc}\n"
+    
+    # Append or create the summary file
+    if _summary_file.exists():
+        _existing = _summary_file.read_text()
+        # Avoid duplicate entries for same slug
+        if _filter_slug not in _existing:
+            with _summary_file.open('a') as f:
+                f.write(_summary_line)
+    else:
+        _header = "Filter Index\n" + "=" * 80 + "\n\n"
+        _header += "Directory  |  Filter Name  |  Description\n"
+        _header += "-" * 80 + "\n"
+        _summary_file.write_text(_header + _summary_line)
+
+# Save to logical variable name for further analysis
+data = _d
+data.collect()
+
+# %%