renamed notebooks and added significance test

2026-02-05 10:14:53 +01:00
parent a3cf9f103d
commit af9a15ccb0
2 changed files with 142 additions and 55 deletions
--- a/XX_quant_report.script.py
+++ b/XX_quant_report.script.py
@@ -14,6 +14,15 @@ import utils
 from speaking_styles import SPEAKING_STYLES
 # %% Fixed Variables
 # RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
 RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
 QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
 BEST_CHOSEN_CHARACTER = "the_coach"
 # %%
 # CLI argument parsing for batch automation
 # When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
@@ -45,12 +54,9 @@ def parse_cli_args():
    # Only parse if running as script (not in Jupyter/interactive)
    try:
        # Check if running in Jupyter by looking for ipykernel
-        get_ipython()  # noqa: F821
+        get_ipython()  # noqa: F821 # type: ignore
        # Return namespace with all filters set to None
        no_filters = {f: None for f in FILTER_CONFIG}
        # debugging age
        no_filters['age'] = FILTER_CONFIG['age']
        return argparse.Namespace(**no_filters, filter_name=None)
    except NameError:
        args = parser.parse_args()
@@ -62,19 +68,7 @@ def parse_cli_args():
 cli_args = parse_cli_args()
 # %%
 # file_browser = mo.ui.file_browser(
 #     initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
 # )
 # file_browser
 # # %%
 # mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
 # RESULTS_FILE = Path(file_browser.path(index=0))
 RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
 QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
 # %%
 S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
@@ -83,46 +77,15 @@ try:
 except NotImplementedError as e:
    mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
 # %%
 BEST_CHOSEN_CHARACTER = "the_coach"
-# # %%
+# %% Build filtered dataset based on CLI args
 # filter_form = mo.md('''
 # {age}
 # {gender}
 # {ethnicity}
 # {income}
 # {consumer}
 # '''
 # ).batch(
 #     age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
 #     gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
 #     ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
 #     income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
 #     consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
 # ).form()
 # mo.md(f'''
 # ---
 # # Data Filter
 # {filter_form}
 # ''')
 # %%
 # mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
 # CLI args: None means "no filter applied" - filter_data() will skip None filters
 # Build filter values dict dynamically from FILTER_CONFIG
 _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
-# %%
+# %% Apply filters
 _d = S.filter_data(data_all, **_active_filters)
 # Write filter description file if filter-name is provided
@@ -174,14 +137,10 @@ if cli_args.filter_name and S.fig_save_dir:
        _header += "-" * 80 + "\n"
        _summary_file.write_text(_header + _summary_line)
-# Stop execution and prevent other cells from running if no data is selected
+# Save to logical variable name for further analysis
 # mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
 data = _d
 # data = data_validated
 data.collect()
 # %%
 # %%
--- a/XX_statistical_significance.script.py
+++ b/XX_statistical_significance.script.py
@@ -0,0 +1,128 @@
 """Extra statistical significance analyses for quant report."""
 # %% Imports
 from utils import QualtricsSurvey
 import polars as pl
 import argparse
 import json
 import re
 # %% Fixed Variables
 RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
 QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
 # %% CLI argument parsing for batch automation
 # When run as script: uv run XX_statistical_significance.script.py --age '["18
 # Central filter configuration - add new filters here only
 # Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name'
 FILTER_CONFIG = {
    'age': 'options_age',
    'gender': 'options_gender',
    'ethnicity': 'options_ethnicity',
    'income': 'options_income',
    'consumer': 'options_consumer',
    'business_owner': 'options_business_owner',
    'ai_user': 'options_ai_user',
    'investable_assets': 'options_investable_assets',
    'industry': 'options_industry',
 }
 def parse_cli_args():
    parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
    # Dynamically add filter arguments from config
    for filter_name in FILTER_CONFIG:
        parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
    parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
    # Only parse if running as script (not in Jupyter/interactive)
    try:
        # Check if running in Jupyter by looking for ipykernel
        get_ipython()  # noqa: F821 # type: ignore
        # Return namespace with all filters set to None
        no_filters = {f: None for f in FILTER_CONFIG}
        return argparse.Namespace(**no_filters, filter_name=None)
    except NameError:
        args = parser.parse_args()
        # Parse JSON strings to lists
        for filter_name in FILTER_CONFIG:
            val = getattr(args, filter_name)
            setattr(args, filter_name, json.loads(val) if val else None)
        return args
 cli_args = parse_cli_args()
 # %%
 S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
 data_all = S.load_data()
 # %% Build filtered dataset based on CLI args
 # CLI args: None means "no filter applied" - filter_data() will skip None filters
 # Build filter values dict dynamically from FILTER_CONFIG
 _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
 _d = S.filter_data(data_all, **_active_filters)
 # Write filter description file if filter-name is provided
 if cli_args.filter_name and S.fig_save_dir:
    # Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.)
    _filter_slug = S._get_filter_slug()
    _filter_slug_dir = S.fig_save_dir / _filter_slug
    _filter_slug_dir.mkdir(parents=True, exist_ok=True)
    # Build filter description
    _filter_desc_lines = [
        f"Filter: {cli_args.filter_name}",
        "",
        "Applied Filters:",
    ]
    _short_desc_parts = []
    for filter_name, options_attr in FILTER_CONFIG.items():
        all_options = getattr(S, options_attr)
        values = _active_filters[filter_name]
        display_name = filter_name.replace('_', ' ').title()
        # None means no filter applied (same as "All")
        if values is not None and values != all_options:
            _short_desc_parts.append(f"{display_name}: {', '.join(values)}")
            _filter_desc_lines.append(f"  {display_name}: {', '.join(values)}")
        else:
            _filter_desc_lines.append(f"  {display_name}: All")
    # Write detailed description INSIDE the filter-slug directory
    # Sanitize filter name for filename usage (replace / and other chars)
    _safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
    _filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
    _filter_file.write_text('\n'.join(_filter_desc_lines))
    # Append to summary index file at figures/<export_date>/filter_index.txt
    _summary_file = S.fig_save_dir / "filter_index.txt"
    _short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents"
    _summary_line = f"{_filter_slug}  |  {cli_args.filter_name}  |  {_short_desc}\n"
    # Append or create the summary file
    if _summary_file.exists():
        _existing = _summary_file.read_text()
        # Avoid duplicate entries for same slug
        if _filter_slug not in _existing:
            with _summary_file.open('a') as f:
                f.write(_summary_line)
    else:
        _header = "Filter Index\n" + "=" * 80 + "\n\n"
        _header += "Directory  |  Filter Name  |  Description\n"
        _header += "-" * 80 + "\n"
        _summary_file.write_text(_header + _summary_line)
 # Save to logical variable name for further analysis
 data = _d
 data.collect()
 # %%