From af9a15ccb0006ce943b2b2e5a6acd1626f87dfc5 Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Thu, 5 Feb 2026 10:14:53 +0100 Subject: [PATCH] renamed notebooks and added significance test --- ...ort.script.py => XX_quant_report.script.py | 69 ++-------- XX_statistical_significance.script.py | 128 ++++++++++++++++++ 2 files changed, 142 insertions(+), 55 deletions(-) rename 03_quant_report.script.py => XX_quant_report.script.py (93%) create mode 100644 XX_statistical_significance.script.py diff --git a/03_quant_report.script.py b/XX_quant_report.script.py similarity index 93% rename from 03_quant_report.script.py rename to XX_quant_report.script.py index 8705de8..eaeca7a 100644 --- a/03_quant_report.script.py +++ b/XX_quant_report.script.py @@ -14,6 +14,15 @@ import utils from speaking_styles import SPEAKING_STYLES +# %% Fixed Variables + +# RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv' +RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv' +QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' + +BEST_CHOSEN_CHARACTER = "the_coach" + + # %% # CLI argument parsing for batch automation # When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]' @@ -45,12 +54,9 @@ def parse_cli_args(): # Only parse if running as script (not in Jupyter/interactive) try: # Check if running in Jupyter by looking for ipykernel - get_ipython() # noqa: F821 + get_ipython() # noqa: F821 # type: ignore # Return namespace with all filters set to None no_filters = {f: None for f in FILTER_CONFIG} - - # debugging age - no_filters['age'] = FILTER_CONFIG['age'] return argparse.Namespace(**no_filters, filter_name=None) except NameError: args = parser.parse_args() @@ -62,19 +68,7 @@ def parse_cli_args(): cli_args = parse_cli_args() -# %% -# file_browser = mo.ui.file_browser( -# initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File" -# ) -# file_browser - -# # %% -# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**")) -# RESULTS_FILE = Path(file_browser.path(index=0)) - -RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv' -QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' # %% S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) @@ -83,46 +77,15 @@ try: except NotImplementedError as e: mo.stop(True, mo.md(f"**⚠️ {str(e)}**")) -# %% -BEST_CHOSEN_CHARACTER = "the_coach" -# # %% -# filter_form = mo.md(''' +# %% Build filtered dataset based on CLI args - - -# {age} - -# {gender} - -# {ethnicity} - -# {income} - -# {consumer} -# ''' -# ).batch( -# age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"), -# gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"), -# ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"), -# income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"), -# consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:") -# ).form() -# mo.md(f''' -# --- - -# # Data Filter - -# {filter_form} -# ''') - -# %% -# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**")) # CLI args: None means "no filter applied" - filter_data() will skip None filters + # Build filter values dict dynamically from FILTER_CONFIG _active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG} -# %% +# %% Apply filters _d = S.filter_data(data_all, **_active_filters) # Write filter description file if filter-name is provided @@ -174,14 +137,10 @@ if cli_args.filter_name and S.fig_save_dir: _header += "-" * 80 + "\n" _summary_file.write_text(_header + _summary_line) -# Stop execution and prevent other cells from running if no data is selected -# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**")) +# Save to logical variable name for further analysis data = _d - -# data = data_validated data.collect() -# %% # %% diff --git a/XX_statistical_significance.script.py b/XX_statistical_significance.script.py new file mode 100644 index 0000000..8d921d3 --- /dev/null +++ b/XX_statistical_significance.script.py @@ -0,0 +1,128 @@ +"""Extra statistical significance analyses for quant report.""" +# %% Imports + +from utils import QualtricsSurvey +import polars as pl +import argparse +import json +import re + + + + +# %% Fixed Variables +RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv' +QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf' + + +# %% CLI argument parsing for batch automation +# When run as script: uv run XX_statistical_significance.script.py --age '["18 +# Central filter configuration - add new filters here only +# Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name' +FILTER_CONFIG = { + 'age': 'options_age', + 'gender': 'options_gender', + 'ethnicity': 'options_ethnicity', + 'income': 'options_income', + 'consumer': 'options_consumer', + 'business_owner': 'options_business_owner', + 'ai_user': 'options_ai_user', + 'investable_assets': 'options_investable_assets', + 'industry': 'options_industry', +} + +def parse_cli_args(): + parser = argparse.ArgumentParser(description='Generate quant report with optional filters') + + # Dynamically add filter arguments from config + for filter_name in FILTER_CONFIG: + parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values') + + parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)') + + # Only parse if running as script (not in Jupyter/interactive) + try: + # Check if running in Jupyter by looking for ipykernel + get_ipython() # noqa: F821 # type: ignore + # Return namespace with all filters set to None + no_filters = {f: None for f in FILTER_CONFIG} + return argparse.Namespace(**no_filters, filter_name=None) + except NameError: + args = parser.parse_args() + # Parse JSON strings to lists + for filter_name in FILTER_CONFIG: + val = getattr(args, filter_name) + setattr(args, filter_name, json.loads(val) if val else None) + return args + +cli_args = parse_cli_args() + + +# %% +S = QualtricsSurvey(RESULTS_FILE, QSF_FILE) +data_all = S.load_data() + + +# %% Build filtered dataset based on CLI args + +# CLI args: None means "no filter applied" - filter_data() will skip None filters + +# Build filter values dict dynamically from FILTER_CONFIG +_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG} + +_d = S.filter_data(data_all, **_active_filters) + +# Write filter description file if filter-name is provided +if cli_args.filter_name and S.fig_save_dir: + # Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.) + _filter_slug = S._get_filter_slug() + _filter_slug_dir = S.fig_save_dir / _filter_slug + _filter_slug_dir.mkdir(parents=True, exist_ok=True) + + # Build filter description + _filter_desc_lines = [ + f"Filter: {cli_args.filter_name}", + "", + "Applied Filters:", + ] + _short_desc_parts = [] + for filter_name, options_attr in FILTER_CONFIG.items(): + all_options = getattr(S, options_attr) + values = _active_filters[filter_name] + display_name = filter_name.replace('_', ' ').title() + # None means no filter applied (same as "All") + if values is not None and values != all_options: + _short_desc_parts.append(f"{display_name}: {', '.join(values)}") + _filter_desc_lines.append(f" {display_name}: {', '.join(values)}") + else: + _filter_desc_lines.append(f" {display_name}: All") + + # Write detailed description INSIDE the filter-slug directory + # Sanitize filter name for filename usage (replace / and other chars) + _safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name) + _filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt" + _filter_file.write_text('\n'.join(_filter_desc_lines)) + + # Append to summary index file at figures//filter_index.txt + _summary_file = S.fig_save_dir / "filter_index.txt" + _short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents" + _summary_line = f"{_filter_slug} | {cli_args.filter_name} | {_short_desc}\n" + + # Append or create the summary file + if _summary_file.exists(): + _existing = _summary_file.read_text() + # Avoid duplicate entries for same slug + if _filter_slug not in _existing: + with _summary_file.open('a') as f: + f.write(_summary_line) + else: + _header = "Filter Index\n" + "=" * 80 + "\n\n" + _header += "Directory | Filter Name | Description\n" + _header += "-" * 80 + "\n" + _summary_file.write_text(_header + _summary_line) + +# Save to logical variable name for further analysis +data = _d +data.collect() + +# %%