renamed notebooks and added significance test

This commit is contained in:
2026-02-05 10:14:53 +01:00
parent a3cf9f103d
commit af9a15ccb0
2 changed files with 142 additions and 55 deletions

View File

@@ -0,0 +1,128 @@
"""Extra statistical significance analyses for quant report."""
# %% Imports
from utils import QualtricsSurvey
import polars as pl
import argparse
import json
import re
# %% Fixed Variables
RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
# %% CLI argument parsing for batch automation
# When run as script: uv run XX_statistical_significance.script.py --age '["18
# Central filter configuration - add new filters here only
# Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name'
FILTER_CONFIG = {
'age': 'options_age',
'gender': 'options_gender',
'ethnicity': 'options_ethnicity',
'income': 'options_income',
'consumer': 'options_consumer',
'business_owner': 'options_business_owner',
'ai_user': 'options_ai_user',
'investable_assets': 'options_investable_assets',
'industry': 'options_industry',
}
def parse_cli_args():
parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
# Dynamically add filter arguments from config
for filter_name in FILTER_CONFIG:
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
# Only parse if running as script (not in Jupyter/interactive)
try:
# Check if running in Jupyter by looking for ipykernel
get_ipython() # noqa: F821 # type: ignore
# Return namespace with all filters set to None
no_filters = {f: None for f in FILTER_CONFIG}
return argparse.Namespace(**no_filters, filter_name=None)
except NameError:
args = parser.parse_args()
# Parse JSON strings to lists
for filter_name in FILTER_CONFIG:
val = getattr(args, filter_name)
setattr(args, filter_name, json.loads(val) if val else None)
return args
cli_args = parse_cli_args()
# %%
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
data_all = S.load_data()
# %% Build filtered dataset based on CLI args
# CLI args: None means "no filter applied" - filter_data() will skip None filters
# Build filter values dict dynamically from FILTER_CONFIG
_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
_d = S.filter_data(data_all, **_active_filters)
# Write filter description file if filter-name is provided
if cli_args.filter_name and S.fig_save_dir:
# Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.)
_filter_slug = S._get_filter_slug()
_filter_slug_dir = S.fig_save_dir / _filter_slug
_filter_slug_dir.mkdir(parents=True, exist_ok=True)
# Build filter description
_filter_desc_lines = [
f"Filter: {cli_args.filter_name}",
"",
"Applied Filters:",
]
_short_desc_parts = []
for filter_name, options_attr in FILTER_CONFIG.items():
all_options = getattr(S, options_attr)
values = _active_filters[filter_name]
display_name = filter_name.replace('_', ' ').title()
# None means no filter applied (same as "All")
if values is not None and values != all_options:
_short_desc_parts.append(f"{display_name}: {', '.join(values)}")
_filter_desc_lines.append(f" {display_name}: {', '.join(values)}")
else:
_filter_desc_lines.append(f" {display_name}: All")
# Write detailed description INSIDE the filter-slug directory
# Sanitize filter name for filename usage (replace / and other chars)
_safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
_filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
_filter_file.write_text('\n'.join(_filter_desc_lines))
# Append to summary index file at figures/<export_date>/filter_index.txt
_summary_file = S.fig_save_dir / "filter_index.txt"
_short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents"
_summary_line = f"{_filter_slug} | {cli_args.filter_name} | {_short_desc}\n"
# Append or create the summary file
if _summary_file.exists():
_existing = _summary_file.read_text()
# Avoid duplicate entries for same slug
if _filter_slug not in _existing:
with _summary_file.open('a') as f:
f.write(_summary_line)
else:
_header = "Filter Index\n" + "=" * 80 + "\n\n"
_header += "Directory | Filter Name | Description\n"
_header += "-" * 80 + "\n"
_summary_file.write_text(_header + _summary_line)
# Save to logical variable name for further analysis
data = _d
data.collect()
# %%