renamed notebooks and added significance test
This commit is contained in:
@@ -14,6 +14,15 @@ import utils
|
|||||||
|
|
||||||
from speaking_styles import SPEAKING_STYLES
|
from speaking_styles import SPEAKING_STYLES
|
||||||
|
|
||||||
|
# %% Fixed Variables
|
||||||
|
|
||||||
|
# RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
|
||||||
|
RESULTS_FILE = 'data/exports/debug/JPMC_Chase Brand Personality_Quant Round 1_February 2, 2026_Labels.csv'
|
||||||
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||||
|
|
||||||
|
BEST_CHOSEN_CHARACTER = "the_coach"
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# CLI argument parsing for batch automation
|
# CLI argument parsing for batch automation
|
||||||
# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
|
# When run as script: python 03_quant_report.script.py --age '["18 to 21 years"]' --consumer '["Starter"]'
|
||||||
@@ -45,12 +54,9 @@ def parse_cli_args():
|
|||||||
# Only parse if running as script (not in Jupyter/interactive)
|
# Only parse if running as script (not in Jupyter/interactive)
|
||||||
try:
|
try:
|
||||||
# Check if running in Jupyter by looking for ipykernel
|
# Check if running in Jupyter by looking for ipykernel
|
||||||
get_ipython() # noqa: F821
|
get_ipython() # noqa: F821 # type: ignore
|
||||||
# Return namespace with all filters set to None
|
# Return namespace with all filters set to None
|
||||||
no_filters = {f: None for f in FILTER_CONFIG}
|
no_filters = {f: None for f in FILTER_CONFIG}
|
||||||
|
|
||||||
# debugging age
|
|
||||||
no_filters['age'] = FILTER_CONFIG['age']
|
|
||||||
return argparse.Namespace(**no_filters, filter_name=None)
|
return argparse.Namespace(**no_filters, filter_name=None)
|
||||||
except NameError:
|
except NameError:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -62,19 +68,7 @@ def parse_cli_args():
|
|||||||
|
|
||||||
cli_args = parse_cli_args()
|
cli_args = parse_cli_args()
|
||||||
|
|
||||||
# %%
|
|
||||||
|
|
||||||
# file_browser = mo.ui.file_browser(
|
|
||||||
# initial_path="./data/exports", multiple=False, restrict_navigation=True, filetypes=[".csv"], label="Select 'Labels' File"
|
|
||||||
# )
|
|
||||||
# file_browser
|
|
||||||
|
|
||||||
# # %%
|
|
||||||
# mo.stop(file_browser.path(index=0) is None, mo.md("**⚠️ Please select a `_Labels.csv` file above to proceed**"))
|
|
||||||
# RESULTS_FILE = Path(file_browser.path(index=0))
|
|
||||||
|
|
||||||
RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
|
|
||||||
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
||||||
@@ -83,46 +77,15 @@ try:
|
|||||||
except NotImplementedError as e:
|
except NotImplementedError as e:
|
||||||
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
|
mo.stop(True, mo.md(f"**⚠️ {str(e)}**"))
|
||||||
|
|
||||||
# %%
|
|
||||||
BEST_CHOSEN_CHARACTER = "the_coach"
|
|
||||||
|
|
||||||
# # %%
|
# %% Build filtered dataset based on CLI args
|
||||||
# filter_form = mo.md('''
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# {age}
|
|
||||||
|
|
||||||
# {gender}
|
|
||||||
|
|
||||||
# {ethnicity}
|
|
||||||
|
|
||||||
# {income}
|
|
||||||
|
|
||||||
# {consumer}
|
|
||||||
# '''
|
|
||||||
# ).batch(
|
|
||||||
# age=mo.ui.multiselect(options=S.options_age, value=S.options_age, label="Select Age Group(s):"),
|
|
||||||
# gender=mo.ui.multiselect(options=S.options_gender, value=S.options_gender, label="Select Gender(s):"),
|
|
||||||
# ethnicity=mo.ui.multiselect(options=S.options_ethnicity, value=S.options_ethnicity, label="Select Ethnicities:"),
|
|
||||||
# income=mo.ui.multiselect(options=S.options_income, value=S.options_income, label="Select Income Group(s):"),
|
|
||||||
# consumer=mo.ui.multiselect(options=S.options_consumer, value=S.options_consumer, label="Select Consumer Groups:")
|
|
||||||
# ).form()
|
|
||||||
# mo.md(f'''
|
|
||||||
# ---
|
|
||||||
|
|
||||||
# # Data Filter
|
|
||||||
|
|
||||||
# {filter_form}
|
|
||||||
# ''')
|
|
||||||
|
|
||||||
# %%
|
|
||||||
# mo.stop(filter_form.value is None, mo.md("**Please submit filter above to proceed**"))
|
|
||||||
# CLI args: None means "no filter applied" - filter_data() will skip None filters
|
# CLI args: None means "no filter applied" - filter_data() will skip None filters
|
||||||
|
|
||||||
# Build filter values dict dynamically from FILTER_CONFIG
|
# Build filter values dict dynamically from FILTER_CONFIG
|
||||||
_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
|
_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
|
||||||
|
|
||||||
# %%
|
# %% Apply filters
|
||||||
_d = S.filter_data(data_all, **_active_filters)
|
_d = S.filter_data(data_all, **_active_filters)
|
||||||
|
|
||||||
# Write filter description file if filter-name is provided
|
# Write filter description file if filter-name is provided
|
||||||
@@ -174,14 +137,10 @@ if cli_args.filter_name and S.fig_save_dir:
|
|||||||
_header += "-" * 80 + "\n"
|
_header += "-" * 80 + "\n"
|
||||||
_summary_file.write_text(_header + _summary_line)
|
_summary_file.write_text(_header + _summary_line)
|
||||||
|
|
||||||
# Stop execution and prevent other cells from running if no data is selected
|
# Save to logical variable name for further analysis
|
||||||
# mo.stop(len(_d.collect()) == 0, mo.md("**No Data available for current filter combination**"))
|
|
||||||
data = _d
|
data = _d
|
||||||
|
|
||||||
# data = data_validated
|
|
||||||
data.collect()
|
data.collect()
|
||||||
|
|
||||||
# %%
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
128
XX_statistical_significance.script.py
Normal file
128
XX_statistical_significance.script.py
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
"""Extra statistical significance analyses for quant report."""
|
||||||
|
# %% Imports
|
||||||
|
|
||||||
|
from utils import QualtricsSurvey
|
||||||
|
import polars as pl
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% Fixed Variables
|
||||||
|
RESULTS_FILE = 'data/exports/2-4-26/JPMC_Chase Brand Personality_Quant Round 1_February 4, 2026_Labels.csv'
|
||||||
|
QSF_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase_Brand_Personality_Quant_Round_1.qsf'
|
||||||
|
|
||||||
|
|
||||||
|
# %% CLI argument parsing for batch automation
|
||||||
|
# When run as script: uv run XX_statistical_significance.script.py --age '["18
|
||||||
|
# Central filter configuration - add new filters here only
|
||||||
|
# Format: 'cli_arg_name': 'QualtricsSurvey.options_* attribute name'
|
||||||
|
FILTER_CONFIG = {
|
||||||
|
'age': 'options_age',
|
||||||
|
'gender': 'options_gender',
|
||||||
|
'ethnicity': 'options_ethnicity',
|
||||||
|
'income': 'options_income',
|
||||||
|
'consumer': 'options_consumer',
|
||||||
|
'business_owner': 'options_business_owner',
|
||||||
|
'ai_user': 'options_ai_user',
|
||||||
|
'investable_assets': 'options_investable_assets',
|
||||||
|
'industry': 'options_industry',
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_cli_args():
|
||||||
|
parser = argparse.ArgumentParser(description='Generate quant report with optional filters')
|
||||||
|
|
||||||
|
# Dynamically add filter arguments from config
|
||||||
|
for filter_name in FILTER_CONFIG:
|
||||||
|
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
||||||
|
|
||||||
|
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
||||||
|
|
||||||
|
# Only parse if running as script (not in Jupyter/interactive)
|
||||||
|
try:
|
||||||
|
# Check if running in Jupyter by looking for ipykernel
|
||||||
|
get_ipython() # noqa: F821 # type: ignore
|
||||||
|
# Return namespace with all filters set to None
|
||||||
|
no_filters = {f: None for f in FILTER_CONFIG}
|
||||||
|
return argparse.Namespace(**no_filters, filter_name=None)
|
||||||
|
except NameError:
|
||||||
|
args = parser.parse_args()
|
||||||
|
# Parse JSON strings to lists
|
||||||
|
for filter_name in FILTER_CONFIG:
|
||||||
|
val = getattr(args, filter_name)
|
||||||
|
setattr(args, filter_name, json.loads(val) if val else None)
|
||||||
|
return args
|
||||||
|
|
||||||
|
cli_args = parse_cli_args()
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
S = QualtricsSurvey(RESULTS_FILE, QSF_FILE)
|
||||||
|
data_all = S.load_data()
|
||||||
|
|
||||||
|
|
||||||
|
# %% Build filtered dataset based on CLI args
|
||||||
|
|
||||||
|
# CLI args: None means "no filter applied" - filter_data() will skip None filters
|
||||||
|
|
||||||
|
# Build filter values dict dynamically from FILTER_CONFIG
|
||||||
|
_active_filters = {filter_name: getattr(cli_args, filter_name) for filter_name in FILTER_CONFIG}
|
||||||
|
|
||||||
|
_d = S.filter_data(data_all, **_active_filters)
|
||||||
|
|
||||||
|
# Write filter description file if filter-name is provided
|
||||||
|
if cli_args.filter_name and S.fig_save_dir:
|
||||||
|
# Get the filter slug (e.g., "All_Respondents", "Cons-Starter", etc.)
|
||||||
|
_filter_slug = S._get_filter_slug()
|
||||||
|
_filter_slug_dir = S.fig_save_dir / _filter_slug
|
||||||
|
_filter_slug_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Build filter description
|
||||||
|
_filter_desc_lines = [
|
||||||
|
f"Filter: {cli_args.filter_name}",
|
||||||
|
"",
|
||||||
|
"Applied Filters:",
|
||||||
|
]
|
||||||
|
_short_desc_parts = []
|
||||||
|
for filter_name, options_attr in FILTER_CONFIG.items():
|
||||||
|
all_options = getattr(S, options_attr)
|
||||||
|
values = _active_filters[filter_name]
|
||||||
|
display_name = filter_name.replace('_', ' ').title()
|
||||||
|
# None means no filter applied (same as "All")
|
||||||
|
if values is not None and values != all_options:
|
||||||
|
_short_desc_parts.append(f"{display_name}: {', '.join(values)}")
|
||||||
|
_filter_desc_lines.append(f" {display_name}: {', '.join(values)}")
|
||||||
|
else:
|
||||||
|
_filter_desc_lines.append(f" {display_name}: All")
|
||||||
|
|
||||||
|
# Write detailed description INSIDE the filter-slug directory
|
||||||
|
# Sanitize filter name for filename usage (replace / and other chars)
|
||||||
|
_safe_filter_name = re.sub(r'[^\w\s-]', '_', cli_args.filter_name)
|
||||||
|
_filter_file = _filter_slug_dir / f"{_safe_filter_name}.txt"
|
||||||
|
_filter_file.write_text('\n'.join(_filter_desc_lines))
|
||||||
|
|
||||||
|
# Append to summary index file at figures/<export_date>/filter_index.txt
|
||||||
|
_summary_file = S.fig_save_dir / "filter_index.txt"
|
||||||
|
_short_desc = "; ".join(_short_desc_parts) if _short_desc_parts else "All Respondents"
|
||||||
|
_summary_line = f"{_filter_slug} | {cli_args.filter_name} | {_short_desc}\n"
|
||||||
|
|
||||||
|
# Append or create the summary file
|
||||||
|
if _summary_file.exists():
|
||||||
|
_existing = _summary_file.read_text()
|
||||||
|
# Avoid duplicate entries for same slug
|
||||||
|
if _filter_slug not in _existing:
|
||||||
|
with _summary_file.open('a') as f:
|
||||||
|
f.write(_summary_line)
|
||||||
|
else:
|
||||||
|
_header = "Filter Index\n" + "=" * 80 + "\n\n"
|
||||||
|
_header += "Directory | Filter Name | Description\n"
|
||||||
|
_header += "-" * 80 + "\n"
|
||||||
|
_summary_file.write_text(_header + _summary_line)
|
||||||
|
|
||||||
|
# Save to logical variable name for further analysis
|
||||||
|
data = _d
|
||||||
|
data.collect()
|
||||||
|
|
||||||
|
# %%
|
||||||
Reference in New Issue
Block a user