SL filter
This commit is contained in:
@@ -48,8 +48,9 @@ def parse_cli_args():
|
||||
parser.add_argument(f'--{filter_name}', type=str, default=None, help=f'JSON list of {filter_name} values')
|
||||
|
||||
parser.add_argument('--filter-name', type=str, default=None, help='Name for this filter combination (used for .txt description file)')
|
||||
parser.add_argument('--figures-dir', type=str, default=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
|
||||
parser.add_argument('--figures-dir', type=str, default=f'figures/{Path(RESULTS_FILE).parts[2]}', help='Override the default figures directory')
|
||||
parser.add_argument('--best-character', type=str, default="the_coach", help='Slug of the best chosen character (default: "the_coach")')
|
||||
parser.add_argument('--sl-threshold', type=int, default=None, help='Exclude respondents who straight-lined >= N question groups (e.g. 3 removes anyone with 3+ straight-lined groups)')
|
||||
|
||||
# Only parse if running as script (not in Jupyter/interactive)
|
||||
try:
|
||||
@@ -57,7 +58,7 @@ def parse_cli_args():
|
||||
get_ipython() # noqa: F821 # type: ignore
|
||||
# Return namespace with all filters set to None
|
||||
no_filters = {f: None for f in FILTER_CONFIG}
|
||||
return argparse.Namespace(**no_filters, filter_name=None, figures_dir=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', best_character="the_coach")
|
||||
return argparse.Namespace(**no_filters, filter_name=None, figures_dir=f'figures/statistical_significance/{Path(RESULTS_FILE).parts[2]}', best_character="the_coach", sl_threshold=None)
|
||||
except NameError:
|
||||
args = parser.parse_args()
|
||||
# Parse JSON strings to lists
|
||||
@@ -138,6 +139,41 @@ if cli_args.filter_name and S.fig_save_dir:
|
||||
_header += "-" * 80 + "\n"
|
||||
_summary_file.write_text(_header + _summary_line)
|
||||
|
||||
# %% Apply straight-liner threshold filter (if specified)
|
||||
# Removes respondents who straight-lined >= N question groups across
|
||||
# speaking style and voice scale questions.
|
||||
if cli_args.sl_threshold is not None:
|
||||
_sl_n = cli_args.sl_threshold
|
||||
S.sl_threshold = _sl_n # Store on Survey so filter slug/description include it
|
||||
print(f"Applying straight-liner filter: excluding respondents with ≥{_sl_n} straight-lined question groups...")
|
||||
_n_before = _d.select(pl.len()).collect().item()
|
||||
|
||||
# Extract question groups with renamed columns for check_straight_liners
|
||||
_sl_ss_or, _ = S.get_ss_orange_red(_d)
|
||||
_sl_ss_gb, _ = S.get_ss_green_blue(_d)
|
||||
_sl_vs, _ = S.get_voice_scale_1_10(_d)
|
||||
_sl_all_q = _sl_ss_or.join(_sl_ss_gb, on='_recordId').join(_sl_vs, on='_recordId')
|
||||
|
||||
_, _sl_df = check_straight_liners(_sl_all_q, max_score=5)
|
||||
|
||||
if _sl_df is not None and not _sl_df.is_empty():
|
||||
# Count straight-lined question groups per respondent
|
||||
_sl_counts = (
|
||||
_sl_df
|
||||
.group_by("Record ID")
|
||||
.agg(pl.len().alias("sl_count"))
|
||||
.filter(pl.col("sl_count") >= _sl_n)
|
||||
.select(pl.col("Record ID").alias("_recordId"))
|
||||
)
|
||||
# Anti-join to remove offending respondents
|
||||
_d = _d.collect().join(_sl_counts, on="_recordId", how="anti").lazy()
|
||||
# Update filtered data on the Survey object so sample size is correct
|
||||
S.data_filtered = _d
|
||||
_n_after = _d.select(pl.len()).collect().item()
|
||||
print(f" Removed {_n_before - _n_after} respondents ({_n_before} → {_n_after})")
|
||||
else:
|
||||
print(" No straight-liners detected — no respondents removed.")
|
||||
|
||||
# Save to logical variable name for further analysis
|
||||
data = _d
|
||||
data.collect()
|
||||
|
||||
Reference in New Issue
Block a user