53 lines
2.0 KiB
Python
53 lines
2.0 KiB
Python
import marimo as mo
|
|
import polars as pl
|
|
|
|
|
|
def check_progress(data):
|
|
"""Check if all responses are complete based on 'progress' column."""
|
|
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
|
|
return """### Responses Complete: \n\n✅ All responses are complete (progress = 100) """
|
|
|
|
return "### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️"
|
|
|
|
|
|
def duration_validation(data):
|
|
"""Validate response durations to identify outliers."""
|
|
# Identify any outliers in duration
|
|
duration_stats = data.select(
|
|
pl.col('duration').mean().alias('mean_duration'),
|
|
pl.col('duration').std().alias('std_duration')
|
|
).collect()
|
|
mean_duration = duration_stats['mean_duration'][0]
|
|
std_duration = duration_stats['std_duration'][0]
|
|
upper_outlier_threshold = mean_duration + 3 * std_duration
|
|
lower_outlier_threshold = mean_duration - 3 * std_duration
|
|
|
|
_d = data.with_columns(
|
|
((pl.col('duration') > upper_outlier_threshold) | (pl.col('duration') < lower_outlier_threshold)).alias('outlier_duration')
|
|
)
|
|
|
|
# Show durations with outlier flag is true
|
|
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
|
|
|
|
if outlier_data.shape[0] == 0:
|
|
return "### Duration Outliers: \n\n✅ No duration outliers detected"
|
|
|
|
return f"""### Duration Outliers:
|
|
|
|
**⚠️ Potential outliers detected based on response duration ⚠️**
|
|
|
|
- Mean Duration: {mean_duration:.2f} seconds (approximately {mean_duration/60:.2f} minutes)
|
|
- Standard Deviation of Duration: {std_duration:.2f} seconds
|
|
- Upper Outlier Threshold (Mean + 3*Std): {upper_outlier_threshold:.2f} seconds
|
|
- Lower Outlier Threshold (Mean - 3*Std): {lower_outlier_threshold:.2f} seconds
|
|
- Number of Outlier Responses: {outlier_data.shape[0]}
|
|
|
|
Outliers:
|
|
|
|
{mo.ui.table(outlier_data)}
|
|
|
|
|
|
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
|
|
|
|
"""
|
|
|