setup complete framework of analysis

This commit is contained in:
2026-01-23 09:53:59 +01:00
parent 42f2d775c7
commit 5327b50ab0
5 changed files with 465 additions and 6 deletions

54
validation.py Normal file
View File

@@ -0,0 +1,54 @@
import marimo as mo
import polars as pl
def check_progress(data):
"""Check if all responses are complete based on 'progress' column."""
if data.collect().select(pl.col('progress').unique()).shape[0] == 1:
return mo.md("""### Responses Complete: \n\n✅ All responses are complete (progress = 100) """)
return mo.md("### Responses Complete: \n\n⚠️ There are incomplete responses (progress < 100) ⚠️")
def duration_validation(data):
"""Validate response durations to identify outliers."""
# Identify any outliers in duration
duration_stats = data.select(
pl.col('duration').mean().alias('mean_duration'),
pl.col('duration').std().alias('std_duration')
).collect()
mean_duration = duration_stats['mean_duration'][0]
std_duration = duration_stats['std_duration'][0]
upper_outlier_threshold = mean_duration + 3 * std_duration
lower_outlier_threshold = mean_duration - 3 * std_duration
_d = data.with_columns(
((pl.col('duration') > upper_outlier_threshold) | (pl.col('duration') < lower_outlier_threshold)).alias('outlier_duration')
)
# Show durations with outlier flag is true
outlier_data = _d.filter(pl.col('outlier_duration') == True).collect()
if outlier_data.shape[0] == 0:
return mo.md("### Duration Outliers: \n\n✅ No duration outliers detected")
return mo.md(f"""
### Duration Outliers:
**⚠️ Potential outliers detected based on response duration ⚠️**
- Mean Duration: {mean_duration:.2f} seconds (approximately {mean_duration/60:.2f} minutes)
- Standard Deviation of Duration: {std_duration:.2f} seconds
- Upper Outlier Threshold (Mean + 3*Std): {upper_outlier_threshold:.2f} seconds
- Lower Outlier Threshold (Mean - 3*Std): {lower_outlier_threshold:.2f} seconds
- Number of Outlier Responses: {outlier_data.shape[0]}
Outliers:
{mo.ui.table(outlier_data)}
**⚠️ NOTE: These have not been removed from the dataset ⚠️**
""")