missing data analysis

This commit is contained in:
2026-02-10 14:24:26 +01:00
parent 14e28cf368
commit 9dfab75925
5 changed files with 1477 additions and 7 deletions

View File

@@ -179,10 +179,25 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
'filters': {'industry': [industry]}
})
# Voice ranking completeness filter
# These use a special flag rather than demographic filters, so we store
# the mode in a dedicated key that run_report passes as --voice-ranking-filter.
if not category or category in ['all_filters', 'voice_ranking']:
combinations.append({
'name': 'VoiceRanking-OnlyMissing',
'filters': {},
'voice_ranking_filter': 'only-missing',
})
combinations.append({
'name': 'VoiceRanking-ExcludeMissing',
'filters': {},
'voice_ranking_filter': 'exclude-missing',
})
return combinations
def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None) -> bool:
def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None, voice_ranking_filter: str = None) -> bool:
"""
Run the report script with given filters.
@@ -191,6 +206,9 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
name: Name for this filter combination (used for .txt description file)
dry_run: If True, just print command without running
sl_threshold: If set, exclude respondents with >= N straight-lined question groups
voice_ranking_filter: If set, filter by voice ranking completeness.
'only-missing' keeps only respondents missing QID98 data,
'exclude-missing' removes them.
Returns:
True if successful, False otherwise
@@ -205,6 +223,10 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
if sl_threshold is not None:
cmd.extend(['--sl-threshold', str(sl_threshold)])
# Pass voice ranking filter if specified
if voice_ranking_filter is not None:
cmd.extend(['--voice-ranking-filter', voice_ranking_filter])
for filter_name, values in filters.items():
if values:
cmd.extend([f'--{filter_name}', json.dumps(values)])
@@ -235,7 +257,7 @@ def main():
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
parser.add_argument(
'--category',
choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry'],
choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry', 'voice_ranking'],
default='all_filters',
help='Filter category to run combinations for (default: all_filters)'
)
@@ -259,7 +281,7 @@ def main():
print("\nDRY RUN - Commands that would be executed:")
for combo in combinations:
print(f"\n{combo['name']}:")
run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold)
run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter'))
return
# Run each combination with progress bar
@@ -268,7 +290,7 @@ def main():
for combo in tqdm(combinations, desc="Running reports", unit="filter"):
tqdm.write(f"Running: {combo['name']}")
if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold):
if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter')):
successful += 1
else:
failed.append(combo['name'])