missing data analysis

2026-02-10 14:24:26 +01:00
parent 14e28cf368
commit 9dfab75925
5 changed files with 1477 additions and 7 deletions
--- a/run_filter_combinations.py
+++ b/run_filter_combinations.py
@@ -179,10 +179,25 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
                'filters': {'industry': [industry]}
            })
    
+    # Voice ranking completeness filter
+    # These use a special flag rather than demographic filters, so we store
+    # the mode in a dedicated key that run_report passes as --voice-ranking-filter.
+    if not category or category in ['all_filters', 'voice_ranking']:
+        combinations.append({
+            'name': 'VoiceRanking-OnlyMissing',
+            'filters': {},
+            'voice_ranking_filter': 'only-missing',
+        })
+        combinations.append({
+            'name': 'VoiceRanking-ExcludeMissing',
+            'filters': {},
+            'voice_ranking_filter': 'exclude-missing',
+        })
+    
    return combinations


-def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None) -> bool:
+def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None, voice_ranking_filter: str = None) -> bool:
    """
    Run the report script with given filters.
    
@@ -191,6 +206,9 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
        name: Name for this filter combination (used for .txt description file)
        dry_run: If True, just print command without running
        sl_threshold: If set, exclude respondents with >= N straight-lined question groups
+        voice_ranking_filter: If set, filter by voice ranking completeness.
+            'only-missing' keeps only respondents missing QID98 data,
+            'exclude-missing' removes them.
        
    Returns:
        True if successful, False otherwise
@@ -205,6 +223,10 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
    if sl_threshold is not None:
        cmd.extend(['--sl-threshold', str(sl_threshold)])
    
+    # Pass voice ranking filter if specified
+    if voice_ranking_filter is not None:
+        cmd.extend(['--voice-ranking-filter', voice_ranking_filter])
+    
    for filter_name, values in filters.items():
        if values:
            cmd.extend([f'--{filter_name}', json.dumps(values)])
@@ -235,7 +257,7 @@ def main():
    parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
    parser.add_argument(
        '--category',
-        choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry'],
+        choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry', 'voice_ranking'],
        default='all_filters',
        help='Filter category to run combinations for (default: all_filters)'
    )
@@ -259,7 +281,7 @@ def main():
        print("\nDRY RUN - Commands that would be executed:")
        for combo in combinations:
            print(f"\n{combo['name']}:")
-            run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold)
+            run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter'))
        return
    
    # Run each combination with progress bar
@@ -268,7 +290,7 @@ def main():
    
    for combo in tqdm(combinations, desc="Running reports", unit="filter"):
        tqdm.write(f"Running: {combo['name']}")
-        if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold):
+        if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter')):
            successful += 1
        else:
            failed.append(combo['name'])