missing data analysis
This commit is contained in:
@@ -179,10 +179,25 @@ def get_filter_combinations(survey: QualtricsSurvey, category: str = None) -> li
|
||||
'filters': {'industry': [industry]}
|
||||
})
|
||||
|
||||
# Voice ranking completeness filter
|
||||
# These use a special flag rather than demographic filters, so we store
|
||||
# the mode in a dedicated key that run_report passes as --voice-ranking-filter.
|
||||
if not category or category in ['all_filters', 'voice_ranking']:
|
||||
combinations.append({
|
||||
'name': 'VoiceRanking-OnlyMissing',
|
||||
'filters': {},
|
||||
'voice_ranking_filter': 'only-missing',
|
||||
})
|
||||
combinations.append({
|
||||
'name': 'VoiceRanking-ExcludeMissing',
|
||||
'filters': {},
|
||||
'voice_ranking_filter': 'exclude-missing',
|
||||
})
|
||||
|
||||
return combinations
|
||||
|
||||
|
||||
def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None) -> bool:
|
||||
def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_threshold: int = None, voice_ranking_filter: str = None) -> bool:
|
||||
"""
|
||||
Run the report script with given filters.
|
||||
|
||||
@@ -191,6 +206,9 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
|
||||
name: Name for this filter combination (used for .txt description file)
|
||||
dry_run: If True, just print command without running
|
||||
sl_threshold: If set, exclude respondents with >= N straight-lined question groups
|
||||
voice_ranking_filter: If set, filter by voice ranking completeness.
|
||||
'only-missing' keeps only respondents missing QID98 data,
|
||||
'exclude-missing' removes them.
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
@@ -205,6 +223,10 @@ def run_report(filters: dict, name: str = None, dry_run: bool = False, sl_thresh
|
||||
if sl_threshold is not None:
|
||||
cmd.extend(['--sl-threshold', str(sl_threshold)])
|
||||
|
||||
# Pass voice ranking filter if specified
|
||||
if voice_ranking_filter is not None:
|
||||
cmd.extend(['--voice-ranking-filter', voice_ranking_filter])
|
||||
|
||||
for filter_name, values in filters.items():
|
||||
if values:
|
||||
cmd.extend([f'--{filter_name}', json.dumps(values)])
|
||||
@@ -235,7 +257,7 @@ def main():
|
||||
parser.add_argument('--dry-run', action='store_true', help='Preview combinations without running')
|
||||
parser.add_argument(
|
||||
'--category',
|
||||
choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry'],
|
||||
choices=['all_filters', 'all', 'age', 'gender', 'ethnicity', 'income', 'consumer', 'business_owner', 'ai_user', 'investable_assets', 'industry', 'voice_ranking'],
|
||||
default='all_filters',
|
||||
help='Filter category to run combinations for (default: all_filters)'
|
||||
)
|
||||
@@ -259,7 +281,7 @@ def main():
|
||||
print("\nDRY RUN - Commands that would be executed:")
|
||||
for combo in combinations:
|
||||
print(f"\n{combo['name']}:")
|
||||
run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold)
|
||||
run_report(combo['filters'], name=combo['name'], dry_run=True, sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter'))
|
||||
return
|
||||
|
||||
# Run each combination with progress bar
|
||||
@@ -268,7 +290,7 @@ def main():
|
||||
|
||||
for combo in tqdm(combinations, desc="Running reports", unit="filter"):
|
||||
tqdm.write(f"Running: {combo['name']}")
|
||||
if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold):
|
||||
if run_report(combo['filters'], name=combo['name'], sl_threshold=args.sl_threshold, voice_ranking_filter=combo.get('voice_ranking_filter')):
|
||||
successful += 1
|
||||
else:
|
||||
failed.append(combo['name'])
|
||||
|
||||
Reference in New Issue
Block a user