74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
import polars as pl
|
|
from pathlib import Path
|
|
import pandas as pd
|
|
from typing import Union
|
|
|
|
def extract_qid(val):
|
|
"""Extracts the 'ImportId' from a string representation of a dictionary."""
|
|
|
|
if isinstance(val, str) and val.startswith('{') and val.endswith('}'):
|
|
val = eval(val)
|
|
return val['ImportId']
|
|
|
|
|
|
def extract_qid_descr_map(results_file: Union[str, Path]) -> dict:
|
|
"""Extract mapping of Qualtrics ImportID to Question Description from results file."""
|
|
if isinstance(results_file, str):
|
|
results_file = Path(results_file)
|
|
|
|
if '1_1-16-2026' in results_file.as_posix():
|
|
df_questions = pd.read_csv(results_file, nrows=1)
|
|
df_questions
|
|
|
|
return df_questions.iloc[0].to_dict()
|
|
|
|
|
|
else:
|
|
# First row contains Qualtrics Editor question names (ie 'B_VOICE SEL. 18-8')
|
|
|
|
# Second row which contains the question content
|
|
# Third row contains the Export Metadata (ie '{"ImportId":"startDate","timeZone":"America/Denver"}')
|
|
df_questions = pd.read_csv(results_file, nrows=1, skiprows=1)
|
|
|
|
|
|
|
|
# transpose df_questions
|
|
df_questions = df_questions.T.reset_index()
|
|
df_questions.columns = ['Description', 'export_metadata']
|
|
df_questions['ImportID'] = df_questions['export_metadata'].apply(extract_qid)
|
|
|
|
df_questions = df_questions[['ImportID', 'Description']]
|
|
|
|
return dict(zip(df_questions['ImportID'], df_questions['Description']))
|
|
|
|
|
|
def load_csv_with_qid_headers(file_path: Union[str, Path]) -> pl.DataFrame:
|
|
"""
|
|
Load CSV where column headers are in row 3 as dict strings with ImportId.
|
|
|
|
The 3rd row contains metadata like '{"ImportId":"startDate","timeZone":"America/Denver"}'.
|
|
This function extracts the ImportId from each column and uses it as the column name.
|
|
|
|
Parameters:
|
|
file_path (Path): Path to the CSV file to load.
|
|
|
|
Returns:
|
|
pl.DataFrame: Polars DataFrame with ImportId as column names.
|
|
"""
|
|
if isinstance(file_path, str):
|
|
file_path = Path(file_path)
|
|
|
|
# Read the 3rd row (index 2) which contains the metadata dictionaries
|
|
# Use header=None to get raw values instead of treating them as column names
|
|
df_meta = pd.read_csv(file_path, nrows=1, skiprows=2, header=None)
|
|
|
|
# Extract ImportIds from each column value in this row
|
|
new_columns = [extract_qid(val) for val in df_meta.iloc[0]]
|
|
|
|
# Now read the actual data starting from row 4 (skip first 3 rows)
|
|
df = pl.read_csv(file_path, skip_rows=3)
|
|
|
|
# Rename columns with the extracted ImportIds
|
|
df.columns = new_columns
|
|
|
|
return df |