import polars as pl from pathlib import Path import pandas as pd from typing import Union def extract_qid(val): """Extracts the 'ImportId' from a string representation of a dictionary.""" if isinstance(val, str) and val.startswith('{') and val.endswith('}'): val = eval(val) return val['ImportId'] def extract_qid_descr_map(results_file: Union[str, Path]) -> dict: """Extract mapping of Qualtrics ImportID to Question Description from results file.""" if isinstance(results_file, str): results_file = Path(results_file) if '1_1-16-2026' in results_file.as_posix(): df_questions = pd.read_csv(results_file, nrows=1) df_questions return df_questions.iloc[0].to_dict() else: # First row contains Qualtrics Editor question names (ie 'B_VOICE SEL. 18-8') # Second row which contains the question content # Third row contains the Export Metadata (ie '{"ImportId":"startDate","timeZone":"America/Denver"}') df_questions = pd.read_csv(results_file, nrows=1, skiprows=1) # transpose df_questions df_questions = df_questions.T.reset_index() df_questions.columns = ['Description', 'export_metadata'] df_questions['ImportID'] = df_questions['export_metadata'].apply(extract_qid) df_questions = df_questions[['ImportID', 'Description']] return dict(zip(df_questions['ImportID'], df_questions['Description'])) def load_csv_with_qid_headers(file_path: Union[str, Path]) -> pl.DataFrame: """ Load CSV where column headers are in row 3 as dict strings with ImportId. The 3rd row contains metadata like '{"ImportId":"startDate","timeZone":"America/Denver"}'. This function extracts the ImportId from each column and uses it as the column name. Parameters: file_path (Path): Path to the CSV file to load. Returns: pl.DataFrame: Polars DataFrame with ImportId as column names. """ if isinstance(file_path, str): file_path = Path(file_path) # Read the 3rd row (index 2) which contains the metadata dictionaries # Use header=None to get raw values instead of treating them as column names df_meta = pd.read_csv(file_path, nrows=1, skiprows=2, header=None) # Extract ImportIds from each column value in this row new_columns = [extract_qid(val) for val in df_meta.iloc[0]] # Now read the actual data starting from row 4 (skip first 3 rows) df = pl.read_csv(file_path, skip_rows=3) # Rename columns with the extracted ImportIds df.columns = new_columns return df