Validation of data exports with reference file

This commit is contained in:
2026-01-22 08:51:27 +01:00
parent 62b57ae862
commit 18ada6ca66
5 changed files with 9899 additions and 23 deletions

View File

@@ -10,39 +10,53 @@ def _():
import polars as pl
import sqlite3
from pathlib import Path
return Path, pl, sqlite3
return Path, pl
@app.cell
def _(Path, pl):
results_file = Path('./data/VB_Qualtrics_labels.csv')
def _():
# RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv'
return (RESULTS_FILE,)
@app.cell
def _(Path, RESULTS_FILE, pl):
results_file = Path(RESULTS_FILE)
df = pl.read_csv(results_file, skip_rows=0)
df
return (df,)
return df, results_file
@app.cell
def _(df, sqlite3):
def _(df, pl, results_file):
colset = set(df.columns)
this_df_verify = pl.DataFrame({'column_names': [colset], 'results_file': results_file.as_posix()})
this_df_verify
return (this_df_verify,)
# Create table if not exists with columns from csv
with sqlite3.connect("data/qualtrics_JP.db") as conn:
# interact with database
q= f'''
CREATE TABLE IF NOT EXISTS qualtrics_raw(
{', '.join(list(df.columns))}
);
'''
print(q)
conn.execute(q)
@app.cell
def _(Path, pl, this_df_verify):
verification_record = Path('./data/exports/verification.csv')
if verification_record.exists():
verify_df = pl.read_csv(verification_record)
verify_df = pl.concat([verify_df, this_df_verify], how='vertical')
# save verify_df
verify_df.write_csv(verification_record)
else:
verify_df = this_df_verify
# append this_df_verify to verify_df
verify_df
return
@app.cell
def _():
import sqlalchemy
DATABASE_URL = "sqlite:///./data/qualtrics_JP.db"
engine = sqlalchemy.create_engine(DATABASE_URL)
return