Validation of data exports with reference file
This commit is contained in:
@@ -10,39 +10,53 @@ def _():
|
||||
import polars as pl
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
return Path, pl, sqlite3
|
||||
return Path, pl
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(Path, pl):
|
||||
results_file = Path('./data/VB_Qualtrics_labels.csv')
|
||||
def _():
|
||||
# RESULTS_FILE = 'data/exports/OneDrive_2026-01-21/Soft Launch Data/JPMC_Chase Brand Personality_Quant Round 1_January 21, 2026_Soft Launch_Labels.csv'
|
||||
RESULTS_FILE = 'data/exports/OneDrive_1_1-16-2026/JPMC_Chase Brand Personality_Quant Round 1_TestData_Labels.csv'
|
||||
return (RESULTS_FILE,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(Path, RESULTS_FILE, pl):
|
||||
results_file = Path(RESULTS_FILE)
|
||||
df = pl.read_csv(results_file, skip_rows=0)
|
||||
df
|
||||
return (df,)
|
||||
return df, results_file
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(df, sqlite3):
|
||||
def _(df, pl, results_file):
|
||||
colset = set(df.columns)
|
||||
this_df_verify = pl.DataFrame({'column_names': [colset], 'results_file': results_file.as_posix()})
|
||||
this_df_verify
|
||||
return (this_df_verify,)
|
||||
|
||||
# Create table if not exists with columns from csv
|
||||
with sqlite3.connect("data/qualtrics_JP.db") as conn:
|
||||
# interact with database
|
||||
q= f'''
|
||||
CREATE TABLE IF NOT EXISTS qualtrics_raw(
|
||||
{', '.join(list(df.columns))}
|
||||
);
|
||||
'''
|
||||
print(q)
|
||||
conn.execute(q)
|
||||
|
||||
@app.cell
|
||||
def _(Path, pl, this_df_verify):
|
||||
verification_record = Path('./data/exports/verification.csv')
|
||||
if verification_record.exists():
|
||||
verify_df = pl.read_csv(verification_record)
|
||||
|
||||
verify_df = pl.concat([verify_df, this_df_verify], how='vertical')
|
||||
|
||||
# save verify_df
|
||||
verify_df.write_csv(verification_record)
|
||||
|
||||
else:
|
||||
verify_df = this_df_verify
|
||||
|
||||
# append this_df_verify to verify_df
|
||||
verify_df
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
import sqlalchemy
|
||||
|
||||
DATABASE_URL = "sqlite:///./data/qualtrics_JP.db"
|
||||
engine = sqlalchemy.create_engine(DATABASE_URL)
|
||||
return
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user