basic parsing working
This commit is contained in:
86
04_Results_Aggregation.py
Normal file
86
04_Results_Aggregation.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import marimo
|
||||
|
||||
__generated_with = "0.18.3"
|
||||
app = marimo.App(width="medium")
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
import marimo as mo
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
INPUT_DIR = Path("./data/processing/03_sentiment_analysis")
|
||||
WORKING_DIR = Path('./data/processing/04_sentiment_aggregation')
|
||||
|
||||
if not WORKING_DIR.exists():
|
||||
WORKING_DIR.mkdir(parents=True)
|
||||
return INPUT_DIR, mo, pd
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
# Voices
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(INPUT_DIR, mo):
|
||||
voice_csv_files = list(INPUT_DIR.glob("*voice*.csv"))
|
||||
file_options = {f.stem: str(f) for f in voice_csv_files}
|
||||
|
||||
voice_multiselect = mo.ui.multiselect(options=file_options, label="Select Voice CSV Files for Aggregation")
|
||||
voice_multiselect
|
||||
return (voice_multiselect,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo, voice_multiselect):
|
||||
mo.hstack([voice_multiselect, mo.md(f"Has value: {voice_multiselect.value}")])
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(pd, voice_multiselect):
|
||||
# Load all voice CSV files and aggregate them so that each row-column pair is summed
|
||||
KEY_COL = "_context"
|
||||
|
||||
def _read_voice_csv(path: str) -> pd.DataFrame:
|
||||
df = pd.read_csv(path).set_index(KEY_COL)
|
||||
df = df.apply(pd.to_numeric, errors="coerce")
|
||||
return df
|
||||
|
||||
def aggregate_voice_data(files: list[str]) -> pd.DataFrame:
|
||||
if not files:
|
||||
return pd.DataFrame()
|
||||
|
||||
master = _read_voice_csv(files[0])
|
||||
for path in files[1:]:
|
||||
master = master.add(_read_voice_csv(path), fill_value=0)
|
||||
|
||||
return master.reset_index()
|
||||
|
||||
master_df = aggregate_voice_data(voice_multiselect.value)
|
||||
master_df
|
||||
return
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
def _(mo):
|
||||
mo.md(r"""
|
||||
# Characters
|
||||
""")
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(INPUT_DIR):
|
||||
char_csv_files = list(INPUT_DIR.glob("*character*.csv"))
|
||||
char_csv_files
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
Reference in New Issue
Block a user