import marimo __generated_with = "0.18.3" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo import pandas as pd from pathlib import Path INPUT_DIR = Path("./data/processing/03_sentiment_analysis") WORKING_DIR = Path('./data/processing/04_sentiment_aggregation') if not WORKING_DIR.exists(): WORKING_DIR.mkdir(parents=True) return INPUT_DIR, mo, pd @app.cell(hide_code=True) def _(mo): mo.md(r""" # Voices """) return @app.cell def _(INPUT_DIR, mo): voice_csv_files = list(INPUT_DIR.glob("*voice*.csv")) file_options = {f.stem: str(f) for f in voice_csv_files} voice_multiselect = mo.ui.multiselect(options=file_options, label="Select Voice CSV Files for Aggregation") voice_multiselect return (voice_multiselect,) @app.cell def _(mo, voice_multiselect): mo.hstack([voice_multiselect, mo.md(f"Has value: {voice_multiselect.value}")]) return @app.cell def _(pd, voice_multiselect): # Load all voice CSV files and aggregate them so that each row-column pair is summed KEY_COL = "_context" def _read_voice_csv(path: str) -> pd.DataFrame: df = pd.read_csv(path).set_index(KEY_COL) df = df.apply(pd.to_numeric, errors="coerce") return df def aggregate_voice_data(files: list[str]) -> pd.DataFrame: if not files: return pd.DataFrame() master = _read_voice_csv(files[0]) for path in files[1:]: master = master.add(_read_voice_csv(path), fill_value=0) return master.reset_index() master_df = aggregate_voice_data(voice_multiselect.value) master_df return @app.cell(hide_code=True) def _(mo): mo.md(r""" # Characters """) return @app.cell def _(INPUT_DIR): char_csv_files = list(INPUT_DIR.glob("*character*.csv")) char_csv_files return if __name__ == "__main__": app.run()