import marimo __generated_with = "0.18.3" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo import pandas as pd from pathlib import Path from utils import create_sentiment_matrix INPUT_DIR = Path("./data/processing/02_taguette_postprocess") WORKING_DIR = Path('./data/processing/03_sentiment_analysis') if not WORKING_DIR.exists(): WORKING_DIR.mkdir(parents=True) return INPUT_DIR, Path, WORKING_DIR, create_sentiment_matrix, mo, pd @app.cell(hide_code=True) def _(mo): mo.md(r""" # Load Sentiment CSV """) return @app.cell def _(INPUT_DIR, mo): csv_files = list(INPUT_DIR.glob("*.csv")) file_options = {f.stem: str(f) for f in csv_files} sentiment_csv = mo.ui.dropdown( options=file_options, label="Select Sentiment CSV File", full_width=True ) sentiment_csv return (sentiment_csv,) @app.cell def _(Path, pd, sentiment_csv): input_csv_name = Path(sentiment_csv.value).stem timestamp = input_csv_name.split('_')[-1] doc = input_csv_name.split('_')[0] sentiment_df = pd.read_csv(sentiment_csv.value) sentiment_df return doc, sentiment_df, timestamp @app.cell(hide_code=True) def _(mo): mo.md(r""" # Phase 1: Individual interview analysis - Create sentiment matrices for each interview (document) - Save the intermediate results to file in the `WORKING_DIR` """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 1.1: Voice Sample vs. Theme Sentiment Matrix For each interview (document), create a matrix where: - Rows represent the different Voices (based on '_V-' tags) - Columns represent the different VoiceThemes(based on 'VT -' tags) - Each cell contains the aggregated sentiment score (sum) for that Voice/Theme combination """) return @app.cell def _(create_sentiment_matrix, sentiment_df): voice_matrix = create_sentiment_matrix(sentiment_df, column_prefix='VT - ', row_prefix='_V-') voice_matrix return (voice_matrix,) @app.cell(hide_code=True) def _(mo): mo.md(r""" SAVE TO CSV """) return @app.cell def _(WORKING_DIR, doc, timestamp, voice_matrix): # Save to CSV voice_filename = WORKING_DIR / f"{doc}_voice_theme_matrix_{timestamp}.csv" voice_matrix.to_csv(voice_filename) print(f"Saved to '{voice_filename}'") return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 1.2: Character Sample vs. Theme Sentiment Matrix For each interview (document), create a matrix where: - Rows represent the different Characters (based on '_C-' tags) - Columns represent the different CharacterThemes (based on 'CT -' tags) - Each cell contains the aggregated sentiment score (sum) for that Character/Theme combination """) return @app.cell def _(create_sentiment_matrix, sentiment_df): character_matrix = create_sentiment_matrix(sentiment_df, column_prefix='CT - ', row_prefix='_C-') character_matrix return (character_matrix,) @app.cell def _(WORKING_DIR, character_matrix, doc, timestamp): # Save to CSV character_filename = WORKING_DIR / f"{doc}_character_theme_matrix_{timestamp}.csv" character_matrix.to_csv(character_filename) print(f"Saved to '{character_filename}'") return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 1.3: Chase Brand Sentiment TODO: not sure we have enough supporting data for this yet """) return if __name__ == "__main__": app.run()