Files
Interview-Analysis/03_Sentiment_Analysis.py
2025-12-11 12:56:23 +01:00

147 lines
3.5 KiB
Python

import marimo
__generated_with = "0.18.3"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import pandas as pd
from pathlib import Path
from utils import create_sentiment_matrix
INPUT_DIR = Path("./data/processing/02_taguette_postprocess")
WORKING_DIR = Path('./data/processing/03_sentiment_analysis')
if not WORKING_DIR.exists():
WORKING_DIR.mkdir(parents=True)
return INPUT_DIR, Path, WORKING_DIR, create_sentiment_matrix, mo, pd
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Load Sentiment CSV
""")
return
@app.cell
def _(INPUT_DIR, mo):
csv_files = list(INPUT_DIR.glob("*.csv"))
file_options = {f.stem: str(f) for f in csv_files}
sentiment_csv = mo.ui.dropdown(
options=file_options,
label="Select Sentiment CSV File",
full_width=True
)
sentiment_csv
return (sentiment_csv,)
@app.cell
def _(Path, pd, sentiment_csv):
input_csv_name = Path(sentiment_csv.value).stem
timestamp = input_csv_name.split('_')[-1]
doc = input_csv_name.split('_')[0]
sentiment_df = pd.read_csv(sentiment_csv.value)
sentiment_df
return doc, sentiment_df, timestamp
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Phase 1: Individual interview analysis
- Create sentiment matrices for each interview (document)
- Save the intermediate results to file in the `WORKING_DIR`
""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Step 1.1: Voice Sample vs. Theme Sentiment Matrix
For each interview (document), create a matrix where:
- Rows represent the different Voices (based on '_V-' tags)
- Columns represent the different VoiceThemes(based on 'VT -' tags)
- Each cell contains the aggregated sentiment score (sum) for that Voice/Theme combination
""")
return
@app.cell
def _(create_sentiment_matrix, sentiment_df):
voice_matrix = create_sentiment_matrix(sentiment_df, column_prefix='VT - ', row_prefix='_V-')
voice_matrix
return (voice_matrix,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
SAVE TO CSV
""")
return
@app.cell
def _(WORKING_DIR, doc, timestamp, voice_matrix):
# Save to CSV
voice_filename = WORKING_DIR / f"{doc}_voice_theme_matrix_{timestamp}.csv"
voice_matrix.to_csv(voice_filename)
print(f"Saved to '{voice_filename}'")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Step 1.2: Character Sample vs. Theme Sentiment Matrix
For each interview (document), create a matrix where:
- Rows represent the different Characters (based on '_C-' tags)
- Columns represent the different CharacterThemes (based on 'CT -' tags)
- Each cell contains the aggregated sentiment score (sum) for that Character/Theme combination
""")
return
@app.cell
def _(create_sentiment_matrix, sentiment_df):
character_matrix = create_sentiment_matrix(sentiment_df, column_prefix='CT - ', row_prefix='_C-')
character_matrix
return (character_matrix,)
@app.cell
def _(WORKING_DIR, character_matrix, doc, timestamp):
# Save to CSV
character_filename = WORKING_DIR / f"{doc}_character_theme_matrix_{timestamp}.csv"
character_matrix.to_csv(character_filename)
print(f"Saved to '{character_filename}'")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Step 1.3: Chase Brand Sentiment
TODO: not sure we have enough supporting data for this yet
""")
return
if __name__ == "__main__":
app.run()