Files
Interview-Analysis/01_Taguette-Pre-Process.py

115 lines
2.5 KiB
Python

import marimo
__generated_with = "0.18.3"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import pandas as pd
from pathlib import Path
from utils import csv_to_markdown, cpc_smb_to_markdown
return Path, cpc_smb_to_markdown, csv_to_markdown, mo
@app.cell
def _(Path):
INPUT_DIR = Path("data/transcripts/raw")
OUTPUT_DIR = Path("data/transcripts/clean")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
return INPUT_DIR, OUTPUT_DIR
@app.cell
def _(INPUT_DIR, mo):
csv_files = list(INPUT_DIR.glob("*.csv"))
file_options = {f.stem: str(f) for f in csv_files}
file_dropdown = mo.ui.dropdown(
options=file_options,
label="Select CSV Transcript",
full_width=True
)
file_dropdown
return (file_dropdown,)
@app.cell
def _(Path, cpc_smb_to_markdown, csv_to_markdown):
def jpmc_transcript_to_md(filepath):
fp = Path(filepath)
try:
return csv_to_markdown(filepath)
except Exception as e:
try:
return cpc_smb_to_markdown(filepath)
except Exception as e2:
raise ValueError(f"Failed to process file {filepath} with errors: {e}, {e2}")
return (jpmc_transcript_to_md,)
@app.cell(hide_code=True)
def _(file_dropdown, jpmc_transcript_to_md, mo):
# Preview
preview = mo.md("")
if file_dropdown.value:
md_content = jpmc_transcript_to_md(file_dropdown.value)
preview = mo.md(md_content)
preview
return
@app.cell
def _(mo):
convert_btn = mo.ui.run_button(label="Convert to Markdown")
convert_btn
return (convert_btn,)
@app.cell
def _(OUTPUT_DIR, Path, convert_btn, file_dropdown, jpmc_transcript_to_md, mo):
result = mo.md("")
saved_md = None
if convert_btn.value and file_dropdown.value:
saved_md = jpmc_transcript_to_md(file_dropdown.value)
_out_path = OUTPUT_DIR / (Path(file_dropdown.value).stem + ".md")
_out_path.write_text(saved_md)
result = mo.callout(f"✅ Saved to `{_out_path}`", kind="success")
result
return (saved_md,)
@app.cell
def _(mo, saved_md):
saved_preview = mo.md("")
if saved_md:
saved_preview = mo.vstack([
mo.md("### Saved Markdown Preview"),
mo.md(saved_md)
])
saved_preview
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Taguette
Upload and process using taguette: http://taguette.tail44fa00.ts.net/
""")
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()