preview md

This commit is contained in:
2025-12-08 11:31:03 +01:00
parent ab4ee4b34a
commit 60d2876725

View File

@@ -34,40 +34,44 @@ def _(INPUT_DIR, mo):
return (file_dropdown,) return (file_dropdown,)
@app.function(hide_code=True)
def csv_to_markdown(df):
"""Convert transcript DataFrame to markdown, merging consecutive same-speaker turns."""
lines = ["# Interview Transcript"]
# Track previous speaker to detect when speaker changes
prev_speaker = None
# Accumulate text from consecutive turns by same speaker
merged_text = []
for _, row in df.iterrows():
speaker = row["Speaker"]
text = str(row["Transcript"]).strip()
if speaker == prev_speaker:
# Same speaker continues — append text to current block
merged_text.append(text)
else:
# New speaker detected — flush previous speaker's block
if prev_speaker is not None:
# Format: **Speaker**: text-part-1\n\ntext-part-2
# Use \n\n to ensure distinct paragraphs for readability
lines.append(f"**{prev_speaker}**: {'\n\n'.join(merged_text)}")
# Start new block for current speaker
prev_speaker = speaker
merged_text = [text]
# Flush final speaker's block
if prev_speaker is not None:
lines.append(f"**{prev_speaker}**: {'\n\n'.join(merged_text)}")
# Join all blocks with double newlines for clear separation
return "\n\n".join(lines)
@app.cell @app.cell
def _(file_dropdown, mo, pd): def _(file_dropdown, mo, pd):
def csv_to_markdown(df):
"""Convert transcript DataFrame to markdown, merging consecutive same-speaker turns."""
lines = [f"# Interview Transcript\n"]
# Track previous speaker to detect when speaker changes
prev_speaker = None
# Accumulate text from consecutive turns by same speaker
merged_text = []
for _, row in df.iterrows():
speaker = row["Speaker"]
text = str(row["Transcript"]).strip()
if speaker == prev_speaker:
# Same speaker continues — append text to current block
merged_text.append(text)
else:
# New speaker detected — flush previous speaker's block
if prev_speaker is not None:
# Format: **Speaker**: text-part-1\ntext-part-2 + blank line
lines.append(f"**{prev_speaker}**: {'\n'.join(merged_text)}\n\n")
# Start new block for current speaker
prev_speaker = speaker
merged_text = [text]
# Flush final speaker's block
if prev_speaker is not None:
lines.append(f"**{prev_speaker}**: {'\n'.join(merged_text)}\n\n")
return "\n".join(lines)
# Preview # Preview
preview = mo.md("") preview = mo.md("")
if file_dropdown.value: if file_dropdown.value:
@@ -76,7 +80,7 @@ def _(file_dropdown, mo, pd):
preview = mo.md(md_content) preview = mo.md(md_content)
preview preview
return (csv_to_markdown,) return
@app.cell @app.cell
@@ -87,17 +91,30 @@ def _(mo):
@app.cell @app.cell
def _(OUTPUT_DIR, Path, convert_btn, csv_to_markdown, file_dropdown, mo, pd): def _(OUTPUT_DIR, Path, convert_btn, file_dropdown, mo, pd):
result = mo.md("") result = mo.md("")
saved_md = None
if convert_btn.value and file_dropdown.value: if convert_btn.value and file_dropdown.value:
_df = pd.read_csv(file_dropdown.value) _df = pd.read_csv(file_dropdown.value)
_md = csv_to_markdown(_df) saved_md = csv_to_markdown(_df)
_out_path = OUTPUT_DIR / (Path(file_dropdown.value).stem + ".md") _out_path = OUTPUT_DIR / (Path(file_dropdown.value).stem + ".md")
_out_path.write_text(_md) _out_path.write_text(saved_md)
result = mo.callout(f"✅ Saved to `{_out_path}`", kind="success") result = mo.callout(f"✅ Saved to `{_out_path}`", kind="success")
result result
return (saved_md,)
@app.cell
def _(mo, saved_md):
saved_preview = mo.md("")
if saved_md:
saved_preview = mo.vstack([
mo.md("### Saved Markdown Preview"),
mo.md(saved_md)
])
saved_preview
return return