import marimo __generated_with = "0.18.0" app = marimo.App(width="medium") @app.cell def _(): import marimo as mo from pathlib import Path from utils import connect_qumo_ollama, load_srt VM_NAME = 'hiperf-gpu' MODEL = 'llama3.3:70b' client = connect_qumo_ollama(VM_NAME) return MODEL, Path, client, load_srt, mo @app.cell(hide_code=True) def _(mo): mo.md(r""" # Interview Transcript Thematic Analysis This notebook loads interview transcripts (SRT files) and runs thematic analysis using LLMs. """) return @app.cell def _(Path, mo): # Load transcript from SRT file TRANSCRIPT_DIR = Path("data/transcripts") srt_files = list(TRANSCRIPT_DIR.glob("*.srt")) # File selector file_dropdown = mo.ui.dropdown( options={f.name: str(f) for f in srt_files}, label="Select transcript file" ) file_dropdown return (file_dropdown,) @app.cell def _(file_dropdown, load_srt, mo): # Load and display transcript preview transcript_raw = "" if file_dropdown.value: transcript_raw = load_srt(file_dropdown.value) mo.md(f""" ## Transcript Preview **File:** `{file_dropdown.value or 'None selected'}` **Length:** {len(transcript_raw)} characters, ~{len(transcript_raw.split())} words
Show first 2000 characters ``` {transcript_raw[:2000]}... ```
""") return (transcript_raw,) @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 1: Infer Speaker Roles The model will analyze the transcript to identify who is the interviewer and who is the interviewee. """) return @app.cell def _(mo, transcript_raw): # Infer speaker roles from transcript context role_inference_prompt = f"""Analyze this interview transcript and identify the role of each speaker. Based on the conversation context, determine who is: - The interviewer(s) - asking questions, guiding the conversation - The interviewee(s) - providing answers, sharing expertise/opinions Return ONLY a simple mapping in this exact format (one per line): SPEAKER_XX: Role - Brief description For example: SPEAKER_00: Interviewer - Michael from the voice branding team SPEAKER_01: Interviewee - Head of Digital Design {transcript_raw[:4000]} """ infer_roles_button = mo.ui.run_button(label="Infer Speaker Roles") infer_roles_button return infer_roles_button, role_inference_prompt @app.cell def _(MODEL, client, infer_roles_button, mo, role_inference_prompt): inferred_roles_text = "" if infer_roles_button.value: response = client.generate(model=MODEL, prompt=role_inference_prompt) inferred_roles_text = response.response mo.md(f""" ### Inferred Roles {inferred_roles_text if inferred_roles_text else "_Click 'Infer Speaker Roles' to analyze the transcript_"} """) return @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 2: Confirm or Edit Speaker Roles Review the inferred roles below and make corrections if needed. """) return @app.cell def _(mo, transcript_raw): import re # Extract unique speakers from transcript speakers = sorted(set(re.findall(r'(SPEAKER_\d+):', transcript_raw))) # Create editable text inputs for each speaker role_inputs = { speaker: mo.ui.text( value=f"{speaker}", label=speaker, full_width=True ) for speaker in speakers } mo.md("### Edit Speaker Labels\n\nEnter the name/role for each speaker:") return (role_inputs,) @app.cell def _(mo, role_inputs): # Display role inputs as a form mo.vstack([role_inputs[k] for k in sorted(role_inputs.keys())]) return @app.cell def _(mo, role_inputs, transcript_raw): # Apply role labels to transcript labeled_transcript = transcript_raw for speaker_id, input_widget in role_inputs.items(): if input_widget.value and input_widget.value != speaker_id: labeled_transcript = labeled_transcript.replace(f"{speaker_id}:", f"{input_widget.value}:") # Build role mapping summary role_mapping = "\n".join([ f"- {speaker_id} → {input_widget.value}" for speaker_id, input_widget in sorted(role_inputs.items()) ]) mo.md(f""" ### Role Mapping Applied {role_mapping} """) return labeled_transcript, role_mapping @app.cell(hide_code=True) def _(mo): mo.md(r""" ## Step 3: Thematic Analysis Configure your analysis task and run the thematic analysis. """) return @app.cell def _(mo): # Editable analysis task prompt analysis_task_input = mo.ui.text_area( value="""Perform a thematic analysis of this interview transcript. Identify and describe: 1. **Key Themes** - Major topics and ideas that emerge from the conversation 2. **Supporting Quotes** - Direct quotes that exemplify each theme (include speaker attribution) 3. **Insights** - Notable observations or implications from the discussion Focus on themes related to: - Brand voice and tone strategy - Customer experience priorities - Design system and consistency - AI/conversational interface considerations""", label="Analysis Task", full_width=True, rows=12 ) analysis_task_input return (analysis_task_input,) @app.cell def _(analysis_task_input, labeled_transcript, mo, role_mapping): # Build full analysis prompt full_analysis_prompt = f"""You are an expert qualitative researcher specializing in thematic analysis of interview data. ## Speaker Roles {role_mapping} ## Task {analysis_task_input.value} ## Interview Transcript ''' {labeled_transcript} ''' Provide your analysis in well-structured markdown format.""" run_analysis_button = mo.ui.run_button(label="Run Thematic Analysis") mo.vstack([ mo.md(f"**Prompt length:** ~{len(full_analysis_prompt.split())} words"), run_analysis_button ]) return full_analysis_prompt, run_analysis_button @app.cell def _(full_analysis_prompt, mo): mo.md(rf""" # Full Analysis Prompt --- {full_analysis_prompt} """) return @app.cell def _(MODEL, client, full_analysis_prompt, mo, run_analysis_button): analysis_response = "" if run_analysis_button.value: response_2 = client.generate(model=MODEL, prompt=full_analysis_prompt) analysis_response = response_2.response mo.md(f""" ## Analysis Results {analysis_response if analysis_response else "_Click 'Run Thematic Analysis' to generate analysis_"} """) return if __name__ == "__main__": app.run()