diff --git a/Thematic_Analysis.py b/Thematic_Analysis.py index 8e502d3..f89b92b 100644 --- a/Thematic_Analysis.py +++ b/Thematic_Analysis.py @@ -213,7 +213,7 @@ def _(mo): @app.cell def _(themes_input): # Parse themes into a clean Python list - raw_lines = themes_input.value.splitlines() if themes_input and themes_input.value else [] + raw_lines = themes_input.value.splitlines() if themes_input.value and themes_input.value else [] theme_list = [t.strip() for t in raw_lines if t.strip()] return (theme_list,) @@ -237,7 +237,7 @@ def _(Path, mo): label_button = mo.ui.run_button(label="Run Theme Labelling for This Interview") label_button - return OUTPUT_DIR, label_button + return @app.cell @@ -247,9 +247,9 @@ def _( Path, client, file_dropdown, - theme_label_button, labeled_transcript, mo, + theme_label_button, theme_list, ): import json @@ -358,9 +358,142 @@ def _( mo.md(f"""### Theme Labelling Status -{status} -""") - return theme_label_results + {status} + """) + return + + +@app.cell +def _(Path, mo): + # Step 3c: Load all labeled transcripts (assumed precomputed) + LABELED_DIR = Path("data/labeled_transcripts") + LABELED_DIR.mkdir(parents=True, exist_ok=True) + + labeled_files = sorted(LABELED_DIR.glob("*.json")) + + mo.md(f"""### Step 3c: Use Pre-Labeled Transcripts + + Found **{len(labeled_files)}** labeled transcript files in `{LABELED_DIR}`. + These will be used to aggregate themes across all interviews. + """) + + labeled_files + return (labeled_files,) + + +@app.cell +def _(labeled_files): + import json + + all_labeled_records = [] + for f in labeled_files: + try: + data = json.loads(f.read_text(encoding="utf-8")) + except Exception: + # Skip unreadable files + continue + + interview_id = data.get("interview_id") or f.stem.split("__", 1)[0] + theme = data.get("theme", "") + present = bool(data.get("present", False)) + try: + relevance = float(data.get("relevance", 0.0)) + except (TypeError, ValueError): + relevance = 0.0 + sections = data.get("sections") or [] + + all_labeled_records.append( + { + "interview_id": interview_id, + "theme": theme, + "present": present, + "relevance": relevance, + "sections": sections, + } + ) + return (all_labeled_records,) + + +@app.cell +def _(all_labeled_records, mo): + # Derive full theme and interview sets + all_themes = sorted({r["theme"] for r in all_labeled_records if r["theme"]}) + all_interviews = sorted({r["interview_id"] for r in all_labeled_records}) + + theme_selector = mo.ui.dropdown( + options={t: t for t in all_themes}, + label="Select theme to explore across all interviews", + ) + + mo.md("### Step 3d: Explore Themes Across All Labeled Transcripts") + theme_selector + return all_interviews, theme_selector + + +@app.cell +def _(all_interviews, all_labeled_records, mo, theme_selector): + import statistics + + selected_theme = theme_selector.value + theme_summary = {} + theme_sections = [] + + if selected_theme: + theme_records = [ + r for r in all_labeled_records if r["theme"] == selected_theme + ] + + present_flags = [r["present"] for r in theme_records] + relevances = [r["relevance"] for r in theme_records if r["present"]] + + theme_summary = { + "theme": selected_theme, + "num_interviews": len(all_interviews), + "num_interviews_with_theme": sum(present_flags), + "share_of_interviews_with_theme": ( + sum(present_flags) / len(all_interviews) if all_interviews else 0.0 + ), + "avg_relevance_if_present": ( + statistics.mean(relevances) if relevances else 0.0 + ), + } + + for r in theme_records: + interview_id = r["interview_id"] + for s in r["sections"]: + theme_sections.append( + { + "interview_id": interview_id, + "speaker": s.get("speaker", ""), + "section_text": s.get("section_text", ""), + "relevance": r["relevance"], + } + ) + + mo.md( + f"""#### Theme Overview: `{selected_theme or "None selected"}` + + - Total interviews: **{len(all_interviews)}** + - Interviews where theme is present: **{theme_summary.get("num_interviews_with_theme", 0)}** + - Share of interviews with theme: **{theme_summary.get("share_of_interviews_with_theme", 0.0):.2f}** + - Avg. relevance (when present): **{theme_summary.get("avg_relevance_if_present", 0.0):.2f}** + """ + ) + + if theme_sections: + table_rows = [ + { + "Interview": s["interview_id"], + "Speaker": s["speaker"], + "Relevance": f"{s['relevance']:.2f}", + "Section": s["section_text"], + } + for s in theme_sections + ] + mo.ui.table(table_rows) + else: + mo.md("_No sections for this theme yet._") + return @app.cell