Started on looping through all interviews
This commit is contained in:
@@ -213,7 +213,7 @@ def _(mo):
|
|||||||
@app.cell
|
@app.cell
|
||||||
def _(themes_input):
|
def _(themes_input):
|
||||||
# Parse themes into a clean Python list
|
# Parse themes into a clean Python list
|
||||||
raw_lines = themes_input.value.splitlines() if themes_input and themes_input.value else []
|
raw_lines = themes_input.value.splitlines() if themes_input.value and themes_input.value else []
|
||||||
theme_list = [t.strip() for t in raw_lines if t.strip()]
|
theme_list = [t.strip() for t in raw_lines if t.strip()]
|
||||||
return (theme_list,)
|
return (theme_list,)
|
||||||
|
|
||||||
@@ -237,7 +237,7 @@ def _(Path, mo):
|
|||||||
|
|
||||||
label_button = mo.ui.run_button(label="Run Theme Labelling for This Interview")
|
label_button = mo.ui.run_button(label="Run Theme Labelling for This Interview")
|
||||||
label_button
|
label_button
|
||||||
return OUTPUT_DIR, label_button
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
@@ -247,9 +247,9 @@ def _(
|
|||||||
Path,
|
Path,
|
||||||
client,
|
client,
|
||||||
file_dropdown,
|
file_dropdown,
|
||||||
theme_label_button,
|
|
||||||
labeled_transcript,
|
labeled_transcript,
|
||||||
mo,
|
mo,
|
||||||
|
theme_label_button,
|
||||||
theme_list,
|
theme_list,
|
||||||
):
|
):
|
||||||
import json
|
import json
|
||||||
@@ -360,7 +360,140 @@ def _(
|
|||||||
|
|
||||||
{status}
|
{status}
|
||||||
""")
|
""")
|
||||||
return theme_label_results
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(Path, mo):
|
||||||
|
# Step 3c: Load all labeled transcripts (assumed precomputed)
|
||||||
|
LABELED_DIR = Path("data/labeled_transcripts")
|
||||||
|
LABELED_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
labeled_files = sorted(LABELED_DIR.glob("*.json"))
|
||||||
|
|
||||||
|
mo.md(f"""### Step 3c: Use Pre-Labeled Transcripts
|
||||||
|
|
||||||
|
Found **{len(labeled_files)}** labeled transcript files in `{LABELED_DIR}`.
|
||||||
|
These will be used to aggregate themes across all interviews.
|
||||||
|
""")
|
||||||
|
|
||||||
|
labeled_files
|
||||||
|
return (labeled_files,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(labeled_files):
|
||||||
|
import json
|
||||||
|
|
||||||
|
all_labeled_records = []
|
||||||
|
for f in labeled_files:
|
||||||
|
try:
|
||||||
|
data = json.loads(f.read_text(encoding="utf-8"))
|
||||||
|
except Exception:
|
||||||
|
# Skip unreadable files
|
||||||
|
continue
|
||||||
|
|
||||||
|
interview_id = data.get("interview_id") or f.stem.split("__", 1)[0]
|
||||||
|
theme = data.get("theme", "")
|
||||||
|
present = bool(data.get("present", False))
|
||||||
|
try:
|
||||||
|
relevance = float(data.get("relevance", 0.0))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
relevance = 0.0
|
||||||
|
sections = data.get("sections") or []
|
||||||
|
|
||||||
|
all_labeled_records.append(
|
||||||
|
{
|
||||||
|
"interview_id": interview_id,
|
||||||
|
"theme": theme,
|
||||||
|
"present": present,
|
||||||
|
"relevance": relevance,
|
||||||
|
"sections": sections,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return (all_labeled_records,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(all_labeled_records, mo):
|
||||||
|
# Derive full theme and interview sets
|
||||||
|
all_themes = sorted({r["theme"] for r in all_labeled_records if r["theme"]})
|
||||||
|
all_interviews = sorted({r["interview_id"] for r in all_labeled_records})
|
||||||
|
|
||||||
|
theme_selector = mo.ui.dropdown(
|
||||||
|
options={t: t for t in all_themes},
|
||||||
|
label="Select theme to explore across all interviews",
|
||||||
|
)
|
||||||
|
|
||||||
|
mo.md("### Step 3d: Explore Themes Across All Labeled Transcripts")
|
||||||
|
theme_selector
|
||||||
|
return all_interviews, theme_selector
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(all_interviews, all_labeled_records, mo, theme_selector):
|
||||||
|
import statistics
|
||||||
|
|
||||||
|
selected_theme = theme_selector.value
|
||||||
|
theme_summary = {}
|
||||||
|
theme_sections = []
|
||||||
|
|
||||||
|
if selected_theme:
|
||||||
|
theme_records = [
|
||||||
|
r for r in all_labeled_records if r["theme"] == selected_theme
|
||||||
|
]
|
||||||
|
|
||||||
|
present_flags = [r["present"] for r in theme_records]
|
||||||
|
relevances = [r["relevance"] for r in theme_records if r["present"]]
|
||||||
|
|
||||||
|
theme_summary = {
|
||||||
|
"theme": selected_theme,
|
||||||
|
"num_interviews": len(all_interviews),
|
||||||
|
"num_interviews_with_theme": sum(present_flags),
|
||||||
|
"share_of_interviews_with_theme": (
|
||||||
|
sum(present_flags) / len(all_interviews) if all_interviews else 0.0
|
||||||
|
),
|
||||||
|
"avg_relevance_if_present": (
|
||||||
|
statistics.mean(relevances) if relevances else 0.0
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
for r in theme_records:
|
||||||
|
interview_id = r["interview_id"]
|
||||||
|
for s in r["sections"]:
|
||||||
|
theme_sections.append(
|
||||||
|
{
|
||||||
|
"interview_id": interview_id,
|
||||||
|
"speaker": s.get("speaker", ""),
|
||||||
|
"section_text": s.get("section_text", ""),
|
||||||
|
"relevance": r["relevance"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
mo.md(
|
||||||
|
f"""#### Theme Overview: `{selected_theme or "None selected"}`
|
||||||
|
|
||||||
|
- Total interviews: **{len(all_interviews)}**
|
||||||
|
- Interviews where theme is present: **{theme_summary.get("num_interviews_with_theme", 0)}**
|
||||||
|
- Share of interviews with theme: **{theme_summary.get("share_of_interviews_with_theme", 0.0):.2f}**
|
||||||
|
- Avg. relevance (when present): **{theme_summary.get("avg_relevance_if_present", 0.0):.2f}**
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
if theme_sections:
|
||||||
|
table_rows = [
|
||||||
|
{
|
||||||
|
"Interview": s["interview_id"],
|
||||||
|
"Speaker": s["speaker"],
|
||||||
|
"Relevance": f"{s['relevance']:.2f}",
|
||||||
|
"Section": s["section_text"],
|
||||||
|
}
|
||||||
|
for s in theme_sections
|
||||||
|
]
|
||||||
|
mo.ui.table(table_rows)
|
||||||
|
else:
|
||||||
|
mo.md("_No sections for this theme yet._")
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
|
|||||||
Reference in New Issue
Block a user