New prompt for thematic analysis added
This commit is contained in:
@@ -13,8 +13,8 @@ def _():
|
||||
VM_NAME = 'hiperf-gpu'
|
||||
MODEL = 'llama3.3:70b'
|
||||
|
||||
client = connect_qumo_ollama(VM_NAME)
|
||||
return MODEL, Path, client, load_srt, mo
|
||||
#client = connect_qumo_ollama(VM_NAME)
|
||||
return MODEL, Path, load_srt, mo
|
||||
|
||||
|
||||
@app.cell(hide_code=True)
|
||||
@@ -186,6 +186,183 @@ def _(mo):
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo):
|
||||
# Step 3a: Define themes for labelling
|
||||
themes_input = mo.ui.text_area(
|
||||
value="""brand voice and tone
|
||||
customer experience priorities
|
||||
design system and consistency
|
||||
AI and conversational interfaces""",
|
||||
label="Themes (one per line)",
|
||||
full_width=True,
|
||||
rows=6,
|
||||
)
|
||||
|
||||
mo.md("""### Step 3a: Define Themes
|
||||
|
||||
Enter one theme per line. These will be used to
|
||||
label each interview transcript. Themes may overlap; the
|
||||
same section can relate to multiple themes.
|
||||
""")
|
||||
|
||||
themes_input
|
||||
return (themes_input,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(themes_input):
|
||||
# Parse themes into a clean Python list
|
||||
raw_lines = themes_input.value.splitlines() if themes_input and themes_input.value else []
|
||||
theme_list = [t.strip() for t in raw_lines if t.strip()]
|
||||
return (theme_list,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(Path, mo):
|
||||
# Configuration for JSON output directory
|
||||
OUTPUT_DIR = Path("data/labels")
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
mo.md(f"""### Step 3b: LLM-based Theme Labelling
|
||||
|
||||
This step runs an LLM over the current interview transcript
|
||||
for each defined theme and saves one JSON file per theme
|
||||
for this interview in `{OUTPUT_DIR}`.
|
||||
|
||||
For each theme, the model will return full sections of the
|
||||
conversation (multi-sentence chunks, not just short quotes)
|
||||
that are about that theme.
|
||||
""")
|
||||
|
||||
label_button = mo.ui.run_button(label="Run Theme Labelling for This Interview")
|
||||
label_button
|
||||
return OUTPUT_DIR, label_button
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(
|
||||
MODEL,
|
||||
OUTPUT_THEME_DIR,
|
||||
Path,
|
||||
client,
|
||||
file_dropdown,
|
||||
theme_label_button,
|
||||
labeled_transcript,
|
||||
mo,
|
||||
theme_list,
|
||||
):
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
theme_label_results = {}
|
||||
|
||||
if theme_label_button.value and file_dropdown.value and theme_list:
|
||||
interview_id = Path(file_dropdown.value).stem
|
||||
|
||||
for theme in theme_list:
|
||||
prompt = f"""You are an expert qualitative researcher.
|
||||
|
||||
You will analyse a single interview transcript for ONE specific theme.
|
||||
|
||||
Theme: "{theme}"
|
||||
|
||||
Tasks:
|
||||
1. Decide if the theme is present in this interview.
|
||||
2. If present, estimate how relevant it is on a 0–1 scale
|
||||
where 0 = not mentioned, 0.5 = moderately important,
|
||||
1 = central theme of the interview.
|
||||
3. Identify all sections of the conversation that are
|
||||
primarily about this theme. A section can span multiple
|
||||
consecutive utterances and should form a coherent piece
|
||||
of the dialogue about the theme, not just a single
|
||||
sentence.
|
||||
|
||||
Each section should include:
|
||||
- the dominant speaker label (or "mixed" if multiple)
|
||||
- the full section text (one or more sentences)
|
||||
|
||||
Return your answer ONLY as a JSON object with this schema:
|
||||
{{
|
||||
"theme": string, // the theme name
|
||||
"present": bool, // whether the theme appears
|
||||
"relevance": float, // 0.0–1.0
|
||||
"sections": [
|
||||
{{
|
||||
"speaker": string, // main speaker label for the section
|
||||
"section_text": string // full section text about the theme
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
Transcript:
|
||||
"""
|
||||
{labeled_transcript}
|
||||
"""
|
||||
"""
|
||||
|
||||
response = client.generate(model=MODEL, prompt=prompt)
|
||||
raw_text = response.response.strip()
|
||||
|
||||
try:
|
||||
parsed = json.loads(raw_text)
|
||||
except json.JSONDecodeError:
|
||||
# Fallback: try to extract JSON between braces
|
||||
try:
|
||||
start = raw_text.index("{")
|
||||
end = raw_text.rindex("}") + 1
|
||||
parsed = json.loads(raw_text[start:end])
|
||||
except Exception:
|
||||
parsed = {
|
||||
"theme": theme,
|
||||
"present": False,
|
||||
"relevance": 0.0,
|
||||
"sections": [],
|
||||
"_parse_error": True,
|
||||
"_raw": raw_text,
|
||||
}
|
||||
|
||||
# Normalise fields
|
||||
parsed["theme"] = parsed.get("theme", theme)
|
||||
parsed["present"] = bool(parsed.get("present", False))
|
||||
try:
|
||||
parsed["relevance"] = float(parsed.get("relevance", 0.0))
|
||||
except (TypeError, ValueError):
|
||||
parsed["relevance"] = 0.0
|
||||
if not isinstance(parsed.get("sections"), list):
|
||||
parsed["sections"] = []
|
||||
|
||||
theme_label_results[theme] = parsed
|
||||
|
||||
# Write per-interview-per-theme JSON file
|
||||
out_path = OUTPUT_THEME_DIR / f"{interview_id}__{theme.replace(' ', '_')}.json"
|
||||
out_data = {
|
||||
"interview_id": interview_id,
|
||||
"theme": parsed["theme"],
|
||||
"present": parsed["present"],
|
||||
"relevance": parsed["relevance"],
|
||||
"sections": parsed["sections"],
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
out_path.write_text(json.dumps(out_data, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
if theme_label_button.value:
|
||||
if not file_dropdown.value:
|
||||
status = "No transcript selected."
|
||||
elif not theme_list:
|
||||
status = "No themes defined. Please add at least one theme."
|
||||
else:
|
||||
status = f"Labelled {len(theme_label_results)} themes for current interview. JSON files written to '{OUTPUT_THEME_DIR}'."
|
||||
else:
|
||||
status = "Click 'Run Theme Labelling for This Interview' to start."
|
||||
|
||||
mo.md(f"""### Theme Labelling Status
|
||||
|
||||
{status}
|
||||
""")
|
||||
return theme_label_results
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mo):
|
||||
# Editable analysis task prompt
|
||||
|
||||
Reference in New Issue
Block a user