import marimo

__generated_with = "0.18.1"
app = marimo.App(width="medium")


@app.cell
def _():
    import marimo as mo
    import json
    import pandas as pd
    import re
    from pathlib import Path
    from utils import connect_qumo_ollama, load_srt

    # Configuration
    VM_NAME = 'hiperf-gpu'
    MODEL = 'llama3.3:70b'
    TRANSCRIPT_DIR = Path("data/transcripts")
    OUTPUT_FILE = Path("master_codebook.json")

    client = connect_qumo_ollama(VM_NAME)
    return (
        MODEL,
        OUTPUT_FILE,
        TRANSCRIPT_DIR,
        client,
        json,
        load_srt,
        mo,
        pd,
        re,
    )


@app.cell
def _(mo):
    mo.md(r"""
    # Stage 1: Theme Discovery

    **Goal:** Identify recurring themes across a sample of interviews.

    1.  **Select Transcripts:** Choose 4-5 representative interviews.
    2.  **Extract Topics:** The AI will analyze each transcript to find key topics.
    3.  **Synthesize Themes:** Topics are grouped into a Master Codebook.
    4.  **Refine & Save:** Edit the definitions and save the `master_codebook.json`.
    """)
    return


@app.cell
def _(TRANSCRIPT_DIR, mo):
    # File Selection
    srt_files = list(TRANSCRIPT_DIR.glob("*.srt"))
    file_options = {f.name: str(f) for f in srt_files}

    file_selector = mo.ui.multiselect(
        options=file_options,
        label="Select Transcripts (Recommended: 4-5)",
        full_width=True
    )
    file_selector
    return (file_selector,)


@app.cell
def _(file_selector, mo):
    mo.md(f"**Selected:** {len(file_selector.value)} files")
    return


@app.cell
def _(mo):
    start_discovery_btn = mo.ui.run_button(label="Start Discovery Process")
    start_discovery_btn
    return (start_discovery_btn,)


@app.cell
def _(
    MODEL,
    client,
    file_selector,
    json,
    load_srt,
    mo,
    re,
    start_discovery_btn,
):
    # Map Phase: Extract Topics per Transcript
    extracted_topics = []
    status_callout = mo.md("")

    if start_discovery_btn.value and file_selector.value:
        with mo.status.spinner("Analyzing transcripts...") as _spinner:
            for filepath in file_selector.value:
                _transcript = load_srt(filepath)

                # Truncate for discovery if too long (optional, but good for speed)
                # Using first 15k chars usually gives enough context for high-level themes
                _context = _transcript[:15000] 

                _prompt = f"""
                Analyze this interview transcript and list the top 5-7 key topics or themes discussed.
                Focus on: Brand voice, Customer experience, Design systems, and AI.

                Return ONLY a JSON list of strings. Example: ["Inconsistent Tone", "Mobile Latency", "AI Trust"]

                Transcript:
                {_context}...
                """

                try:
                    _response = client.generate(model=MODEL, prompt=_prompt)
                    # Find JSON list in response
                    _match = re.search(r'\[.*\]', _response.response, re.DOTALL)
                    if _match:
                        _topics = json.loads(_match.group(0))
                        extracted_topics.extend(_topics)
                except Exception as e:
                    print(f"Error processing {filepath}: {e}")

        status_callout = mo.callout(
            f"✅ Extracted {len(extracted_topics)} raw topics from {len(file_selector.value)} files.", 
            kind="success"
        )
    elif start_discovery_btn.value:
        status_callout = mo.callout("Please select at least one file.", kind="warn")

    status_callout
    return (extracted_topics,)


@app.cell
def _(MODEL, client, extracted_topics, json, mo, re, start_discovery_btn):
    # Reduce Phase: Synthesize Themes
    suggested_themes = []

    if start_discovery_btn.value and extracted_topics:
        with mo.status.spinner("Synthesizing Master Codebook...") as _spinner:
            _topics_str = ", ".join(extracted_topics)

            _synthesis_prompt = f"""
            You are a qualitative data architect. 

            I have a list of raw topics extracted from multiple interviews:
            [{_topics_str}]

            Task:
            1. Group these into 5-8 distinct, high-level Themes.
            2. Create a definition for each theme.
            3. Assign a hex color code to each.
            4. ALWAYS include a theme named "Other" for miscellaneous insights.

            Return a JSON object with this structure:
            [
                {{"Theme": "Theme Name", "Definition": "Description...", "Color": "#HEXCODE"}},
                ...
            ]
            """

            _response = client.generate(model=MODEL, prompt=_synthesis_prompt)

            _match = re.search(r'\[.*\]', _response.response, re.DOTALL)
            if _match:
                try:
                    suggested_themes = json.loads(_match.group(0))
                except:
                    suggested_themes = [{"Theme": "Error parsing JSON", "Definition": _response.response, "Color": "#000000"}]

    return (suggested_themes,)


@app.cell
def _(mo, pd, suggested_themes):
    # Interactive Editor

    # Default empty structure if nothing generated yet
    _initial_data = suggested_themes if suggested_themes else [
        {"Theme": "Example Theme", "Definition": "Description here...", "Color": "#CCCCCC"}
    ]

    df_themes = pd.DataFrame(_initial_data)

    theme_editor = mo.ui.data_editor(
        df_themes,
        label="Master Codebook Editor",
        column_config={
            "Color": mo.ui.column.color_picker(label="Color")
        },
        num_rows="dynamic" # Allow adding/removing rows
    )

    mo.vstack([
        mo.md("### Review & Refine Codebook"),
        mo.md("Edit the themes below. You can add rows, change colors, or refine definitions."),
        theme_editor
    ])
    return (theme_editor,)


@app.cell
def _(OUTPUT_FILE, json, mo, theme_editor):
    save_btn = mo.ui.run_button(label="Save Master Codebook")

    save_message = mo.md("")

    if save_btn.value:
        _final_df = theme_editor.value
        # Convert to list of dicts
        _codebook = _final_df.to_dict(orient="records")

        with open(OUTPUT_FILE, "w") as f:
            json.dump(_codebook, f, indent=2)

        save_message = mo.callout(f"✅ Saved {len(_codebook)} themes to `{OUTPUT_FILE}`", kind="success")

    mo.vstack([
        save_btn,
        save_message
    ])
    return


if __name__ == "__main__":
    app.run()