architecture overview for afstemming + reference

2025-12-01 16:58:11 +01:00
parent 9499d6c068
commit 21e4ab366a
4 changed files with 281 additions and 2 deletions
--- a/Architecture_Overview.py
+++ b/Architecture_Overview.py
@@ -0,0 +1,127 @@
+import marimo
+
+__generated_with = "0.18.0"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    # Interview Analysis Pipeline Architecture
+
+    **Project Goal:** Synthesize insights from 26 stakeholder interviews into a unified report.
+
+    **Input:** 26 Interview Transcripts (`.srt`)
+    **Output:** Comprehensive Qualitative Analysis Report
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## High-Level Workflow
+
+    The analysis follows a structured **3-Stage Pipeline** to ensure consistency across all interviews while leveraging the reasoning capabilities of Large Language Models (LLMs).
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Stage 1: Discovery (Theme Definition)
+
+    **Goal:** Establish the "Rules of the Game" to ensure consistent analysis.
+
+    1.  **Input:** A representative sample of 4-5 interviews.
+    2.  **Process:**
+        *   Exploratory analysis to identify recurring topics.
+        *   Grouping topics into **Themes**.
+        *   Defining the **"Other"** category for emerging insights that don't fit established themes.
+    3.  **Output:** `master_codebook.json`
+        *   Contains Theme Names, Definitions, and Color Codes.
+        *   Serves as the strict instruction set for the AI in Stage 2.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.mermaid("""
+    graph TD
+        A[Raw Transcripts] -->|Sample 4-5| B(Stage 1: Discovery)
+        B -->|Generate| C[Master Codebook]
+        C -->|Input| D(Stage 2: Theme Coding)
+        A -->|All 26 Files| D
+        D -->|Extract| E[Structured Dataset]
+        E -->|Aggregate| F(Stage 3: Synthesis)
+        F -->|Generate| G[Final Report]
+
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Stage 2: Structured Theme Coding (Extraction)
+
+    **Goal:** Convert unstructured text into a structured dataset.
+
+    1.  **Input:** All 26 Transcripts + `master_codebook.json`.
+    2.  **Process:**
+        *   The LLM analyzes each transcript segment-by-segment.
+        *   It extracts specific quotes that match a Theme Definition.
+        *   **Granular Sentiment Analysis:** For each quote, the model identifies:
+            *   **Subject:** The specific topic/object being discussed (e.g., "Login Flow", "Brand Tone").
+            *   **Sentiment:** Positive / Neutral / Negative.
+    3.  **Output:** `coded_segments.csv`
+        *   Columns: `Source_File`, `Speaker`, `Theme`, `Quote`, `Subject`, `Sentiment`, `Context`.
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Stage 3: Synthesis & Reporting
+
+    **Goal:** Derive conclusions from the aggregated data.
+
+    1.  **Input:** `coded_segments.csv` (The consolidated dataset).
+    2.  **Process:**
+        *   **Theme Synthesis:** All quotes for "Theme A" are analyzed together to find patterns, contradictions, and consensus.
+        *   **"Other" Review:** The "Other" category is manually or computationally reviewed to identify missed signals.
+        *   **Global Synthesis:** Cross-theme analysis to build the final narrative.
+    3.  **Output:** Final Report
+        *   Executive Summary
+        *   Theme-by-Theme Deep Dives (with supporting quotes)
+        *   Strategic Recommendations
+    """)
+    return
+
+
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""
+    ## Technical Infrastructure
+
+    | Component | Specification | Role |
+    |-----------|---------------|------|
+    | **Model** | `llama3.3:70b` | Primary reasoning engine (128k context) |
+    | **Compute** | NVIDIA H100 (80GB) | High-performance inference |
+    | **Orchestration** | Python + Marimo | Pipeline management and UI |
+    | **Storage** | Local JSON/CSV | Data persistence |
+    """)
+    return
+
+
+if __name__ == "__main__":
+    app.run()