keywords

2025-12-16 14:39:54 -08:00
parent a5ffd8315e
commit 12e14e3c9b
7 changed files with 787 additions and 8 deletions
--- a/utils/keyword_analysis.py
+++ b/utils/keyword_analysis.py
@@ -0,0 +1,78 @@
+import pandas as pd
+
+from ollama import Client
+import json
+
+
+
+def ollama_keyword_extraction(content, tag, client: Client, model) -> list:
+    """
+    Perform sentiment analysis using Ollama model.
+
+    Parameters:
+    - content: Text content to analyze
+    - tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')
+
+    Returns:
+    - sentiment score and reason
+    """
+    
+    # Construct prompt for Ollama model
+    prompt = f"""
+### Role
+You are a qualitative data analyst. Your task is to extract keywords from a user quote to build a semantic word cluster.
+
+### Guidelines
+1. **Quantity:** Extract **1-5** high-value keywords. If the quote only contains 1 valid insight, return only 1 keyword. Do not force extra words.
+2. **Specificity:** Avoid vague, single nouns (e.g., "tech", "choice", "system"). Instead, capture the descriptor (e.g., "tech-forward", "payment choice", "legacy system").
+3. **Adjectives:** Standalone adjectives are acceptable if they are strong descriptors (e.g., "reliable", "trustworthy", "professional").
+4. **Normalize:** Convert verbs to present tense and nouns to singular.
+5. **Output Format:** Return a single JSON object with the key "keywords" containing a list of strings.
+
+### Examples
+
+**Input Context:** Chase as a Brand
+**Input Quote:** "I would describe it as, you know, like the next big thing, like, you know, tech forward, you know, customer service forward, and just hating that availability."
+**Output:** {{ "keywords": ["tech forward", "customer service focused", "availability"] }}
+
+**Input Context:** App Usability
+**Input Quote:** "There are so many options when I try to pay, it's confusing."
+**Output:** {{ "keywords": ["confusing", "payment options"] }}
+
+**Input Context:** Investment Tools
+**Input Quote:** "It is just really reliable."
+**Output:** {{ "keywords": ["reliable"] }}
+
+### Input Data
+**Context/Theme:** {tag}
+**Quote:** "{content}"
+
+### Output
+```json
+"""
+
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            resp = client.generate(
+                model=model,
+                prompt=prompt,
+                format='json',
+            )
+            
+            response_text = resp.response.strip()
+
+            # Extract JSON from response
+            start_index = response_text.find('{')
+            
+            if start_index == -1:
+                raise ValueError("No JSON found")
+
+            response_json, _ = json.JSONDecoder().raw_decode(response_text[start_index:])
+            keywords = response_json.get('keywords', [])
+            return [keywords]
+        
+        except Exception as e:
+            print(f"Attempt {attempt + 1}/{max_retries} failed: {e}. Output was: {response_text}")
+            if attempt == max_retries - 1:
+                return [[]]