91 lines
3.0 KiB
Python
91 lines
3.0 KiB
Python
import pandas as pd
|
|
|
|
from ollama import Client
|
|
import json
|
|
|
|
|
|
def worker_extraction(row, host, model):
|
|
|
|
|
|
# Instantiate local client for this specific worker/thread
|
|
local_client = Client(host=host)
|
|
|
|
return ollama_keyword_extraction(
|
|
content=row['content'],
|
|
tag=row['tag'],
|
|
client=local_client,
|
|
model=model
|
|
)
|
|
|
|
|
|
def ollama_keyword_extraction(content, tag, client: Client, model) -> list:
|
|
"""
|
|
Perform sentiment analysis using Ollama model.
|
|
|
|
Parameters:
|
|
- content: Text content to analyze
|
|
- tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')
|
|
|
|
Returns:
|
|
- sentiment score and reason
|
|
"""
|
|
|
|
# Construct prompt for Ollama model
|
|
prompt = f"""
|
|
### Role
|
|
You are a qualitative data analyst. Your task is to extract keywords from a user quote to build a semantic word cluster.
|
|
|
|
### Guidelines
|
|
1. **Quantity:** Extract **1-5** high-value keywords. If the quote only contains 1 valid insight, return only 1 keyword. Do not force extra words.
|
|
2. **Specificity:** Avoid vague, single nouns (e.g., "tech", "choice", "system"). Instead, capture the descriptor (e.g., "tech-forward", "payment choice", "legacy system").
|
|
3. **Adjectives:** Standalone adjectives are acceptable if they are strong descriptors (e.g., "reliable", "trustworthy", "professional").
|
|
4. **Normalize:** Convert verbs to present tense and nouns to singular.
|
|
5. **Output Format:** Return a single JSON object with the key "keywords" containing a list of strings.
|
|
|
|
### Examples
|
|
|
|
**Input Context:** Chase as a Brand
|
|
**Input Quote:** "I would describe it as, you know, like the next big thing, like, you know, tech forward, you know, customer service forward, and just hating that availability."
|
|
**Output:** {{ "keywords": ["tech forward", "customer service focused", "availability"] }}
|
|
|
|
**Input Context:** App Usability
|
|
**Input Quote:** "There are so many options when I try to pay, it's confusing."
|
|
**Output:** {{ "keywords": ["confusing", "payment options"] }}
|
|
|
|
**Input Context:** Investment Tools
|
|
**Input Quote:** "It is just really reliable."
|
|
**Output:** {{ "keywords": ["reliable"] }}
|
|
|
|
### Input Data
|
|
**Context/Theme:** {tag}
|
|
**Quote:** "{content}"
|
|
|
|
### Output
|
|
```json
|
|
"""
|
|
|
|
max_retries = 3
|
|
for attempt in range(max_retries):
|
|
try:
|
|
resp = client.generate(
|
|
model=model,
|
|
prompt=prompt,
|
|
format='json',
|
|
)
|
|
|
|
response_text = resp.response.strip()
|
|
|
|
# Extract JSON from response
|
|
start_index = response_text.find('{')
|
|
|
|
if start_index == -1:
|
|
raise ValueError("No JSON found")
|
|
|
|
response_json, _ = json.JSONDecoder().raw_decode(response_text[start_index:])
|
|
keywords = response_json.get('keywords', [])
|
|
return [keywords]
|
|
|
|
except Exception as e:
|
|
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}. Output was: {response_text}")
|
|
if attempt == max_retries - 1:
|
|
return [[]] |