Files
Interview-Analysis/utils/sentiment_analysis.py

136 lines
4.1 KiB
Python

import random
import pandas as pd
from ollama import Client
import json
def dummy_sentiment_analysis(content, tag):
if tag.startswith('VT -') or tag.startswith('CT -'):
return random.choice([-1, 0, 1]), 'random dummy sentiment' # Random sentiment for testing
return 'test', 'not applicable'
def ollama_sentiment_analysis(content, theme, theme_description, client: Client, model) -> tuple[list[str], int, str]:
"""
Perform sentiment analysis using Ollama model.
Parameters:
- content: Text content to analyze
- tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')
Returns:
- sentiment score and reason
"""
prompt = f"""
# Role
You are an expert in sentiment analysis. Your task is to analyze the sentiment of a quote in relation to a specific theme.
# Input
Theme: `{theme}`
Theme Description: `{theme_description}`
Quote:
```
{content}
```
# Instructions
1. Analyze the sentiment of the quote specifically regarding the theme.
2. Extract relevant keywords or phrases from the quote. Prioritize specific descriptors found in the text that match or relate to the theme.
3. Assign a sentiment score:
- -1: Negative (complaint, dissatisfaction, criticism)
- 0: Neutral (factual, mixed, or no strong opinion)
- 1: Positive (praise, satisfaction, agreement)
4. Provide a concise reason (max 10 words).
# Constraints
- Return ONLY a valid JSON object.
- Do not use Markdown formatting (no ```json blocks).
- Do not write any Python code or explanations outside the JSON.
- If the quote is irrelevant to the theme, return sentiment 0.
# Response Format
{{
"keywords": ["<list_of_keywords>"],
"sentiment": <integer_score>,
"reason": "<string_reason>"
}}
# Examples
Example 1:
Theme: `Speed`
Quote: `It was a little slow for me.`
Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "Dissatisfaction with speed"}}
Example 2:
Theme: `Price`
Quote: `It costs $50.`
Response: {{"keywords": [], "sentiment": 0, "reason": "Factual statement"}}
Example 3:
Theme: `Friendliness`
Quote: `Sound very welcoming.`
Response: {{"keywords": ["welcoming"], "sentiment": 1, "reason": "Positive descriptor used"}}
"""
max_retries = 3
for attempt in range(max_retries):
try:
resp = client.generate(
model=model,
prompt=prompt,
)
response_text = resp.response.strip()
# Extract JSON from response
start_index = response_text.find('{')
end_index = response_text.rfind('}') + 1
if start_index == -1 or end_index == 0:
raise ValueError("No JSON found")
json_str = response_text[start_index:end_index]
response_json = json.loads(json_str)
keywords = response_json.get('keywords', [])
sentiment = response_json.get('sentiment', 'test')
reason = response_json.get('reason', 'no reason provided')
return keywords, sentiment, reason
except Exception as e:
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt == max_retries - 1:
return [], None, 'parsing error'
if __name__ == "__main__":
client = Client(
host="http://localhost:11434"
)
sentiment_df = pd.DataFrame({
'content': [
"I love this product!",
"This is the worst service ever.",
"It's okay, not great but not terrible."
],
'tag': [
'VT - Personal Experience',
'VT - Personal Experience',
'VT - Personal Experience'
],
'manual_analysis': [False, False, True]
})
sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['tag'], client, model='llama3.2:latest')),
axis=1
)
print(sentiment_df.head())