136 lines
4.1 KiB
Python
136 lines
4.1 KiB
Python
import random
|
|
import pandas as pd
|
|
|
|
from ollama import Client
|
|
import json
|
|
|
|
def dummy_sentiment_analysis(content, tag):
|
|
if tag.startswith('VT -') or tag.startswith('CT -'):
|
|
return random.choice([-1, 0, 1]), 'random dummy sentiment' # Random sentiment for testing
|
|
|
|
return 'test', 'not applicable'
|
|
|
|
|
|
|
|
def ollama_sentiment_analysis(content, theme, theme_description, client: Client, model) -> tuple[list[str], int, str]:
|
|
"""
|
|
Perform sentiment analysis using Ollama model.
|
|
|
|
Parameters:
|
|
- content: Text content to analyze
|
|
- tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')
|
|
|
|
Returns:
|
|
- sentiment score and reason
|
|
"""
|
|
prompt = f"""
|
|
# Role
|
|
You are an expert in sentiment analysis. Your task is to analyze the sentiment of a quote in relation to a specific theme.
|
|
|
|
# Input
|
|
Theme: `{theme}`
|
|
Theme Description: `{theme_description}`
|
|
Quote:
|
|
```
|
|
{content}
|
|
```
|
|
|
|
# Instructions
|
|
1. Analyze the sentiment of the quote specifically regarding the theme.
|
|
2. Extract relevant keywords or phrases from the quote. Prioritize specific descriptors found in the text that match or relate to the theme.
|
|
3. Assign a sentiment score:
|
|
- -1: Negative (complaint, dissatisfaction, criticism)
|
|
- 0: Neutral (factual, mixed, or no strong opinion)
|
|
- 1: Positive (praise, satisfaction, agreement)
|
|
4. Provide a concise reason (max 10 words).
|
|
|
|
# Constraints
|
|
- Return ONLY a valid JSON object.
|
|
- Do not use Markdown formatting (no ```json blocks).
|
|
- Do not write any Python code or explanations outside the JSON.
|
|
- If the quote is irrelevant to the theme, return sentiment 0.
|
|
|
|
# Response Format
|
|
{{
|
|
"keywords": ["<list_of_keywords>"],
|
|
"sentiment": <integer_score>,
|
|
"reason": "<string_reason>"
|
|
}}
|
|
|
|
# Examples
|
|
|
|
Example 1:
|
|
Theme: `Speed`
|
|
Quote: `It was a little slow for me.`
|
|
Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "Dissatisfaction with speed"}}
|
|
|
|
Example 2:
|
|
Theme: `Price`
|
|
Quote: `It costs $50.`
|
|
Response: {{"keywords": [], "sentiment": 0, "reason": "Factual statement"}}
|
|
|
|
Example 3:
|
|
Theme: `Friendliness`
|
|
Quote: `Sound very welcoming.`
|
|
Response: {{"keywords": ["welcoming"], "sentiment": 1, "reason": "Positive descriptor used"}}
|
|
"""
|
|
|
|
max_retries = 3
|
|
for attempt in range(max_retries):
|
|
try:
|
|
resp = client.generate(
|
|
model=model,
|
|
prompt=prompt,
|
|
)
|
|
|
|
response_text = resp.response.strip()
|
|
|
|
# Extract JSON from response
|
|
start_index = response_text.find('{')
|
|
end_index = response_text.rfind('}') + 1
|
|
|
|
if start_index == -1 or end_index == 0:
|
|
raise ValueError("No JSON found")
|
|
|
|
json_str = response_text[start_index:end_index]
|
|
|
|
response_json = json.loads(json_str)
|
|
keywords = response_json.get('keywords', [])
|
|
sentiment = response_json.get('sentiment', 'test')
|
|
reason = response_json.get('reason', 'no reason provided')
|
|
return keywords, sentiment, reason
|
|
|
|
except Exception as e:
|
|
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
|
|
if attempt == max_retries - 1:
|
|
return [], None, 'parsing error'
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
client = Client(
|
|
host="http://localhost:11434"
|
|
)
|
|
|
|
sentiment_df = pd.DataFrame({
|
|
'content': [
|
|
"I love this product!",
|
|
"This is the worst service ever.",
|
|
"It's okay, not great but not terrible."
|
|
],
|
|
'tag': [
|
|
'VT - Personal Experience',
|
|
'VT - Personal Experience',
|
|
'VT - Personal Experience'
|
|
],
|
|
'manual_analysis': [False, False, True]
|
|
})
|
|
|
|
sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
|
|
lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['tag'], client, model='llama3.2:latest')),
|
|
axis=1
|
|
)
|
|
|
|
print(sentiment_df.head())
|
|
|