basic parsing working
This commit is contained in:
128
utils/sentiment_analysis.py
Normal file
128
utils/sentiment_analysis.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import random
|
||||
import pandas as pd
|
||||
|
||||
from ollama import Client
|
||||
import json
|
||||
|
||||
def dummy_sentiment_analysis(content, tag):
|
||||
if tag.startswith('VT -') or tag.startswith('CT -'):
|
||||
return random.choice([-1, 0, 1]), 'random dummy sentiment' # Random sentiment for testing
|
||||
|
||||
return 'test', 'not applicable'
|
||||
|
||||
|
||||
|
||||
def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[list[str], int, str]:
|
||||
"""
|
||||
Perform sentiment analysis using Ollama model.
|
||||
|
||||
Parameters:
|
||||
- content: Text content to analyze
|
||||
- tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')
|
||||
|
||||
Returns:
|
||||
- sentiment score and reason
|
||||
"""
|
||||
prompt = f"""
|
||||
# Instructions
|
||||
You are an expert in sentiment analysis and natural language processing. You are given a quote from an interview along with a theme tag. Your task is to analyze the sentiment expressed in the quote in relation to the provided theme, and provide a short explanation for your assessment (max 10 words).
|
||||
|
||||
You need to deliver three pieces of information:
|
||||
1. A list of keywords from the quote quantify or qualify the theme, and that influenced your sentiment analysis (if any).
|
||||
2. A sentiment score: -1 for negative, 0 for neutral, and 1 for positive sentiment.
|
||||
3. A brief reason (max 10 words) explaining your sentiment score.
|
||||
|
||||
|
||||
# Guidelines
|
||||
Keywords should be directly relevant to the theme.
|
||||
|
||||
The reason should be extremely concise and to the point:
|
||||
- Does not need to be a full sentence.
|
||||
- Sentiment itself does not need to be stated in the explanation.
|
||||
- If keywords are present in the quote that directly capture the sentiment, give that as the reason..
|
||||
|
||||
|
||||
# Input
|
||||
|
||||
Theme: `{theme}`
|
||||
|
||||
Quote:
|
||||
```
|
||||
{content}
|
||||
```
|
||||
|
||||
# Response Format
|
||||
Provide your response in the following JSON format:
|
||||
{{
|
||||
"keywords": ["<list_of_relevant_keywords_if_any>"],
|
||||
"sentiment": <sentiment_score>,
|
||||
"reason": "<brief_explanation_max_10_words>"
|
||||
}}
|
||||
|
||||
|
||||
# Examples
|
||||
|
||||
** Example 1**
|
||||
- Theme: `Speed`
|
||||
- Quote: `It just was a little toned down. It was almost like he was talking like this. You know? It almost kind of this was a little slow for me.`
|
||||
|
||||
- Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "States speed is slow, indicates dissatisfaction"}}
|
||||
|
||||
** Example 2**
|
||||
- Theme: `Friendliness / Empathy`
|
||||
- Quote: `Sound very welcoming`
|
||||
|
||||
- Response: {{ "keywords": ["welcoming"], "sentiment": 1, "reason": "Uses 'welcoming'" }}
|
||||
|
||||
"""
|
||||
|
||||
resp = client.generate(
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
try:
|
||||
response_text = resp.response.strip()
|
||||
|
||||
# Extract JSON from response
|
||||
start_index = response_text.find('{')
|
||||
end_index = response_text.rfind('}') + 1
|
||||
json_str = response_text[start_index:end_index]
|
||||
|
||||
response_json = json.loads(json_str)
|
||||
keywords = response_json.get('keywords', [])
|
||||
sentiment = response_json.get('sentiment', 'test')
|
||||
reason = response_json.get('reason', 'no reason provided')
|
||||
return keywords, sentiment, reason
|
||||
except Exception as e:
|
||||
print(f"Error parsing response: {e}")
|
||||
return [], None, 'parsing error'
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
client = Client(
|
||||
host="http://localhost:11434"
|
||||
)
|
||||
|
||||
sentiment_df = pd.DataFrame({
|
||||
'content': [
|
||||
"I love this product!",
|
||||
"This is the worst service ever.",
|
||||
"It's okay, not great but not terrible."
|
||||
],
|
||||
'tag': [
|
||||
'VT - Personal Experience',
|
||||
'VT - Personal Experience',
|
||||
'VT - Personal Experience'
|
||||
],
|
||||
'manual_analysis': [False, False, True]
|
||||
})
|
||||
|
||||
sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
|
||||
lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['tag'], client, model='llama3.2:latest')),
|
||||
axis=1
|
||||
)
|
||||
|
||||
print(sentiment_df.head())
|
||||
|
||||
Reference in New Issue
Block a user