Interview-Analysis/utils/sentiment_analysis.py

import random
import pandas as pd

from ollama import Client
import json

def dummy_sentiment_analysis(content, tag):
    if tag.startswith('VT -') or tag.startswith('CT -'):
        return random.choice([-1, 0, 1]), 'random dummy sentiment'  # Random sentiment for testing

    return 'test', 'not applicable'


def ollama_sentiment_analysis(content, theme, theme_description, client: Client, model) -> tuple[list[str], int, str]:
    """
    Perform sentiment analysis using Ollama model.

    Parameters:
    - content: Text content to analyze
    - tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive')

    Returns:
    - sentiment score and reason
    """
    prompt = f"""
    # Role
    You are an expert in sentiment analysis. Your task is to analyze the sentiment of a quote in relation to a specific theme.

    # Input
    Theme: `{theme}`
    Theme Description: `{theme_description}`
    Quote:
    ```
    {content}
    ```

    # Instructions
    1. Analyze the sentiment of the quote specifically regarding the theme.
    2. Extract relevant keywords or phrases from the quote. Prioritize specific descriptors found in the text that match or relate to the theme.
    3. Assign a sentiment score:
       - -1: Negative (complaint, dissatisfaction, criticism)
       - 0: Neutral (factual, mixed, or no strong opinion)
       - 1: Positive (praise, satisfaction, agreement)
    4. Provide a concise reason (max 10 words).

    # Constraints
    - Return ONLY a valid JSON object.
    - Do not use Markdown formatting (no ```json blocks).
    - Do not write any Python code or explanations outside the JSON.
    - If the quote is irrelevant to the theme, return sentiment 0.

    # Response Format
    {{
        "keywords": ["<list_of_keywords>"],
        "sentiment": <integer_score>,
        "reason": "<string_reason>"
    }}

    # Examples

    Example 1:
    Theme: `Speed`
    Quote: `It was a little slow for me.`
    Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "Dissatisfaction with speed"}}

    Example 2:
    Theme: `Price`
    Quote: `It costs $50.`
    Response: {{"keywords": [], "sentiment": 0, "reason": "Factual statement"}}

    Example 3:
    Theme: `Friendliness`
    Quote: `Sound very welcoming.`
    Response: {{"keywords": ["welcoming"], "sentiment": 1, "reason": "Positive descriptor used"}}
    """

    max_retries = 3
    for attempt in range(max_retries):
        try:
            resp = client.generate(
                model=model,
                prompt=prompt,
            )

            response_text = resp.response.strip()

            # Extract JSON from response
            start_index = response_text.find('{')
            end_index = response_text.rfind('}') + 1

            if start_index == -1 or end_index == 0:
                raise ValueError("No JSON found")

            json_str = response_text[start_index:end_index]

            response_json = json.loads(json_str)
            keywords = response_json.get('keywords', [])
            sentiment = response_json.get('sentiment', 'test')
            reason = response_json.get('reason', 'no reason provided')
            return keywords, sentiment, reason

        except Exception as e:
            print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
            if attempt == max_retries - 1:
                return [], None, 'parsing error'


if __name__ == "__main__":

    client = Client(
            host="http://localhost:11434"
        )

    sentiment_df = pd.DataFrame({
        'content': [
            "I love this product!",
            "This is the worst service ever.",
            "It's okay, not great but not terrible."
        ],
        'tag': [
            'VT - Personal Experience',
            'VT - Personal Experience',
            'VT - Personal Experience'
        ],
        'manual_analysis': [False, False, True]
    })

    sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
        lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['tag'], client, model='llama3.2:latest')),
        axis=1
    )

    print(sentiment_df.head())