import pandas as pd from ollama import Client import json def worker_extraction(row, host, model): # Instantiate local client for this specific worker/thread local_client = Client(host=host) return ollama_keyword_extraction( content=row['content'], tag=row['tag'], client=local_client, model=model ) def ollama_keyword_extraction(content, tag, client: Client, model) -> list: """ Perform sentiment analysis using Ollama model. Parameters: - content: Text content to analyze - tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive') Returns: - sentiment score and reason """ # Construct prompt for Ollama model prompt = f""" ### Role You are a qualitative data analyst. Your task is to extract keywords from a user quote to build a semantic word cluster. ### Guidelines 1. **Quantity:** Extract **1-5** high-value keywords. If the quote only contains 1 valid insight, return only 1 keyword. Do not force extra words. 2. **Specificity:** Avoid vague, single nouns (e.g., "tech", "choice", "system"). Instead, capture the descriptor (e.g., "tech-forward", "payment choice", "legacy system"). 3. **Adjectives:** Standalone adjectives are acceptable if they are strong descriptors (e.g., "reliable", "trustworthy", "professional"). 4. **Normalize:** Convert verbs to present tense and nouns to singular. 5. **Output Format:** Return a single JSON object with the key "keywords" containing a list of strings. ### Examples **Input Context:** Chase as a Brand **Input Quote:** "I would describe it as, you know, like the next big thing, like, you know, tech forward, you know, customer service forward, and just hating that availability." **Output:** {{ "keywords": ["tech forward", "customer service focused", "availability"] }} **Input Context:** App Usability **Input Quote:** "There are so many options when I try to pay, it's confusing." **Output:** {{ "keywords": ["confusing", "payment options"] }} **Input Context:** Investment Tools **Input Quote:** "It is just really reliable." **Output:** {{ "keywords": ["reliable"] }} ### Input Data **Context/Theme:** {tag} **Quote:** "{content}" ### Output ```json """ max_retries = 3 for attempt in range(max_retries): try: resp = client.generate( model=model, prompt=prompt, format='json', ) response_text = resp.response.strip() # Extract JSON from response start_index = response_text.find('{') if start_index == -1: raise ValueError("No JSON found") response_json, _ = json.JSONDecoder().raw_decode(response_text[start_index:]) keywords = response_json.get('keywords', []) return [keywords] except Exception as e: print(f"Attempt {attempt + 1}/{max_retries} failed: {e}. Output was: {response_text}") if attempt == max_retries - 1: return [[]]