import pandas as pd from ollama import Client import json import matplotlib.pyplot as plt import random import matplotlib.colors as mcolors def blue_color_func( word, font_size, position, orientation, random_state=None, **kwargs): # Use the provided random_state for reproducibility if available, else use random module r = random_state if random_state else random # Sample from the darker end of the 'Blues' colormap (e.g., 0.4 to 1.0) # 0.0 is white/light, 1.0 is dark blue min_val, max_val = 0.4, 1.0 color_val = r.uniform(min_val, max_val) # Get color from matplotlib colormap rgba = plt.cm.Blues(color_val) return mcolors.to_hex(rgba) def worker_extraction(row, host, model): # Instantiate local client for this specific worker/thread local_client = Client(host=host) return ollama_keyword_extraction( content=row['content'], tag=row['tag'], client=local_client, model=model ) def ollama_keyword_extraction(content, tag, client: Client, model) -> list: """ Perform sentiment analysis using Ollama model. Parameters: - content: Text content to analyze - tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive') Returns: - sentiment score and reason """ # Construct prompt for Ollama model # Prompt optimized for small models (Llama 3.2): # - Fewer rules, prioritized by importance # - Explicit verbatim instruction (prevents truncation errors) # - Examples that reinforce exact copying # - Positive framing (do X) instead of negative (don't do Y) # - Minimal formatting overhead prompt = f"""Extract keywords from interview quotes for thematic analysis. RULES (in priority order): 1. Extract only keywords RELEVANT to the given context. Ignore off-topic content. Do NOT invent keywords. 2. Use words from the quote, but generalize for clustering (e.g., "not youthful" → "traditional"). 3. Extract 1-5 keywords or short phrases that capture key themes. 4. Prefer descriptive phrases over vague single words (e.g., "tech forward" not "tech"). EXAMPLES: Context: Chase as a Brand Quote: "It's definitely not, like, youthful or trendy." Output: {{"keywords": ["traditional", "established"]}} Context: App Usability Quote: "There are so many options when I try to pay, it's confusing." Output: {{"keywords": ["confusing", "overwhelming options"]}} Context: Brand Perception Quote: "I would say reliable, trustworthy, kind of old-school." Output: {{"keywords": ["reliable", "trustworthy", "old-school"]}} NOW EXTRACT KEYWORDS: Context: {tag} Quote: "{content}" Output:""" max_retries = 3 for attempt in range(max_retries): try: resp = client.generate( model=model, prompt=prompt, format='json', ) response_text = resp.response.strip() # Extract JSON from response start_index = response_text.find('{') if start_index == -1: raise ValueError("No JSON found") response_json, _ = json.JSONDecoder().raw_decode(response_text[start_index:]) keywords = response_json.get('keywords', []) return [keywords] except Exception as e: print(f"Attempt {attempt + 1}/{max_retries} failed: {e}. Output was: {response_text}") if attempt == max_retries - 1: return [[]]