llm processing of sentiment

This commit is contained in:
2025-12-12 14:28:51 +01:00
parent e576f98cce
commit ccc5154b93
5 changed files with 135 additions and 83 deletions

View File

@@ -70,13 +70,13 @@ def csv_to_markdown(df):
return "\n\n".join(lines)
@app.cell
@app.cell(hide_code=True)
def _(file_dropdown, mo, pd):
# Preview
preview = mo.md("")
if file_dropdown.value:
df = pd.read_csv(file_dropdown.value)
md_content = csv_to_markdown(df)
md_content = csv_to_markdown(df.head(10))
preview = mo.md(md_content)
preview

View File

@@ -18,7 +18,7 @@ def _():
client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False)
TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results')
TAGUETTE_EXPORT_DIR = Path('./data/processing/02_taguette_export')
WORKING_DIR = Path('./data/processing/02_taguette_postprocess')
if not WORKING_DIR.exists():
@@ -47,13 +47,18 @@ def _():
@app.cell(hide_code=True)
def _(TAGUETTE_EXPORT_DIR, mo):
mo.md(rf"""
# Step 1: Export All Highlights out of Taguette
# Step 1: Export Data out of Taguette
1. Go to: http://taguette.tail44fa00.ts.net/project/1
2. Select 'Highlights' on left
3. Select 'See all hightlights'
4. Top right 'Export this view' > 'CSV'
5. Save to '{TAGUETTE_EXPORT_DIR}/all_tags.csv'
**Highlights**
1. Go to: https://taguette.qumo.io/project/1
2. Select 'Highlights' (left side) > 'See all hightlights' > 'Export this view' (top right) > 'CSV'
3. Save to '{TAGUETTE_EXPORT_DIR}/all_tags.csv'
**Tags Codebook**
1. Select 'Project Info' (left side) > 'Export codebook' > 'CSV'
2. Save to '{TAGUETTE_EXPORT_DIR}/codebook.csv'
_NOTE: Sometimes you need to explicitly allow 'Unsafe Download' in the browser's download manager_
""")
return
@@ -67,13 +72,21 @@ def _(mo):
@app.cell
def _(pd):
all_tags_df = pd.read_csv('data/transcripts/taguette_results/all_tags.csv')
def _(TAGUETTE_EXPORT_DIR, pd):
all_tags_df = pd.read_csv(f'{TAGUETTE_EXPORT_DIR}/all_tags.csv')
all_tags_df['_seq_id'] = range(len(all_tags_df))
all_tags_df.head(20)
all_tags_df
return (all_tags_df,)
@app.cell
def _(TAGUETTE_EXPORT_DIR, pd):
codebook_df = pd.read_csv(f'{TAGUETTE_EXPORT_DIR}/codebook.csv')
codebook_df.rename(columns={'description': 'theme_description'}, inplace=True)
codebook_df
return (codebook_df,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
@@ -255,30 +268,51 @@ def _(mo):
@app.cell
def _(client, model_select, pd, sentiment_df):
# for now, create an empty sentiment column with randomized dummy values for testing
# only for 'VT -' and 'CT -' tags
def _(mo):
start_processing_btn = mo.ui.button(
label="Start Sentiment Extraction",
kind="warn",
on_click=lambda val: True
)
start_processing_btn
return (start_processing_btn,)
@app.cell
def _(
client,
codebook_df,
mo,
model_select,
pd,
sentiment_df,
start_processing_btn,
):
from utils import dummy_sentiment_analysis, ollama_sentiment_analysis
# Only run on rows without manual_analysis
# add theme_description to be used in LLM prompt
_df = sentiment_df.merge(codebook_df, on='tag', how='left', suffixes=('', '_codebook'))
# sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
# lambda row: pd.Series(dummy_sentiment_analysis(row['content'], row['tag'])),
# axis=1
# )
# Wait for start processing button
mo.stop(not start_processing_btn.value, "Click button above to start processing")
sentiment_df[['keywords', 'sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['theme'], client=client, model=model_select.value)),
sentiment_df[['keywords', 'sentiment', 'reason']] = _df[~_df['manual_analysis']].apply(
lambda row: pd.Series(ollama_sentiment_analysis(
content=row['content'],
theme=row['theme'],
theme_description=row['theme_description'],
client=client,
model=model_select.value
)),
axis=1
)
return
@app.cell
def _(sentiment_df):
def _(mo, sentiment_df):
mo.stop(('sentiment' not in sentiment_df.columns), "Run above cells to extract sentiment analysis")
sentiment_df.loc[~sentiment_df['manual_analysis'], ['theme', 'content', 'sentiment', 'reason', 'keywords']]
return
@@ -318,6 +352,13 @@ def _(mo, sentiment_df):
return rows_to_edit, split_rows_editor
@app.cell
def _(split_rows_editor):
split_rows_editor
return
@app.cell(hide_code=True)
def _(mo, rows_to_edit, split_rows_editor):
if split_rows_editor is not None:

View File

@@ -32,7 +32,7 @@ def _(INPUT_DIR, mo):
file_options = {f.stem: str(f) for f in voice_csv_files}
voice_multiselect = mo.ui.multiselect(options=file_options, label="Select Voice CSV Files for Aggregation")
voice_multiselect
return (voice_multiselect,)

View File

@@ -17,6 +17,8 @@ services:
# c) Explicitly override: docker compose run --gpus all ollama
# 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the
# 'devices' section further down as a fallback (less portable).
## UNCOMMENT THE FOLLOWING BLOCK FOR NVIDIA GPU SUPPORT ###
deploy:
resources:
reservations:
@@ -29,6 +31,8 @@ services:
# Visible devices / capabilities for the NVIDIA container runtime
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
## ---------- END GPU SUPPORT BLOCK ------------###
# Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors):
# devices:

View File

@@ -12,7 +12,7 @@ def dummy_sentiment_analysis(content, tag):
def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[list[str], int, str]:
def ollama_sentiment_analysis(content, theme, theme_description, client: Client, model) -> tuple[list[str], int, str]:
"""
Perform sentiment analysis using Ollama model.
@@ -24,79 +24,86 @@ def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[li
- sentiment score and reason
"""
prompt = f"""
# Instructions
You are an expert in sentiment analysis and natural language processing. You are given a quote from an interview along with a theme tag. Your task is to analyze the sentiment expressed in the quote in relation to the provided theme, and provide a short explanation for your assessment (max 10 words).
You need to deliver three pieces of information:
1. A list of keywords from the quote quantify or qualify the theme, and that influenced your sentiment analysis (if any).
2. A sentiment score: -1 for negative, 0 for neutral, and 1 for positive sentiment.
3. A brief reason (max 10 words) explaining your sentiment score.
# Role
You are an expert in sentiment analysis. Your task is to analyze the sentiment of a quote in relation to a specific theme.
# Guidelines
Keywords should be directly relevant to the theme.
The reason should be extremely concise and to the point:
- Does not need to be a full sentence.
- Sentiment itself does not need to be stated in the explanation.
- If keywords are present in the quote that directly capture the sentiment, give that as the reason..
# Input
Theme: `{theme}`
Theme Description: `{theme_description}`
Quote:
```
{content}
```
# Instructions
1. Analyze the sentiment of the quote specifically regarding the theme.
2. Extract relevant keywords or phrases from the quote. Prioritize specific descriptors found in the text that match or relate to the theme.
3. Assign a sentiment score:
- -1: Negative (complaint, dissatisfaction, criticism)
- 0: Neutral (factual, mixed, or no strong opinion)
- 1: Positive (praise, satisfaction, agreement)
4. Provide a concise reason (max 10 words).
# Constraints
- Return ONLY a valid JSON object.
- Do not use Markdown formatting (no ```json blocks).
- Do not write any Python code or explanations outside the JSON.
- If the quote is irrelevant to the theme, return sentiment 0.
# Response Format
Provide your response in the following JSON format:
{{
"keywords": ["<list_of_relevant_keywords_if_any>"],
"sentiment": <sentiment_score>,
"reason": "<brief_explanation_max_10_words>"
"keywords": ["<list_of_keywords>"],
"sentiment": <integer_score>,
"reason": "<string_reason>"
}}
# Examples
** Example 1**
- Theme: `Speed`
- Quote: `It just was a little toned down. It was almost like he was talking like this. You know? It almost kind of this was a little slow for me.`
- Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "States speed is slow, indicates dissatisfaction"}}
** Example 2**
- Theme: `Friendliness / Empathy`
- Quote: `Sound very welcoming`
- Response: {{ "keywords": ["welcoming"], "sentiment": 1, "reason": "Uses 'welcoming'" }}
Example 1:
Theme: `Speed`
Quote: `It was a little slow for me.`
Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "Dissatisfaction with speed"}}
Example 2:
Theme: `Price`
Quote: `It costs $50.`
Response: {{"keywords": [], "sentiment": 0, "reason": "Factual statement"}}
Example 3:
Theme: `Friendliness`
Quote: `Sound very welcoming.`
Response: {{"keywords": ["welcoming"], "sentiment": 1, "reason": "Positive descriptor used"}}
"""
resp = client.generate(
model=model,
prompt=prompt,
)
try:
response_text = resp.response.strip()
max_retries = 3
for attempt in range(max_retries):
try:
resp = client.generate(
model=model,
prompt=prompt,
)
response_text = resp.response.strip()
# Extract JSON from response
start_index = response_text.find('{')
end_index = response_text.rfind('}') + 1
json_str = response_text[start_index:end_index]
# Extract JSON from response
start_index = response_text.find('{')
end_index = response_text.rfind('}') + 1
if start_index == -1 or end_index == 0:
raise ValueError("No JSON found")
json_str = response_text[start_index:end_index]
response_json = json.loads(json_str)
keywords = response_json.get('keywords', [])
sentiment = response_json.get('sentiment', 'test')
reason = response_json.get('reason', 'no reason provided')
return keywords, sentiment, reason
response_json = json.loads(json_str)
keywords = response_json.get('keywords', [])
sentiment = response_json.get('sentiment', 'test')
reason = response_json.get('reason', 'no reason provided')
return keywords, sentiment, reason
except Exception as e:
print(f"Error parsing response: {e}")
return [], None, 'parsing error'
except Exception as e:
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
if attempt == max_retries - 1:
return [], None, 'parsing error'
if __name__ == "__main__":