llm processing of sentiment
This commit is contained in:
@@ -70,13 +70,13 @@ def csv_to_markdown(df):
|
|||||||
return "\n\n".join(lines)
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell(hide_code=True)
|
||||||
def _(file_dropdown, mo, pd):
|
def _(file_dropdown, mo, pd):
|
||||||
# Preview
|
# Preview
|
||||||
preview = mo.md("")
|
preview = mo.md("")
|
||||||
if file_dropdown.value:
|
if file_dropdown.value:
|
||||||
df = pd.read_csv(file_dropdown.value)
|
df = pd.read_csv(file_dropdown.value)
|
||||||
md_content = csv_to_markdown(df)
|
md_content = csv_to_markdown(df.head(10))
|
||||||
preview = mo.md(md_content)
|
preview = mo.md(md_content)
|
||||||
|
|
||||||
preview
|
preview
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ def _():
|
|||||||
|
|
||||||
client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False)
|
client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False)
|
||||||
|
|
||||||
TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results')
|
TAGUETTE_EXPORT_DIR = Path('./data/processing/02_taguette_export')
|
||||||
WORKING_DIR = Path('./data/processing/02_taguette_postprocess')
|
WORKING_DIR = Path('./data/processing/02_taguette_postprocess')
|
||||||
|
|
||||||
if not WORKING_DIR.exists():
|
if not WORKING_DIR.exists():
|
||||||
@@ -47,13 +47,18 @@ def _():
|
|||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(TAGUETTE_EXPORT_DIR, mo):
|
def _(TAGUETTE_EXPORT_DIR, mo):
|
||||||
mo.md(rf"""
|
mo.md(rf"""
|
||||||
# Step 1: Export All Highlights out of Taguette
|
# Step 1: Export Data out of Taguette
|
||||||
|
|
||||||
1. Go to: http://taguette.tail44fa00.ts.net/project/1
|
**Highlights**
|
||||||
2. Select 'Highlights' on left
|
1. Go to: https://taguette.qumo.io/project/1
|
||||||
3. Select 'See all hightlights'
|
2. Select 'Highlights' (left side) > 'See all hightlights' > 'Export this view' (top right) > 'CSV'
|
||||||
4. Top right 'Export this view' > 'CSV'
|
3. Save to '{TAGUETTE_EXPORT_DIR}/all_tags.csv'
|
||||||
5. Save to '{TAGUETTE_EXPORT_DIR}/all_tags.csv'
|
|
||||||
|
**Tags Codebook**
|
||||||
|
1. Select 'Project Info' (left side) > 'Export codebook' > 'CSV'
|
||||||
|
2. Save to '{TAGUETTE_EXPORT_DIR}/codebook.csv'
|
||||||
|
|
||||||
|
_NOTE: Sometimes you need to explicitly allow 'Unsafe Download' in the browser's download manager_
|
||||||
""")
|
""")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -67,13 +72,21 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(pd):
|
def _(TAGUETTE_EXPORT_DIR, pd):
|
||||||
all_tags_df = pd.read_csv('data/transcripts/taguette_results/all_tags.csv')
|
all_tags_df = pd.read_csv(f'{TAGUETTE_EXPORT_DIR}/all_tags.csv')
|
||||||
all_tags_df['_seq_id'] = range(len(all_tags_df))
|
all_tags_df['_seq_id'] = range(len(all_tags_df))
|
||||||
all_tags_df.head(20)
|
all_tags_df
|
||||||
return (all_tags_df,)
|
return (all_tags_df,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(TAGUETTE_EXPORT_DIR, pd):
|
||||||
|
codebook_df = pd.read_csv(f'{TAGUETTE_EXPORT_DIR}/codebook.csv')
|
||||||
|
codebook_df.rename(columns={'description': 'theme_description'}, inplace=True)
|
||||||
|
codebook_df
|
||||||
|
return (codebook_df,)
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo):
|
def _(mo):
|
||||||
mo.md(r"""
|
mo.md(r"""
|
||||||
@@ -255,30 +268,51 @@ def _(mo):
|
|||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(client, model_select, pd, sentiment_df):
|
def _(mo):
|
||||||
# for now, create an empty sentiment column with randomized dummy values for testing
|
start_processing_btn = mo.ui.button(
|
||||||
# only for 'VT -' and 'CT -' tags
|
label="Start Sentiment Extraction",
|
||||||
|
kind="warn",
|
||||||
|
on_click=lambda val: True
|
||||||
|
)
|
||||||
|
start_processing_btn
|
||||||
|
return (start_processing_btn,)
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(
|
||||||
|
client,
|
||||||
|
codebook_df,
|
||||||
|
mo,
|
||||||
|
model_select,
|
||||||
|
pd,
|
||||||
|
sentiment_df,
|
||||||
|
start_processing_btn,
|
||||||
|
):
|
||||||
from utils import dummy_sentiment_analysis, ollama_sentiment_analysis
|
from utils import dummy_sentiment_analysis, ollama_sentiment_analysis
|
||||||
|
|
||||||
# Only run on rows without manual_analysis
|
# add theme_description to be used in LLM prompt
|
||||||
|
_df = sentiment_df.merge(codebook_df, on='tag', how='left', suffixes=('', '_codebook'))
|
||||||
|
|
||||||
# sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
|
# Wait for start processing button
|
||||||
# lambda row: pd.Series(dummy_sentiment_analysis(row['content'], row['tag'])),
|
mo.stop(not start_processing_btn.value, "Click button above to start processing")
|
||||||
# axis=1
|
|
||||||
# )
|
|
||||||
|
|
||||||
sentiment_df[['keywords', 'sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply(
|
|
||||||
lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['theme'], client=client, model=model_select.value)),
|
sentiment_df[['keywords', 'sentiment', 'reason']] = _df[~_df['manual_analysis']].apply(
|
||||||
|
lambda row: pd.Series(ollama_sentiment_analysis(
|
||||||
|
content=row['content'],
|
||||||
|
theme=row['theme'],
|
||||||
|
theme_description=row['theme_description'],
|
||||||
|
client=client,
|
||||||
|
model=model_select.value
|
||||||
|
)),
|
||||||
axis=1
|
axis=1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell
|
@app.cell
|
||||||
def _(sentiment_df):
|
def _(mo, sentiment_df):
|
||||||
|
mo.stop(('sentiment' not in sentiment_df.columns), "Run above cells to extract sentiment analysis")
|
||||||
sentiment_df.loc[~sentiment_df['manual_analysis'], ['theme', 'content', 'sentiment', 'reason', 'keywords']]
|
sentiment_df.loc[~sentiment_df['manual_analysis'], ['theme', 'content', 'sentiment', 'reason', 'keywords']]
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -318,6 +352,13 @@ def _(mo, sentiment_df):
|
|||||||
return rows_to_edit, split_rows_editor
|
return rows_to_edit, split_rows_editor
|
||||||
|
|
||||||
|
|
||||||
|
@app.cell
|
||||||
|
def _(split_rows_editor):
|
||||||
|
split_rows_editor
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.cell(hide_code=True)
|
@app.cell(hide_code=True)
|
||||||
def _(mo, rows_to_edit, split_rows_editor):
|
def _(mo, rows_to_edit, split_rows_editor):
|
||||||
if split_rows_editor is not None:
|
if split_rows_editor is not None:
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ def _(INPUT_DIR, mo):
|
|||||||
file_options = {f.stem: str(f) for f in voice_csv_files}
|
file_options = {f.stem: str(f) for f in voice_csv_files}
|
||||||
|
|
||||||
voice_multiselect = mo.ui.multiselect(options=file_options, label="Select Voice CSV Files for Aggregation")
|
voice_multiselect = mo.ui.multiselect(options=file_options, label="Select Voice CSV Files for Aggregation")
|
||||||
voice_multiselect
|
|
||||||
return (voice_multiselect,)
|
return (voice_multiselect,)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ services:
|
|||||||
# c) Explicitly override: docker compose run --gpus all ollama
|
# c) Explicitly override: docker compose run --gpus all ollama
|
||||||
# 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the
|
# 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the
|
||||||
# 'devices' section further down as a fallback (less portable).
|
# 'devices' section further down as a fallback (less portable).
|
||||||
|
|
||||||
|
## UNCOMMENT THE FOLLOWING BLOCK FOR NVIDIA GPU SUPPORT ###
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
@@ -29,6 +31,8 @@ services:
|
|||||||
# Visible devices / capabilities for the NVIDIA container runtime
|
# Visible devices / capabilities for the NVIDIA container runtime
|
||||||
- NVIDIA_VISIBLE_DEVICES=all
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
## ---------- END GPU SUPPORT BLOCK ------------###
|
||||||
|
|
||||||
|
|
||||||
# Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors):
|
# Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors):
|
||||||
# devices:
|
# devices:
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ def dummy_sentiment_analysis(content, tag):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[list[str], int, str]:
|
def ollama_sentiment_analysis(content, theme, theme_description, client: Client, model) -> tuple[list[str], int, str]:
|
||||||
"""
|
"""
|
||||||
Perform sentiment analysis using Ollama model.
|
Perform sentiment analysis using Ollama model.
|
||||||
|
|
||||||
@@ -24,79 +24,86 @@ def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[li
|
|||||||
- sentiment score and reason
|
- sentiment score and reason
|
||||||
"""
|
"""
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
# Instructions
|
# Role
|
||||||
You are an expert in sentiment analysis and natural language processing. You are given a quote from an interview along with a theme tag. Your task is to analyze the sentiment expressed in the quote in relation to the provided theme, and provide a short explanation for your assessment (max 10 words).
|
You are an expert in sentiment analysis. Your task is to analyze the sentiment of a quote in relation to a specific theme.
|
||||||
|
|
||||||
You need to deliver three pieces of information:
|
|
||||||
1. A list of keywords from the quote quantify or qualify the theme, and that influenced your sentiment analysis (if any).
|
|
||||||
2. A sentiment score: -1 for negative, 0 for neutral, and 1 for positive sentiment.
|
|
||||||
3. A brief reason (max 10 words) explaining your sentiment score.
|
|
||||||
|
|
||||||
|
|
||||||
# Guidelines
|
|
||||||
Keywords should be directly relevant to the theme.
|
|
||||||
|
|
||||||
The reason should be extremely concise and to the point:
|
|
||||||
- Does not need to be a full sentence.
|
|
||||||
- Sentiment itself does not need to be stated in the explanation.
|
|
||||||
- If keywords are present in the quote that directly capture the sentiment, give that as the reason..
|
|
||||||
|
|
||||||
|
|
||||||
# Input
|
# Input
|
||||||
|
|
||||||
Theme: `{theme}`
|
Theme: `{theme}`
|
||||||
|
Theme Description: `{theme_description}`
|
||||||
Quote:
|
Quote:
|
||||||
```
|
```
|
||||||
{content}
|
{content}
|
||||||
```
|
```
|
||||||
|
|
||||||
# Response Format
|
# Instructions
|
||||||
Provide your response in the following JSON format:
|
1. Analyze the sentiment of the quote specifically regarding the theme.
|
||||||
{{
|
2. Extract relevant keywords or phrases from the quote. Prioritize specific descriptors found in the text that match or relate to the theme.
|
||||||
"keywords": ["<list_of_relevant_keywords_if_any>"],
|
3. Assign a sentiment score:
|
||||||
"sentiment": <sentiment_score>,
|
- -1: Negative (complaint, dissatisfaction, criticism)
|
||||||
"reason": "<brief_explanation_max_10_words>"
|
- 0: Neutral (factual, mixed, or no strong opinion)
|
||||||
}}
|
- 1: Positive (praise, satisfaction, agreement)
|
||||||
|
4. Provide a concise reason (max 10 words).
|
||||||
|
|
||||||
|
# Constraints
|
||||||
|
- Return ONLY a valid JSON object.
|
||||||
|
- Do not use Markdown formatting (no ```json blocks).
|
||||||
|
- Do not write any Python code or explanations outside the JSON.
|
||||||
|
- If the quote is irrelevant to the theme, return sentiment 0.
|
||||||
|
|
||||||
|
# Response Format
|
||||||
|
{{
|
||||||
|
"keywords": ["<list_of_keywords>"],
|
||||||
|
"sentiment": <integer_score>,
|
||||||
|
"reason": "<string_reason>"
|
||||||
|
}}
|
||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
|
|
||||||
** Example 1**
|
Example 1:
|
||||||
- Theme: `Speed`
|
Theme: `Speed`
|
||||||
- Quote: `It just was a little toned down. It was almost like he was talking like this. You know? It almost kind of this was a little slow for me.`
|
Quote: `It was a little slow for me.`
|
||||||
|
Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "Dissatisfaction with speed"}}
|
||||||
|
|
||||||
- Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "States speed is slow, indicates dissatisfaction"}}
|
Example 2:
|
||||||
|
Theme: `Price`
|
||||||
** Example 2**
|
Quote: `It costs $50.`
|
||||||
- Theme: `Friendliness / Empathy`
|
Response: {{"keywords": [], "sentiment": 0, "reason": "Factual statement"}}
|
||||||
- Quote: `Sound very welcoming`
|
|
||||||
|
|
||||||
- Response: {{ "keywords": ["welcoming"], "sentiment": 1, "reason": "Uses 'welcoming'" }}
|
|
||||||
|
|
||||||
|
Example 3:
|
||||||
|
Theme: `Friendliness`
|
||||||
|
Quote: `Sound very welcoming.`
|
||||||
|
Response: {{"keywords": ["welcoming"], "sentiment": 1, "reason": "Positive descriptor used"}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
resp = client.generate(
|
max_retries = 3
|
||||||
model=model,
|
for attempt in range(max_retries):
|
||||||
prompt=prompt,
|
try:
|
||||||
)
|
resp = client.generate(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
response_text = resp.response.strip()
|
||||||
response_text = resp.response.strip()
|
|
||||||
|
|
||||||
# Extract JSON from response
|
# Extract JSON from response
|
||||||
start_index = response_text.find('{')
|
start_index = response_text.find('{')
|
||||||
end_index = response_text.rfind('}') + 1
|
end_index = response_text.rfind('}') + 1
|
||||||
json_str = response_text[start_index:end_index]
|
|
||||||
|
|
||||||
response_json = json.loads(json_str)
|
if start_index == -1 or end_index == 0:
|
||||||
keywords = response_json.get('keywords', [])
|
raise ValueError("No JSON found")
|
||||||
sentiment = response_json.get('sentiment', 'test')
|
|
||||||
reason = response_json.get('reason', 'no reason provided')
|
json_str = response_text[start_index:end_index]
|
||||||
return keywords, sentiment, reason
|
|
||||||
except Exception as e:
|
response_json = json.loads(json_str)
|
||||||
print(f"Error parsing response: {e}")
|
keywords = response_json.get('keywords', [])
|
||||||
return [], None, 'parsing error'
|
sentiment = response_json.get('sentiment', 'test')
|
||||||
|
reason = response_json.get('reason', 'no reason provided')
|
||||||
|
return keywords, sentiment, reason
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
|
||||||
|
if attempt == max_retries - 1:
|
||||||
|
return [], None, 'parsing error'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user