diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..7774467 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/02_Taguette_Post-Process.py b/02_Taguette_Post-Process.py index d0170cf..10ea1b7 100644 --- a/02_Taguette_Post-Process.py +++ b/02_Taguette_Post-Process.py @@ -16,7 +16,7 @@ def _(): OLLAMA_LOCATION= 'localhost' # VM_NAME = 'ollama-lite' - client = connect_qumo_ollama(OLLAMA_LOCATION) + client, _models = connect_qumo_ollama(OLLAMA_LOCATION, print_models=False) TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results') WORKING_DIR = Path('./data/processing/02_taguette_postprocess') @@ -25,7 +25,23 @@ def _(): WORKING_DIR.mkdir(parents=True) if not TAGUETTE_EXPORT_DIR.exists(): TAGUETTE_EXPORT_DIR.mkdir(parents=True) - return TAGUETTE_EXPORT_DIR, WORKING_DIR, datetime, mo, pd + + model_select = mo.ui.dropdown( + options=_models, + value=_models[0], + label="Select Ollama Model to use", + searchable=True, + ) + model_select + return ( + TAGUETTE_EXPORT_DIR, + WORKING_DIR, + client, + datetime, + mo, + model_select, + pd, + ) @app.cell(hide_code=True) @@ -89,7 +105,7 @@ def _(all_tags_df, interview_select, mo): @app.cell(hide_code=True) def _(mo): mo.md(r""" - ### Add `_context` column to track Voice / Character is being referred to per highlight + ## Add `_context` column to track Voice / Character is being referred to per highlight Create a new column 'context', which is defined by the last '_V-' or '_C-' tag seen in the 'tags' column', when moving row by row from top to bottom. 1. Iterates through the dataframe in document order (row by row) @@ -102,12 +118,12 @@ def _(mo): Example of challenging case: - | id | document | tag | content | _seq_id | _context | - |-----|-------------|------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|----------------------| - | 88 | P2 - Done | _V-54 | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 117 | _V-54, _V-41 | - | 88 | P2 - Done | _V-41 | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 118 | _V-54, _V-41 | - | 88 | P2 - Done | VT - Human / Artificial | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 119 | _V-54, _V-41 | - | 88 | P2 - Done | VT - Friendliness / Empathy | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 120 | _V-54, _V-41 | + | tag | content | _seq_id | _context | + |------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|----------------------| + | _V-54 | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 117 | _V-54, _V-41 | + | _V-41 | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 118 | _V-54, _V-41 | + | VT - Human / Artificial | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 119 | _V-54, _V-41 | + | VT - Friendliness / Empathy | They I feel like they're like twins in that sense. Like, they both had this calming, like, calming voice that was smooth. It felt, like, but articulated and helpful, and, like, I felt reassured listening to them. | 120 | _V-54, _V-41 | """) return @@ -155,7 +171,7 @@ def _(df): @app.cell(hide_code=True) def _(mo): mo.md(r""" - ## Resolve multi-context rows (only VT- and CT- theme tags) + ## Split multi-context rows (only VT- and CT- theme tags) For rows that have multiple contexts (e.g., both _V-54 and _V-41) - split these into separate rows for each context. @@ -165,7 +181,7 @@ def _(mo): @app.cell -def _(df, mo, pd): +def _(df, pd): # Expand rows that contain multiple contexts (comma-separated) expanded_rows = [] @@ -201,71 +217,32 @@ def _(df, mo, pd): expanded_df_raw['tag'].str.startswith(('VT -', 'CT -'), na=False) ].copy() - manual_rows = sentiment_df[sentiment_df['manual_analysis']] - split_rows_editor = None + print(f"{len(sentiment_df[sentiment_df['manual_analysis']])} Rows with multiple contexts") - - if not manual_rows.empty: - print( - f"⚠️ {len(manual_rows)} rows were created from multi-context splits. " - "See next cell for manual review." - ) - - # Filter for rows that need review. Manual analysis and the tag starts with 'VT -' or 'CT -' - rows_to_edit = sentiment_df[ - (sentiment_df['manual_analysis']) - ] - - # Create data editor for split rows - split_rows_editor = mo.ui.data_editor( - rows_to_edit - ).form(label="Update Sentiment / Manual Flag") - - else: - print("✓ No multi-context rows found") - - return rows_to_edit, sentiment_df, split_rows_editor - - -@app.cell(hide_code=True) -def _(mo, rows_to_edit, split_rows_editor): - mo.vstack([ - mo.md(f""" - ### ⚠️ Manual Review Required - - **{len(rows_to_edit)} rows** were split from multi-context entries. - Please review them below: - 1. Update the `sentiment` column (-1, 0, 1) for each row based on the specific context. - 2. Click **Submit** to apply changes. - """), - split_rows_editor - ]) - return - - -@app.cell -def _(mo, split_rows_editor): - # Capture the edited manual-analysis rows for validation - mo.stop(split_rows_editor.value is None, mo.md("Submit your sentiment analysis changes before continuing.")) - reviewed_manual_rows = split_rows_editor.value - - # Ensure all manual-analysis rows include a sentiment of -1, 0, or 1 - if not reviewed_manual_rows.empty: - valid_sentiments = {-1, 0, 1} - needs_review = reviewed_manual_rows[ - reviewed_manual_rows['manual_analysis'] - & ~reviewed_manual_rows['sentiment'].isin(valid_sentiments) - ] - assert needs_review.empty, f"{len(needs_review)} manual-analysis rows missing sentiment -1/0/1" - - print("Verification: ✓ All Manual-analysis rows have valid sentiment values") - return (reviewed_manual_rows,) + sentiment_df[sentiment_df['manual_analysis']] + return (sentiment_df,) @app.cell(hide_code=True) def _(mo): mo.md(r""" - # Highlight Sentiment Analysis + ## Create 'theme' column + """) + return + + +@app.cell +def _(sentiment_df): + from utils import extract_theme + sentiment_df['theme'] = sentiment_df.apply(lambda row: extract_theme(row['tag']), axis=1) + sentiment_df + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Extract Sentiment + Reasoning For each row in the dataframe, analyze the sentiment of the 'content' regarding the respective tag. This should be done for all 'VT -' and 'CT -' tags, since these represent the 'VoiceThemes' and 'CharacterThemes' respectively. The results should be stored in a new 'sentiment' column. @@ -278,24 +255,106 @@ def _(mo): @app.cell -def _(sentiment_df): +def _(client, model_select, pd, sentiment_df): # for now, create an empty sentiment column with randomized dummy values for testing # only for 'VT -' and 'CT -' tags - import random - def dummy_sentiment_analysis(content, tag): - if tag.startswith('VT -') or tag.startswith('CT -'): - return random.choice([-1, 0, 1]) # Random sentiment for testing - return None + from utils import dummy_sentiment_analysis, ollama_sentiment_analysis # Only run on rows without manual_analysis - sentiment_df['sentiment'] = sentiment_df[~sentiment_df['manual_analysis']].apply(lambda row: dummy_sentiment_analysis(row['content'], row['tag']), axis=1) + # sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply( + # lambda row: pd.Series(dummy_sentiment_analysis(row['content'], row['tag'])), + # axis=1 + # ) + + sentiment_df[['keywords', 'sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply( + lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['theme'], client=client, model=model_select.value)), + axis=1 + ) + - sentiment_df[~sentiment_df['manual_analysis']] return +@app.cell +def _(sentiment_df): + sentiment_df.loc[~sentiment_df['manual_analysis'], ['theme', 'content', 'sentiment', 'reason', 'keywords']] + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Multi-context tags + """) + return + + +@app.cell +def _(mo, sentiment_df): + manual_rows = sentiment_df[sentiment_df['manual_analysis']] + split_rows_editor = None + rows_to_edit = [] + + if not manual_rows.empty: + print( + f"⚠️ {len(manual_rows)} rows were created from multi-context splits. " + "See next cell for manual review." + ) + + # Filter for rows that need review. Manual analysis and the tag starts with 'VT -' or 'CT -' + rows_to_edit = sentiment_df[ + (sentiment_df['manual_analysis']) + ] + + # Create data editor for split rows + split_rows_editor = mo.ui.data_editor( + rows_to_edit + ).form(label="Update Sentiment / Manual Flag") + + else: + print("✓ No multi-context rows found") + return rows_to_edit, split_rows_editor + + +@app.cell(hide_code=True) +def _(mo, rows_to_edit, split_rows_editor): + if split_rows_editor is not None: + mo.vstack([ + mo.md(f""" + ### ⚠️ Manual Review Required + + **{len(rows_to_edit)} rows** were split from multi-context entries. + Please review them below: + 1. Update the `sentiment` column (-1, 0, 1) for each row based on the specific context. + 2. Click **Submit** to apply changes. + """), + split_rows_editor + ]) + return + + +@app.cell +def _(mo, split_rows_editor): + # Capture the edited manual-analysis rows for validation + reviewed_manual_rows = getattr(split_rows_editor, 'value', '') + mo.stop(reviewed_manual_rows is None, mo.md("Submit your sentiment analysis changes before continuing.")) + + # Ensure all manual-analysis rows include a sentiment of -1, 0, or 1 + + if (reviewed_manual_rows != '') and (not reviewed_manual_rows.empty): + valid_sentiments = {-1, 0, 1} + needs_review = reviewed_manual_rows[ + reviewed_manual_rows['manual_analysis'] + & ~reviewed_manual_rows['sentiment'].isin(valid_sentiments) + ] + assert needs_review.empty, f"{len(needs_review)} manual-analysis rows missing sentiment -1/0/1" + + print("Verification: ✓ All Manual-analysis rows have valid sentiment values") + return (reviewed_manual_rows,) + + @app.cell(hide_code=True) def _(mo): mo.md(r""" @@ -307,7 +366,10 @@ def _(mo): @app.cell def _(pd, reviewed_manual_rows, sentiment_df): _static_analysis_rows = sentiment_df[~sentiment_df['manual_analysis']] - recombined_df = pd.concat([_static_analysis_rows, reviewed_manual_rows]).sort_values(by='_seq_id').reset_index(drop=True) + if isinstance(reviewed_manual_rows, pd.DataFrame): + recombined_df = pd.concat([_static_analysis_rows, reviewed_manual_rows]).sort_values(by='_seq_id').reset_index(drop=True) + else: + recombined_df = sentiment_df recombined_df return (recombined_df,) @@ -348,7 +410,7 @@ def _(mo): def _(WORKING_DIR, datetime, interview_select, recombined_df): # Save to CSV in working dir timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - filename = WORKING_DIR / f"{interview_select.value.split(' ')[0]}_sentiments_{timestamp}.csv" + filename = WORKING_DIR / f"{interview_select.value.split(' ')[0]}_sentiments.csv" recombined_df.to_csv(filename, index=False) print(f"✓ Saved processed data to '{filename}'") diff --git a/03_Sentiment_Analysis.py b/03_Sentiment_Analysis.py index 9427d62..871fc65 100644 --- a/03_Sentiment_Analysis.py +++ b/03_Sentiment_Analysis.py @@ -9,14 +9,14 @@ def _(): import marimo as mo import pandas as pd from pathlib import Path + from utils import create_sentiment_matrix INPUT_DIR = Path("./data/processing/02_taguette_postprocess") WORKING_DIR = Path('./data/processing/03_sentiment_analysis') if not WORKING_DIR.exists(): WORKING_DIR.mkdir(parents=True) - - return INPUT_DIR, Path, WORKING_DIR, mo, pd + return INPUT_DIR, Path, WORKING_DIR, create_sentiment_matrix, mo, pd @app.cell(hide_code=True) @@ -62,55 +62,6 @@ def _(mo): return -@app.cell -def _(document_name, pd): - import numpy as np - - def create_sentiment_matrix(doc_df, column_prefix='VT - |CT - ', row_prefix='_V-|_C-'): - """ - Create a sentiment matrix for a specific document. - - Parameters: - - df: DataFrame with columns ['document', 'tag', '_context', 'sentiment'] - - document_name: Name of the document to filter by - - Returns: - - DataFrame representing the sentiment matrix - """ - - # Filter for rows where the tag matches the sentiment prefixes (VT-/CT-) - sentiment_rows = doc_df[ - doc_df['tag'].str.contains(column_prefix, na=False) - ].copy() - - if sentiment_rows.empty: - print(f"No sentiment data found for document: {document_name}") - return pd.DataFrame() - - # Filter for rows with valid Voice/Character context - valid_rows = sentiment_rows[ - sentiment_rows['_context'].notna() & - (sentiment_rows['_context'].str.contains(row_prefix, na=False)) - ].copy() - - if valid_rows.empty: - print(f"No Voice/Character context found for document: {document_name}") - return pd.DataFrame() - - # Create aggregation: group by Voice/Character (_context) and Theme (tag) - # Sum sentiment scores for each combination - matrix_data = valid_rows.groupby(['_context', 'tag'])['sentiment'].sum().reset_index() - - # Pivot to create the matrix - matrix = matrix_data.pivot(index='_context', columns='tag', values='sentiment') - - # # Convert to integers for cleaner display - # matrix = matrix.astype(int) - - return matrix - return (create_sentiment_matrix,) - - @app.cell(hide_code=True) def _(mo): mo.md(r""" diff --git a/04_Sentiment_Aggregation.py b/04_Results_Aggregation.py similarity index 100% rename from 04_Sentiment_Aggregation.py rename to 04_Results_Aggregation.py diff --git a/ollama/docker-compose.yml b/ollama/docker-compose.yml index c5f903f..75647e7 100644 --- a/ollama/docker-compose.yml +++ b/ollama/docker-compose.yml @@ -17,18 +17,18 @@ services: # c) Explicitly override: docker compose run --gpus all ollama # 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the # 'devices' section further down as a fallback (less portable). - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: all - # capabilities: [gpu] + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] - # environment: + environment: # Visible devices / capabilities for the NVIDIA container runtime - # - NVIDIA_VISIBLE_DEVICES=all - # - NVIDIA_DRIVER_CAPABILITIES=compute,utility + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility # Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors): # devices: diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..c5846e1 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,4 @@ +from .ollama_utils import connect_qumo_ollama +from .data_utils import create_sentiment_matrix, extract_theme +from .transcript_utils import load_srt +from .sentiment_analysis import dummy_sentiment_analysis, ollama_sentiment_analysis diff --git a/utils/data_utils.py b/utils/data_utils.py new file mode 100644 index 0000000..46452dc --- /dev/null +++ b/utils/data_utils.py @@ -0,0 +1,65 @@ +import pandas as pd + + +def create_sentiment_matrix(doc_df, column_prefix='VT - |CT - ', row_prefix='_V-|_C-'): + """ + Create a sentiment matrix for a specific document. + + Parameters: + - df: DataFrame with columns ['document', 'tag', '_context', 'sentiment'] + - document_name: Name of the document to filter by + + Returns: + - DataFrame representing the sentiment matrix + """ + + # Filter for rows where the tag matches the sentiment prefixes (VT-/CT-) + sentiment_rows = doc_df[ + doc_df['tag'].str.contains(column_prefix, na=False) + ].copy() + + if sentiment_rows.empty: + print("No sentiment data found") + return pd.DataFrame() + + # Filter for rows with valid Voice/Character context + valid_rows = sentiment_rows[ + sentiment_rows['_context'].notna() & + (sentiment_rows['_context'].str.contains(row_prefix, na=False)) + ].copy() + + if valid_rows.empty: + print("No Voice/Character context found") + return pd.DataFrame() + + # Create aggregation: group by Voice/Character (_context) and Theme (tag) + # Sum sentiment scores for each combination + matrix_data = valid_rows.groupby(['_context', 'tag'])['sentiment'].sum().reset_index() + + # Pivot to create the matrix + matrix = matrix_data.pivot(index='_context', columns='tag', values='sentiment') + + # # Convert to integers for cleaner display + # matrix = matrix.astype(int) + + return matrix + + + +def extract_theme(tag: str, theme_prefixes='VT - |CT - ') -> str: + """ + Extract the theme from a tag string. + + Parameters: + - tag: str, the tag string (e.g., 'VT - Personal Experience') + - theme_prefixes: str, prefixes to remove from the tag (e.g., 'VT - |CT - ') + + Returns: + - str, the extracted theme (e.g., 'Personal Experience') + - None if no theme found + """ + for prefix in theme_prefixes.split('|'): + if tag.startswith(prefix): + return tag.replace(prefix, '').strip() + return None + \ No newline at end of file diff --git a/utils/ollama_utils.py b/utils/ollama_utils.py new file mode 100644 index 0000000..a844fcd --- /dev/null +++ b/utils/ollama_utils.py @@ -0,0 +1,42 @@ + + + +import requests +from ollama import Client + + + + +def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434', print_models=True) -> Client: + """Establish connection to Qumo Ollama instance + + vm_name: str ('ollama-lite' or 'hiperf-gpu') + Name of the VM running the Ollama instance + + Returns: + tuple(Client): Ollama client connected to the specified VM + """ + QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}' + + if vm_name in ['localhost', '0.0.0.0']: + QUMO_OLLAMA_URL = f"http://{vm_name}:{port}" + + try: + requests.get(QUMO_OLLAMA_URL, timeout=5) + client = Client( + host=QUMO_OLLAMA_URL + ) + + print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}") + models = [m.model for m in client.list().models] + if print_models: + print("Available models:") + for m in models: + print(f" - '{m}' ") + return client, models + + except requests.ConnectionError: + pass + + print(f"Failed to reach {QUMO_OLLAMA_URL}. Check that the VM is running and Tailscale is up") + return None, None diff --git a/utils/sentiment_analysis.py b/utils/sentiment_analysis.py new file mode 100644 index 0000000..ed46edc --- /dev/null +++ b/utils/sentiment_analysis.py @@ -0,0 +1,128 @@ +import random +import pandas as pd + +from ollama import Client +import json + +def dummy_sentiment_analysis(content, tag): + if tag.startswith('VT -') or tag.startswith('CT -'): + return random.choice([-1, 0, 1]), 'random dummy sentiment' # Random sentiment for testing + + return 'test', 'not applicable' + + + +def ollama_sentiment_analysis(content, theme, client: Client, model) -> tuple[list[str], int, str]: + """ + Perform sentiment analysis using Ollama model. + + Parameters: + - content: Text content to analyze + - tag: Tag indicating the type of sentiment analysis (e.g., 'VT - Positive') + + Returns: + - sentiment score and reason + """ + prompt = f""" + # Instructions + You are an expert in sentiment analysis and natural language processing. You are given a quote from an interview along with a theme tag. Your task is to analyze the sentiment expressed in the quote in relation to the provided theme, and provide a short explanation for your assessment (max 10 words). + + You need to deliver three pieces of information: + 1. A list of keywords from the quote quantify or qualify the theme, and that influenced your sentiment analysis (if any). + 2. A sentiment score: -1 for negative, 0 for neutral, and 1 for positive sentiment. + 3. A brief reason (max 10 words) explaining your sentiment score. + + + # Guidelines + Keywords should be directly relevant to the theme. + + The reason should be extremely concise and to the point: + - Does not need to be a full sentence. + - Sentiment itself does not need to be stated in the explanation. + - If keywords are present in the quote that directly capture the sentiment, give that as the reason.. + + + # Input + + Theme: `{theme}` + + Quote: + ``` + {content} + ``` + + # Response Format + Provide your response in the following JSON format: + {{ + "keywords": [""], + "sentiment": , + "reason": "" + }} + + + # Examples + + ** Example 1** + - Theme: `Speed` + - Quote: `It just was a little toned down. It was almost like he was talking like this. You know? It almost kind of this was a little slow for me.` + + - Response: {{"keywords": ["slow"], "sentiment": -1, "reason": "States speed is slow, indicates dissatisfaction"}} + + ** Example 2** + - Theme: `Friendliness / Empathy` + - Quote: `Sound very welcoming` + + - Response: {{ "keywords": ["welcoming"], "sentiment": 1, "reason": "Uses 'welcoming'" }} + + """ + + resp = client.generate( + model=model, + prompt=prompt, + ) + + try: + response_text = resp.response.strip() + + # Extract JSON from response + start_index = response_text.find('{') + end_index = response_text.rfind('}') + 1 + json_str = response_text[start_index:end_index] + + response_json = json.loads(json_str) + keywords = response_json.get('keywords', []) + sentiment = response_json.get('sentiment', 'test') + reason = response_json.get('reason', 'no reason provided') + return keywords, sentiment, reason + except Exception as e: + print(f"Error parsing response: {e}") + return [], None, 'parsing error' + + +if __name__ == "__main__": + + client = Client( + host="http://localhost:11434" + ) + + sentiment_df = pd.DataFrame({ + 'content': [ + "I love this product!", + "This is the worst service ever.", + "It's okay, not great but not terrible." + ], + 'tag': [ + 'VT - Personal Experience', + 'VT - Personal Experience', + 'VT - Personal Experience' + ], + 'manual_analysis': [False, False, True] + }) + + sentiment_df[['sentiment', 'reason']] = sentiment_df[~sentiment_df['manual_analysis']].apply( + lambda row: pd.Series(ollama_sentiment_analysis(row['content'], row['tag'], client, model='llama3.2:latest')), + axis=1 + ) + + print(sentiment_df.head()) + diff --git a/utils.py b/utils/transcript_utils.py similarity index 59% rename from utils.py rename to utils/transcript_utils.py index c40aad1..0a118f9 100644 --- a/utils.py +++ b/utils/transcript_utils.py @@ -1,13 +1,6 @@ -""" -Standard utils for this repository -""" -import re from pathlib import Path - -import requests -from ollama import Client - +import re def load_srt(path: str | Path) -> str: """Load and parse an SRT file, returning clean transcript with speaker labels. @@ -58,37 +51,4 @@ def load_srt(path: str | Path) -> str: # Format as "SPEAKER_XX: text" transcript_lines = [f"{speaker}: {utterance}" for speaker, utterance in merged] - return '\n\n'.join(transcript_lines) - - -def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434') -> Client: - """Establish connection to Qumo Ollama instance - - vm_name: str ('ollama-lite' or 'hiperf-gpu') - Name of the VM running the Ollama instance - - Returns: - tuple(Client): Ollama client connected to the specified VM - """ - QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}' - - if vm_name in ['localhost', '0.0.0.0']: - QUMO_OLLAMA_URL = f"http://{vm_name}:{port}" - - try: - requests.get(QUMO_OLLAMA_URL, timeout=5) - client = Client( - host=QUMO_OLLAMA_URL - ) - - print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}\nAvailable models:") - for m in client.list().models: - print(f" - '{m.model}' ") - return client - - except requests.ConnectionError: - pass - - print(f"Failed to reach {QUMO_OLLAMA_URL}. Check that the VM is running and Tailscale is up") - return None - + return '\n\n'.join(transcript_lines) \ No newline at end of file