87 lines
2.6 KiB
Python
87 lines
2.6 KiB
Python
"""
|
|
Standard utils for this repository
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import requests
|
|
from ollama import Client
|
|
|
|
|
|
def load_srt(path: str | Path) -> str:
|
|
"""Load and parse an SRT file, returning clean transcript with speaker labels.
|
|
|
|
Args:
|
|
path: Path to the SRT file
|
|
|
|
Returns:
|
|
Clean transcript string with format "SPEAKER_XX: text" per line,
|
|
timestamps stripped, consecutive lines from same speaker merged.
|
|
"""
|
|
path = Path(path)
|
|
content = path.read_text(encoding='utf-8')
|
|
|
|
# Parse SRT blocks: sequence number, timestamp, speaker|text
|
|
# Pattern matches: number, timestamp line, content line(s)
|
|
blocks = re.split(r'\n\n+', content.strip())
|
|
|
|
turns = []
|
|
for block in blocks:
|
|
lines = block.strip().split('\n')
|
|
if len(lines) < 3:
|
|
continue
|
|
|
|
# Skip sequence number (line 0) and timestamp (line 1)
|
|
# Content is line 2 onwards
|
|
text_lines = lines[2:]
|
|
text = ' '.join(text_lines)
|
|
|
|
# Parse speaker|text format
|
|
if '|' in text:
|
|
speaker, utterance = text.split('|', 1)
|
|
speaker = speaker.strip()
|
|
utterance = utterance.strip()
|
|
else:
|
|
speaker = "UNKNOWN"
|
|
utterance = text.strip()
|
|
|
|
turns.append((speaker, utterance))
|
|
|
|
# Merge consecutive turns from same speaker
|
|
merged = []
|
|
for speaker, utterance in turns:
|
|
if merged and merged[-1][0] == speaker:
|
|
merged[-1] = (speaker, merged[-1][1] + ' ' + utterance)
|
|
else:
|
|
merged.append((speaker, utterance))
|
|
|
|
# Format as "SPEAKER_XX: text"
|
|
transcript_lines = [f"{speaker}: {utterance}" for speaker, utterance in merged]
|
|
return '\n\n'.join(transcript_lines)
|
|
|
|
|
|
def connect_qumo_ollama(vm_name: str ='ollama-lite') -> Client:
|
|
"""Establish connection to Qumo Ollama instance
|
|
|
|
vm_name: str ('ollama-lite' or 'hiperf-gpu')
|
|
Name of the VM running the Ollama instance
|
|
|
|
Returns:
|
|
tuple(Client): Ollama client connected to the specified VM
|
|
"""
|
|
QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:11434'
|
|
try:
|
|
requests.get(QUMO_OLLAMA_URL, timeout=5)
|
|
client = Client(
|
|
host=QUMO_OLLAMA_URL
|
|
)
|
|
except requests.ConnectionError:
|
|
print(f"Failed to reach {QUMO_OLLAMA_URL}. Check that the VM is running and Tailscale is up")
|
|
|
|
print(f"Connection succesful. WebUI available at: http://{vm_name}.tail44fa00.ts.net:3000\nAvailable models:")
|
|
for m in client.list().models:
|
|
print(f" - '{m.model}' ")
|
|
return client
|
|
|