43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
|
|
|
|
|
|
import requests
|
|
from ollama import Client
|
|
|
|
|
|
|
|
|
|
def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434', print_models=True) -> Client:
|
|
"""Establish connection to Qumo Ollama instance
|
|
|
|
vm_name: str ('ollama-lite' or 'hiperf-gpu')
|
|
Name of the VM running the Ollama instance
|
|
|
|
Returns:
|
|
tuple(Client): Ollama client connected to the specified VM
|
|
"""
|
|
QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}'
|
|
|
|
if vm_name in ['localhost', '0.0.0.0']:
|
|
QUMO_OLLAMA_URL = f"http://{vm_name}:{port}"
|
|
|
|
try:
|
|
requests.get(QUMO_OLLAMA_URL, timeout=5)
|
|
client = Client(
|
|
host=QUMO_OLLAMA_URL
|
|
)
|
|
|
|
print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}")
|
|
models = [m.model for m in client.list().models]
|
|
if print_models:
|
|
print("Available models:")
|
|
for m in models:
|
|
print(f" - '{m}' ")
|
|
return client, models
|
|
|
|
except requests.ConnectionError:
|
|
pass
|
|
|
|
print(f"Failed to reach {QUMO_OLLAMA_URL}. Check that the VM is running and Tailscale is up")
|
|
return None, None
|