Interview-Analysis/utils/ollama_utils.py



import requests
from ollama import Client


def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434', print_models=True) -> Client:
    """Establish connection to Qumo Ollama instance

    vm_name: str ('ollama-lite' or 'hiperf-gpu')
        Name of the VM running the Ollama instance

    Returns:
        tuple(Client): Ollama client connected to the specified VM
    """
    QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}'

    if vm_name in ['localhost', '0.0.0.0']:
        QUMO_OLLAMA_URL = f"http://{vm_name}:{port}"

    try:
        requests.get(QUMO_OLLAMA_URL, timeout=5)
        client = Client(
            host=QUMO_OLLAMA_URL
        )

        print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}")
        models = [m.model for m in client.list().models]
        if print_models:
            print("Available models:")
            for m in models:
                print(f"  - '{m}' ")
        return client, models

    except requests.ConnectionError:
        pass

    print(f"Failed to reach {QUMO_OLLAMA_URL}. Check that the VM is running and Tailscale is up")
    return None, None