From ad00860fa128a238959780c1f7f5f17870b3fc8d Mon Sep 17 00:00:00 2001 From: Luigi Maiorano Date: Wed, 10 Dec 2025 08:28:01 +0100 Subject: [PATCH] added local ollama support --- .gitignore | 4 ++- 02_Taguette_Post-Process.py | 7 +++++ ollama/docker-compose.yml | 56 +++++++++++++++++++++++++++++++++++++ utils.py | 10 +++++-- 4 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 ollama/docker-compose.yml diff --git a/.gitignore b/.gitignore index dceb869..3c1ca91 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ __marimo__ __pycache__/ -data/ \ No newline at end of file +data/ +docker-volumes/ +logs/ \ No newline at end of file diff --git a/02_Taguette_Post-Process.py b/02_Taguette_Post-Process.py index f456a3a..8ea44cc 100644 --- a/02_Taguette_Post-Process.py +++ b/02_Taguette_Post-Process.py @@ -11,6 +11,13 @@ def _(): from pathlib import Path from datetime import datetime + from utils import connect_qumo_ollama + + OLLAMA_LOCATION= 'localhost' + # VM_NAME = 'ollama-lite' + + client = connect_qumo_ollama(OLLAMA_LOCATION) + TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results') WORKING_DIR = Path('./data/processing/02_taguette_postprocess') diff --git a/ollama/docker-compose.yml b/ollama/docker-compose.yml new file mode 100644 index 0000000..c5f903f --- /dev/null +++ b/ollama/docker-compose.yml @@ -0,0 +1,56 @@ +services: + ollama: + image: ollama/ollama:latest + ports: + - 11434:11434 + volumes: + - ./docker-volumes/ollama:/root/.ollama + container_name: ollama + tty: true + restart: unless-stopped + # GPU SUPPORT NOTES: + # 1. The "deploy" section is ignored by classic 'docker-compose'; it's honored in Swarm. + # 2. For local 'docker compose up' with NVIDIA GPUs you need the host configured with + # nvidia-container-toolkit. Then either: + # a) Leave the reservation block (Compose V2 now honors it) OR + # b) Start with: docker compose up --build (Compose will request GPUs) OR + # c) Explicitly override: docker compose run --gpus all ollama + # 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the + # 'devices' section further down as a fallback (less portable). + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] + + # environment: + # Visible devices / capabilities for the NVIDIA container runtime + # - NVIDIA_VISIBLE_DEVICES=all + # - NVIDIA_DRIVER_CAPABILITIES=compute,utility + + # Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors): + # devices: + # - /dev/nvidiactl:/dev/nvidiactl + # - /dev/nvidia-uvm:/dev/nvidia-uvm + # - /dev/nvidia-uvm-tools:/dev/nvidia-uvm-tools + # - /dev/nvidia0:/dev/nvidia0 + + open-webui: + image: ghcr.io/open-webui/open-webui:main + container_name: open-webui + volumes: + - ./docker-volumes/open-webui:/app/backend/data + depends_on: + - ollama + ports: + - 3000:8080 + environment: + - 'OLLAMA_BASE_URL=http://ollama:11434' + - 'ENABLE_OLLAMA_API=true' + - 'WEBUI_SECRET_KEY=' + + extra_hosts: + - host.docker.internal:host-gateway + restart: unless-stopped diff --git a/utils.py b/utils.py index 830a9ec..c40aad1 100644 --- a/utils.py +++ b/utils.py @@ -61,7 +61,7 @@ def load_srt(path: str | Path) -> str: return '\n\n'.join(transcript_lines) -def connect_qumo_ollama(vm_name: str ='ollama-lite') -> Client: +def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434') -> Client: """Establish connection to Qumo Ollama instance vm_name: str ('ollama-lite' or 'hiperf-gpu') @@ -70,14 +70,18 @@ def connect_qumo_ollama(vm_name: str ='ollama-lite') -> Client: Returns: tuple(Client): Ollama client connected to the specified VM """ - QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:11434' + QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}' + + if vm_name in ['localhost', '0.0.0.0']: + QUMO_OLLAMA_URL = f"http://{vm_name}:{port}" + try: requests.get(QUMO_OLLAMA_URL, timeout=5) client = Client( host=QUMO_OLLAMA_URL ) - print(f"Connection succesful. WebUI available at: http://{vm_name}.tail44fa00.ts.net:3000\nAvailable models:") + print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}\nAvailable models:") for m in client.list().models: print(f" - '{m.model}' ") return client