added local ollama support
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -12,3 +12,5 @@ __marimo__
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
||||||
data/
|
data/
|
||||||
|
docker-volumes/
|
||||||
|
logs/
|
||||||
@@ -11,6 +11,13 @@ def _():
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from utils import connect_qumo_ollama
|
||||||
|
|
||||||
|
OLLAMA_LOCATION= 'localhost'
|
||||||
|
# VM_NAME = 'ollama-lite'
|
||||||
|
|
||||||
|
client = connect_qumo_ollama(OLLAMA_LOCATION)
|
||||||
|
|
||||||
TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results')
|
TAGUETTE_EXPORT_DIR = Path('./data/transcripts/taguette_results')
|
||||||
WORKING_DIR = Path('./data/processing/02_taguette_postprocess')
|
WORKING_DIR = Path('./data/processing/02_taguette_postprocess')
|
||||||
|
|
||||||
|
|||||||
56
ollama/docker-compose.yml
Normal file
56
ollama/docker-compose.yml
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
services:
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
ports:
|
||||||
|
- 11434:11434
|
||||||
|
volumes:
|
||||||
|
- ./docker-volumes/ollama:/root/.ollama
|
||||||
|
container_name: ollama
|
||||||
|
tty: true
|
||||||
|
restart: unless-stopped
|
||||||
|
# GPU SUPPORT NOTES:
|
||||||
|
# 1. The "deploy" section is ignored by classic 'docker-compose'; it's honored in Swarm.
|
||||||
|
# 2. For local 'docker compose up' with NVIDIA GPUs you need the host configured with
|
||||||
|
# nvidia-container-toolkit. Then either:
|
||||||
|
# a) Leave the reservation block (Compose V2 now honors it) OR
|
||||||
|
# b) Start with: docker compose up --build (Compose will request GPUs) OR
|
||||||
|
# c) Explicitly override: docker compose run --gpus all ollama
|
||||||
|
# 3. If your Docker/Compose version does NOT honor the reservation below, uncomment the
|
||||||
|
# 'devices' section further down as a fallback (less portable).
|
||||||
|
# deploy:
|
||||||
|
# resources:
|
||||||
|
# reservations:
|
||||||
|
# devices:
|
||||||
|
# - driver: nvidia
|
||||||
|
# count: all
|
||||||
|
# capabilities: [gpu]
|
||||||
|
|
||||||
|
# environment:
|
||||||
|
# Visible devices / capabilities for the NVIDIA container runtime
|
||||||
|
# - NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
# - NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
|
||||||
|
# Fallback (UNCOMMENT ONLY if the reservation above is ignored and you still get errors):
|
||||||
|
# devices:
|
||||||
|
# - /dev/nvidiactl:/dev/nvidiactl
|
||||||
|
# - /dev/nvidia-uvm:/dev/nvidia-uvm
|
||||||
|
# - /dev/nvidia-uvm-tools:/dev/nvidia-uvm-tools
|
||||||
|
# - /dev/nvidia0:/dev/nvidia0
|
||||||
|
|
||||||
|
open-webui:
|
||||||
|
image: ghcr.io/open-webui/open-webui:main
|
||||||
|
container_name: open-webui
|
||||||
|
volumes:
|
||||||
|
- ./docker-volumes/open-webui:/app/backend/data
|
||||||
|
depends_on:
|
||||||
|
- ollama
|
||||||
|
ports:
|
||||||
|
- 3000:8080
|
||||||
|
environment:
|
||||||
|
- 'OLLAMA_BASE_URL=http://ollama:11434'
|
||||||
|
- 'ENABLE_OLLAMA_API=true'
|
||||||
|
- 'WEBUI_SECRET_KEY='
|
||||||
|
|
||||||
|
extra_hosts:
|
||||||
|
- host.docker.internal:host-gateway
|
||||||
|
restart: unless-stopped
|
||||||
10
utils.py
10
utils.py
@@ -61,7 +61,7 @@ def load_srt(path: str | Path) -> str:
|
|||||||
return '\n\n'.join(transcript_lines)
|
return '\n\n'.join(transcript_lines)
|
||||||
|
|
||||||
|
|
||||||
def connect_qumo_ollama(vm_name: str ='ollama-lite') -> Client:
|
def connect_qumo_ollama(vm_name: str ='ollama-lite', port='11434') -> Client:
|
||||||
"""Establish connection to Qumo Ollama instance
|
"""Establish connection to Qumo Ollama instance
|
||||||
|
|
||||||
vm_name: str ('ollama-lite' or 'hiperf-gpu')
|
vm_name: str ('ollama-lite' or 'hiperf-gpu')
|
||||||
@@ -70,14 +70,18 @@ def connect_qumo_ollama(vm_name: str ='ollama-lite') -> Client:
|
|||||||
Returns:
|
Returns:
|
||||||
tuple(Client): Ollama client connected to the specified VM
|
tuple(Client): Ollama client connected to the specified VM
|
||||||
"""
|
"""
|
||||||
QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:11434'
|
QUMO_OLLAMA_URL = f'http://{vm_name}.tail44fa00.ts.net:{port}'
|
||||||
|
|
||||||
|
if vm_name in ['localhost', '0.0.0.0']:
|
||||||
|
QUMO_OLLAMA_URL = f"http://{vm_name}:{port}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
requests.get(QUMO_OLLAMA_URL, timeout=5)
|
requests.get(QUMO_OLLAMA_URL, timeout=5)
|
||||||
client = Client(
|
client = Client(
|
||||||
host=QUMO_OLLAMA_URL
|
host=QUMO_OLLAMA_URL
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Connection succesful. WebUI available at: http://{vm_name}.tail44fa00.ts.net:3000\nAvailable models:")
|
print(f"Connection succesful. WebUI available at: {QUMO_OLLAMA_URL.replace(port, '3000')}\nAvailable models:")
|
||||||
for m in client.list().models:
|
for m in client.list().models:
|
||||||
print(f" - '{m.model}' ")
|
print(f" - '{m.model}' ")
|
||||||
return client
|
return client
|
||||||
|
|||||||
Reference in New Issue
Block a user