Dynavera/compose/prod/docker-compose.inference.yml

39 lines
1 KiB
YAML
Raw Normal View History

services:
fyp-inference-prod:
container_name: fyp-inference-prod
build:
context: ../../
dockerfile: compose/dev/inference/Dockerfile
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
environment:
- INFERENCE_HTTP_HOST=0.0.0.0
- INFERENCE_HTTP_PORT=8001
- NVIDIA_VISIBLE_DEVICES=all
- WATCHFILES_FORCE_POLLING=true
- PYTHONPATH=/app
- HF_HOME=/root/.cache/huggingface
- HF_HUB_OFFLINE=1
volumes:
- ../../:/app
- ../../models:/app/models
- ../../hf_cache:/root/.cache/huggingface
labels:
- "traefik.enable=true"
- "traefik.http.routers.llm.rule=Host(`${LLM_DOMAIN_NAME}`)"
- "traefik.http.routers.llm.entrypoints=web"
- "traefik.http.services.llm.loadbalancer.server.port=${LLM_PORT}"
- "traefik.docker.network=proxy-net"
networks:
- proxy-net
networks:
proxy-net:
external: true