services: fyp-inference-prod: container_name: fyp-inference-prod build: context: ../../ dockerfile: compose/dev/inference/Dockerfile deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] env_file: - ../../.env environment: - INFERENCE_HTTP_HOST=0.0.0.0 - INFERENCE_HTTP_PORT=8001 - NVIDIA_VISIBLE_DEVICES=all - WATCHFILES_FORCE_POLLING=true - PYTHONPATH=/app - HF_HOME=/root/.cache/huggingface - HF_HUB_OFFLINE=1 ports: - '0.0.0.0:58001:8001' volumes: - ../../:/app - ../../models:/app/models - ../../hf_cache:/root/.cache/huggingface