services:
  fyp-inference-prod:
    container_name: fyp-inference-prod
    build:
      context: ../../
      dockerfile: compose/dev/inference/Dockerfile
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    env_file:
      - ../../.env
    environment:
      - INFERENCE_HTTP_HOST=0.0.0.0
      - INFERENCE_HTTP_PORT=8001
      - NVIDIA_VISIBLE_DEVICES=all
      - WATCHFILES_FORCE_POLLING=true
      - PYTHONPATH=/app
      - HF_HOME=/root/.cache/huggingface
      - HF_HUB_OFFLINE=1
    ports:
      - '0.0.0.0:58001:8001'
    volumes:
      - ../../:/app
      - ../../models:/app/models
      - ../../hf_cache:/root/.cache/huggingface