35 lines
1.1 KiB
Text
35 lines
1.1 KiB
Text
|
|
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS builder
|
||
|
|
WORKDIR /build
|
||
|
|
|
||
|
|
RUN apt-get update && apt-get install -y python3.10 python3-pip python3-dev cmake git
|
||
|
|
|
||
|
|
COPY requirements/inference.txt .
|
||
|
|
|
||
|
|
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
||
|
|
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||
|
|
|
||
|
|
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
|
||
|
|
|
||
|
|
ENV CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off"
|
||
|
|
ENV FORCE_CMAKE=1
|
||
|
|
|
||
|
|
RUN pip install --no-cache-dir llama-cpp-python
|
||
|
|
RUN pip install --no-cache-dir -r inference.txt
|
||
|
|
|
||
|
|
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
|
||
|
|
WORKDIR /app
|
||
|
|
|
||
|
|
RUN apt-get update && apt-get install -y python3.10 python3-pip && \
|
||
|
|
rm -rf /var/lib/apt/lists/* && \
|
||
|
|
ln -sf /usr/bin/python3 /usr/bin/python
|
||
|
|
|
||
|
|
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
||
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
||
|
|
|
||
|
|
COPY . .
|
||
|
|
|
||
|
|
ENV PYTHONUNBUFFERED=1
|
||
|
|
ENV PYTHONPATH=/app
|
||
|
|
EXPOSE 8001
|
||
|
|
|
||
|
|
CMD ["python", "-m", "uvicorn", "gpu_server:app", "--host", "0.0.0.0", "--port", "8001"]
|