# Use NVIDIA CUDA base image FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3 \ python3-pip \ git \ && rm -rf /var/lib/apt/lists/* # Copy requirements first to leverage Docker cache COPY requirements.txt . # Install Python dependencies RUN pip3 install --no-cache-dir -r requirements.txt # Copy the rest of the application COPY . . # Create model directory RUN mkdir -p /app/model/medical_llama_3b # Expose port EXPOSE 7860 # Set environment variables ENV MODEL_PATH=/app/model/medical_llama_3b # Command to run the application CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]