# Use NVIDIA CUDA base image with Python FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 # Set environment variables ENV DEBIAN_FRONTEND=noninteractive ENV TRANSFORMERS_CACHE=/app/cache ENV PYTHONUNBUFFERED=1 ENV PORT=7860 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3.10 \ python3-pip \ git \ && rm -rf /var/lib/apt/lists/* # Create cache directory and set permissions RUN mkdir -p /app/cache && \ mkdir -p /app/model/medical_llama_3b && \ chmod -R 777 /app/cache # Copy requirements first to leverage Docker cache COPY requirements.txt . # Update pip and install dependencies RUN python3 -m pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Install specific numpy version to fix compatibility RUN pip install --no-cache-dir "numpy<2.0.0" # Copy the rest of the application COPY . . # Expose port EXPOSE 7860 # Set environment variables for GPU ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility # Command to run the application CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]