docker-compose-prod.yaml

version: "3"
services:
  llama-cpp:
    image: logdetective/runtime:latest-cuda
    build:
      context: .
      dockerfile: ./Containerfile.cuda
    hostname: "${LLAMA_CPP_HOST}"
    command: "llama-server --host 0.0.0.0 --port ${LLAMA_CPP_SERVER_PORT}"
    stdin_open: true
    tty: true
    env_file: .env
    ports:
      - "${LLAMA_CPP_SERVER_PORT:-8000}:${LLAMA_CPP_SERVER_PORT:-8000}"
    volumes:
      - ${MODELS_PATH-./models}:/models:Z
    # these lines are needed for CUDA acceleration
    devices:
      - nvidia.com/gpu=all
  server:
    image: logdetective/runtime:latest
    build:
      context: .
      dockerfile: ./Containerfile
    hostname: logdetective-server
    stdin_open: true
    tty: true
    volumes:
      - .:/src/:Z
    ports:
      - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
    env_file: .env
    # so gunicorn can find logdetective python module
    working_dir: /src
    # --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
    # command: fastapi run /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
    command: ["gunicorn", "-c", "/src/server/gunicorn-prod.config.py", "logdetective.server:app"]