-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdocker-compose-prod.yaml
37 lines (37 loc) · 1.23 KB
/
docker-compose-prod.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
version: "3"
services:
llama-cpp:
image: logdetective/runtime:latest-cuda
build:
context: .
dockerfile: ./Containerfile.cuda
hostname: "${LLAMA_CPP_HOST}"
command: "llama-server --host 0.0.0.0 --port ${LLAMA_CPP_SERVER_PORT}"
stdin_open: true
tty: true
env_file: .env
ports:
- "${LLAMA_CPP_SERVER_PORT:-8000}:${LLAMA_CPP_SERVER_PORT:-8000}"
volumes:
- ${MODELS_PATH-./models}:/models:Z
# these lines are needed for CUDA acceleration
devices:
- nvidia.com/gpu=all
server:
image: logdetective/runtime:latest
build:
context: .
dockerfile: ./Containerfile
hostname: logdetective-server
stdin_open: true
tty: true
volumes:
- .:/src/:Z
ports:
- "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
env_file: .env
# so gunicorn can find logdetective python module
working_dir: /src
# --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
# command: fastapi run /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
command: ["gunicorn", "-c", "/src/server/gunicorn-prod.config.py", "logdetective.server:app"]