Skip to content

Commit

Permalink
Merge pull request #2 from the023/fixdocker
Browse files Browse the repository at this point in the history
Fix docker PWD and network
  • Loading branch information
jasonacox authored Feb 8, 2024
2 parents 542a0a8 + 938d96a commit c0af4b0
Show file tree
Hide file tree
Showing 17 changed files with 23 additions and 21 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ python3 -m llama_cpp.server \
--model ./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf \
--host localhost \
--n_gpu_layers 99 \
-n_ctx 2048 \
--n_ctx 2048 \
--chat_format llama-2
```

Expand Down Expand Up @@ -111,7 +111,7 @@ docker run \
-e OPENAI_API_BASE="http://localhost:8000/v1" \
-e LLM_MODEL="tinyllm" \
-e USE_SYSTEM="false" \
-v ./prompts.json:/app/prompts.json \
-v $PWD/prompts.json:/app/prompts.json \
--name chatbot \
--restart unless-stopped \
jasonacox/chatbot
Expand Down
3 changes: 2 additions & 1 deletion chatbot/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ EXPOSE $PORT
# -e DEVICE="cpu" \
# -e RESULTS=1 \
# -e USE_SYSTEM="false" \
# -v ./prompts.json:/app/prompts.json \
# -v $PWD/prompts.json:/app/prompts.json \
# --network="host" \
# --name chatbot \
# --restart unless-stopped \
# chatbot:0.11.0
2 changes: 1 addition & 1 deletion chatbot/Dockerfile.basic
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ EXPOSE $PORT
# -e DEVICE="cpu" \
# -e RESULTS=1 \
# -e USE_SYSTEM="false" \
# -v ./prompts.json:/app/prompts.json \
# -v $PWD/prompts.json:/app/prompts.json \
# --name chatbot \
# --restart unless-stopped \
# chatbot:0.11.0
2 changes: 1 addition & 1 deletion chatbot/Dockerfile.rag
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ EXPOSE $PORT
# -e DEVICE="cpu" \
# -e RESULTS=1 \
# -e USE_SYSTEM="false" \
# -v ./prompts.json:/app/prompts.json \
# -v $PWD/prompts.json:/app/prompts.json \
# --name chatbot \
# --restart unless-stopped \
# chatbot:0.11.0
2 changes: 1 addition & 1 deletion chatbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ docker run \
-e OPENAI_API_BASE="http://localhost:8000/v1" \
-e LLM_MODEL="tinyllm" \
-e USE_SYSTEM="false" \
-v ./prompts.json:/app/prompts.json \
-v $PWD/prompts.json:/app/prompts.json \
--name chatbot \
--restart unless-stopped \
jasonacox/chatbot
Expand Down
3 changes: 2 additions & 1 deletion chatbot/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ docker run \
-e MAXTOKENS=16384 \
-e TEMPERATURE=0.0 \
-e QDRANT_HOST="localhost" \
-v ./prompts.json:/app/prompts.json \
-v $PWD/prompts.json:/app/prompts.json \
--network="host" \
--name chatbot \
--restart unless-stopped \
jasonacox/chatbot
Expand Down
2 changes: 1 addition & 1 deletion llmserver/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ EXPOSE $PORT
# --runtime=nvidia --gpus all \
# -d \
# -p 8000:8000 \
# -v ./models:/app/models \
# -v $PWD/models:/app/models \
# -e MODEL=models/llama-2-7b-chat.Q5_K_M.gguf \
# -e N_GPU_LAYERS=32 \
# -e HOST=0.0.0.0 \
Expand Down
4 changes: 2 additions & 2 deletions llmserver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ python3 -m llama_cpp.server \
--model ./models/mistral-7b-instruct-v0.1.Q5_K_M.gguf \
--host localhost \
--n_gpu_layers 99 \
-n_ctx 2048 \
--n_ctx 2048 \
--chat_format llama-2

```
Expand Down Expand Up @@ -119,7 +119,7 @@ docker run \
--runtime=nvidia --gpus all \
-d \
-p 8000:8000 \
-v ./models:/app/models \
-v $PWD/models:/app/models \
-e MODEL=models/llama-2-7b-chat.Q5_K_M.gguf \
-e N_GPU_LAYERS=32 \
-e HOST=0.0.0.0 \
Expand Down
2 changes: 1 addition & 1 deletion llmserver/models/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ EXPOSE $PORT
# docker run -d \
# --runtime=nvidia --gpus all \
# -p $PORT:$PORT \
# -v .:/app/models \
# -v $PWD:/app/models \
# -e MODEL=$MODEL \
# -e N_CTX=$CONTEXT_SIZE \
# -e CHAT_FORMAT=$CHAT_FORMAT \
Expand Down
2 changes: 1 addition & 1 deletion llmserver/models/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ INT_REQ=${INT_REQ:-$DEFAULT_INT_REQ}
docker run -d \
--runtime=nvidia --gpus all \
-p $PORT:$PORT \
-v .:/app/models \
-v $PWD:/app/models \
-e MODEL=$MODEL \
-e N_CTX=$CONTEXT_SIZE \
-e CHAT_FORMAT=$CHAT_FORMAT \
Expand Down
2 changes: 1 addition & 1 deletion llmserver/setup-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if [[ $start_server == "y" ]]; then
--runtime=nvidia --gpus all \
-d \
-p 8000:8000 \
-v ./models:/app/models \
-v $PWD/models:/app/models \
-e MODEL=models/llama-2-7b-chat.Q5_K_M.gguf \
-e N_GPU_LAYERS=32 \
-e HOST=0.0.0.0 \
Expand Down
4 changes: 2 additions & 2 deletions rag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ docker run \
-e QDRANT_HOST="localhost" \
-e RESULTS=1 \
-e SENTENCE_TRANSFORMERS_HOME=/app/models \
-v ./models:/app/models \
-v ./prompts.json:/app/prompts.json \
-v $PWD/models:/app/models \
-v $PWD/prompts.json:/app/prompts.json \
--name chatbot \
--restart unless-stopped \
jasonacox/chatbot:latest-rag
Expand Down
4 changes: 2 additions & 2 deletions rag/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ docker run \
-e QDRANT_HOST="localhost" \
-e RESULTS=1 \
-e SENTENCE_TRANSFORMERS_HOME=/app/models \
-v ./models:/app/models \
-v ./prompts.json:/app/prompts.json \
-v $PWD/models:/app/models \
-v $PWD/prompts.json:/app/prompts.json \
--name chatbot \
--restart unless-stopped \
jasonacox/chatbot:latest-rag
Expand Down
2 changes: 1 addition & 1 deletion vllm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ nvidia-docker run -d -p 8000:8000 --gpus=all --shm-size=10.24gb \
-e HF_HOME=/app/models \
-e NUM_GPU=4 \
-e EXTRA_ARGS="--dtype float --max-model-len 20000" \
-v models:/app/models \
-v $PWD/models:/app/models \
--name vllm \
vllm
# Print Running Logs - ^C to Stop Viewing Logs
Expand Down
2 changes: 1 addition & 1 deletion vllm/run-awq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ docker run -d \
-e NUM_GPU=1 \
-e SERVED_MODEL_NAME=tinyllm \
-e HF_HOME=/app/models \
-v ./models:/app/models \
-v $PWD/models:/app/models \
--restart unless-stopped \
--name $CONTAINER \
vllm
Expand Down
2 changes: 1 addition & 1 deletion vllm/run-pascal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ docker run -d \
-e NUM_GPU=1 \
-e SERVED_MODEL_NAME=tinyllm \
-e HF_HOME=/app/models \
-v ./models:/app/models \
-v $PWD/models:/app/models \
--restart unless-stopped \
--name $CONTAINER \
vllm
Expand Down
2 changes: 1 addition & 1 deletion vllm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ docker run -d \
-e NUM_GPU=1 \
-e SERVED_MODEL_NAME=tinyllm \
-e HF_HOME=/app/models \
-v ./models:/app/models \
-v $PWD/models:/app/models \
--restart unless-stopped \
--name $CONTAINER \
vllm
Expand Down

0 comments on commit c0af4b0

Please sign in to comment.