Skip to content

Commit

Permalink
Merge branch 'master' into issues/fix_benchmark_failure
Browse files Browse the repository at this point in the history
  • Loading branch information
chauhang authored Feb 27, 2024
2 parents 15cd35b + 85e4e0f commit f29b8d7
Show file tree
Hide file tree
Showing 45 changed files with 1,545 additions and 169 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci-cpu-cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macOS-latest]
os: [ubuntu-20.04, macOS-latest]
steps:
- name: Checkout TorchServe
uses: actions/checkout@v2
Expand All @@ -29,4 +29,4 @@ jobs:
python ts_scripts/install_dependencies.py --environment=dev --cpp
- name: Build
run: |
cd cpp && ./build.sh --install-dependencies
cd cpp && ./build.sh
12 changes: 12 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,15 @@
[submodule "cpp/third-party/llama2.so"]
path = cpp/third-party/llama2.so
url = https://github.com/mreso/llama2.so.git
[submodule "cpp/third-party/folly"]
path = cpp/third-party/folly
url = https://github.com/facebook/folly.git
[submodule "cpp/third-party/yaml-cpp"]
path = cpp/third-party/yaml-cpp
url = https://github.com/jbeder/yaml-cpp.git
[submodule "cpp/third-party/tokenizers-cpp"]
path = cpp/third-party/tokenizers-cpp
url = https://github.com/mlc-ai/tokenizers-cpp.git
[submodule "cpp/third-party/kineto"]
path = cpp/third-party/kineto
url = https://github.com/pytorch/kineto.git
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
project(torchserve_cpp VERSION 0.1)

set(CMAKE_CXX_STANDARD 17)
Expand Down
1 change: 1 addition & 0 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## Requirements
* C++17
* GCC version: gcc-9
* cmake version: 3.18+
## Installation and Running TorchServe CPP

### Install dependencies
Expand Down
30 changes: 12 additions & 18 deletions cpp/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@ function install_folly() {
FOLLY_SRC_DIR=$BASE_DIR/third-party/folly
FOLLY_BUILD_DIR=$DEPS_DIR/folly-build

if [ ! -d "$FOLLY_SRC_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Cloning folly repo ${COLOR_OFF}"
git clone https://github.com/facebook/folly.git "$FOLLY_SRC_DIR"
cd $FOLLY_SRC_DIR
git checkout tags/v2024.01.29.00
fi

if [ ! -d "$FOLLY_BUILD_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Building Folly ${COLOR_OFF}"
cd $FOLLY_SRC_DIR
Expand Down Expand Up @@ -60,9 +53,7 @@ function install_kineto() {
elif [ "$PLATFORM" = "Mac" ]; then
KINETO_SRC_DIR=$BASE_DIR/third-party/kineto

if [ ! -d "$KINETO_SRC_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Cloning kineto repo ${COLOR_OFF}"
git clone --recursive https://github.com/pytorch/kineto.git "$KINETO_SRC_DIR"
if [ ! -d "$KINETO_SRC_DIR/libkineto/build" ] ; then
cd $KINETO_SRC_DIR/libkineto
mkdir build && cd build
cmake ..
Expand Down Expand Up @@ -128,13 +119,6 @@ function install_yaml_cpp() {
YAML_CPP_SRC_DIR=$BASE_DIR/third-party/yaml-cpp
YAML_CPP_BUILD_DIR=$DEPS_DIR/yaml-cpp-build

if [ ! -d "$YAML_CPP_SRC_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Cloning yaml-cpp repo ${COLOR_OFF}"
git clone https://github.com/jbeder/yaml-cpp.git "$YAML_CPP_SRC_DIR"
cd $YAML_CPP_SRC_DIR
git checkout tags/0.8.0
fi

if [ ! -d "$YAML_CPP_BUILD_DIR" ] ; then
echo -e "${COLOR_GREEN}[ INFO ] Building yaml-cpp ${COLOR_OFF}"

Expand Down Expand Up @@ -187,6 +171,16 @@ function prepare_test_files() {
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
fi
if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
pip install transformers
local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
export TOKENIZERS_PARALLELISM=false
cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
python aot_compile_export.py
mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
export TOKENIZERS_PARALLELISM=""
fi
if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resne50_pt2.so" ]; then
local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
cd ${HANDLER_DIR}
Expand Down Expand Up @@ -376,7 +370,7 @@ cd $BASE_DIR
git submodule update --init --recursive

install_folly
install_kineto
#install_kineto
install_libtorch
install_yaml_cpp
build_llama_cpp
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/

add_subdirectory("../../../examples/cpp/mnist/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/mnist/mnist_handler/")


# PT2.2 torch.expport does not support Mac
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")

add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")

add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
endif()
39 changes: 38 additions & 1 deletion cpp/src/utils/file_system.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#include "src/utils/file_system.hh"
#include "src/utils/logging.hh"

#include <folly/FileUtil.h>
#include <folly/json.h>

namespace torchserve {
std::unique_ptr<std::istream> FileSystem::GetStream(
Expand All @@ -10,4 +14,37 @@ std::unique_ptr<std::istream> FileSystem::GetStream(
}
return file_stream;
}
} // namespace torchserve

std::string FileSystem::LoadBytesFromFile(const std::string& path) {
std::ifstream fs(path, std::ios::in | std::ios::binary);
if (fs.fail()) {
TS_LOGF(ERROR, "Cannot open tokenizer file {}", path);
throw;
}
std::string data;
fs.seekg(0, std::ios::end);
size_t size = static_cast<size_t>(fs.tellg());
fs.seekg(0, std::ios::beg);
data.resize(size);
fs.read(data.data(), size);
return data;
}

std::unique_ptr<folly::dynamic> FileSystem::LoadJsonFile(const std::string& file_path) {
std::string content;
if (!folly::readFile(file_path.c_str(), content)) {
TS_LOGF(ERROR, "{} not found", file_path);
throw;
}
return std::make_unique<folly::dynamic>(folly::parseJson(content));
}

const folly::dynamic& FileSystem::GetJsonValue(std::unique_ptr<folly::dynamic>& json, const std::string& key) {
if (json->find(key) != json->items().end()) {
return (*json)[key];
} else {
TS_LOG(ERROR, "Required field {} not found in JSON.", key);
throw ;
}
}
} // namespace torchserve
6 changes: 4 additions & 2 deletions cpp/src/utils/file_system.hh
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#ifndef TS_CPP_UTILS_FILE_SYSTEM_HH_
#define TS_CPP_UTILS_FILE_SYSTEM_HH_

#include <fmt/format.h>

#include <folly/dynamic.h>
#include <fstream>
#include <stdexcept>
#include <string>
Expand All @@ -11,6 +10,9 @@ namespace torchserve {
class FileSystem {
public:
static std::unique_ptr<std::istream> GetStream(const std::string& path);
static std::string LoadBytesFromFile(const std::string& path);
static std::unique_ptr<folly::dynamic> LoadJsonFile(const std::string& file_path);
static const folly::dynamic& GetJsonValue(std::unique_ptr<folly::dynamic>& json, const std::string& key);
};
} // namespace torchserve
#endif // TS_CPP_UTILS_FILE_SYSTEM_HH_
23 changes: 23 additions & 0 deletions cpp/test/examples/examples_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,29 @@ TEST_F(ModelPredictTest, TestLoadPredictLlamaCppHandler) {
base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200);
}

TEST_F(ModelPredictTest, TestLoadPredictAotInductorBertHandler) {
std::string base_dir = "_build/test/resources/examples/aot_inductor/";
std::string file1 = base_dir + "bert_handler/bert-seq.so";
std::string file2 = base_dir + "bert_handler/tokenizer.json";

std::ifstream f1(file1);
std::ifstream f2(file2);

if (!f1.good() || !f2.good())
GTEST_SKIP() << "Skipping TestLoadPredictAotInductorBertHandler because "
"of missing files: "
<< file1 << " or " << file2;

this->LoadPredict(
std::make_shared<torchserve::LoadModelRequest>(
base_dir + "bert_handler", "bert_aot",
torch::cuda::is_available() ? 0 : -1, "", "", 1, false),
base_dir + "bert_handler",
base_dir + "bert_handler/sample_text.txt",
"bert_ts",
200);
}

TEST_F(ModelPredictTest, TestLoadPredictAotInductorResnetHandler) {
std::string base_dir = "_build/test/resources/examples/aot_inductor/";
std::string file1 = base_dir + "resnet_handler/resnet50_pt2.so";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"createdOn": "12/02/2024 21:09:26",
"runtime": "LSP",
"model": {
"modelName": "bertcppaot",
"handler": "libbert_handler:BertCppHandler",
"modelVersion": "1.0",
"configFile": "model-config.yaml"
},
"archiverVersion": "0.9.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"0":"Not Accepted",
"1":"Accepted"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
minWorkers: 1
maxWorkers: 1
batchSize: 2

handler:
model_so_path: "bert-seq.so"
tokenizer_path: "tokenizer.json"
mapping: "index_to_name.json"
model_name: "bert-base-uncased"
mode: "sequence_classification"
do_lower_case: true
num_labels: 2
max_length: 150
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Bloomberg has decided to publish a new report on the global economy.
1 change: 1 addition & 0 deletions cpp/third-party/folly
Submodule folly added at 323e46
1 change: 1 addition & 0 deletions cpp/third-party/kineto
Submodule kineto added at 594c63
1 change: 1 addition & 0 deletions cpp/third-party/tokenizers-cpp
Submodule tokenizers-cpp added at 27dbe1
1 change: 1 addition & 0 deletions cpp/third-party/yaml-cpp
Submodule yaml-cpp added at f73201
5 changes: 5 additions & 0 deletions examples/cpp/aot_inductor/bert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
add_library(bert_handler SHARED src/bert_handler.cc)
target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
61 changes: 61 additions & 0 deletions examples/cpp/aot_inductor/bert/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
This example uses AOTInductor to compile the [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) into an so file (see script [aot_compile_export.py](aot_compile_export.py)). In PyTorch 2.2, the supported `MAX_SEQ_LENGTH` in this script is 511.

Then, this example loads model and runs prediction using libtorch. The handler C++ source code for this examples can be found [here](src).

### Setup
1. Follow the instructions in [README.md](../../../../cpp/README.md) to build the TorchServe C++ backend.

```
cd serve/cpp
./builld.sh
```

The build script will create the necessary artifact for this example.
To recreate these by hand you can follow the prepare_test_files function of the [build.sh](../../../../cpp/build.sh) script.
We will need the handler .so file as well as the bert-seq.so and tokenizer.json.

2. Create a [model-config.yaml](model-config.yaml)

```yaml
minWorkers: 1
maxWorkers: 1
batchSize: 2

handler:
model_so_path: "bert-seq.so"
tokenizer_path: "tokenizer.json"
mapping: "index_to_name.json"
model_name: "bert-base-uncased"
mode: "sequence_classification"
do_lower_case: true
num_labels: 2
max_length: 150
```
### Generate Model Artifact Folder
```bash
torch-model-archiver --model-name bertcppaot --version 1.0 --handler ../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/libbert_handler:BertCppHandler --runtime LSP --extra-files index_to_name.json,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/bert-seq.so,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/tokenizer.json --config-file model-config.yaml --archive-format no-archive
```

Create model store directory and move the folder `bertcppaot`

```
mkdir model_store
mv bertcppaot model_store/
```

### Inference

Start torchserve using the following command

```
torchserve --ncs --model-store model_store/ --models bertcppaot
```

Infer the model using the following command

```
curl http://localhost:8080/predictions/bertcppaot -T ../../../../cpp/test/resources/examples/aot_inductor/bert_handler/sample_text.txt
Not Accepted
```
Loading

0 comments on commit f29b8d7

Please sign in to comment.