Merge pull request #25 from ShihaoLuo/main

update: deployment readme
RLE-Foundation · Nov 11, 2023 · 9fa704b · 9fa704b
2 parents 75ffd09 + fcb62a7
commit 9fa704b
Show file tree

Hide file tree

Showing 125 changed files with 34,411 additions and 14 deletions.
diff --git a/deployment/README.md b/deployment/README.md
@@ -19,18 +19,28 @@
 >+ `cd path_to_rllte/deloyment/c++`  
 >+ `mkdir build && cd build`
 >+ `cmake .. && make`
->+ `./DeployerTest ../../model/test_model.onnx`  
+>+ `./DeployerTest ../../model/test_model.onnx`    
+This demo will deploy the testing onnx model into tensorRT and output the performed result w.r.t the input [1\*9\*84\*84] float16 data.  
+![Alt text](docs/c++_quick_start_run.png)
 
 ### python
->+ `git clone https://github.com/RLE-Foundation/rllte`  
->+ `cd path_to_rllte/deloyment/python`
->+ `python3 pth2onnx.py ../model/test_model.pth`
->+ `./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference`
->+ `python3 infer.py test_model.plan`
+>+ `git clone https://github.com/RLE-Foundation/rllte`    
+>+ `cd path_to_rllte/deloyment/python`  
+>+ `python3 pth2onnx.py ../model/test_model.pth`    
+This python script will transform pth model to onnx model, which is saved in the current path.     
+![Alt text](docs/python_pth_2_onnx.png)    
+>+ `./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference`  
+Using the trtexec tool to transfom the onnx model into trt model.   
+![Alt text](docs/onnx_2_trt_py.png)    
+>+ `python3 infer.py test_model.trt`  
+It will infer the trt model and output the performed result w.r.t the input [1\*9\*84\*84] float16 data.  
+![Alt text](docs/py_infer.png) 
+
 
 ## use in your c++ project
+### basic API instruction
 >+ `#inlude "RLLTEDeployer.h"`  
-    Including the header file in your cpp file.
+    Including the header file in your cpp file.  
 >+ `Options options;`  
     `options.deviceIndex = 0;`  
     `options.doesSupportDynamicBatchSize = false;`  
@@ -50,13 +60,23 @@
    Use infer member funtion to execute the infer process. The input is the tensor with relevant data type, and the output is a pointer with relevant data size and data type. The infer result will be moved to the output.
 >+ The complete code please refer to the DeployerTest.cpp;
 
-## c++ project with cmake
+### build your c++ project with cmake
 >+ `find_package(CUDA REQUIRED)`  
+Find the header and dynamic libraries of CUDA.  
 >+ `include_directories(${CUDA_INCLUDE_DIRS} ${Path_of_RLLTEDeployer_h}})`   
->+ `target_link_libraries(YOUREXECUTEFILE ${PATH_OF_libRLLTEDeployer_so)`  
+Set the path of include files required.
+>+ `add_library(RLLTEDeployer SHARED ${Path_of_RLLTEDeployer.cpp} ${Path_of_common/logger.cpp})`
+Build the RLLTEDployer as a dynamic library.  
+>+ `target_link_libraries(RLLTEDeployer nvinfer nvonnxparser ${CUDA_LIBRARIES})`   
+Link the dependecies od RLLTEDployer.so.  
+>+ `add_executable(YourProjectExecutable ${Path_of_YourProjectExecutable.cpp})`  
+Build the executable file of your project.  
+>+ `target_link_libraries(YourProjectExecutable RLLTEDeployer)`  
+Link the RLLTEDeployer to your project.  
 
-## c++ deployment with Docker
 
+## c++ deployment with Docker
+Using docker to deploy model is easier than using host PC, the nvidia driver is the only dependency to install, everything else is prepared in the image.  
 ### install Nvidia_Docker
 >+ Make sure to install Nvidia Driver.
 >+ `sudo apt-get install ca-certificates gnupg lsb-release`
@@ -73,15 +93,21 @@
 >+ `sudo groupadd docker`  
 >+ `sudo gpasswd -a $USER docker`  
 >+ Logout and Login to make the user group activated.
->+ `sudo service docker restart`
+>+ `sudo service docker restart`  
+>+ `docker run --gpus all nvidia/cuda:12.0.0-cudnn8-devel-ubuntu20.04  nvidia-smi`  
+If the gpu message is showed, then everything is okay.  
+![Alt text](docs/gpus_docker.png) 
 
 ### usage
->+ `docker pull jakeshihaoluo/rllte_deployment_env:0.0.1`
+>+ `docker pull jakeshihaoluo/rllte_deployment_env:0.0.1`  
+![Alt text](docs/pull.png) 
 >+ `docker run -it -v ${path_to_the_repo}:/rllte --gpus all jakeshihaoluo/rllte_deployment_env:0.0.1`  
+![Alt text](docs/docker_container.png) 
 >+ `cd /rllte/deloyment/c++`  
 >+ `mkdir build && cd build`
 >+ `cmake .. && make`
 >+ `./DeployerTest ../../model/test_model.onnx`  
+![Alt text](docs/run_docker.png) 
 
 ##  deployment with Ascend
 
@@ -98,7 +124,7 @@
 
 ### c++ development 
 >+ include header file `#include "acl/acl.h"`  
->+ The main workflow is showned as below. The main functions are implemented in the *ascend/src/main.cpp* .
+>+ The main workflow is showned as below. The main functions are implemented in the *ascend/src/main.cpp* .  
 ![Alt text](docs/ascendmain.png)
 
 ### build and run
@@ -109,4 +135,31 @@
 >+ `chmod +x sample_build.sh`
 >+ `./sample_build.sh`
 >+ `./chmod +x sample_run.sh`
->+ `./sample_run.sh`
+>+ `./sample_run.sh`
+
+##  deployment with NCNN
+
+### what is NCNN
+>+ ncnn is a high-performance neural network inference computing framework optimized for mobile platforms. ncnn is deeply considerate about deployment and uses on mobile phones from the beginning of design. ncnn does not have third party dependencies. It is cross-platform, and runs faster than all known open source frameworks on mobile phone cpu. Developers can easily deploy deep learning algorithm models to the mobile platform by using efficient ncnn implementation, create intelligent APPs, and bring the artificial intelligence to your fingertips. ncnn is currently being used in many Tencent applications, such as QQ, Qzone, WeChat, Pitu and so on.  
+Ref: https://github.com/Tencent/ncnn   
+ ![Alt text](docs/ncnn.png)  
+
+### deployment on PC with NCNN
+>+ `cd deployment/ncnn`  
+>+ install requirements of NCNN  
+`sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev`  
+>+ your onnx model may contains many redundant operators such as Shape, Gather and Unsqueeze that is not supported in ncnn. Use handy tool developed by daquexian to eliminate them.   
+Ref:https://github.com/daquexian/onnx-simplifier  
+`python3 -m pip install onnxsim`  
+`python3 -m onnxsim ../model/test_model.onnx test_model-sim.onnx`   
+>+ convert the model to ncnn using tools/onnx2ncnn    
+`./tools/onnx2ncnn test_model-sim.onnx test_model-sim.param test_model-sim.bin`  
+>+ now, you should have test_model-sim.bin  test_model-sim.onnx  test_model-sim.param in the ncnn directory.   
+![Alt text](docs/ncnn_1.png)  
+>+ before compile the executable, change the ncnn lib directory to your own path int the CMakeLists.txt, for example  
+![Alt text](docs/ncnn_2.png)  
+`mkdir build && cd build && cmake .. && make`  
+`./NCNNDeployTest ../test_model-sim.param ../test_model-sim.bin `  
+>+ After running it, it will output a 1*50 tensor.   
+![Alt text](docs/ncnn_3.png)  
+
diff --git a/deployment/docs/ascendmain.png b/deployment/docs/ascendmain.png
diff --git a/deployment/docs/ascendworkflow.png b/deployment/docs/ascendworkflow.png
diff --git a/deployment/docs/c++_quick_start_run.png b/deployment/docs/c++_quick_start_run.png
diff --git a/deployment/docs/docker_container.png b/deployment/docs/docker_container.png
diff --git a/deployment/docs/gpus_docker.png b/deployment/docs/gpus_docker.png
diff --git a/deployment/docs/jetpackos.png b/deployment/docs/jetpackos.png
diff --git a/deployment/docs/ncnn.png b/deployment/docs/ncnn.png
diff --git a/deployment/docs/ncnn_1.png b/deployment/docs/ncnn_1.png
diff --git a/deployment/docs/ncnn_2.png b/deployment/docs/ncnn_2.png
diff --git a/deployment/docs/ncnn_3.png b/deployment/docs/ncnn_3.png
diff --git a/deployment/docs/onnx_2_trt_py.png b/deployment/docs/onnx_2_trt_py.png
diff --git a/deployment/docs/pull.png b/deployment/docs/pull.png
diff --git a/deployment/docs/py_infer.png b/deployment/docs/py_infer.png
diff --git a/deployment/docs/python_pth_2_onnx.png b/deployment/docs/python_pth_2_onnx.png
diff --git a/deployment/docs/run_docker.png b/deployment/docs/run_docker.png
diff --git a/deployment/docs/sdk_ver.png b/deployment/docs/sdk_ver.png
diff --git a/deployment/docs/ssh_con.png b/deployment/docs/ssh_con.png
diff --git a/deployment/ncnn/CMakeLists.txt b/deployment/ncnn/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.2)
+
+project(NCNNDeploy_test)
+
+if(CMAKE_BUILD_TYPE STREQUAL "")
+set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3")
+
+include_directories("~/Documents/Hsuanwu/deployment/ncnn/include")
+
+set(ncnn_DIR "~/Documents/Hsuanwu/deployment/ncnn/lib/cmake/ncnn" CACHE PATH "Directory that contains ncnnConfig.cmake")
+find_package(ncnn REQUIRED)
+
+add_executable(NCNNDeployTest NCNNDeployeTest.cpp)
+target_link_libraries(NCNNDeployTest ncnn)
+
+message("Build type: " ${CMAKE_BUILD_TYPE})
+
diff --git a/deployment/ncnn/NCNNDeployeTest.cpp b/deployment/ncnn/NCNNDeployeTest.cpp
@@ -0,0 +1,46 @@
+#include "ncnn/net.h"
+
+int main(int argc, char** argv)
+{
+    ncnn::Net net; //net 
+    net.load_param(argv[1]);//load the param file
+    net.load_model(argv[2]);//load the bin file
+
+
+    ncnn::Mat in;//data tyoe Mat. Input/output data are stored in this structure. 
+    in.create(84 ,84, 9);//create a 9*84*84 tensor as the test onnx model requires. 
+    in.fill(3.0f);//fill the input data will 3.0.
+
+
+    ncnn::Extractor ex = net.create_extractor();//create a extractor from net 
+    ex.set_light_mode(true);
+    ex.set_num_threads(4);
+    ex.input("input", in);//feed data into the extractor, then it will start infer automatically.
+    ncnn::Mat out;
+    ex.extract("output", out);//get infer result and put it into the out variable.
+
+
+    for (int q=0; q<out.c; q++)//print the infer result
+    {
+        const float* ptr = out.channel(q);
+        for (int z=0; z<out.d; z++)
+        {
+            for (int y=0; y<out.h; y++)
+            {
+                for (int x=0; x<out.w; x++)
+                {
+                    printf("%f ", ptr[x]);
+                }
+                ptr += out.w;
+                printf("\n");
+            }
+            printf("\n");
+        }
+        printf("------------------------\n");
+    }
+
+
+    ex.clear();//destrcut the extractor
+    net.clear();//destrcut the net
+    return 0;
+}