Skip to content

Commit

Permalink
Merge branch 'Tencent:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
inspireMeNow authored Jul 12, 2024
2 parents 4db4e41 + 1c40615 commit 6211838
Show file tree
Hide file tree
Showing 57 changed files with 3,714 additions and 130 deletions.
69 changes: 69 additions & 0 deletions .github/workflows/esp32.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: ESP32
on:
push:
branches: [master]
paths:
- '.github/workflows/esp32.yml'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'
pull_request:
branches: [master]
paths:
- '.github/workflows/esp32.yml'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'

concurrency:
group: esp32-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

jobs:
build:
name: ESP32
runs-on: ubuntu-20.04

steps:
- uses: actions/checkout@v4
with:
submodules: true

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.8'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y cmake ninja-build ccache
- name: Checkout ESP-IDF
uses: actions/checkout@v4
with:
repository: espressif/esp-idf
path: esp-idf-install
ref: release/v5.3

- name: Install ESP-IDF
run: |
cd esp-idf-install
git submodule update --init --recursive
./install.sh
- name: Set environment and build NCNN for ESP32
run: |
source esp-idf-install/export.sh
echo "IDF_PATH=$IDF_PATH" >> $GITHUB_ENV
echo "${IDF_PATH}/tools" >> $GITHUB_PATH
echo "${IDF_PATH}/components" >> $GITHUB_PATH
mkdir -p build-esp32 && cd build-esp32
cmake -DCMAKE_TOOLCHAIN_FILE="../toolchains/esp32.toolchain.cmake" -DCMAKE_BUILD_TYPE=Release -DNCNN_BUILD_EXAMPLES=OFF ..
make -j 4
make install
187 changes: 150 additions & 37 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1961,10 +1961,11 @@ cooling_down = 1
vision_transformer min = 20594.51 max = 20601.53 avg = 20596.59
FastestDet min = 90.25 max = 91.00 avg = 90.64
```
### Raspberry Pi 5 Broadcom BCM2712, VideoCore VII Graphics Overclock to 1.1Ghz (Vulkan 1.2)

### Raspberry Pi 5 Broadcom BCM2712 Overclock to 2.9Ghz, VideoCore VII Graphics Overclock to 1.1Ghz (Vulkan 1.2)
```
pi@raspberrypi:~/ncnn/build/benchmark $ sudo echo "arm_freq=2900" >> /boot/firmware/config.txt
pi@raspberrypi:~/ncnn/build/benchmark $ sudo echo "gpu_freq=1100" >> /boot/firmware/config.txt
pi@raspberrypi:~/ncnn/build/benchmark $ sudo echo "force_turbo=1" >> /boot/firmware/config.txt
pi@raspberrypi:~/ncnn/build/benchmark $ sudo reboot
pi@raspberrypi:~/ncnn/build/benchmark $ ./benchncnn 10 4 0 0
Expand All @@ -1983,41 +1984,83 @@ num_threads = 4
powersave = 0
gpu_device = 0
cooling_down = 1
squeezenet min = 106.91 max = 106.99 avg = 106.95
squeezenet_int8 min = 8.91 max = 9.49 avg = 9.11
mobilenet min = 147.60 max = 147.66 avg = 147.63
mobilenet_int8 min = 10.77 max = 36.34 avg = 14.67
mobilenet_v2 min = 109.97 max = 110.05 avg = 110.00
mobilenet_v3 min = 101.90 max = 102.00 avg = 101.95
shufflenet min = 59.73 max = 60.29 avg = 59.89
shufflenet_v2 min = 81.38 max = 81.60 avg = 81.48
mnasnet min = 105.78 max = 105.88 avg = 105.82
proxylessnasnet min = 108.78 max = 108.92 avg = 108.84
efficientnet_b0 min = 168.82 max = 169.02 avg = 168.90
efficientnetv2_b0 min = 232.37 max = 232.58 avg = 232.49
regnety_400m min = 130.27 max = 130.41 avg = 130.34
blazeface min = 22.14 max = 22.20 avg = 22.17
googlenet min = 299.08 max = 299.28 avg = 299.17
googlenet_int8 min = 29.24 max = 29.92 avg = 29.61
resnet18 min = 304.37 max = 304.55 avg = 304.48
resnet18_int8 min = 26.23 max = 53.80 avg = 35.61
alexnet min = 203.85 max = 217.97 avg = 209.30
vgg16 min = 1570.77 max = 1571.04 avg = 1570.94
vgg16_int8 min = 129.85 max = 145.79 avg = 132.82
resnet50 min = 753.93 max = 754.41 avg = 754.08
resnet50_int8 min = 49.41 max = 49.84 avg = 49.64
squeezenet_ssd min = 399.12 max = 399.55 avg = 399.30
squeezenet_ssd_int8 min = 34.22 max = 34.89 avg = 34.54
mobilenet_ssd min = 344.68 max = 344.90 avg = 344.79
mobilenet_ssd_int8 min = 27.42 max = 28.16 avg = 27.74
mobilenet_yolo min = 711.69 max = 711.76 avg = 711.72
mobilenetv2_yolov3 min = 361.99 max = 362.11 avg = 362.05
yolov4-tiny min = 589.25 max = 608.54 avg = 595.14
nanodet_m min = 178.85 max = 184.93 avg = 180.18
yolo-fastest-1.1 min = 92.28 max = 92.53 avg = 92.43
yolo-fastestv2 min = 70.79 max = 73.38 avg = 71.19
vision_transformer min = 18645.20 max = 18787.41 avg = 18667.17
FastestDet min = 74.67 max = 74.77 avg = 74.71
squeezenet min = 106.98 max = 107.05 avg = 107.02
squeezenet_int8 min = 8.51 max = 8.83 avg = 8.65
mobilenet min = 147.66 max = 147.71 avg = 147.68
mobilenet_int8 min = 10.21 max = 10.54 avg = 10.37
mobilenet_v2 min = 110.11 max = 110.23 avg = 110.18
mobilenet_v3 min = 101.84 max = 102.03 avg = 101.92
shufflenet min = 59.77 max = 59.84 avg = 59.80
shufflenet_v2 min = 81.46 max = 81.60 avg = 81.51
mnasnet min = 105.88 max = 105.98 avg = 105.94
proxylessnasnet min = 108.82 max = 108.89 avg = 108.86
efficientnet_b0 min = 168.79 max = 168.93 avg = 168.87
efficientnetv2_b0 min = 232.52 max = 232.80 avg = 232.65
regnety_400m min = 130.33 max = 130.49 avg = 130.36
blazeface min = 22.23 max = 22.49 avg = 22.39
googlenet min = 299.25 max = 299.37 avg = 299.31
googlenet_int8 min = 29.21 max = 29.97 avg = 29.58
resnet18 min = 304.47 max = 304.64 avg = 304.58
resnet18_int8 min = 19.31 max = 20.77 avg = 20.24
alexnet min = 203.68 max = 203.79 avg = 203.76
vgg16 min = 1571.91 max = 1572.22 avg = 1572.06
vgg16_int8 min = 128.46 max = 130.89 avg = 129.96
resnet50 min = 754.16 max = 754.33 avg = 754.26
resnet50_int8 min = 52.65 max = 53.48 avg = 53.09
squeezenet_ssd min = 398.22 max = 398.36 avg = 398.28
squeezenet_ssd_int8 min = 34.26 max = 34.67 avg = 34.51
mobilenet_ssd min = 344.81 max = 344.99 avg = 344.89
mobilenet_ssd_int8 min = 27.59 max = 28.01 avg = 27.77
mobilenet_yolo min = 712.53 max = 712.63 avg = 712.59
mobilenetv2_yolov3 min = 362.81 max = 363.11 avg = 362.90
yolov4-tiny min = 589.30 max = 589.51 avg = 589.39
nanodet_m min = 178.83 max = 178.97 avg = 178.88
yolo-fastest-1.1 min = 92.36 max = 92.58 avg = 92.45
yolo-fastestv2 min = 70.68 max = 70.84 avg = 70.74
vision_transformer min = 18615.94 max = 18648.17 avg = 18633.77
FastestDet min = 74.59 max = 74.68 avg = 74.63
pi@raspberrypi:~/ncnn/build/benchmark $ ./benchncnn 10 4 0 -1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
squeezenet min = 7.61 max = 7.76 avg = 7.70
squeezenet_int8 min = 7.97 max = 8.68 avg = 8.23
mobilenet min = 9.65 max = 9.91 avg = 9.80
mobilenet_int8 min = 10.60 max = 36.93 avg = 13.29
mobilenet_v2 min = 12.25 max = 12.64 avg = 12.40
mobilenet_v3 min = 8.14 max = 8.26 avg = 8.20
shufflenet min = 3.72 max = 3.82 avg = 3.77
shufflenet_v2 min = 2.99 max = 3.10 avg = 3.05
mnasnet min = 7.27 max = 7.46 avg = 7.37
proxylessnasnet min = 8.39 max = 8.55 avg = 8.48
efficientnet_b0 min = 13.15 max = 13.59 avg = 13.39
efficientnetv2_b0 min = 14.79 max = 15.30 avg = 14.91
regnety_400m min = 9.49 max = 9.71 avg = 9.57
blazeface min = 1.41 max = 1.46 avg = 1.43
googlenet min = 28.60 max = 28.87 avg = 28.73
googlenet_int8 min = 27.09 max = 27.77 avg = 27.47
resnet18 min = 21.47 max = 21.88 avg = 21.65
resnet18_int8 min = 20.07 max = 20.30 avg = 20.24
alexnet min = 22.75 max = 23.47 avg = 23.05
vgg16 min = 154.32 max = 158.51 avg = 157.40
vgg16_int8 min = 127.78 max = 162.60 avg = 133.21
resnet50 min = 49.36 max = 49.86 avg = 49.63
resnet50_int8 min = 46.44 max = 46.89 avg = 46.74
squeezenet_ssd min = 37.31 max = 74.95 avg = 41.30
squeezenet_ssd_int8 min = 32.62 max = 33.63 avg = 33.09
mobilenet_ssd min = 27.40 max = 27.99 avg = 27.68
mobilenet_ssd_int8 min = 26.70 max = 27.71 avg = 27.23
mobilenet_yolo min = 60.25 max = 61.10 avg = 60.67
mobilenetv2_yolov3 min = 43.51 max = 44.29 avg = 43.87
yolov4-tiny min = 51.63 max = 52.64 avg = 52.24
nanodet_m min = 11.89 max = 12.06 avg = 11.97
yolo-fastest-1.1 min = 5.63 max = 5.78 avg = 5.69
yolo-fastestv2 min = 5.34 max = 5.48 avg = 5.40
vision_transformer min = 481.78 max = 506.72 avg = 493.05
FastestDet min = 4.91 max = 5.14 avg = 5.01
```
### Raspberry Pi Zero 2 W Broadcom BCM2710A1, Cortex-A53 (ARMv8) (1.0GHz x 4)

Expand Down Expand Up @@ -8747,3 +8790,73 @@ cooling_down = 0
vision_transformer min = 153.75 max = 198.81 avg = 165.58
FastestDet min = 3.01 max = 5.01 avg = 3.29
```

### AXERA AX630C (Cortex-A53 1.2GHz * 2)

```
# ~/ncnn/build-aarch64-linux-gnu/benchmark # ./benchncnn 4 1 0 -1 0
loop_count = 4
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 0
squeezenet min = 129.78 max = 130.30 avg = 130.09
squeezenet_int8 min = 123.08 max = 123.48 avg = 123.22
mobilenet min = 211.46 max = 221.68 avg = 214.14
mobilenet_int8 min = 196.00 max = 212.73 avg = 200.23
mobilenet_v2 min = 149.15 max = 149.21 avg = 149.17
mobilenet_v3 min = 124.70 max = 125.54 avg = 125.08
shufflenet min = 80.75 max = 80.88 avg = 80.81
shufflenet_v2 min = 74.30 max = 74.50 avg = 74.37
mnasnet min = 148.87 max = 165.85 avg = 153.26
proxylessnasnet min = 203.05 max = 213.50 avg = 205.82
efficientnet_b0 min = 270.39 max = 280.59 avg = 273.13
efficientnetv2_b0 min = 302.93 max = 318.07 avg = 307.30
regnety_400m min = 187.47 max = 187.90 avg = 187.60
blazeface min = 22.64 max = 22.78 avg = 22.72
googlenet min = 487.36 max = 503.50 avg = 493.93
googlenet_int8 min = 418.16 max = 434.44 avg = 426.09
resnet18_int8 min = 290.39 max = 301.90 avg = 293.70
resnet50_int8 min = 888.81 max = 898.34 avg = 895.92
squeezenet_ssd min = 320.78 max = 330.33 avg = 323.54
squeezenet_ssd_int8 min = 281.52 max = 299.11 avg = 286.89
mobilenet_ssd min = 435.79 max = 452.66 avg = 444.19
mobilenet_ssd_int8 min = 394.38 max = 411.09 avg = 398.65
mobilenet_yolo min = 955.48 max = 972.38 avg = 967.52
mobilenetv2_yolov3 min = 519.47 max = 536.58 avg = 524.25
yolo-fastestv2 min = 73.94 max = 74.15 avg = 74.05
FastestDet min = 81.89 max = 82.07 avg = 81.98
# ~/ncnn/build-aarch64-linux-gnu/benchmark # ./benchncnn 4 2 0 -1 0
loop_count = 4
num_threads = 2
powersave = 0
gpu_device = -1
cooling_down = 0
squeezenet min = 75.14 max = 88.89 avg = 79.06
squeezenet_int8 min = 70.11 max = 85.48 avg = 74.32
mobilenet min = 112.72 max = 124.85 avg = 115.87
mobilenet_int8 min = 100.35 max = 100.58 avg = 100.49
mobilenet_v2 min = 85.92 max = 86.20 avg = 86.03
mobilenet_v3 min = 73.94 max = 74.34 avg = 74.20
shufflenet min = 53.99 max = 66.11 avg = 57.63
shufflenet_v2 min = 47.47 max = 47.72 avg = 47.59
mnasnet min = 85.96 max = 86.27 avg = 86.13
proxylessnasnet min = 111.15 max = 121.84 avg = 113.92
efficientnet_b0 min = 149.72 max = 150.00 avg = 149.85
efficientnetv2_b0 min = 168.84 max = 170.57 avg = 169.35
regnety_400m min = 120.42 max = 135.50 avg = 124.26
blazeface min = 14.27 max = 14.48 avg = 14.39
googlenet min = 263.82 max = 274.74 avg = 266.84
googlenet_int8 min = 226.91 max = 227.36 avg = 227.23
resnet18_int8 min = 157.66 max = 168.11 avg = 160.57
resnet50_int8 min = 469.84 max = 484.00 avg = 476.59
squeezenet_ssd min = 190.23 max = 204.41 avg = 193.99
squeezenet_ssd_int8 min = 162.73 max = 174.30 avg = 165.79
mobilenet_ssd min = 236.26 max = 251.16 avg = 240.34
mobilenet_ssd_int8 min = 203.22 max = 212.01 avg = 206.00
mobilenet_yolo min = 522.45 max = 537.99 avg = 529.95
mobilenetv2_yolov3 min = 300.33 max = 316.59 avg = 304.89
yolo-fastestv2 min = 50.27 max = 50.62 avg = 50.43
FastestDet min = 53.34 max = 53.64 avg = 53.51
```
29 changes: 29 additions & 0 deletions docs/how-to-build/how-to-build.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ git submodule update --init
- [Build for QNX](#build-for-qnx)
- [Build for Nintendo 3DS Homebrew Launcher](#build-for-nintendo-3ds-homebrew-launcher)
- [Build for HarmonyOS with cross-compiling](#build-for-harmonyos-with-cross-compiling)
- [Build for ESP32 with cross-compiling](#build-for-esp32-with-cross-compiling)

***

Expand Down Expand Up @@ -885,3 +886,31 @@ ${HM_SDK}/native/build-tools/cmake/bin/cmake -DOHOS_STL=c++_static -DOHOS_ARCH=a
make -j$(nproc)
make install
```

***

### Build for ESP32 with cross-compiling
Download esp-idf sdk
```shell
git clone https://github.com/espressif/esp-idf
cd esp-idf
git submodule update --init --recursive
```
Install esp-idf sdk and configure the environment
```shell
sudo sh install.sh
source export.sh
```
Note: python>=3.8, cmake>=3.24.0

Build ncnn library:
```shell
mkdir build-esp32
cd build-esp32
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/esp32.toolchain.cmake -DCMAKE_BUILD_TYPE=Release ..
make -j 4
make install
```
Note: Make sure to compile in esp-idf environment.

The compiled ncnn library and headers can be put to the esp32 project to test.
36 changes: 33 additions & 3 deletions src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,17 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
// clang-format off
// *INDENT-OFF*

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage)
{
Mat bottom_blob_fp16;
cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
bottom_blob = bottom_blob_fp16;
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage)
if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage)
{
Mat bottom_blob_fp16;
cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
Expand Down Expand Up @@ -740,8 +749,17 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
// clang-format off
// *INDENT-OFF*

#if NCNN_ARM82
if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage)
{
Mat bottom_blob_fp32;
cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
bottom_blob = bottom_blob_fp32;
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage)
if (opt.use_fp16_storage && !opt.use_bf16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage)
{
Mat bottom_blob_fp32;
cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
Expand Down Expand Up @@ -2719,8 +2737,20 @@ int Extractor::extract(int blob_index, Mat& feat, int type)

// clang-format off
// *INDENT-OFF*
#if NCNN_ARM82
if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0))
{
if (feat.elembits() == 16)
{
Mat feat_fp32;
cast_float16_to_float32(feat, feat_fp32, d->opt);
feat = feat_fp32;
}
}
else
#endif // NCNN_ARM82
#if NCNN_VFPV4
if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0))
if (d->opt.use_fp16_storage && !d->opt.use_bf16_storage && cpu_support_arm_vfpv4() && (type == 0))
{
if (feat.elembits() == 16)
{
Expand Down
Loading

0 comments on commit 6211838

Please sign in to comment.