From 65dbb4a70b55bfc649c870c5aae7e7510a003fbf Mon Sep 17 00:00:00 2001 From: nihui Date: Tue, 7 May 2024 11:31:08 +0800 Subject: [PATCH] update ncnnoptimize layers, lightmode=false keeps original weight (#5414) --- docs/developer-guide/operators.md | 75 +++++++++- src/layer/arm/convolution1d_arm.cpp | 8 +- src/layer/arm/convolution1d_arm_asimdhp.cpp | 3 +- src/layer/arm/convolution_arm.cpp | 24 ++-- src/layer/arm/convolution_arm_asimdhp.cpp | 9 +- src/layer/arm/convolutiondepthwise_arm.cpp | 15 +- .../arm/convolutiondepthwise_arm_asimdhp.cpp | 6 +- src/layer/arm/deconvolution_arm.cpp | 6 +- src/layer/arm/deconvolution_arm_asimdhp.cpp | 3 +- src/layer/arm/deconvolutiondepthwise_arm.cpp | 6 +- .../deconvolutiondepthwise_arm_asimdhp.cpp | 3 +- src/layer/arm/gemm_arm.cpp | 18 ++- src/layer/arm/gemm_arm_asimdhp.cpp | 9 +- src/layer/arm/gemm_arm_vfpv4.cpp | 9 +- src/layer/arm/gru_arm.cpp | 18 ++- src/layer/arm/gru_arm_asimdhp.cpp | 9 +- src/layer/arm/innerproduct_arm.cpp | 9 +- src/layer/arm/innerproduct_arm_vfpv4.cpp | 3 +- src/layer/arm/lstm_arm.cpp | 18 ++- src/layer/arm/lstm_arm_asimdhp.cpp | 9 +- src/layer/arm/multiheadattention_arm.cpp | 28 ++-- src/layer/arm/rnn_arm.cpp | 18 ++- src/layer/arm/rnn_arm_asimdhp.cpp | 9 +- .../loongarch/convolution1d_loongarch.cpp | 5 +- src/layer/loongarch/convolution_loongarch.cpp | 6 +- .../convolutiondepthwise_loongarch.cpp | 12 +- .../loongarch/deconvolution_loongarch.cpp | 3 +- .../deconvolutiondepthwise_loongarch.cpp | 6 +- .../loongarch/innerproduct_loongarch.cpp | 9 +- src/layer/mips/convolution1d_mips.cpp | 5 +- src/layer/mips/convolution_mips.cpp | 6 +- src/layer/mips/convolutiondepthwise_mips.cpp | 12 +- src/layer/mips/deconvolution_mips.cpp | 3 +- .../mips/deconvolutiondepthwise_mips.cpp | 6 +- src/layer/mips/innerproduct_mips.cpp | 9 +- src/layer/riscv/convolution1d_riscv.cpp | 8 +- src/layer/riscv/convolution_riscv.cpp | 6 +- .../riscv/convolutiondepthwise_riscv.cpp | 12 +- src/layer/riscv/deconvolution_riscv.cpp | 6 +- .../riscv/deconvolutiondepthwise_riscv.cpp | 12 +- src/layer/riscv/gemm_riscv.cpp | 9 +- src/layer/riscv/gru_riscv.cpp | 9 +- src/layer/riscv/innerproduct_riscv.cpp | 6 +- src/layer/vulkan/batchnorm_vulkan.cpp | 6 + src/layer/vulkan/convolution1d_vulkan.cpp | 7 +- src/layer/vulkan/convolution_vulkan.cpp | 7 +- .../vulkan/convolutiondepthwise_vulkan.cpp | 14 +- src/layer/vulkan/deconvolution_vulkan.cpp | 13 +- .../vulkan/deconvolutiondepthwise_vulkan.cpp | 14 +- src/layer/vulkan/gemm_vulkan.cpp | 9 +- src/layer/vulkan/innerproduct_vulkan.cpp | 21 ++- src/layer/vulkan/memorydata_vulkan.cpp | 5 + .../vulkan/multiheadattention_vulkan.cpp | 28 ++-- src/layer/vulkan/normalize_vulkan.cpp | 3 + src/layer/vulkan/padding_vulkan.cpp | 5 + src/layer/vulkan/prelu_vulkan.cpp | 5 + src/layer/vulkan/priorbox_vulkan.cpp | 19 ++- src/layer/vulkan/scale_vulkan.cpp | 6 + src/layer/x86/convolution1d_x86.cpp | 5 +- src/layer/x86/convolution_x86.cpp | 12 +- src/layer/x86/convolutiondepthwise_x86.cpp | 12 +- src/layer/x86/deconvolution_x86.cpp | 3 +- src/layer/x86/deconvolutiondepthwise_x86.cpp | 6 +- src/layer/x86/deformableconv2d_x86.cpp | 3 +- src/layer/x86/gemm_x86.cpp | 9 +- src/layer/x86/innerproduct_x86.cpp | 9 +- src/layer/x86/lstm_x86.cpp | 9 +- src/layer/x86/multiheadattention_x86.cpp | 28 ++-- tools/modelwriter.h | 132 ++++++++++++------ 69 files changed, 614 insertions(+), 241 deletions(-) diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md index c0255375b38..6056c277b1f 100644 --- a/docs/developer-guide/operators.md +++ b/docs/developer-guide/operators.md @@ -30,8 +30,10 @@ * [Dropout](#dropout) * [Eltwise](#eltwise) * [ELU](#elu) +* [Embed](#embed) * [Exp](#exp) * [Flatten](#flatten) +* [Fold](#fold) * [GELU](#gelu) * [GLU](#glu) * [Gemm](#gemm) @@ -84,6 +86,7 @@ * [Threshold](#threshold) * [Tile](#tile) * [UnaryOp](#unaryop) +* [Unfold](#unfold) # AbsVal ``` @@ -474,12 +477,15 @@ y = crop(x) | --------- | ------------- | ----- | --------- | ----------------- | | 0 | woffset | int | 0 | | | 1 | hoffset | int | 0 | | -| 2 | coffset | int | 1 | | -| 3 | outw | int | 1 | | +| 13 | doffset | int | 0 | | +| 2 | coffset | int | 0 | | +| 3 | outw | int | 0 | | | 4 | outh | int | 0 | | +| 14 | outd | int | 0 | | | 5 | outc | int | 0 | | | 6 | woffset2 | int | 0 | | -| 7 | hoffset2 | int | 1 | | +| 7 | hoffset2 | int | 0 | | +| 15 | doffset2 | int | 0 | | | 8 | coffset2 | int | 0 | | | 9 | starts | array | [ ] | | | 10 | ends | array | [ ] | | @@ -819,6 +825,23 @@ else y = x | --------- | ------------- | ----- | --------- | ----------------- | | 0 | alpha | float | 0.1f | | +# Embed +``` +y = embedding(x) +``` + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | num_output | int | 0 | | +| 1 | input_dim | int | 0 | | +| 2 | bias_term | int | 0 | | +| 3 | weight_data_size | int | 0 | | + +| weight | type | shape | +| ------------- | ----- | --------------------- | +| weight_data | float | [weight_data_size] | +| bias_term | float | [num_output] | + # Exp ``` if base == -1 y = exp(shift + x * scale) @@ -839,6 +862,29 @@ Reshape blob to 1 dimension * one_blob_only +# Fold +``` +y = fold(x) +``` + +* one_blob_only + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | +| 20 | output_w | int | 0 | | +| 21 | output_h | int | output_w | | + # GELU ``` if fast_gelu == 1 y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x))); @@ -1187,6 +1233,7 @@ y = data | 1 | h | int | 0 | | | 11 | d | int | 0 | | | 2 | c | int | 0 | | +| 21 | load_type | int | 1 | 1=fp32 | | weight | type | shape | | ------------- | ----- | --------------------- | @@ -1537,6 +1584,7 @@ y = reduce_op(x * coeff) | 2 | coeff | float | 1.f | | | 3 | axes | array | [ ] | | | 4 | keepdims | int | 0 | | +| 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 | Operation type: - 0 = SUM @@ -1829,3 +1877,24 @@ Operation type: - 17 = LOG10 - 18 = ROUND - 19 = TRUNC + +# Unfold +``` +y = unfold(x) +``` + +* one_blob_only + +| param id | name | type | default | description | +| --------- | ------------- | ----- | --------- | ----------------- | +| 0 | num_output | int | 0 | | +| 1 | kernel_w | int | 0 | | +| 2 | dilation_w | int | 1 | | +| 3 | stride_w | int | 1 | | +| 4 | pad_left | int | 0 | | +| 11 | kernel_h | int | kernel_w | | +| 12 | dilation_h | int | dilation_w | | +| 13 | stride_h | int | stride_w | | +| 14 | pad_top | int | pad_left | | +| 15 | pad_right | int | pad_left | | +| 16 | pad_bottom | int | pad_top | | diff --git a/src/layer/arm/convolution1d_arm.cpp b/src/layer/arm/convolution1d_arm.cpp index 26389279b18..ec1df51469f 100644 --- a/src/layer/arm/convolution1d_arm.cpp +++ b/src/layer/arm/convolution1d_arm.cpp @@ -68,7 +68,8 @@ int Convolution1D_arm::create_pipeline(const Option& opt) convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -233,13 +234,14 @@ int Convolution1D_arm::forward(const std::vector& bottom_blobs, std::vector } #if NCNN_BF16 -int Convolution1D_arm::create_pipeline_bf16s(const Option& /*opt*/) +int Convolution1D_arm::create_pipeline_bf16s(const Option& opt) { const int num_input = weight_data_size / kernel_w / num_output; convolution1d_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/convolution1d_arm_asimdhp.cpp b/src/layer/arm/convolution1d_arm_asimdhp.cpp index 2e194eabf21..cb6336d019e 100644 --- a/src/layer/arm/convolution1d_arm_asimdhp.cpp +++ b/src/layer/arm/convolution1d_arm_asimdhp.cpp @@ -36,7 +36,8 @@ int Convolution1D_arm::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index f7f04619e9e..4198eeeb7c6 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -194,7 +194,8 @@ int Convolution_arm::create_pipeline(const Option& opt) convolution_dilation1->create_pipeline(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -224,7 +225,8 @@ int Convolution_arm::create_pipeline(const Option& opt) else conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -270,7 +272,8 @@ int Convolution_arm::create_pipeline(const Option& opt) { convolution_im2col_gemm_transform_kernel(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -305,7 +308,8 @@ int Convolution_arm::create_pipeline(const Option& opt) convolution_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -904,7 +908,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) else conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -950,7 +955,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) { convolution_im2col_gemm_transform_kernel_bf16s(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -971,7 +977,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) convolution_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1284,7 +1291,8 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/convolution_arm_asimdhp.cpp b/src/layer/arm/convolution_arm_asimdhp.cpp index 51ec51675a8..b1a98ea22e5 100644 --- a/src/layer/arm/convolution_arm_asimdhp.cpp +++ b/src/layer/arm/convolution_arm_asimdhp.cpp @@ -108,7 +108,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) else conv3x3s1_winograd23_transform_kernel_fp16sa(weight_data, weight_winograd23_data, num_input, num_output, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); if (opt.use_fp16_arithmetic) { @@ -189,7 +190,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -219,7 +221,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index f9f4a1fdc2d..4bf61c53efc 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -119,7 +119,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) ncnn::cast_float32_to_bfloat16(weight_data, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -161,7 +162,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -169,7 +171,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1022,7 +1025,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1030,7 +1034,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp b/src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp index 1d5f2782cc1..cfea9f2a003 100644 --- a/src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp +++ b/src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp @@ -76,7 +76,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -84,7 +85,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/deconvolution_arm.cpp b/src/layer/arm/deconvolution_arm.cpp index 24c825ae266..c06532a66ca 100644 --- a/src/layer/arm/deconvolution_arm.cpp +++ b/src/layer/arm/deconvolution_arm.cpp @@ -211,7 +211,8 @@ int Deconvolution_arm::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -954,7 +955,8 @@ int Deconvolution_arm::create_pipeline_bf16s(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/deconvolution_arm_asimdhp.cpp b/src/layer/arm/deconvolution_arm_asimdhp.cpp index b5498d815f3..9cb7df4630d 100644 --- a/src/layer/arm/deconvolution_arm_asimdhp.cpp +++ b/src/layer/arm/deconvolution_arm_asimdhp.cpp @@ -154,7 +154,8 @@ int Deconvolution_arm::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/deconvolutiondepthwise_arm.cpp b/src/layer/arm/deconvolutiondepthwise_arm.cpp index 4eac426d9de..133d5158fa6 100644 --- a/src/layer/arm/deconvolutiondepthwise_arm.cpp +++ b/src/layer/arm/deconvolutiondepthwise_arm.cpp @@ -104,7 +104,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) ncnn::cast_float32_to_bfloat16(weight_data_transposed, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -190,7 +191,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp b/src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp index 5fa42d07490..73b428ebfef 100644 --- a/src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp +++ b/src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp @@ -145,7 +145,8 @@ int DeconvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/gemm_arm.cpp b/src/layer/arm/gemm_arm.cpp index 3463550d3d4..e88ca1cc68d 100644 --- a/src/layer/arm/gemm_arm.cpp +++ b/src/layer/arm/gemm_arm.cpp @@ -4201,7 +4201,8 @@ int Gemm_arm::create_pipeline(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -4241,7 +4242,8 @@ int Gemm_arm::create_pipeline(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -4271,7 +4273,8 @@ int Gemm_arm::create_pipeline(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) @@ -4889,7 +4892,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -4929,7 +4933,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -4959,7 +4964,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/arm/gemm_arm_asimdhp.cpp b/src/layer/arm/gemm_arm_asimdhp.cpp index cfe6ce8ce60..f3140cb04b9 100644 --- a/src/layer/arm/gemm_arm_asimdhp.cpp +++ b/src/layer/arm/gemm_arm_asimdhp.cpp @@ -2736,7 +2736,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -2776,7 +2777,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -2802,7 +2804,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) } } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/arm/gemm_arm_vfpv4.cpp b/src/layer/arm/gemm_arm_vfpv4.cpp index 5792e47e980..be0fe178730 100644 --- a/src/layer/arm/gemm_arm_vfpv4.cpp +++ b/src/layer/arm/gemm_arm_vfpv4.cpp @@ -427,7 +427,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -467,7 +468,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -497,7 +499,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/arm/gru_arm.cpp b/src/layer/arm/gru_arm.cpp index 58df8275ad5..80f8c80ad3c 100644 --- a/src/layer/arm/gru_arm.cpp +++ b/src/layer/arm/gru_arm.cpp @@ -250,9 +250,12 @@ int GRU_arm::create_pipeline(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } @@ -1372,9 +1375,12 @@ int GRU_arm::create_pipeline_bf16s(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/arm/gru_arm_asimdhp.cpp b/src/layer/arm/gru_arm_asimdhp.cpp index fcdce2d8e18..f3d38305a2e 100644 --- a/src/layer/arm/gru_arm_asimdhp.cpp +++ b/src/layer/arm/gru_arm_asimdhp.cpp @@ -914,9 +914,12 @@ int GRU_arm::create_pipeline_fp16s(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/arm/innerproduct_arm.cpp b/src/layer/arm/innerproduct_arm.cpp index 0cbc78525eb..2d3bafab81b 100644 --- a/src/layer/arm/innerproduct_arm.cpp +++ b/src/layer/arm/innerproduct_arm.cpp @@ -122,7 +122,8 @@ int InnerProduct_arm::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -865,7 +866,8 @@ int InnerProduct_arm::create_pipeline_bf16s(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1258,7 +1260,8 @@ int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/innerproduct_arm_vfpv4.cpp b/src/layer/arm/innerproduct_arm_vfpv4.cpp index 6a6eab84fba..306d37ad7cf 100644 --- a/src/layer/arm/innerproduct_arm_vfpv4.cpp +++ b/src/layer/arm/innerproduct_arm_vfpv4.cpp @@ -41,7 +41,8 @@ int InnerProduct_arm::create_pipeline_fp16s(const Option& opt) } #endif - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/arm/lstm_arm.cpp b/src/layer/arm/lstm_arm.cpp index b8d5afe93dc..04d7277547e 100644 --- a/src/layer/arm/lstm_arm.cpp +++ b/src/layer/arm/lstm_arm.cpp @@ -124,9 +124,12 @@ int LSTM_arm::create_pipeline(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } @@ -928,9 +931,12 @@ int LSTM_arm::create_pipeline_bf16s(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/arm/lstm_arm_asimdhp.cpp b/src/layer/arm/lstm_arm_asimdhp.cpp index 593af33ccd4..1d3fc71cdfc 100644 --- a/src/layer/arm/lstm_arm_asimdhp.cpp +++ b/src/layer/arm/lstm_arm_asimdhp.cpp @@ -835,9 +835,12 @@ int LSTM_arm::create_pipeline_fp16s(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/arm/multiheadattention_arm.cpp b/src/layer/arm/multiheadattention_arm.cpp index b3f3d7aa8e7..37323a2255f 100644 --- a/src/layer/arm/multiheadattention_arm.cpp +++ b/src/layer/arm/multiheadattention_arm.cpp @@ -84,8 +84,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) q_gemm->load_model(ModelBinFromMatArray(weights)); q_gemm->create_pipeline(opt); - q_weight_data.release(); - q_bias_data.release(); + if (opt.lightmode) + { + q_weight_data.release(); + q_bias_data.release(); + } } { @@ -110,8 +113,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) k_gemm->load_model(ModelBinFromMatArray(weights)); k_gemm->create_pipeline(opt); - k_weight_data.release(); - k_bias_data.release(); + if (opt.lightmode) + { + k_weight_data.release(); + k_bias_data.release(); + } } { @@ -136,8 +142,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) v_gemm->load_model(ModelBinFromMatArray(weights)); v_gemm->create_pipeline(opt); - v_weight_data.release(); - v_bias_data.release(); + if (opt.lightmode) + { + v_weight_data.release(); + v_bias_data.release(); + } } { @@ -160,8 +169,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) o_gemm->load_model(ModelBinFromMatArray(weights)); o_gemm->create_pipeline(opt); - out_weight_data.release(); - out_bias_data.release(); + if (opt.lightmode) + { + out_weight_data.release(); + out_bias_data.release(); + } } { diff --git a/src/layer/arm/rnn_arm.cpp b/src/layer/arm/rnn_arm.cpp index 15b9f0b8a0d..293322b8488 100644 --- a/src/layer/arm/rnn_arm.cpp +++ b/src/layer/arm/rnn_arm.cpp @@ -139,9 +139,12 @@ int RNN_arm::create_pipeline(const Option& opt) bias_c_data_packed = bias_c_data; - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } @@ -736,9 +739,12 @@ int RNN_arm::create_pipeline_bf16s(const Option& opt) cast_float32_to_bfloat16(bias_c_data, bias_c_data_packed, opt); - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/arm/rnn_arm_asimdhp.cpp b/src/layer/arm/rnn_arm_asimdhp.cpp index 467dba614f8..93b009151c5 100644 --- a/src/layer/arm/rnn_arm_asimdhp.cpp +++ b/src/layer/arm/rnn_arm_asimdhp.cpp @@ -517,9 +517,12 @@ int RNN_arm::create_pipeline_fp16s(const Option& opt) cast_float32_to_float16(bias_c_data, bias_c_data_packed, opt); - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/loongarch/convolution1d_loongarch.cpp b/src/layer/loongarch/convolution1d_loongarch.cpp index 0917a79f62e..95590b3d725 100644 --- a/src/layer/loongarch/convolution1d_loongarch.cpp +++ b/src/layer/loongarch/convolution1d_loongarch.cpp @@ -78,6 +78,9 @@ int Convolution1D_loongarch::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -281,7 +284,7 @@ int Convolution1D_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { diff --git a/src/layer/loongarch/convolution_loongarch.cpp b/src/layer/loongarch/convolution_loongarch.cpp index 3c5d0c1a424..a1dfc64a95f 100644 --- a/src/layer/loongarch/convolution_loongarch.cpp +++ b/src/layer/loongarch/convolution_loongarch.cpp @@ -225,7 +225,8 @@ int Convolution_loongarch::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -789,7 +790,8 @@ int Convolution_loongarch::create_pipeline_int8_loongarch(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/convolutiondepthwise_loongarch.cpp b/src/layer/loongarch/convolutiondepthwise_loongarch.cpp index 0c5050dbce0..be0c52ea4b9 100644 --- a/src/layer/loongarch/convolutiondepthwise_loongarch.cpp +++ b/src/layer/loongarch/convolutiondepthwise_loongarch.cpp @@ -83,7 +83,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -91,7 +92,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -600,7 +602,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -608,7 +611,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/deconvolution_loongarch.cpp b/src/layer/loongarch/deconvolution_loongarch.cpp index 62b9d872b60..afdd77f47a9 100644 --- a/src/layer/loongarch/deconvolution_loongarch.cpp +++ b/src/layer/loongarch/deconvolution_loongarch.cpp @@ -126,7 +126,8 @@ int Deconvolution_loongarch::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp index 9495a99aae0..b31786bbbce 100644 --- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp +++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/innerproduct_loongarch.cpp b/src/layer/loongarch/innerproduct_loongarch.cpp index e6b8eb0936b..0f52c7352c3 100644 --- a/src/layer/loongarch/innerproduct_loongarch.cpp +++ b/src/layer/loongarch/innerproduct_loongarch.cpp @@ -99,7 +99,8 @@ int InnerProduct_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -652,7 +653,8 @@ int InnerProduct_loongarch::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1140,7 +1142,8 @@ int InnerProduct_loongarch::create_pipeline_int8_loongarch(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/convolution1d_mips.cpp b/src/layer/mips/convolution1d_mips.cpp index e9cf211e49b..02a008e70ab 100644 --- a/src/layer/mips/convolution1d_mips.cpp +++ b/src/layer/mips/convolution1d_mips.cpp @@ -78,6 +78,9 @@ int Convolution1D_mips::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -281,7 +284,7 @@ int Convolution1D_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { diff --git a/src/layer/mips/convolution_mips.cpp b/src/layer/mips/convolution_mips.cpp index af420e61a9a..23da8b838f2 100644 --- a/src/layer/mips/convolution_mips.cpp +++ b/src/layer/mips/convolution_mips.cpp @@ -225,7 +225,8 @@ int Convolution_mips::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -789,7 +790,8 @@ int Convolution_mips::create_pipeline_int8_mips(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/convolutiondepthwise_mips.cpp b/src/layer/mips/convolutiondepthwise_mips.cpp index 0c9bdca30ce..27799d9aca7 100644 --- a/src/layer/mips/convolutiondepthwise_mips.cpp +++ b/src/layer/mips/convolutiondepthwise_mips.cpp @@ -83,7 +83,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -91,7 +92,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -600,7 +602,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -608,7 +611,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/deconvolution_mips.cpp b/src/layer/mips/deconvolution_mips.cpp index 208400f532e..6efc3c5fefd 100644 --- a/src/layer/mips/deconvolution_mips.cpp +++ b/src/layer/mips/deconvolution_mips.cpp @@ -126,7 +126,8 @@ int Deconvolution_mips::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/deconvolutiondepthwise_mips.cpp b/src/layer/mips/deconvolutiondepthwise_mips.cpp index e6f5dd43478..456cc07780b 100644 --- a/src/layer/mips/deconvolutiondepthwise_mips.cpp +++ b/src/layer/mips/deconvolutiondepthwise_mips.cpp @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/innerproduct_mips.cpp b/src/layer/mips/innerproduct_mips.cpp index 9d926bfd08d..ad42accc16a 100644 --- a/src/layer/mips/innerproduct_mips.cpp +++ b/src/layer/mips/innerproduct_mips.cpp @@ -99,7 +99,8 @@ int InnerProduct_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -652,7 +653,8 @@ int InnerProduct_mips::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1140,7 +1142,8 @@ int InnerProduct_mips::create_pipeline_int8_mips(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolution1d_riscv.cpp b/src/layer/riscv/convolution1d_riscv.cpp index 6c581a0edeb..5671d4d4226 100644 --- a/src/layer/riscv/convolution1d_riscv.cpp +++ b/src/layer/riscv/convolution1d_riscv.cpp @@ -95,6 +95,9 @@ int Convolution1D_riscv::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -308,7 +311,7 @@ int Convolution1D_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { @@ -470,7 +473,8 @@ int Convolution1D_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolution_riscv.cpp b/src/layer/riscv/convolution_riscv.cpp index be413e5be25..fef27f21967 100644 --- a/src/layer/riscv/convolution_riscv.cpp +++ b/src/layer/riscv/convolution_riscv.cpp @@ -237,7 +237,8 @@ int Convolution_riscv::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -834,7 +835,8 @@ int Convolution_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolutiondepthwise_riscv.cpp b/src/layer/riscv/convolutiondepthwise_riscv.cpp index d913fe7e1d5..6a0eb04cda5 100644 --- a/src/layer/riscv/convolutiondepthwise_riscv.cpp +++ b/src/layer/riscv/convolutiondepthwise_riscv.cpp @@ -104,7 +104,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -112,7 +113,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -682,7 +684,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -690,7 +693,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/deconvolution_riscv.cpp b/src/layer/riscv/deconvolution_riscv.cpp index 6b395282908..3b6364e8020 100644 --- a/src/layer/riscv/deconvolution_riscv.cpp +++ b/src/layer/riscv/deconvolution_riscv.cpp @@ -148,7 +148,8 @@ int Deconvolution_riscv::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -530,7 +531,8 @@ int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp index 7b567cf63e0..6a311680f4f 100644 --- a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp +++ b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp @@ -97,7 +97,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -105,7 +106,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -619,7 +621,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -627,7 +630,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/gemm_riscv.cpp b/src/layer/riscv/gemm_riscv.cpp index 9b4b58ac651..fa25a058cb1 100644 --- a/src/layer/riscv/gemm_riscv.cpp +++ b/src/layer/riscv/gemm_riscv.cpp @@ -3984,7 +3984,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -4024,7 +4025,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -4054,7 +4056,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/riscv/gru_riscv.cpp b/src/layer/riscv/gru_riscv.cpp index c7e36c1c0fc..0869a455979 100644 --- a/src/layer/riscv/gru_riscv.cpp +++ b/src/layer/riscv/gru_riscv.cpp @@ -714,9 +714,12 @@ int GRU_riscv::create_pipeline_fp16sa(const Option& opt) cast_float32_to_float16(weight_hc_data, weight_hc_data_fp16sa, opt); cast_float32_to_float16(bias_c_data, bias_c_data_fp16sa, opt); - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/riscv/innerproduct_riscv.cpp b/src/layer/riscv/innerproduct_riscv.cpp index accfc683584..e71c09a157c 100644 --- a/src/layer/riscv/innerproduct_riscv.cpp +++ b/src/layer/riscv/innerproduct_riscv.cpp @@ -106,7 +106,8 @@ int InnerProduct_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -560,7 +561,8 @@ int InnerProduct_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/vulkan/batchnorm_vulkan.cpp b/src/layer/vulkan/batchnorm_vulkan.cpp index b770ab36bc7..2d0072a6cc1 100644 --- a/src/layer/vulkan/batchnorm_vulkan.cpp +++ b/src/layer/vulkan/batchnorm_vulkan.cpp @@ -156,6 +156,12 @@ int BatchNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) cmd.record_upload(b_data_packed, b_data_gpu, opt); } + if (opt.lightmode) + { + a_data.release(); + b_data.release(); + } + return 0; } diff --git a/src/layer/vulkan/convolution1d_vulkan.cpp b/src/layer/vulkan/convolution1d_vulkan.cpp index 2747012addc..0581f2d4c0a 100644 --- a/src/layer/vulkan/convolution1d_vulkan.cpp +++ b/src/layer/vulkan/convolution1d_vulkan.cpp @@ -133,8 +133,11 @@ int Convolution1D_vulkan::create_pipeline(const Option& _opt) pipeline_convolution1d->create(shader_type_index, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/convolution_vulkan.cpp b/src/layer/vulkan/convolution_vulkan.cpp index 302ab9085c5..6b07b6b73d4 100644 --- a/src/layer/vulkan/convolution_vulkan.cpp +++ b/src/layer/vulkan/convolution_vulkan.cpp @@ -1148,8 +1148,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt) pipeline_convolution->create(shader_type_index, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/convolutiondepthwise_vulkan.cpp b/src/layer/vulkan/convolutiondepthwise_vulkan.cpp index 59eca6a55c6..2cda228d976 100644 --- a/src/layer/vulkan/convolutiondepthwise_vulkan.cpp +++ b/src/layer/vulkan/convolutiondepthwise_vulkan.cpp @@ -271,8 +271,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) pipeline_convolutiondepthwise_pack8->create(LayerShaderType::convolutiondepthwise_pack8, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } @@ -413,8 +416,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) pipeline_convolutiondepthwise_group_pack8to1->create(LayerShaderType::convolutiondepthwise_group_pack8to1, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/deconvolution_vulkan.cpp b/src/layer/vulkan/deconvolution_vulkan.cpp index 66e57db57bf..04fab54cceb 100644 --- a/src/layer/vulkan/deconvolution_vulkan.cpp +++ b/src/layer/vulkan/deconvolution_vulkan.cpp @@ -366,6 +366,12 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt) pipeline_deconvolution_col2im->create(shader_type_index, opt, specializations); } + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } + return 0; } @@ -462,8 +468,11 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt) pipeline_deconvolution->set_optimal_local_size_xyz(local_size_xyz); pipeline_deconvolution->create(shader_type_index, opt, specializations); - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp index a715a4782f4..e6ab72b8a2a 100644 --- a/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp +++ b/src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp @@ -295,8 +295,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) pipeline_deconvolutiondepthwise_pack8->create(LayerShaderType::deconvolutiondepthwise_pack8, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } @@ -437,8 +440,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) pipeline_deconvolutiondepthwise_group_pack8to1->create(LayerShaderType::deconvolutiondepthwise_group_pack8to1, opt, specializations); } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/gemm_vulkan.cpp b/src/layer/vulkan/gemm_vulkan.cpp index f30fa552f11..0d403a5288b 100644 --- a/src/layer/vulkan/gemm_vulkan.cpp +++ b/src/layer/vulkan/gemm_vulkan.cpp @@ -100,9 +100,12 @@ int Gemm_vulkan::create_pipeline(const Option& opt) pipeline_gemm->create(LayerShaderType::gemm, opt, specializations); } - A_data.release(); - B_data.release(); - C_data.release(); + if (opt.lightmode) + { + A_data.release(); + B_data.release(); + C_data.release(); + } return 0; } diff --git a/src/layer/vulkan/innerproduct_vulkan.cpp b/src/layer/vulkan/innerproduct_vulkan.cpp index ee73d4bb4ac..7f9051abe1c 100644 --- a/src/layer/vulkan/innerproduct_vulkan.cpp +++ b/src/layer/vulkan/innerproduct_vulkan.cpp @@ -154,8 +154,11 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt) pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz); pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations); - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } @@ -364,14 +367,20 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt) pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz); pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations); - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } - weight_data.release(); - bias_data.release(); + if (opt.lightmode) + { + weight_data.release(); + bias_data.release(); + } return 0; } diff --git a/src/layer/vulkan/memorydata_vulkan.cpp b/src/layer/vulkan/memorydata_vulkan.cpp index d6a316c0a41..b4716ef5e23 100644 --- a/src/layer/vulkan/memorydata_vulkan.cpp +++ b/src/layer/vulkan/memorydata_vulkan.cpp @@ -82,6 +82,11 @@ int MemoryData_vulkan::upload_model(VkTransfer& cmd, const Option& opt) cmd.record_upload(data_packed, data_gpu, opt, /*bool flatten*/ false); } + if (opt.lightmode) + { + data.release(); + } + return 0; } diff --git a/src/layer/vulkan/multiheadattention_vulkan.cpp b/src/layer/vulkan/multiheadattention_vulkan.cpp index 411b81b05e9..48967de3697 100644 --- a/src/layer/vulkan/multiheadattention_vulkan.cpp +++ b/src/layer/vulkan/multiheadattention_vulkan.cpp @@ -73,8 +73,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) q_gemm->load_model(ModelBinFromMatArray(weights)); q_gemm->create_pipeline(opt); - q_weight_data.release(); - q_bias_data.release(); + if (opt.lightmode) + { + q_weight_data.release(); + q_bias_data.release(); + } } { @@ -100,8 +103,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) k_gemm->load_model(ModelBinFromMatArray(weights)); k_gemm->create_pipeline(opt); - k_weight_data.release(); - k_bias_data.release(); + if (opt.lightmode) + { + k_weight_data.release(); + k_bias_data.release(); + } } { @@ -127,8 +133,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) v_gemm->load_model(ModelBinFromMatArray(weights)); v_gemm->create_pipeline(opt); - v_weight_data.release(); - v_bias_data.release(); + if (opt.lightmode) + { + v_weight_data.release(); + v_bias_data.release(); + } } { @@ -222,8 +231,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) o_gemm->load_model(ModelBinFromMatArray(weights)); o_gemm->create_pipeline(opt); - out_weight_data.release(); - out_bias_data.release(); + if (opt.lightmode) + { + out_weight_data.release(); + out_bias_data.release(); + } } return 0; diff --git a/src/layer/vulkan/normalize_vulkan.cpp b/src/layer/vulkan/normalize_vulkan.cpp index 5e29cd9afee..dcadc9b0753 100644 --- a/src/layer/vulkan/normalize_vulkan.cpp +++ b/src/layer/vulkan/normalize_vulkan.cpp @@ -264,6 +264,9 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt) { cmd.record_upload(scale_data_packed, scale_data_gpu, opt); } + + if (opt.lightmode) + scale_data.release(); } return 0; diff --git a/src/layer/vulkan/padding_vulkan.cpp b/src/layer/vulkan/padding_vulkan.cpp index 4f97733c313..619f33311c6 100644 --- a/src/layer/vulkan/padding_vulkan.cpp +++ b/src/layer/vulkan/padding_vulkan.cpp @@ -348,6 +348,11 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt) cmd.record_upload(per_channel_pad_data_packed, per_channel_pad_data_gpu, opt); } + if (opt.lightmode) + { + per_channel_pad_data.release(); + } + return 0; } diff --git a/src/layer/vulkan/prelu_vulkan.cpp b/src/layer/vulkan/prelu_vulkan.cpp index d58184c0938..a3995e3d2b1 100644 --- a/src/layer/vulkan/prelu_vulkan.cpp +++ b/src/layer/vulkan/prelu_vulkan.cpp @@ -144,6 +144,11 @@ int PReLU_vulkan::upload_model(VkTransfer& cmd, const Option& opt) { cmd.record_upload(slope_data_packed, slope_data_gpu, opt); } + + if (opt.lightmode) + { + slope_data.release(); + } } return 0; diff --git a/src/layer/vulkan/priorbox_vulkan.cpp b/src/layer/vulkan/priorbox_vulkan.cpp index 5cfe341cd78..538ff37c137 100644 --- a/src/layer/vulkan/priorbox_vulkan.cpp +++ b/src/layer/vulkan/priorbox_vulkan.cpp @@ -129,6 +129,13 @@ int PriorBox_vulkan::upload_model(VkTransfer& cmd, const Option& opt) cmd.record_upload(aspect_ratios, aspect_ratios_gpu, opt); + if (opt.lightmode) + { + min_sizes.release(); + max_sizes.release(); + aspect_ratios.release(); + } + return 0; } @@ -137,7 +144,7 @@ int PriorBox_vulkan::forward(const std::vector& bottom_blobs, std::vector int w = bottom_blobs[0].w; int h = bottom_blobs[0].h; - if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty()) + if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes_gpu.empty()) { // mxnet style _contrib_MultiBoxPrior float step_w = step_width; @@ -147,8 +154,8 @@ int PriorBox_vulkan::forward(const std::vector& bottom_blobs, std::vector if (step_h == -233) step_h = 1.f / (float)h; - int num_sizes = min_sizes.w; - int num_ratios = aspect_ratios.w; + int num_sizes = min_sizes_gpu.w; + int num_ratios = aspect_ratios_gpu.w; int num_prior = num_sizes - 1 + num_ratios; @@ -200,9 +207,9 @@ int PriorBox_vulkan::forward(const std::vector& bottom_blobs, std::vector if (step_h == -233) step_h = (float)image_h / h; - int num_min_size = min_sizes.w; - int num_max_size = max_sizes.w; - int num_aspect_ratio = aspect_ratios.w; + int num_min_size = min_sizes_gpu.w; + int num_max_size = max_sizes_gpu.w; + int num_aspect_ratio = aspect_ratios_gpu.w; int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size; if (flip) diff --git a/src/layer/vulkan/scale_vulkan.cpp b/src/layer/vulkan/scale_vulkan.cpp index 135f04638bd..2398409fe41 100644 --- a/src/layer/vulkan/scale_vulkan.cpp +++ b/src/layer/vulkan/scale_vulkan.cpp @@ -218,6 +218,12 @@ int Scale_vulkan::upload_model(VkTransfer& cmd, const Option& opt) } } + if (opt.lightmode) + { + scale_data.release(); + bias_data.release(); + } + return 0; } diff --git a/src/layer/x86/convolution1d_x86.cpp b/src/layer/x86/convolution1d_x86.cpp index 905db18b728..e1686611d59 100644 --- a/src/layer/x86/convolution1d_x86.cpp +++ b/src/layer/x86/convolution1d_x86.cpp @@ -34,7 +34,7 @@ Convolution1D_x86::Convolution1D_x86() #endif // __SSE2__ } -int Convolution1D_x86::create_pipeline(const Option& /*opt*/) +int Convolution1D_x86::create_pipeline(const Option& opt) { if (dynamic_weight) return 0; @@ -43,7 +43,8 @@ int Convolution1D_x86::create_pipeline(const Option& /*opt*/) convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index c1f354ea6de..4bd6a4ef2bf 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -334,7 +334,8 @@ int Convolution_x86::create_pipeline(const Option& opt) convolution_dilation1->create_pipeline(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -451,7 +452,8 @@ int Convolution_x86::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -542,7 +544,8 @@ int Convolution_x86::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1250,7 +1253,8 @@ int Convolution_x86::create_pipeline_int8_x86(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/convolutiondepthwise_x86.cpp b/src/layer/x86/convolutiondepthwise_x86.cpp index 6a9fb7fb4c2..4b20252ccdd 100644 --- a/src/layer/x86/convolutiondepthwise_x86.cpp +++ b/src/layer/x86/convolutiondepthwise_x86.cpp @@ -132,7 +132,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -140,7 +141,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -843,7 +845,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -851,7 +854,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/deconvolution_x86.cpp b/src/layer/x86/deconvolution_x86.cpp index 6a94104a43d..2b187a3e32a 100644 --- a/src/layer/x86/deconvolution_x86.cpp +++ b/src/layer/x86/deconvolution_x86.cpp @@ -193,7 +193,8 @@ int Deconvolution_x86::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/deconvolutiondepthwise_x86.cpp b/src/layer/x86/deconvolutiondepthwise_x86.cpp index 4a1e89d26a8..21b931abae2 100644 --- a/src/layer/x86/deconvolutiondepthwise_x86.cpp +++ b/src/layer/x86/deconvolutiondepthwise_x86.cpp @@ -109,7 +109,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -117,7 +118,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/deformableconv2d_x86.cpp b/src/layer/x86/deformableconv2d_x86.cpp index 8fc7bdf2855..b880adb1e4d 100644 --- a/src/layer/x86/deformableconv2d_x86.cpp +++ b/src/layer/x86/deformableconv2d_x86.cpp @@ -203,7 +203,8 @@ int DeformableConv2D_x86::create_pipeline(const Option& opt) deformableconv2d_transform_kernel_packed_sse(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h, elempack, out_elempack); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/gemm_x86.cpp b/src/layer/x86/gemm_x86.cpp index 4ab37836a43..16791394dba 100644 --- a/src/layer/x86/gemm_x86.cpp +++ b/src/layer/x86/gemm_x86.cpp @@ -7235,7 +7235,8 @@ int Gemm_x86::create_pipeline(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -7279,7 +7280,8 @@ int Gemm_x86::create_pipeline(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -7315,7 +7317,8 @@ int Gemm_x86::create_pipeline(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/x86/innerproduct_x86.cpp b/src/layer/x86/innerproduct_x86.cpp index dee07d1de64..9bef1436df9 100644 --- a/src/layer/x86/innerproduct_x86.cpp +++ b/src/layer/x86/innerproduct_x86.cpp @@ -80,7 +80,8 @@ int InnerProduct_x86::create_pipeline(const Option& opt) innerproduct_transform_kernel_sse(weight_data, weight_data_tm, num_input, num_output, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -175,7 +176,8 @@ int InnerProduct_x86::create_pipeline_fp16s(const Option& opt) innerproduct_transform_kernel_fp16s_sse(weight_data, weight_data_tm, num_input, num_output, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -281,7 +283,8 @@ int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/x86/lstm_x86.cpp b/src/layer/x86/lstm_x86.cpp index 5d693648f44..6ba218e53d3 100644 --- a/src/layer/x86/lstm_x86.cpp +++ b/src/layer/x86/lstm_x86.cpp @@ -182,9 +182,12 @@ int LSTM_x86::create_pipeline(const Option& opt) } } - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/x86/multiheadattention_x86.cpp b/src/layer/x86/multiheadattention_x86.cpp index 2bddad5582d..a7ff58288c2 100644 --- a/src/layer/x86/multiheadattention_x86.cpp +++ b/src/layer/x86/multiheadattention_x86.cpp @@ -65,8 +65,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) q_gemm->load_model(ModelBinFromMatArray(weights)); q_gemm->create_pipeline(opt); - q_weight_data.release(); - q_bias_data.release(); + if (opt.lightmode) + { + q_weight_data.release(); + q_bias_data.release(); + } } { @@ -91,8 +94,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) k_gemm->load_model(ModelBinFromMatArray(weights)); k_gemm->create_pipeline(opt); - k_weight_data.release(); - k_bias_data.release(); + if (opt.lightmode) + { + k_weight_data.release(); + k_bias_data.release(); + } } { @@ -117,8 +123,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) v_gemm->load_model(ModelBinFromMatArray(weights)); v_gemm->create_pipeline(opt); - v_weight_data.release(); - v_bias_data.release(); + if (opt.lightmode) + { + v_weight_data.release(); + v_bias_data.release(); + } } { @@ -193,8 +202,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) o_gemm->load_model(ModelBinFromMatArray(weights)); o_gemm->create_pipeline(opt); - out_weight_data.release(); - out_bias_data.release(); + if (opt.lightmode) + { + out_weight_data.release(); + out_bias_data.release(); + } } return 0; diff --git a/tools/modelwriter.h b/tools/modelwriter.h index fd5105e612f..9fc24369368 100644 --- a/tools/modelwriter.h +++ b/tools/modelwriter.h @@ -32,6 +32,7 @@ #include "layer/batchnorm.h" #include "layer/bias.h" #include "layer/binaryop.h" +#include "layer/celu.h" #include "layer/clip.h" #include "layer/concat.h" #include "layer/convolution.h" @@ -51,6 +52,7 @@ #include "layer/deconvolutiondepthwise3d.h" #include "layer/deformableconv2d.h" #include "layer/detectionoutput.h" +#include "layer/diag.h" #include "layer/dropout.h" #include "layer/eltwise.h" #include "layer/elu.h" @@ -835,6 +837,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) fprintf_param_value(" 1=%d", with_scalar) fprintf_param_value(" 2=%e", b) } + else if (layer->type == "CELU") + { + ncnn::CELU* op = (ncnn::CELU*)layer; + ncnn::CELU* op_default = (ncnn::CELU*)layer_default; + + fprintf_param_value(" 0=%e", alpha) + } else if (layer->type == "Clip") { ncnn::Clip* op = (ncnn::Clip*)layer; @@ -888,18 +897,21 @@ int ModelWriter::save(const char* parampath, const char* binpath) } fprintf_param_value(" 19=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); #if NCNN_INT8 - // write int8_scale data - if (op->int8_scale_term) - { - fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); - fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); - fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); - } + // write int8_scale data + if (op->int8_scale_term) + { + fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); + fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); + fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); + } #endif // NCNN_INT8 + } if (shape_ready) { @@ -931,9 +943,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 19=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1040,32 +1056,35 @@ int ModelWriter::save(const char* parampath, const char* binpath) } fprintf_param_value(" 19=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); #if NCNN_INT8 - // write int8_scale data - if (op->int8_scale_term == 1 || op->int8_scale_term == 101) - { - op->bottom_blob_int8_scales.w = 1; - } - if (op->int8_scale_term == 2 || op->int8_scale_term == 102) - { - op->weight_data_int8_scales.w = 1; - op->bottom_blob_int8_scales.w = 1; - } - if (op->int8_scale_term > 100) - { - op->top_blob_int8_scales.w = 1; - } + // write int8_scale data + if (op->int8_scale_term == 1 || op->int8_scale_term == 101) + { + op->bottom_blob_int8_scales.w = 1; + } + if (op->int8_scale_term == 2 || op->int8_scale_term == 102) + { + op->weight_data_int8_scales.w = 1; + op->bottom_blob_int8_scales.w = 1; + } + if (op->int8_scale_term > 100) + { + op->top_blob_int8_scales.w = 1; + } - if (op->int8_scale_term) - { - fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); - fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); - fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); - } + if (op->int8_scale_term) + { + fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); + fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); + fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); + } #endif // NCNN_INT8 + } if (shape_ready) { @@ -1098,9 +1117,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 19=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1261,9 +1284,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 28=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1296,9 +1323,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 28=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1418,9 +1449,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 28=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1454,9 +1489,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) { if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); } + fprintf_param_value(" 28=%d", dynamic_weight) - fwrite_weight_tag_data(op->weight_data, bp); - fwrite_weight_data(op->bias_data, bp); + if (op->dynamic_weight == 0) + { + fwrite_weight_tag_data(op->weight_data, bp); + fwrite_weight_data(op->bias_data, bp); + } if (shape_ready) { @@ -1597,6 +1636,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) fprintf_param_value(" 7=%e", variances[2]) fprintf_param_value(" 8=%e", variances[3]) } + else if (layer->type == "Diag") + { + ncnn::Diag* op = (ncnn::Diag*)layer; + ncnn::Diag* op_default = (ncnn::Diag*)layer_default; + + fprintf_param_value(" 0=%d", diagonal) + } else if (layer->type == "Dropout") { ncnn::Dropout* op = (ncnn::Dropout*)layer;