From ef8a99a4e1a0a2908f146a794e442ba4b08208bf Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 8 Apr 2024 19:29:59 +0800 Subject: [PATCH] wip --- src/layer/loongarch/convolution1d_loongarch.cpp | 5 ++++- src/layer/loongarch/convolution_loongarch.cpp | 6 ++++-- .../loongarch/convolutiondepthwise_loongarch.cpp | 12 ++++++++---- src/layer/loongarch/deconvolution_loongarch.cpp | 3 ++- .../loongarch/deconvolutiondepthwise_loongarch.cpp | 6 ++++-- src/layer/loongarch/innerproduct_loongarch.cpp | 9 ++++++--- src/layer/mips/convolution1d_mips.cpp | 5 ++++- src/layer/mips/convolution_mips.cpp | 6 ++++-- src/layer/mips/convolutiondepthwise_mips.cpp | 12 ++++++++---- src/layer/mips/deconvolution_mips.cpp | 3 ++- src/layer/mips/deconvolutiondepthwise_mips.cpp | 6 ++++-- src/layer/mips/innerproduct_mips.cpp | 9 ++++++--- src/layer/riscv/convolution1d_riscv.cpp | 8 ++++++-- src/layer/riscv/convolution_riscv.cpp | 6 ++++-- src/layer/riscv/convolutiondepthwise_riscv.cpp | 12 ++++++++---- src/layer/riscv/deconvolution_riscv.cpp | 6 ++++-- src/layer/riscv/deconvolutiondepthwise_riscv.cpp | 12 ++++++++---- src/layer/riscv/gemm_riscv.cpp | 9 ++++++--- src/layer/riscv/gru_riscv.cpp | 9 ++++++--- src/layer/riscv/innerproduct_riscv.cpp | 6 ++++-- 20 files changed, 102 insertions(+), 48 deletions(-) diff --git a/src/layer/loongarch/convolution1d_loongarch.cpp b/src/layer/loongarch/convolution1d_loongarch.cpp index 0917a79f62e..95590b3d725 100644 --- a/src/layer/loongarch/convolution1d_loongarch.cpp +++ b/src/layer/loongarch/convolution1d_loongarch.cpp @@ -78,6 +78,9 @@ int Convolution1D_loongarch::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -281,7 +284,7 @@ int Convolution1D_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { diff --git a/src/layer/loongarch/convolution_loongarch.cpp b/src/layer/loongarch/convolution_loongarch.cpp index 3c5d0c1a424..a1dfc64a95f 100644 --- a/src/layer/loongarch/convolution_loongarch.cpp +++ b/src/layer/loongarch/convolution_loongarch.cpp @@ -225,7 +225,8 @@ int Convolution_loongarch::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -789,7 +790,8 @@ int Convolution_loongarch::create_pipeline_int8_loongarch(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/convolutiondepthwise_loongarch.cpp b/src/layer/loongarch/convolutiondepthwise_loongarch.cpp index 0c5050dbce0..be0c52ea4b9 100644 --- a/src/layer/loongarch/convolutiondepthwise_loongarch.cpp +++ b/src/layer/loongarch/convolutiondepthwise_loongarch.cpp @@ -83,7 +83,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -91,7 +92,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -600,7 +602,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -608,7 +611,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/deconvolution_loongarch.cpp b/src/layer/loongarch/deconvolution_loongarch.cpp index 62b9d872b60..afdd77f47a9 100644 --- a/src/layer/loongarch/deconvolution_loongarch.cpp +++ b/src/layer/loongarch/deconvolution_loongarch.cpp @@ -126,7 +126,8 @@ int Deconvolution_loongarch::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp index 9495a99aae0..b31786bbbce 100644 --- a/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp +++ b/src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/loongarch/innerproduct_loongarch.cpp b/src/layer/loongarch/innerproduct_loongarch.cpp index e6b8eb0936b..0f52c7352c3 100644 --- a/src/layer/loongarch/innerproduct_loongarch.cpp +++ b/src/layer/loongarch/innerproduct_loongarch.cpp @@ -99,7 +99,8 @@ int InnerProduct_loongarch::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -652,7 +653,8 @@ int InnerProduct_loongarch::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1140,7 +1142,8 @@ int InnerProduct_loongarch::create_pipeline_int8_loongarch(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/convolution1d_mips.cpp b/src/layer/mips/convolution1d_mips.cpp index e9cf211e49b..02a008e70ab 100644 --- a/src/layer/mips/convolution1d_mips.cpp +++ b/src/layer/mips/convolution1d_mips.cpp @@ -78,6 +78,9 @@ int Convolution1D_mips::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -281,7 +284,7 @@ int Convolution1D_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { diff --git a/src/layer/mips/convolution_mips.cpp b/src/layer/mips/convolution_mips.cpp index af420e61a9a..23da8b838f2 100644 --- a/src/layer/mips/convolution_mips.cpp +++ b/src/layer/mips/convolution_mips.cpp @@ -225,7 +225,8 @@ int Convolution_mips::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -789,7 +790,8 @@ int Convolution_mips::create_pipeline_int8_mips(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/convolutiondepthwise_mips.cpp b/src/layer/mips/convolutiondepthwise_mips.cpp index 0c9bdca30ce..27799d9aca7 100644 --- a/src/layer/mips/convolutiondepthwise_mips.cpp +++ b/src/layer/mips/convolutiondepthwise_mips.cpp @@ -83,7 +83,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -91,7 +92,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -600,7 +602,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -608,7 +611,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/deconvolution_mips.cpp b/src/layer/mips/deconvolution_mips.cpp index 208400f532e..6efc3c5fefd 100644 --- a/src/layer/mips/deconvolution_mips.cpp +++ b/src/layer/mips/deconvolution_mips.cpp @@ -126,7 +126,8 @@ int Deconvolution_mips::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/deconvolutiondepthwise_mips.cpp b/src/layer/mips/deconvolutiondepthwise_mips.cpp index e6f5dd43478..456cc07780b 100644 --- a/src/layer/mips/deconvolutiondepthwise_mips.cpp +++ b/src/layer/mips/deconvolutiondepthwise_mips.cpp @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/mips/innerproduct_mips.cpp b/src/layer/mips/innerproduct_mips.cpp index 9d926bfd08d..ad42accc16a 100644 --- a/src/layer/mips/innerproduct_mips.cpp +++ b/src/layer/mips/innerproduct_mips.cpp @@ -99,7 +99,8 @@ int InnerProduct_mips::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -652,7 +653,8 @@ int InnerProduct_mips::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -1140,7 +1142,8 @@ int InnerProduct_mips::create_pipeline_int8_mips(const Option& opt) scale_in_data[p] = scale_in; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolution1d_riscv.cpp b/src/layer/riscv/convolution1d_riscv.cpp index 6c581a0edeb..5671d4d4226 100644 --- a/src/layer/riscv/convolution1d_riscv.cpp +++ b/src/layer/riscv/convolution1d_riscv.cpp @@ -95,6 +95,9 @@ int Convolution1D_riscv::create_pipeline(const Option& opt) } } + if (opt.lightmode) + weight_data.release(); + return 0; } @@ -308,7 +311,7 @@ int Convolution1D_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op sum = bias_data[p]; } - const float* kptr = (const float*)weight_data + kernel_w * h * p; + const float* kptr = weight_data_packed.channel(p); for (int q = 0; q < h; q++) { @@ -470,7 +473,8 @@ int Convolution1D_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolution_riscv.cpp b/src/layer/riscv/convolution_riscv.cpp index be413e5be25..fef27f21967 100644 --- a/src/layer/riscv/convolution_riscv.cpp +++ b/src/layer/riscv/convolution_riscv.cpp @@ -237,7 +237,8 @@ int Convolution_riscv::create_pipeline(const Option& opt) } } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -834,7 +835,8 @@ int Convolution_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/convolutiondepthwise_riscv.cpp b/src/layer/riscv/convolutiondepthwise_riscv.cpp index d913fe7e1d5..6a0eb04cda5 100644 --- a/src/layer/riscv/convolutiondepthwise_riscv.cpp +++ b/src/layer/riscv/convolutiondepthwise_riscv.cpp @@ -104,7 +104,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -112,7 +113,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -682,7 +684,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -690,7 +693,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/deconvolution_riscv.cpp b/src/layer/riscv/deconvolution_riscv.cpp index 6b395282908..3b6364e8020 100644 --- a/src/layer/riscv/deconvolution_riscv.cpp +++ b/src/layer/riscv/deconvolution_riscv.cpp @@ -148,7 +148,8 @@ int Deconvolution_riscv::create_pipeline(const Option& opt) { } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -530,7 +531,8 @@ int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp index 7b567cf63e0..6a311680f4f 100644 --- a/src/layer/riscv/deconvolutiondepthwise_riscv.cpp +++ b/src/layer/riscv/deconvolutiondepthwise_riscv.cpp @@ -97,7 +97,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data_transposed; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -105,7 +106,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -619,7 +621,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -627,7 +630,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) // group convolution create_group_ops(opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } diff --git a/src/layer/riscv/gemm_riscv.cpp b/src/layer/riscv/gemm_riscv.cpp index 9b4b58ac651..fa25a058cb1 100644 --- a/src/layer/riscv/gemm_riscv.cpp +++ b/src/layer/riscv/gemm_riscv.cpp @@ -3984,7 +3984,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) } } - A_data.release(); + if (opt.lightmode) + A_data.release(); } if (constantB) @@ -4024,7 +4025,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) } } - B_data.release(); + if (opt.lightmode) + B_data.release(); } if (constantC && constant_broadcast_type_C != -1) @@ -4054,7 +4056,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) CT_data = C2; } - C_data.release(); + if (opt.lightmode) + C_data.release(); } if (constantA || constantB || constantC) diff --git a/src/layer/riscv/gru_riscv.cpp b/src/layer/riscv/gru_riscv.cpp index c7e36c1c0fc..0869a455979 100644 --- a/src/layer/riscv/gru_riscv.cpp +++ b/src/layer/riscv/gru_riscv.cpp @@ -714,9 +714,12 @@ int GRU_riscv::create_pipeline_fp16sa(const Option& opt) cast_float32_to_float16(weight_hc_data, weight_hc_data_fp16sa, opt); cast_float32_to_float16(bias_c_data, bias_c_data_fp16sa, opt); - weight_xc_data.release(); - bias_c_data.release(); - weight_hc_data.release(); + if (opt.lightmode) + { + weight_xc_data.release(); + bias_c_data.release(); + weight_hc_data.release(); + } return 0; } diff --git a/src/layer/riscv/innerproduct_riscv.cpp b/src/layer/riscv/innerproduct_riscv.cpp index accfc683584..e71c09a157c 100644 --- a/src/layer/riscv/innerproduct_riscv.cpp +++ b/src/layer/riscv/innerproduct_riscv.cpp @@ -106,7 +106,8 @@ int InnerProduct_riscv::create_pipeline(const Option& opt) weight_data_tm = weight_data; } - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; } @@ -560,7 +561,8 @@ int InnerProduct_riscv::create_pipeline_fp16s(const Option& opt) ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); - weight_data.release(); + if (opt.lightmode) + weight_data.release(); return 0; }