Skip to content

Commit

Permalink
update ncnnoptimize layers, lightmode=false keeps original weight (Te…
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored and sen.li committed May 7, 2024
1 parent c5534ff commit 65dbb4a
Show file tree
Hide file tree
Showing 69 changed files with 614 additions and 241 deletions.
75 changes: 72 additions & 3 deletions docs/developer-guide/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
* [Dropout](#dropout)
* [Eltwise](#eltwise)
* [ELU](#elu)
* [Embed](#embed)
* [Exp](#exp)
* [Flatten](#flatten)
* [Fold](#fold)
* [GELU](#gelu)
* [GLU](#glu)
* [Gemm](#gemm)
Expand Down Expand Up @@ -84,6 +86,7 @@
* [Threshold](#threshold)
* [Tile](#tile)
* [UnaryOp](#unaryop)
* [Unfold](#unfold)

# AbsVal
```
Expand Down Expand Up @@ -474,12 +477,15 @@ y = crop(x)
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | woffset | int | 0 | |
| 1 | hoffset | int | 0 | |
| 2 | coffset | int | 1 | |
| 3 | outw | int | 1 | |
| 13 | doffset | int | 0 | |
| 2 | coffset | int | 0 | |
| 3 | outw | int | 0 | |
| 4 | outh | int | 0 | |
| 14 | outd | int | 0 | |
| 5 | outc | int | 0 | |
| 6 | woffset2 | int | 0 | |
| 7 | hoffset2 | int | 1 | |
| 7 | hoffset2 | int | 0 | |
| 15 | doffset2 | int | 0 | |
| 8 | coffset2 | int | 0 | |
| 9 | starts | array | [ ] | |
| 10 | ends | array | [ ] | |
Expand Down Expand Up @@ -819,6 +825,23 @@ else y = x
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | alpha | float | 0.1f | |

# Embed
```
y = embedding(x)
```

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | input_dim | int | 0 | |
| 2 | bias_term | int | 0 | |
| 3 | weight_data_size | int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
| weight_data | float | [weight_data_size] |
| bias_term | float | [num_output] |

# Exp
```
if base == -1 y = exp(shift + x * scale)
Expand All @@ -839,6 +862,29 @@ Reshape blob to 1 dimension

* one_blob_only

# Fold
```
y = fold(x)
```

* one_blob_only

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | kernel_w | int | 0 | |
| 2 | dilation_w | int | 1 | |
| 3 | stride_w | int | 1 | |
| 4 | pad_left | int | 0 | |
| 11 | kernel_h | int | kernel_w | |
| 12 | dilation_h | int | dilation_w | |
| 13 | stride_h | int | stride_w | |
| 14 | pad_top | int | pad_left | |
| 15 | pad_right | int | pad_left | |
| 16 | pad_bottom | int | pad_top | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |

# GELU
```
if fast_gelu == 1 y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x)));
Expand Down Expand Up @@ -1187,6 +1233,7 @@ y = data
| 1 | h | int | 0 | |
| 11 | d | int | 0 | |
| 2 | c | int | 0 | |
| 21 | load_type | int | 1 | 1=fp32 |

| weight | type | shape |
| ------------- | ----- | --------------------- |
Expand Down Expand Up @@ -1537,6 +1584,7 @@ y = reduce_op(x * coeff)
| 2 | coeff | float | 1.f | |
| 3 | axes | array | [ ] | |
| 4 | keepdims | int | 0 | |
| 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 |

Operation type:
- 0 = SUM
Expand Down Expand Up @@ -1829,3 +1877,24 @@ Operation type:
- 17 = LOG10
- 18 = ROUND
- 19 = TRUNC

# Unfold
```
y = unfold(x)
```

* one_blob_only

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | kernel_w | int | 0 | |
| 2 | dilation_w | int | 1 | |
| 3 | stride_w | int | 1 | |
| 4 | pad_left | int | 0 | |
| 11 | kernel_h | int | kernel_w | |
| 12 | dilation_h | int | dilation_w | |
| 13 | stride_h | int | stride_w | |
| 14 | pad_top | int | pad_left | |
| 15 | pad_right | int | pad_left | |
| 16 | pad_bottom | int | pad_top | |
8 changes: 5 additions & 3 deletions src/layer/arm/convolution1d_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ int Convolution1D_arm::create_pipeline(const Option& opt)

convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -233,13 +234,14 @@ int Convolution1D_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector
}

#if NCNN_BF16
int Convolution1D_arm::create_pipeline_bf16s(const Option& /*opt*/)
int Convolution1D_arm::create_pipeline_bf16s(const Option& opt)
{
const int num_input = weight_data_size / kernel_w / num_output;

convolution1d_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
3 changes: 2 additions & 1 deletion src/layer/arm/convolution1d_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ int Convolution1D_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
24 changes: 16 additions & 8 deletions src/layer/arm/convolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ int Convolution_arm::create_pipeline(const Option& opt)

convolution_dilation1->create_pipeline(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -224,7 +225,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -270,7 +272,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
{
convolution_im2col_gemm_transform_kernel(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -305,7 +308,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
convolution_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -904,7 +908,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -950,7 +955,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
{
convolution_im2col_gemm_transform_kernel_bf16s(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand All @@ -971,7 +977,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
convolution_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -1284,7 +1291,8 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
9 changes: 6 additions & 3 deletions src/layer/arm/convolution_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel_fp16sa(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

if (opt.use_fp16_arithmetic)
{
Expand Down Expand Up @@ -189,7 +190,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -219,7 +221,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)
ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
15 changes: 10 additions & 5 deletions src/layer/arm/convolutiondepthwise_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
ncnn::cast_float32_to_bfloat16(weight_data, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -161,15 +162,17 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}

// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -1022,15 +1025,17 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}

// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
6 changes: 4 additions & 2 deletions src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,17 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}

// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
6 changes: 4 additions & 2 deletions src/layer/arm/deconvolution_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ int Deconvolution_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -954,7 +955,8 @@ int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
3 changes: 2 additions & 1 deletion src/layer/arm/deconvolution_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ int Deconvolution_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
6 changes: 4 additions & 2 deletions src/layer/arm/deconvolutiondepthwise_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
ncnn::cast_float32_to_bfloat16(weight_data_transposed, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down Expand Up @@ -190,7 +191,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
Expand Down
Loading

0 comments on commit 65dbb4a

Please sign in to comment.