w

Tencent · Jan 23, 2025 · dc3b4a2 · dc3b4a2
1 parent 25874e4
commit dc3b4a2
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 8 deletions.
diff --git a/src/layer/arm/dequantize_arm.cpp b/src/layer/arm/dequantize_arm.cpp
@@ -283,13 +283,14 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O
     const int h = bottom_blob.h;
     const int channels = bottom_blob.c;
     const int elempack = bottom_blob.elempack;
-
-    top_blob.create_like(bottom_blob, opt.blob_allocator);
-    if (top_blob.empty())
-        return -100;
+    const size_t out_elemsize = elempack * 2u;
 
     if (dims == 1)
     {
+        top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         const int wp = std::max(1, w / opt.num_threads);
         const int nn_w = (w + wp - 1) / wp;
 
@@ -312,6 +313,10 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O
 
     if (dims == 2)
     {
+        top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int i = 0; i < h; i++)
         {
@@ -327,6 +332,10 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O
 
     if (dims == 3)
     {
+        top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int q = 0; q < channels; q++)
         {

diff --git a/src/layer/arm/dequantize_arm_asimdhp.cpp b/src/layer/arm/dequantize_arm_asimdhp.cpp
@@ -129,13 +129,14 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O
     const int h = bottom_blob.h;
     const int channels = bottom_blob.c;
     const int elempack = bottom_blob.elempack;
-
-    top_blob.create_like(bottom_blob, opt.blob_allocator);
-    if (top_blob.empty())
-        return -100;
+    const size_t out_elemsize = elempack * 2u;
 
     if (dims == 1)
     {
+        top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         const int wp = std::max(1, w / opt.num_threads);
         const int nn_w = (w + wp - 1) / wp;
 
@@ -158,6 +159,10 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O
 
     if (dims == 2)
     {
+        top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int i = 0; i < h; i++)
         {
@@ -173,6 +178,10 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O
 
     if (dims == 3)
     {
+        top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int q = 0; q < channels; q++)
         {