Skip to content

Commit

Permalink
w
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Jan 23, 2025
1 parent 25874e4 commit dc3b4a2
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
17 changes: 13 additions & 4 deletions src/layer/arm/dequantize_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,14 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O
const int h = bottom_blob.h;
const int channels = bottom_blob.c;
const int elempack = bottom_blob.elempack;

top_blob.create_like(bottom_blob, opt.blob_allocator);
if (top_blob.empty())
return -100;
const size_t out_elemsize = elempack * 2u;

if (dims == 1)
{
top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

const int wp = std::max(1, w / opt.num_threads);
const int nn_w = (w + wp - 1) / wp;

Expand All @@ -312,6 +313,10 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O

if (dims == 2)
{
top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
Expand All @@ -327,6 +332,10 @@ int Dequantize_arm::forward_bf16s(const Mat& bottom_blob, Mat& top_blob, const O

if (dims == 3)
{
top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
Expand Down
17 changes: 13 additions & 4 deletions src/layer/arm/dequantize_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,14 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O
const int h = bottom_blob.h;
const int channels = bottom_blob.c;
const int elempack = bottom_blob.elempack;

top_blob.create_like(bottom_blob, opt.blob_allocator);
if (top_blob.empty())
return -100;
const size_t out_elemsize = elempack * 2u;

if (dims == 1)
{
top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

const int wp = std::max(1, w / opt.num_threads);
const int nn_w = (w + wp - 1) / wp;

Expand All @@ -158,6 +159,10 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O

if (dims == 2)
{
top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < h; i++)
{
Expand All @@ -173,6 +178,10 @@ int Dequantize_arm::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const O

if (dims == 3)
{
top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
if (top_blob.empty())
return -100;

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
Expand Down

0 comments on commit dc3b4a2

Please sign in to comment.