Skip to content

Commit

Permalink
Adding support for padding 5-d tensors.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Jan 21, 2025
1 parent d157996 commit e27298e
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 21 deletions.
2 changes: 1 addition & 1 deletion lib/nnc/ccv_cnnp_dataframe_addons.c
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ static void _ccv_cnnp_combine_new(void* const* const input_data, const int input
memcpy(params.dim + 1, inputs[j]->info.dim, sizeof(int) * nd);
} else if (nd >= 3) {
memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
const int hw = ccv_nnc_tensor_hw(inputs[j]->info, nd);
const int hw = ccv_nnc_tensor_hw(inputs[j]->info, nd, CCV_NNC_MAX_DIM);
if (batch->format == CCV_TENSOR_FORMAT_NCHW)
{
params.dim[1] = ccv_nnc_tensor_get_c(inputs[j]->info);
Expand Down
14 changes: 9 additions & 5 deletions lib/nnc/ccv_cnnp_model_addons.c
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ static void _ccv_cnnp_batch_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_sy
ccv_array_push(self->retainables, &out_var);
const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_mean");
const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_inv_std");
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
ccv_nnc_cmd_param_t batch_norm = self->params;
batch_norm.bnorm.count = hw >= 0 ? CCV_NNC_MAX_DIM + 1 : 1;
int i;
Expand Down Expand Up @@ -1111,7 +1111,11 @@ static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_s
const int c = ccv_nnc_tensor_get_c(params);
assert(c % self->groups == 0);
ccv_nnc_tensor_set_c(&weights_params, nd, c / self->groups);
const int hw = ccv_nnc_tensor_hw(weights_params, nd);
int hw = -1;
if (weights_params.format == CCV_TENSOR_FORMAT_NHWC || weights_params.format == CCV_TENSOR_FORMAT_CHWN)
hw = 1;
else if (weights_params.format == CCV_TENSOR_FORMAT_NCHW)
hw = 2;
assert(hw >= 0);
for (i = 0; i < k_nd; i++)
weights_params.dim[i + hw] = self->kdim[i];
Expand Down Expand Up @@ -1242,7 +1246,7 @@ static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super,
ccv_nnc_tensor_set_n(&weights_params, c);
assert(c % self->groups == 0);
ccv_nnc_tensor_set_c(&weights_params, nd, self->filters / self->groups);
const int hw = ccv_nnc_tensor_hw(weights_params, nd);
const int hw = ccv_nnc_tensor_hw(weights_params, nd, CCV_NNC_MAX_DIM);
assert(hw >= 0);
for (i = 0; i < CCV_NNC_MAX_DIM; i++)
weights_params.dim[i + hw] = self->kdim[i];
Expand Down Expand Up @@ -1464,7 +1468,7 @@ static void _ccv_cnnp_max_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symb
assert(input_size == 1);
assert(output_size == 1);
const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
ccv_nnc_cmd_t cmd;
if (hw >= 0 && self->kdim[0] == 0 && self->kdim[1] == 0)
cmd = CMD_MAX_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
Expand Down Expand Up @@ -1511,7 +1515,7 @@ static void _ccv_cnnp_average_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_
assert(input_size == 1);
assert(output_size == 1);
const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
ccv_nnc_cmd_t cmd;
if (hw >= 0 && self->kdim[0] == 0 && self->kdim[1] == 0)
cmd = CMD_AVERAGE_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
Expand Down
22 changes: 11 additions & 11 deletions lib/nnc/ccv_nnc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ typedef struct {
void* aux; /**< [aux] The additional information available for a particular command under a particular backend. */
} ccv_nnc_cmd_backend_registry_t;

static inline int ccv_nnc_tensor_hw(const ccv_nnc_tensor_param_t a, const int nd)
static inline int ccv_nnc_tensor_hw(const ccv_nnc_tensor_param_t a, const int nd, const int size_nd)
{
if ((a.format == CCV_TENSOR_FORMAT_CHWN) ||
(a.format == CCV_TENSOR_FORMAT_NHWC && nd == CCV_NNC_MAX_DIM + 1))
(a.format == CCV_TENSOR_FORMAT_NHWC && nd == size_nd + 1))
return 0;
else if ((a.format == CCV_TENSOR_FORMAT_NHWC && nd == CCV_NNC_MAX_DIM + 2) ||
(a.format == CCV_TENSOR_FORMAT_NCHW && nd == CCV_NNC_MAX_DIM + 1))
else if ((a.format == CCV_TENSOR_FORMAT_NHWC && nd == size_nd + 2) ||
(a.format == CCV_TENSOR_FORMAT_NCHW && nd == size_nd + 1))
return 1;
else if (a.format == CCV_TENSOR_FORMAT_NCHW && nd == CCV_NNC_MAX_DIM + 2)
else if (a.format == CCV_TENSOR_FORMAT_NCHW && nd == size_nd + 2)
return 2;
return -1;
}
Expand All @@ -59,11 +59,11 @@ static inline void ccv_nnc_hint_tensor_forward(const ccv_nnc_cmd_param_t cmd, co
int i;
assert(a.format == b->format);
const int nd = ccv_nnc_tensor_nd(a.dim);
assert(nd == CCV_NNC_MAX_DIM + 1 || nd == CCV_NNC_MAX_DIM + 2);
int hw = ccv_nnc_tensor_hw(a, nd);
assert(hw >= 0);
const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1;
assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
assert(nd == size_nd + 1 || nd == size_nd + 2);
int hw = ccv_nnc_tensor_hw(a, nd, size_nd);
assert(hw >= 0);
for (i = 0; i < size_nd; i++)
{
int stride = ccv_max(1, hint.stride.dim[i]);
Expand All @@ -76,11 +76,11 @@ static inline void ccv_nnc_hint_tensor_backward(const ccv_nnc_cmd_param_t cmd, c
int i;
assert(a.format == b->format);
const int nd = ccv_nnc_tensor_nd(a.dim);
assert(nd == CCV_NNC_MAX_DIM + 1 || nd == CCV_NNC_MAX_DIM + 2);
int hw = ccv_nnc_tensor_hw(a, nd);
assert(hw >= 0);
const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1;
assert(size_nd == 2 || size_nd == 3); // Support 3D convolution.
assert(nd == size_nd + 1 || nd == size_nd + 2);
int hw = ccv_nnc_tensor_hw(a, nd, size_nd);
assert(hw >= 0);
for (i = 0; i < size_nd; i++)
{
int stride = ccv_max(1, hint.stride.dim[i]);
Expand Down
4 changes: 2 additions & 2 deletions lib/nnc/cmd/compression/ccv_nnc_compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static void _ccv_nnc_lssc_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const
{
assert(inputs[i].datatype == CCV_16F);
const int nd = ccv_nnc_tensor_nd(inputs[i].dim);
const int hw = ccv_nnc_tensor_hw(inputs[i], nd);
const int hw = ccv_nnc_tensor_hw(inputs[i], nd, CCV_NNC_MAX_DIM);
outputs[i] = inputs[i];
for (j = 0; j < CCV_NNC_MAX_DIM - 1; j++)
outputs[i].dim[j + hw] = (inputs[i].dim[j + hw] + 3) / 4;
Expand All @@ -112,7 +112,7 @@ static void _ccv_nnc_lssc_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const
{
assert(inputs[i].datatype == CCV_16F);
const int nd = ccv_nnc_tensor_nd(inputs[i].dim);
const int hw = ccv_nnc_tensor_hw(inputs[i], nd);
const int hw = ccv_nnc_tensor_hw(inputs[i], nd, CCV_NNC_MAX_DIM);
outputs[i] = inputs[i];
for (j = 0; j < CCV_NNC_MAX_DIM - 1; j++)
outputs[i].dim[j + hw] = inputs[i].dim[j + hw] * 4;
Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/cmd/convolution/ccv_nnc_convolution.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static void _ccv_nnc_conv_transpose_tensor_auto_forw(const ccv_nnc_cmd_param_t c
assert(inputs[0].format == outputs[0].format);
const int nd = ccv_nnc_tensor_nd(inputs[0].dim);
assert(nd == size_nd + 1 || nd == size_nd + 2);
int hw = ccv_nnc_tensor_hw(inputs[0], nd);
int hw = ccv_nnc_tensor_hw(inputs[0], nd, size_nd);
assert(hw >= 0);
for (i = 0; i < size_nd; i++)
{
Expand Down
50 changes: 50 additions & 0 deletions lib/nnc/cmd/pad/gpu/ccv_nnc_pad_gpu_ref.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,25 @@ __global__ void _ccv_nnc_pad_zero_forw_4d(const NUM* const ap, const int begin3,
}
}

template<typename NUM>
__global__ void _ccv_nnc_pad_zero_forw_5d(const NUM* const ap, const int begin4, const int begin3, const int begin2, const int begin1, const int begin0, const int adim4, const int adim3, const int adim2, const int adim1, const int adim0, NUM* const bp, const int bdim43210, const int bdim3, const int bdim2, const int bdim1, const int bdim0)
{
CUDA_1D_KERNEL_LOOP(i, bdim43210) {
const int x = i % bdim0;
int y = i / bdim0;
int z = y / bdim1;
y = y % bdim1;
int u = z / bdim2;
z = z % bdim2;
const int v = u / bdim3;
u = u % bdim3;
if (x - begin0 >= 0 && x - begin0 < adim0 && y - begin1 >= 0 && y - begin1 < adim1 && z - begin2 >= 0 && z - begin2 < adim2 && u - begin3 >= 0 && u - begin3 < adim3 && v - begin4 >= 0 && v - begin4 < adim4)
bp[i] = ap[((((v - begin4) * adim3 + (u - begin3)) * adim2 + (z - begin2)) * adim1 + (y - begin1)) * adim0 + x - begin0];
else
bp[i] = 0;
}
}

template<typename NUM>
__global__ void _ccv_nnc_pad_replicate_forw_1d(const NUM* const ap, const int begin0, const int adim0, NUM* const bp, const int bdim0)
{
Expand Down Expand Up @@ -121,6 +140,27 @@ __global__ void _ccv_nnc_pad_replicate_forw_4d(const NUM* const ap, const int be
}
}

template<typename NUM>
__global__ void _ccv_nnc_pad_replicate_forw_5d(const NUM* const ap, const int begin4, const int begin3, const int begin2, const int begin1, const int begin0, const int adim4, const int adim3, const int adim2, const int adim1, const int adim0, NUM* const bp, const int bdim43210, const int bdim3, const int bdim2, const int bdim1, const int bdim0)
{
CUDA_1D_KERNEL_LOOP(i, bdim43210) {
const int x = i % bdim0;
int y = i / bdim0;
int z = y / bdim1;
y = y % bdim1;
int u = z / bdim2;
z = z % bdim2;
const int v = u / bdim3;
u = u % bdim3;
const int ax = min(max(x - begin0, 0), adim0 - 1);
const int ay = min(max(y - begin1, 0), adim1 - 1);
const int az = min(max(z - begin2, 0), adim2 - 1);
const int au = min(max(u - begin3, 0), adim3 - 1);
const int av = min(max(v - begin4, 0), adim4 - 1);
bp[i] = ap[(((av * adim3 + au) * adim2 + az) * adim1 + ay) * adim0 + ax];
}
}

static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
{
assert(input_size == 1);
Expand Down Expand Up @@ -159,6 +199,11 @@ static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint,
_ccv_nnc_pad_zero_forw_4d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3]), CUDA_NUM_THREADS, 0, stream>>>(a->data.f32, begin[0], begin[1], begin[2], begin[3], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], b->data.f32, b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3]);
else
_ccv_nnc_pad_zero_forw_4d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3]), CUDA_NUM_THREADS, 0, stream>>>((__half*)a->data.f16, begin[0], begin[1], begin[2], begin[3], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], (__half*)b->data.f16, b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3]);
} else if (a_nd == 5) {
if (a->info.datatype == CCV_32F)
_ccv_nnc_pad_zero_forw_5d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3] * b->info.dim[4]), CUDA_NUM_THREADS, 0, stream>>>(a->data.f32, begin[0], begin[1], begin[2], begin[3], begin[4], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], a->info.dim[4], b->data.f32, b->info.dim[4] * b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3], b->info.dim[4]);
else
_ccv_nnc_pad_zero_forw_5d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3] * b->info.dim[4]), CUDA_NUM_THREADS, 0, stream>>>((__half*)a->data.f16, begin[0], begin[1], begin[2], begin[3], begin[4], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], a->info.dim[4], (__half*)b->data.f16, b->info.dim[4] * b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3], b->info.dim[4]);
} else {
assert(0);
}
Expand All @@ -185,6 +230,11 @@ static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint,
_ccv_nnc_pad_replicate_forw_4d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3]), CUDA_NUM_THREADS, 0, stream>>>(a->data.f32, begin[0], begin[1], begin[2], begin[3], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], b->data.f32, b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3]);
else
_ccv_nnc_pad_replicate_forw_4d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3]), CUDA_NUM_THREADS, 0, stream>>>((__half*)a->data.f16, begin[0], begin[1], begin[2], begin[3], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], (__half*)b->data.f16, b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3]);
} else if (a_nd == 5) {
if (a->info.datatype == CCV_32F)
_ccv_nnc_pad_replicate_forw_5d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3] * b->info.dim[4]), CUDA_NUM_THREADS, 0, stream>>>(a->data.f32, begin[0], begin[1], begin[2], begin[3], begin[4], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], a->info.dim[4], b->data.f32, b->info.dim[4] * b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3], b->info.dim[4]);
else
_ccv_nnc_pad_replicate_forw_5d<<<CUDA_GET_BLOCKS(b->info.dim[0] * b->info.dim[1] * b->info.dim[2] * b->info.dim[3] * b->info.dim[4]), CUDA_NUM_THREADS, 0, stream>>>((__half*)a->data.f16, begin[0], begin[1], begin[2], begin[3], begin[4], a->info.dim[0], a->info.dim[1], a->info.dim[2], a->info.dim[3], a->info.dim[4], (__half*)b->data.f16, b->info.dim[4] * b->info.dim[3] * b->info.dim[2] * b->info.dim[1] * b->info.dim[0], b->info.dim[1], b->info.dim[2], b->info.dim[3], b->info.dim[4]);
} else {
assert(0);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/cmd/roi/ccv_nnc_roi_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ static void _ccv_nnc_roi_align_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, c
{
assert(output_size == 1);
outputs[0] = inputs[0];
const int hw = ccv_nnc_tensor_hw(outputs[0], ccv_nnc_tensor_nd(outputs[0].dim));
const int hw = ccv_nnc_tensor_hw(outputs[0], ccv_nnc_tensor_nd(outputs[0].dim), CCV_NNC_MAX_DIM);
assert(hw >= 0);
outputs[0].dim[hw] = ccv_max(cmd.size.dim[0], 1);
outputs[0].dim[hw + 1] = ccv_max(cmd.size.dim[1], 1);
Expand Down

0 comments on commit e27298e

Please sign in to comment.