Skip to content

Commit

Permalink
dispatch for zfh
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 30, 2024
1 parent de0cf29 commit 1ba5670
Show file tree
Hide file tree
Showing 82 changed files with 1,364 additions and 602 deletions.
25 changes: 17 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m8_t _s, _w; float _v; size_t vl; _s = __riscv_vfmacc_vf_f32m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_V)

set(CMAKE_REQUIRED_FLAGS "-march=rv64gc_zfh -D__fp16=_Float16")
check_cxx_source_compiles("int main() { __fp16 s, v; s = v * v; return 0; }" NCNN_COMPILER_SUPPORT_RISCV_ZFH)

set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m8_t _s, _w; __fp16 _v; size_t vl; _s = __riscv_vfmacc_vf_f16m8(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RISCV_ZVFH)

Expand All @@ -432,8 +435,20 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")

if(NCNN_COMPILER_SUPPORT_RISCV_V OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
option(NCNN_RVV "optimize risc-v platform with v extension" ON)
else()
message(WARNING "The compiler does not support risc-v v or xtheadvector extension. NCNN_RVV will be OFF.")
endif()

if(NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
option(NCNN_XTHEADVECTOR "optimize risc-v platform with xtheadvector extension" ON)
else()
message(WARNING "The compiler does not support risc-v xtheadvector extension. NCNN_XTHEADVECTOR will be OFF.")
endif()

if(NCNN_COMPILER_SUPPORT_RISCV_ZFH)
option(NCNN_ZFH "optimize risc-v platform with zfh extension" ON)
if(NCNN_COMPILER_SUPPORT_RISCV_ZVFH OR NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
if(NCNN_RVV)
if(NCNN_RVV AND NCNN_ZFH)
option(NCNN_ZVFH "optimize risc-v platform with zvfh extension" ON)
endif()
else()
Expand All @@ -458,13 +473,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
# add_definitions(-D__rvv_tuple)
# endif()
else()
message(WARNING "The compiler does not support risc-v v or xtheadvector extension. NCNN_RVV will be OFF.")
endif()

if(NCNN_COMPILER_SUPPORT_RISCV_XTHEADVECTOR)
option(NCNN_XTHEADVECTOR "optimize risc-v platform with xtheadvector extension" ON)
else()
message(WARNING "The compiler does not support risc-v xtheadvector extension. NCNN_XTHEADVECTOR will be OFF.")
message(WARNING "The compiler does not support risc-v zfh extension. NCNN_ZFH will be OFF.")
endif()

endif()
Expand Down
32 changes: 29 additions & 3 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,28 @@ macro(ncnn_add_arch_opt_source class NCNN_TARGET_ARCH_OPT NCNN_TARGET_ARCH_OPT_C
endif()
endmacro()

macro(ncnn_add_arch_opt_layer_source class NCNN_TARGET_ARCH_OPT_BASE NCNN_TARGET_ARCH_OPT NCNN_TARGET_ARCH_OPT_CFLAGS)
set(NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT_BASE}.cpp)

if(WITH_LAYER_${name} AND EXISTS ${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_SOURCE})

set(NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}.cpp)

add_custom_command(
OUTPUT ${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE}
COMMAND ${CMAKE_COMMAND} -DSRC=${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_SOURCE} -DDST=${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE} -DCLASS=${class} -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_${NCNN_TARGET_ARCH_OPT}_source.cmake"
DEPENDS ${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_SOURCE}
COMMENT "Generating source ${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}.cpp"
VERBATIM
)
set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE} PROPERTIES GENERATED TRUE)

set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE} PROPERTIES COMPILE_FLAGS ${NCNN_TARGET_ARCH_OPT_CFLAGS})

list(APPEND ncnn_SRCS ${NCNN_${NCNN_TARGET_ARCH_OPT_BASE}_${NCNN_TARGET_ARCH_OPT}_SOURCE})
endif()
endmacro()

macro(ncnn_add_layer class)
string(TOLOWER ${class} name)

Expand Down Expand Up @@ -394,11 +416,15 @@ macro(ncnn_add_layer class)
if(NCNN_RUNTIME_CPU AND NCNN_RVV)
ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
endif()
if(NCNN_ZFH)
ncnn_add_arch_opt_source(${class} zfh "-march=rv64gc_zfh -D__fp16=_Float16")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_XTHEADVECTOR)
ncnn_add_arch_opt_layer(${class} xtheadvector "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
ncnn_add_arch_opt_layer(${class} xtheadvector "-march=rv64gc_xtheadvector")
ncnn_add_arch_opt_layer_source(${class} zfh xtheadvector "-march=rv64gc_zfh_xtheadvector -D__fp16=_Float16")
endif()
if(NCNN_ZVFH)
ncnn_add_arch_opt_source(${class} zvfh "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
if(NCNN_RUNTIME_CPU AND NCNN_ZVFH)
ncnn_add_arch_opt_layer_source(${class} zfh rvv "-march=rv64gcv_zfh_zvfh -D__fp16=_Float16")
endif()
endif()

Expand Down
27 changes: 11 additions & 16 deletions src/layer/riscv/absval_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@
// specific language governing permissions and limitations under the License.

#include "absval_riscv.h"
#include "cpu.h"

#if __riscv_vector
#include <riscv_vector.h>
#endif // __riscv_vector

namespace ncnn {
#include "cpu.h"

#include "absval_fp16.h"
namespace ncnn {

AbsVal_riscv::AbsVal_riscv()
{
#if __riscv_vector
support_packing = true;
#if NCNN_ZVFH || NCNN_XTHEADVECTOR
support_fp16_storage = cpu_support_riscv_zvfh() || cpu_support_riscv_xtheadvector();
#endif
#endif // __riscv_vector
#if NCNN_ZFH
#if __riscv_vector
support_fp16_storage = cpu_support_riscv_zvfh();
#else
support_fp16_storage = cpu_support_riscv_zfh();
#endif
#endif
}

#if __riscv_vector
Expand All @@ -42,10 +45,10 @@ static inline vfloat32m8_t __riscv_vfabs_v_f32m8_absval(vfloat32m8_t op1, size_t

int AbsVal_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
#if __riscv_vector
#if NCNN_ZFH
int elembits = bottom_top_blob.elembits();

if (support_fp16_storage && opt.use_fp16_storage && elembits == 16)
if (opt.use_fp16_storage && elembits == 16)
{
return forward_inplace_fp16s(bottom_top_blob, opt);
}
Expand Down Expand Up @@ -89,12 +92,4 @@ int AbsVal_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
return 0;
}

#if __riscv_vector
int AbsVal_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
{
absval_fp16(bottom_top_blob, opt);
return 0;
}
#endif // __riscv_vector

} // namespace ncnn
2 changes: 1 addition & 1 deletion src/layer/riscv/absval_riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class AbsVal_riscv : public AbsVal
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

protected:
#if __riscv_vector
#if NCNN_ZFH
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
#endif
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_zvfh
void absval_fp16_zvfh(Mat& bottom_top_blob, const Option& opt);
#endif
#include "absval_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#endif // __riscv_vector

namespace ncnn {

#if __riscv_zvfh
static inline vfloat16m8_t __riscv_vfabs_v_f16m8_absval(vfloat16m8_t op1, size_t vl)
Expand All @@ -23,17 +27,9 @@ static inline vfloat16m8_t __riscv_vfabs_v_f16m8_absval(vfloat16m8_t op1, size_t
}
#endif // __riscv_zvfh

static void absval_fp16(Mat& bottom_top_blob, const Option& opt)
#if NCNN_ZFH
int AbsVal_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
{
#if NCNN_RUNTIME_CPU && NCNN_ZVFH && __riscv_vector && !__riscv_xtheadvector && !__riscv_zvfh
if (ncnn::cpu_support_riscv_zvfh())
{
absval_fp16_zvfh(bottom_top_blob, opt);
return;
}
#endif

#if __riscv_zvfh
const int w = bottom_top_blob.w;
const int h = bottom_top_blob.h;
const int d = bottom_top_blob.d;
Expand All @@ -46,6 +42,7 @@ static void absval_fp16(Mat& bottom_top_blob, const Option& opt)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
Expand All @@ -58,9 +55,17 @@ static void absval_fp16(Mat& bottom_top_blob, const Option& opt)
ptr += vl;
n -= vl;
}
}
#else
(void)bottom_top_blob;
(void)opt;
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
*ptr = (*ptr > (__fp16)0.f) ? (*ptr) : (-*ptr);
ptr++;
}
#endif // __riscv_zvfh
}

return 0;
}
#endif // NCNN_ZFH

} // namespace ncnn
27 changes: 0 additions & 27 deletions src/layer/riscv/absval_riscv_zvfh.cpp

This file was deleted.

14 changes: 9 additions & 5 deletions src/layer/riscv/batchnorm_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,30 @@

#if __riscv_vector
#include <riscv_vector.h>
#include "riscv_usability.h"
#endif // __riscv_vector

#include "riscv_usability.h"
#include "cpu.h"

namespace ncnn {

BatchNorm_riscv::BatchNorm_riscv()
{
#if __riscv_vector
support_packing = true;
#if NCNN_ZVFH
#endif // __riscv_vector
#if NCNN_ZFH
#if __riscv_vector
support_fp16_storage = cpu_support_riscv_zvfh();
#else
support_fp16_storage = cpu_support_riscv_zfh();
#endif
#endif
#endif // __riscv_vector
}

int BatchNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
#if NCNN_ZVFH
#if NCNN_ZFH
int elembits = bottom_top_blob.elembits();

if (opt.use_fp16_storage && elembits == 16)
Expand Down Expand Up @@ -75,7 +80,6 @@ int BatchNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) co
}
#else
int w = bottom_top_blob.w;
#pragma omp parallel for num_threads(opt.num_threads)
for (int i = 0; i < w; i++)
{
ptr[i] = b_data[i] * ptr[i] + a_data[i];
Expand Down
2 changes: 1 addition & 1 deletion src/layer/riscv/batchnorm_riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class BatchNorm_riscv : public BatchNorm
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

protected:
#if NCNN_ZVFH
#if NCNN_ZFH
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
#endif
Expand Down
Loading

0 comments on commit 1ba5670

Please sign in to comment.