Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix mingw64 avx crash and termux build issue #5464

Merged
merged 7 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ if(NCNN_VULKAN)
target_link_libraries(ncnn PRIVATE glslang SPIRV)
endif()

if(NCNN_PLATFORM_API AND ANDROID_NDK)
if(NCNN_PLATFORM_API AND ANDROID)
target_link_libraries(ncnn PUBLIC android jnigraphics log)
endif()

Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/avx_mathfun.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ _PS256_CONST(cephes_tanh_p8, 1.18534705686654e-04f);
_PS256_CONST(cephes_tanh_p9, 2.26843463243900e-03f);

// an approximation of tanh
static inline __m256 tanh256_ps(const __m256 x)
static NCNN_FORCEINLINE __m256 tanh256_ps(__m256 x)
{
__m256 value = x;
value = _mm256_max_ps(*(__m256*)_ps256_tanh_lo, value);
Expand Down
96 changes: 48 additions & 48 deletions src/layer/x86/binaryop_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,22 +479,22 @@ namespace BinaryOp_x86_functor {

struct binary_op_add
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x + y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_add_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_add_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_add_ps(x, y);
}
Expand All @@ -505,22 +505,22 @@ struct binary_op_add

struct binary_op_sub
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x - y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_sub_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_sub_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_sub_ps(x, y);
}
Expand All @@ -531,22 +531,22 @@ struct binary_op_sub

struct binary_op_mul
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x * y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_mul_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_mul_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_mul_ps(x, y);
}
Expand All @@ -557,22 +557,22 @@ struct binary_op_mul

struct binary_op_div
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x / y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_div_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_div_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_div_ps(x, y);
}
Expand All @@ -583,22 +583,22 @@ struct binary_op_div

struct binary_op_max
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return std::max(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_max_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_max_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_max_ps(x, y);
}
Expand All @@ -609,22 +609,22 @@ struct binary_op_max

struct binary_op_min
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return std::min(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_min_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_min_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_min_ps(x, y);
}
Expand All @@ -635,22 +635,22 @@ struct binary_op_min

struct binary_op_pow
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)powf(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return pow_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return pow256_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return pow512_ps(x, y);
}
Expand All @@ -661,22 +661,22 @@ struct binary_op_pow

struct binary_op_rsub
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return y - x;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_sub_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_sub_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_sub_ps(y, x);
}
Expand All @@ -687,22 +687,22 @@ struct binary_op_rsub

struct binary_op_rdiv
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return y / x;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_div_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_div_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_div_ps(y, x);
}
Expand All @@ -713,22 +713,22 @@ struct binary_op_rdiv

struct binary_op_rpow
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)powf(y, x);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return pow_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return pow256_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return pow512_ps(y, x);
}
Expand All @@ -739,22 +739,22 @@ struct binary_op_rpow

struct binary_op_atan2
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)atan2f(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return atan2_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return atan2256_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return atan2512_ps(x, y);
}
Expand All @@ -765,22 +765,22 @@ struct binary_op_atan2

struct binary_op_ratan2
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)atan2f(y, x);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return atan2_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return atan2256_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return atan2512_ps(y, x);
}
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack16.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack16_avx512(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m512 v)
static void padding_constant_pack16_avx512(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m512& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack4.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack4_sse(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m128 v)
static void padding_constant_pack4_sse(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m128& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack8.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack8_avx(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m256 v)
static void padding_constant_pack8_avx(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m256& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
Loading
Loading