diff --git a/.github/workflows/build_nightly.yaml b/.github/workflows/build_nightly.yaml new file mode 100644 index 00000000..14a1a0c4 --- /dev/null +++ b/.github/workflows/build_nightly.yaml @@ -0,0 +1,150 @@ +# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +# +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +# Build and test Kokkos FFT using Docker and Singularity images. Pre-generated +# images are pulled from Github registry. + +name: Nightly tests + +on: + schedule: + - cron: "0 1 * * 1-5" # every weekday at 1am + +env: + # Force the use of BuildKit for Docker + DOCKER_BUILDKIT: 1 + +jobs: + # build project + build: + # only run on original repo + if: github.repository == 'kokkos/kokkos-fft' + + runs-on: ubuntu-latest + + strategy: + matrix: + backend: + - name: openmp + image: gcc + compiler: + c: gcc + cxx: g++ + cmake_flags: + cxx_standard: 17 + kokkos: -DKokkos_ENABLE_OPENMP=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON + - name: threads + image: gcc + compiler: + c: gcc + cxx: g++ + cmake_flags: + cxx_standard: 20 + kokkos: -DKokkos_ENABLE_THREADS=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON + - name: serial + image: gcc + compiler: + c: gcc + cxx: g++ + cmake_flags: + cxx_standard: 17 + kokkos: -DKokkos_ENABLE_SERIAL=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON + - name: cuda + image: nvcc + compiler: + c: gcc + cxx: g++ + cmake_flags: + cxx_standard: 17 + kokkos: -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" + - name: hip + image: rocm + compiler: + c: hipcc + cxx: hipcc + cmake_flags: + cxx_standard: 17 + kokkos: -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_VEGA90A=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" + - name: rocm + image: rocm + compiler: + c: hipcc + cxx: hipcc + cmake_flags: + cxx_standard: 20 + kokkos: -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_VEGA90A=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra -Werror" -DKokkosFFT_ENABLE_ROCFFT=ON + - name: sycl + image: intel + compiler: + c: icx + cxx: icpx + cmake_flags: + # building for Intel PVC was unsuccessful without the proper + # device, so for now, we simply generate generic Intel GPU code + cxx_standard: 17 + kokkos: -DKokkos_ENABLE_SYCL=ON -DKokkos_ARCH_INTEL_GEN=ON + kokkos_fft: -DCMAKE_CXX_FLAGS="-Wall -Wextra" + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + large-packages: false + + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Checkout Kokkos devel branch + uses: actions/checkout@v4 + with: + repository: kokkos/kokkos + path: kokkos + + - name: Configure Kokkos + run: | + docker run -v ${{ github.workspace }}:/work ghcr.io/kokkos/kokkos-fft/base_${{ matrix.backend.image }}_main:latest \ + cmake -B build_kokkos \ + -DCMAKE_INSTALL_PREFIX=/work/install \ + -DCMAKE_C_COMPILER=${{ matrix.backend.compiler.c }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.backend.compiler.cxx }} \ + -DCMAKE_CXX_STANDARD=${{ matrix.backend.cmake_flags.cxx_standard }} \ + -DCMAKE_BUILD_TYPE=${{ matrix.backend.cmake_build_type }} \ + ${{ matrix.backend.cmake_flags.kokkos }} \ + kokkos + + - name: Build Kokkos + run: | + docker run -v ${{ github.workspace }}:/work ghcr.io/kokkos/kokkos-fft/base_${{ matrix.backend.image }}_main:latest \ + cmake --build build_kokkos -j $(( $(nproc) * 2 + 1 )) + + - name: Install Kokkos + run: | + docker run -v ${{ github.workspace }}:/work ghcr.io/kokkos/kokkos-fft/base_${{ matrix.backend.image }}_main:latest \ + cmake --install build_kokkos + + - name: Configure + run: | + docker run -v ${{ github.workspace }}:/work ghcr.io/kokkos/kokkos-fft/base_${{ matrix.backend.image }}_main:latest \ + cmake -B build \ + -DCMAKE_PREFIX_PATH=/work/install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=${{ matrix.backend.compiler.c }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.backend.compiler.cxx }} \ + -DCMAKE_CXX_STANDARD=${{ matrix.backend.cmake_flags.cxx_standard }} \ + -DKokkosFFT_ENABLE_EXAMPLES=ON \ + -DKokkosFFT_ENABLE_TESTS=ON \ + ${{ matrix.backend.cmake_flags.kokkos_fft }} + + - name: Build + run: | + docker run -v ${{ github.workspace }}:/work ghcr.io/kokkos/kokkos-fft/base_${{ matrix.backend.image }}_main:latest \ + cmake --build build -j $(( $(nproc) * 2 + 1 )) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index ff649d31..d3fbed1e 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -301,6 +301,7 @@ jobs: # test the project test: runs-on: ${{ matrix.backend.runner }} + continue-on-error: ${{ matrix.backend.unstable }} needs: - check_docker_files @@ -317,21 +318,25 @@ jobs: image: nvcc runner: [self-hosted, cuda] use_singularity: true + unstable: true # run OpenMP tests on Azure server - name: openmp image: gcc runner: ubuntu-latest use_singularity: false + unstable: false # run Threads tests on Azure server - name: threads image: gcc runner: ubuntu-latest use_singularity: false + unstable: false # run Serial tests on Azure server - name: serial image: gcc runner: ubuntu-latest use_singularity: false + unstable: false steps: - name: Get artifacts diff --git a/README.md b/README.md index 2a1c6ca7..2e63ea3e 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception # kokkos-fft [![CI](https://github.com/kokkos/kokkos-fft/actions/workflows/build_test.yaml/badge.svg)](https://github.com/kokkos/kokkos-fft/actions) +[![Nightly builds](https://github.com/kokkos/kokkos-fft/actions/workflows/build_nightly.yaml/badge.svg)](https://github.com/kokkos/kokkos-fft/actions/workflows/build_nightly.yaml) [![docs](https://readthedocs.org/projects/kokkosfft/badge/?version=latest)](https://kokkosfft.readthedocs.io/en/latest/?badge=latest) > [!WARNING] diff --git a/common/src/KokkosFFT_utils.hpp b/common/src/KokkosFFT_utils.hpp index 0e35e6a4..c518c67f 100644 --- a/common/src/KokkosFFT_utils.hpp +++ b/common/src/KokkosFFT_utils.hpp @@ -105,7 +105,7 @@ bool are_valid_axes(const ViewType& view, const ArrayType& axes) { "are_valid_axes: the Rank of FFT axes must be between 1 and View rank"); // Convert the input axes to be in the range of [0, rank-1] - // int type is choosen for consistency with the rest of the code + // int type is chosen for consistency with the rest of the code // the axes are defined with int type std::array non_negative_axes; diff --git a/common/unit_test/Test_Extents.cpp b/common/unit_test/Test_Extents.cpp index 312af11a..54acce6b 100644 --- a/common/unit_test/Test_Extents.cpp +++ b/common/unit_test/Test_Extents.cpp @@ -52,7 +52,7 @@ void test_extents_1d() { EXPECT_TRUE(fft_extents_r2c == ref_fft_extents_r2c); EXPECT_EQ(howmany_r2c, 1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr, xcout, axes_type({0})); }, std::runtime_error); @@ -70,7 +70,7 @@ void test_extents_1d() { EXPECT_TRUE(fft_extents_c2r == ref_fft_extents_c2r); EXPECT_EQ(howmany_c2r, 1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin, xr, axes_type({0})); }, std::runtime_error); @@ -119,7 +119,7 @@ void test_extents_1d_batched_FFT_2d() { EXPECT_TRUE(out_extents_r2c_axis0 == ref_out_extents_r2c_axis0); EXPECT_EQ(howmany_r2c_axis0, ref_howmany_r2c_axis0); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr2, xcout2, axes_type({0})); }, std::runtime_error); @@ -131,7 +131,7 @@ void test_extents_1d_batched_FFT_2d() { EXPECT_TRUE(out_extents_r2c_axis1 == ref_out_extents_r2c_axis1); EXPECT_EQ(howmany_r2c_axis1, ref_howmany_r2c_axis1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr2, xcout2, axes_type({1})); }, std::runtime_error); @@ -144,7 +144,7 @@ void test_extents_1d_batched_FFT_2d() { EXPECT_TRUE(out_extents_c2r_axis0 == ref_in_extents_r2c_axis0); EXPECT_EQ(howmany_c2r_axis0, ref_howmany_r2c_axis0); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin2, xr2, axes_type({0})); }, std::runtime_error); @@ -156,7 +156,7 @@ void test_extents_1d_batched_FFT_2d() { EXPECT_TRUE(out_extents_c2r_axis1 == ref_in_extents_r2c_axis1); EXPECT_EQ(howmany_c2r_axis1, ref_howmany_r2c_axis1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin2, xr2, axes_type({1})); }, std::runtime_error); @@ -177,7 +177,7 @@ void test_extents_1d_batched_FFT_2d() { EXPECT_TRUE(out_extents_c2c_axis1 == ref_in_extents_r2c_axis1); EXPECT_EQ(howmany_c2c_axis1, ref_howmany_r2c_axis1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents ComplexView2Dtype xcout2_wrong("xcout2_wrong", n0 + 3, n1); for (int i = 0; i < 2; i++) { EXPECT_THROW( @@ -225,7 +225,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis0 == ref_out_extents_r2c_axis0); EXPECT_EQ(howmany_r2c_axis0, ref_howmany_r2c_axis0); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({0})); }, std::runtime_error); @@ -237,7 +237,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis1 == ref_out_extents_r2c_axis1); EXPECT_EQ(howmany_r2c_axis1, ref_howmany_r2c_axis1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({1})); }, std::runtime_error); @@ -249,7 +249,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis2 == ref_out_extents_r2c_axis2); EXPECT_EQ(howmany_r2c_axis2, ref_howmany_r2c_axis2); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({2})); }, std::runtime_error); @@ -262,7 +262,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis0 == ref_in_extents_r2c_axis0); EXPECT_EQ(howmany_c2r_axis0, ref_howmany_r2c_axis0); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({0})); }, std::runtime_error); @@ -274,7 +274,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis1 == ref_in_extents_r2c_axis1); EXPECT_EQ(howmany_c2r_axis1, ref_howmany_r2c_axis1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({1})); }, std::runtime_error); @@ -286,7 +286,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis2 == ref_in_extents_r2c_axis2); EXPECT_EQ(howmany_c2r_axis2, ref_howmany_r2c_axis2); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW({ KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({2})); }, std::runtime_error); @@ -315,7 +315,7 @@ void test_extents_1d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2c_axis2 == ref_in_extents_r2c_axis2); EXPECT_EQ(howmany_c2c_axis2, ref_howmany_r2c_axis2); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents ComplexView3Dtype xcout3_wrong("xcout3_wrong", n0 + 3, n1, n2); for (int i = 0; i < 3; i++) { EXPECT_THROW( @@ -382,7 +382,7 @@ void test_extents_2d() { EXPECT_EQ(howmany_r2c_axis01, 1); EXPECT_EQ(howmany_r2c_axis10, 1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr2, xcout2, axes_type({0, 1})); @@ -414,7 +414,7 @@ void test_extents_2d() { EXPECT_EQ(howmany_c2r_axis01, 1); EXPECT_EQ(howmany_c2r_axis10, 1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin2, xr2, axes_type({0, 1})); @@ -446,7 +446,7 @@ void test_extents_2d() { EXPECT_EQ(howmany_c2c_axis01, 1); EXPECT_EQ(howmany_c2c_axis10, 1); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents ComplexView2Dtype xcout2_wrong("xcout2_wrong", n0 + 3, n1); for (int axis0 = 0; axis0 < 2; axis0++) { for (int axis1 = 0; axis1 < 2; axis1++) { @@ -518,7 +518,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_01 == ref_out_extents_r2c_axis_01); EXPECT_EQ(howmany_r2c_axis_01, ref_howmany_r2c_axis_01); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({0, 1})); @@ -533,7 +533,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_02 == ref_out_extents_r2c_axis_02); EXPECT_EQ(howmany_r2c_axis_02, ref_howmany_r2c_axis_02); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({0, 2})); @@ -548,7 +548,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_10 == ref_out_extents_r2c_axis_10); EXPECT_EQ(howmany_r2c_axis_10, ref_howmany_r2c_axis_10); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({1, 0})); @@ -563,7 +563,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_12 == ref_out_extents_r2c_axis_12); EXPECT_EQ(howmany_r2c_axis_12, ref_howmany_r2c_axis_12); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({1, 2})); @@ -578,7 +578,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_20 == ref_out_extents_r2c_axis_20); EXPECT_EQ(howmany_r2c_axis_20, ref_howmany_r2c_axis_20); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({2, 0})); @@ -593,7 +593,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_r2c_axis_21 == ref_out_extents_r2c_axis_21); EXPECT_EQ(howmany_r2c_axis_21, ref_howmany_r2c_axis_21); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xr3, xcout3, axes_type({2, 1})); @@ -609,7 +609,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_01 == ref_in_extents_r2c_axis_01); EXPECT_EQ(howmany_c2r_axis_01, ref_howmany_r2c_axis_01); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({0, 1})); @@ -624,7 +624,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_02 == ref_in_extents_r2c_axis_02); EXPECT_EQ(howmany_c2r_axis_02, ref_howmany_r2c_axis_02); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({0, 2})); @@ -639,7 +639,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_10 == ref_in_extents_r2c_axis_10); EXPECT_EQ(howmany_c2r_axis_10, ref_howmany_r2c_axis_10); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({1, 0})); @@ -654,7 +654,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_12 == ref_in_extents_r2c_axis_12); EXPECT_EQ(howmany_c2r_axis_12, ref_howmany_r2c_axis_12); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({1, 2})); @@ -669,7 +669,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_20 == ref_in_extents_r2c_axis_20); EXPECT_EQ(howmany_c2r_axis_20, ref_howmany_r2c_axis_20); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({2, 0})); @@ -684,7 +684,7 @@ void test_extents_2d_batched_FFT_3d() { EXPECT_TRUE(out_extents_c2r_axis_21 == ref_in_extents_r2c_axis_21); EXPECT_EQ(howmany_c2r_axis_21, ref_howmany_r2c_axis_21); - // Check if errors are correctly raised aginst invalid extents + // Check if errors are correctly raised against invalid extents EXPECT_THROW( { KokkosFFT::Impl::get_extents(xcin3, xr3, axes_type({2, 1})); diff --git a/common/unit_test/Test_Traits.cpp b/common/unit_test/Test_Traits.cpp index 3a80878b..3afae3d5 100644 --- a/common/unit_test/Test_Traits.cpp +++ b/common/unit_test/Test_Traits.cpp @@ -241,7 +241,7 @@ template void test_admissible_value_type() { using ViewType = Kokkos::View; using real_type = KokkosFFT::Impl::base_floating_point_type; - // Tests that a Value or View has a addmissible value type + // Tests that a Value or View has a admissible value type if constexpr (std::is_same_v || std::is_same_v) { // Base floating point type of a Value is float or double diff --git a/common/unit_test/Test_Utils.cpp b/common/unit_test/Test_Utils.cpp index 48dbcfff..829b5316 100644 --- a/common/unit_test/Test_Utils.cpp +++ b/common/unit_test/Test_Utils.cpp @@ -65,7 +65,7 @@ void test_convert_negative_axes_1d() { EXPECT_EQ(converted_axis_0, ref_converted_axis_0); EXPECT_EQ(converted_axis_minus1, ref_converted_axis_minus1); - // Check if errors are correctly raised aginst invalid axis + // Check if errors are correctly raised against invalid axis // axis must be in [-1, 1) EXPECT_THROW({ KokkosFFT::Impl::convert_negative_axis(x, /*axis=*/1); }, std::runtime_error); @@ -93,7 +93,7 @@ void test_convert_negative_axes_2d() { EXPECT_EQ(converted_axis_1, ref_converted_axis_1); EXPECT_EQ(converted_axis_minus1, ref_converted_axis_minus1); - // Check if errors are correctly raised aginst invalid axis + // Check if errors are correctly raised against invalid axis // axis must be in [-2, 2) EXPECT_THROW({ KokkosFFT::Impl::convert_negative_axis(x, /*axis=*/2); }, std::runtime_error); @@ -128,7 +128,7 @@ void test_convert_negative_axes_3d() { EXPECT_EQ(converted_axis_minus1, ref_converted_axis_minus1); EXPECT_EQ(converted_axis_minus2, ref_converted_axis_minus2); - // Check if errors are correctly raised aginst invalid axis + // Check if errors are correctly raised against invalid axis // axis must be in [-3, 3) EXPECT_THROW({ KokkosFFT::Impl::convert_negative_axis(x, /*axis=*/3); }, std::runtime_error); @@ -170,7 +170,7 @@ void test_convert_negative_axes_4d() { EXPECT_EQ(converted_axis_minus2, ref_converted_axis_minus2); EXPECT_EQ(converted_axis_minus3, ref_converted_axis_minus3); - // Check if errors are correctly raised aginst invalid axis + // Check if errors are correctly raised against invalid axis // axis must be in [-4, 4) EXPECT_THROW({ KokkosFFT::Impl::convert_negative_axis(x, /*axis=*/4); }, std::runtime_error); diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 6c7bbd26..b745bed3 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -16,15 +16,14 @@ namespace KokkosFFT { namespace Impl { // 1D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<1> axes, shape_type<1> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<1> axes, shape_type<1> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -37,14 +36,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); - - plan = std::make_unique(); - cufftResult cufft_rt = cufftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); - - cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = @@ -52,24 +43,22 @@ auto create_plan(const ExecutionSpace& exec_space, const int nx = fft_extents.at(0); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - cufft_rt = cufftPlan1d(&(*plan), nx, type, howmany); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed"); + plan = std::make_unique(nx, type, howmany); + plan->commit(exec_space); return fft_size; } // 2D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<2> axes, shape_type<2> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<2> axes, shape_type<2> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -82,14 +71,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); - - plan = std::make_unique(); - cufftResult cufft_rt = cufftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); - - cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -97,24 +78,22 @@ auto create_plan(const ExecutionSpace& exec_space, const int nx = fft_extents.at(0), ny = fft_extents.at(1); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed"); + plan = std::make_unique(nx, ny, type); + plan->commit(exec_space); return fft_size; } // 3D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<3> axes, shape_type<3> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<3> axes, shape_type<3> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -127,14 +106,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_cufft]"); - - plan = std::make_unique(); - cufftResult cufft_rt = cufftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); - - cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -144,24 +115,22 @@ auto create_plan(const ExecutionSpace& exec_space, nz = fft_extents.at(2); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed"); + plan = std::make_unique(nx, ny, nz, type); + plan->commit(exec_space); return fft_size; } // batched transform, over ND Views template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type axes, - shape_type s, bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type axes, shape_type s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -194,30 +163,14 @@ auto create_plan(const ExecutionSpace& exec_space, // For the moment, considering the contiguous layout only int istride = 1, ostride = 1; - - plan = std::make_unique(); - cufftResult cufft_rt = cufftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftCreate failed"); - - cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream((*plan), stream); - - cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), - in_extents.data(), istride, idist, - out_extents.data(), ostride, odist, type, howmany); - - KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlanMany failed"); + plan = std::make_unique(rank, fft_extents.data(), in_extents.data(), + istride, idist, out_extents.data(), ostride, + odist, type, howmany); + plan->commit(exec_space); return fft_size; } -template , - std::nullptr_t> = nullptr> -void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_cufft]"); - cufftDestroy(*plan); -} } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 5b7a02be..5cfb070e 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -8,54 +8,61 @@ #include #include #include "KokkosFFT_asserts.hpp" +#include "KokkosFFT_Cuda_types.hpp" namespace KokkosFFT { namespace Impl { -template -inline void exec_plan(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); + +inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftReal* idata, + cufftComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecR2C]"); + cufftResult cufft_rt = cufftExecR2C(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecR2C failed"); } -template -inline void exec_plan(cufftHandle& plan, cufftDoubleReal* idata, - cufftDoubleComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); +inline void exec_plan(const ScopedCufftPlan& scoped_plan, + cufftDoubleReal* idata, cufftDoubleComplex* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecD2Z]"); + cufftResult cufft_rt = cufftExecD2Z(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecD2Z failed"); } -template -inline void exec_plan(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); +inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, + cufftReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecC2R]"); + cufftResult cufft_rt = cufftExecC2R(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2R failed"); } -template -inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, - cufftDoubleReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); +inline void exec_plan(const ScopedCufftPlan& scoped_plan, + cufftDoubleComplex* idata, cufftDoubleReal* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecZ2D]"); + cufftResult cufft_rt = cufftExecZ2D(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2D failed"); } -template -inline void exec_plan(cufftHandle& plan, cufftComplex* idata, - cufftComplex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); +inline void exec_plan(const ScopedCufftPlan& scoped_plan, cufftComplex* idata, + cufftComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecC2C]"); + cufftResult cufft_rt = + cufftExecC2C(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecC2C failed"); } -template -inline void exec_plan(cufftHandle& plan, cufftDoubleComplex* idata, - cufftDoubleComplex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); +inline void exec_plan(const ScopedCufftPlan& scoped_plan, + cufftDoubleComplex* idata, cufftDoubleComplex* odata, + int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_cufftExecZ2Z]"); + cufftResult cufft_rt = + cufftExecZ2Z(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_Cuda_types.hpp b/fft/src/KokkosFFT_Cuda_types.hpp index 52b9d155..00e123a2 100644 --- a/fft/src/KokkosFFT_Cuda_types.hpp +++ b/fft/src/KokkosFFT_Cuda_types.hpp @@ -6,7 +6,14 @@ #define KOKKOSFFT_CUDA_TYPES_HPP #include +#include +#include #include "KokkosFFT_common_types.hpp" +#include "KokkosFFT_asserts.hpp" + +#if defined(ENABLE_HOST_AND_DEVICE) +#include "KokkosFFT_FFTW_Types.hpp" +#endif // Check the size of complex type static_assert(sizeof(cufftComplex) == sizeof(Kokkos::complex)); @@ -15,27 +22,61 @@ static_assert(alignof(cufftComplex) <= alignof(Kokkos::complex)); static_assert(sizeof(cufftDoubleComplex) == sizeof(Kokkos::complex)); static_assert(alignof(cufftDoubleComplex) <= alignof(Kokkos::complex)); -#ifdef ENABLE_HOST_AND_DEVICE -#include -#include "KokkosFFT_utils.hpp" -static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); - -static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); -#endif - namespace KokkosFFT { namespace Impl { using FFTDirectionType = int; -// Unused -template -using FFTInfoType = int; +/// \brief A class that wraps cufft for RAII +struct ScopedCufftPlan { + private: + cufftHandle m_plan; + + public: + ScopedCufftPlan(int nx, cufftType type, int batch) { + cufftResult cufft_rt = cufftPlan1d(&m_plan, nx, type, batch); + KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan1d failed"); + } + + ScopedCufftPlan(int nx, int ny, cufftType type) { + cufftResult cufft_rt = cufftPlan2d(&m_plan, nx, ny, type); + KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan2d failed"); + } -#ifdef ENABLE_HOST_AND_DEVICE -enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; + ScopedCufftPlan(int nx, int ny, int nz, cufftType type) { + cufftResult cufft_rt = cufftPlan3d(&m_plan, nx, ny, nz, type); + KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlan3d failed"); + } + + ScopedCufftPlan(int rank, int *n, int *inembed, int istride, int idist, + int *onembed, int ostride, int odist, cufftType type, + int batch) { + cufftResult cufft_rt = + cufftPlanMany(&m_plan, rank, n, inembed, istride, idist, onembed, + ostride, odist, type, batch); + KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftPlanMany failed"); + } + + ~ScopedCufftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_cufft]"); + cufftResult cufft_rt = cufftDestroy(m_plan); + if (cufft_rt != CUFFT_SUCCESS) Kokkos::abort("cufftDestroy failed"); + } + + ScopedCufftPlan() = delete; + ScopedCufftPlan(const ScopedCufftPlan &) = delete; + ScopedCufftPlan &operator=(const ScopedCufftPlan &) = delete; + ScopedCufftPlan &operator=(ScopedCufftPlan &&) = delete; + ScopedCufftPlan(ScopedCufftPlan &&) = delete; + + cufftHandle plan() const noexcept { return m_plan; } + void commit(const Kokkos::Cuda &exec_space) const { + cufftResult cufft_rt = cufftSetStream(m_plan, exec_space.cuda_stream()); + KOKKOSFFT_THROW_IF(cufft_rt != CUFFT_SUCCESS, "cufftSetStream failed"); + } +}; +#if defined(ENABLE_HOST_AND_DEVICE) template struct FFTDataType { using float32 = @@ -52,15 +93,6 @@ struct FFTDataType { cufftDoubleComplex, fftw_complex>; }; -template -struct FFTPlanType { - using fftwHandle = std::conditional_t< - std::is_same_v, float>, - fftwf_plan, fftw_plan>; - using type = std::conditional_t, - cufftHandle, fftwHandle>; -}; - template using TransformType = std::conditional_t, cufftType, @@ -136,6 +168,14 @@ struct transform_type, } }; +template +struct FFTPlanType { + using fftw_plan_type = ScopedFFTWPlan; + using cufft_plan_type = ScopedCufftPlan; + using type = std::conditional_t, + cufft_plan_type, fftw_plan_type>; +}; + template auto direction_type(Direction direction) { static constexpr FFTDirectionType FORWARD = @@ -155,11 +195,6 @@ struct FFTDataType { using complex128 = cufftDoubleComplex; }; -template -struct FFTPlanType { - using type = cufftHandle; -}; - template using TransformType = cufftType; @@ -197,6 +232,11 @@ struct transform_type, static constexpr cufftType type() { return m_type; }; }; +template +struct FFTPlanType { + using type = ScopedCufftPlan; +}; + template auto direction_type(Direction direction) { return direction == Direction::forward ? CUFFT_FORWARD : CUFFT_INVERSE; diff --git a/fft/src/KokkosFFT_FFTW_Types.hpp b/fft/src/KokkosFFT_FFTW_Types.hpp new file mode 100644 index 00000000..20aac85c --- /dev/null +++ b/fft/src/KokkosFFT_FFTW_Types.hpp @@ -0,0 +1,146 @@ +// SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +#ifndef KOKKOSFFT_FFTW_TYPES_HPP +#define KOKKOSFFT_FFTW_TYPES_HPP + +#include +#include +#include +#include "KokkosFFT_common_types.hpp" +#include "KokkosFFT_utils.hpp" + +// Check the size of complex type +static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); + +static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); + +namespace KokkosFFT { +namespace Impl { + +enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; + +// Define fft transform types +template +struct fftw_transform_type { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct fftw_transform_type> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + static constexpr FFTWTransformType m_type = std::is_same_v + ? FFTWTransformType::R2C + : FFTWTransformType::D2Z; + static constexpr FFTWTransformType type() { return m_type; }; +}; + +template +struct fftw_transform_type, T2> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + static constexpr FFTWTransformType m_type = std::is_same_v + ? FFTWTransformType::C2R + : FFTWTransformType::Z2D; + static constexpr FFTWTransformType type() { return m_type; }; +}; + +template +struct fftw_transform_type, Kokkos::complex> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + static constexpr FFTWTransformType m_type = std::is_same_v + ? FFTWTransformType::C2C + : FFTWTransformType::Z2Z; + static constexpr FFTWTransformType type() { return m_type; }; +}; + +/// \brief A class that wraps fftw_plan and fftwf_plan for RAII +template +struct ScopedFFTWPlan { + private: + using floating_point_type = KokkosFFT::Impl::base_floating_point_type; + using plan_type = + std::conditional_t, fftwf_plan, + fftw_plan>; + plan_type m_plan; + + public: + template + ScopedFFTWPlan(const ExecutionSpace &exec_space, int rank, const int *n, + int howmany, InScalarType *in, const int *inembed, int istride, + int idist, OutScalarType *out, const int *onembed, int ostride, + int odist, [[maybe_unused]] int sign, unsigned flags) { + init_threads(exec_space); + constexpr auto type = fftw_transform_type::type(); + if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { + m_plan = + fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, flags); + } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { + m_plan = + fftw_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, flags); + } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { + m_plan = + fftwf_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, flags); + } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { + m_plan = + fftw_plan_many_dft_c2r(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, flags); + } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { + m_plan = + fftwf_plan_many_dft(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, sign, flags); + } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { + m_plan = fftw_plan_many_dft(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, sign, flags); + } + } + + ~ScopedFFTWPlan() noexcept { + Kokkos::Profiling::ScopedRegion region("KokkosFFT::cleanup_plan[TPL_fftw]"); + if constexpr (std::is_same_v) { + fftwf_destroy_plan(m_plan); + } else { + fftw_destroy_plan(m_plan); + } + } + + ScopedFFTWPlan() = delete; + ScopedFFTWPlan(const ScopedFFTWPlan &) = delete; + ScopedFFTWPlan &operator=(const ScopedFFTWPlan &) = delete; + ScopedFFTWPlan &operator=(ScopedFFTWPlan &&) = delete; + ScopedFFTWPlan(ScopedFFTWPlan &&) = delete; + + plan_type plan() const noexcept { return m_plan; } + + private: + void init_threads([[maybe_unused]] const ExecutionSpace &exec_space) { +#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) + if constexpr (std::is_same_v) { + int nthreads = exec_space.concurrency(); + + if constexpr (std::is_same_v) { + fftwf_init_threads(); + fftwf_plan_with_nthreads(nthreads); + } else { + fftw_init_threads(); + fftw_plan_with_nthreads(nthreads); + } + } +#endif + } +}; + +} // namespace Impl +} // namespace KokkosFFT + +#endif diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 1dc3a331..fe617e6a 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -16,15 +16,14 @@ namespace KokkosFFT { namespace Impl { // 1D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<1> axes, shape_type<1> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<1> axes, shape_type<1> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -37,14 +36,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); - - plan = std::make_unique(); - hipfftResult hipfft_rt = hipfftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); - - hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = @@ -52,24 +43,22 @@ auto create_plan(const ExecutionSpace& exec_space, const int nx = fft_extents.at(0); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - hipfft_rt = hipfftPlan1d(&(*plan), nx, type, howmany); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan1d failed"); + plan = std::make_unique(nx, type, howmany); + plan->commit(exec_space); return fft_size; } // 2D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<2> axes, shape_type<2> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<2> axes, shape_type<2> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -82,14 +71,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); - - plan = std::make_unique(); - hipfftResult hipfft_rt = hipfftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); - - hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -97,24 +78,22 @@ auto create_plan(const ExecutionSpace& exec_space, const int nx = fft_extents.at(0), ny = fft_extents.at(1); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan2d failed"); + plan = std::make_unique(nx, ny, type); + plan->commit(exec_space); return fft_size; } // 3D transform template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type<3> axes, shape_type<3> s, - bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type<3> axes, shape_type<3> s, bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -127,14 +106,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); - - plan = std::make_unique(); - hipfftResult hipfft_rt = hipfftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); - - hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream((*plan), stream); - auto type = KokkosFFT::Impl::transform_type::type(); [[maybe_unused]] auto [in_extents, out_extents, fft_extents, howmany] = @@ -144,24 +115,22 @@ auto create_plan(const ExecutionSpace& exec_space, nz = fft_extents.at(2); int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - - hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan3d failed"); + plan = std::make_unique(nx, ny, nz, type); + plan->commit(exec_space); return fft_size; } // batched transform, over ND Views template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type axes, - shape_type s, bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type axes, shape_type s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -179,7 +148,6 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_hipfft]"); - const int rank = fft_rank; constexpr auto type = KokkosFFT::Impl::transform_type(); - hipfftResult hipfft_rt = hipfftCreate(&(*plan)); - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftCreate failed"); - - hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream((*plan), stream); - - hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), - in_extents.data(), istride, idist, - out_extents.data(), ostride, odist, type, howmany); - - KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlanMany failed"); + plan = std::make_unique(rank, fft_extents.data(), in_extents.data(), + istride, idist, out_extents.data(), ostride, + odist, type, howmany); + plan->commit(exec_space); return fft_size; } -template , - std::nullptr_t> = nullptr> -void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_hipfft]"); - hipfftDestroy(*plan); -} } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 07062f6b..9ac5c954 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -8,54 +8,61 @@ #include #include #include "KokkosFFT_asserts.hpp" +#include "KokkosFFT_HIP_types.hpp" namespace KokkosFFT { namespace Impl { -template -inline void exec_plan(hipfftHandle& plan, hipfftReal* idata, - hipfftComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_cufft]"); - hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); + +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftReal* idata, + hipfftComplex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecR2C]"); + hipfftResult hipfft_rt = hipfftExecR2C(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecR2C failed"); } -template -inline void exec_plan(hipfftHandle& plan, hipfftDoubleReal* idata, - hipfftDoubleComplex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); - hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, + hipfftDoubleReal* idata, hipfftDoubleComplex* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecD2Z]"); + hipfftResult hipfft_rt = hipfftExecD2Z(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecD2Z failed"); } -template -inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, - hipfftReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); - hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, + hipfftReal* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecC2R]"); + hipfftResult hipfft_rt = hipfftExecC2R(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2R failed"); } -template -inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleReal* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); - hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, + hipfftDoubleComplex* idata, hipfftDoubleReal* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecZ2D]"); + hipfftResult hipfft_rt = hipfftExecZ2D(scoped_plan.plan(), idata, odata); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2D failed"); } -template -inline void exec_plan(hipfftHandle& plan, hipfftComplex* idata, - hipfftComplex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); - hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, hipfftComplex* idata, + hipfftComplex* odata, int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecC2C]"); + hipfftResult hipfft_rt = + hipfftExecC2C(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecC2C failed"); } -template -inline void exec_plan(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleComplex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_hipfft]"); - hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); +inline void exec_plan(const ScopedHIPfftPlan& scoped_plan, + hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, + int direction) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_hipfftExecZ2Z]"); + hipfftResult hipfft_rt = + hipfftExecZ2Z(scoped_plan.plan(), idata, odata, direction); KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftExecZ2Z failed"); } } // namespace Impl diff --git a/fft/src/KokkosFFT_HIP_types.hpp b/fft/src/KokkosFFT_HIP_types.hpp index 38663905..16e5eea1 100644 --- a/fft/src/KokkosFFT_HIP_types.hpp +++ b/fft/src/KokkosFFT_HIP_types.hpp @@ -6,7 +6,14 @@ #define KOKKOSFFT_HIP_TYPES_HPP #include +#include +#include #include "KokkosFFT_common_types.hpp" +#include "KokkosFFT_asserts.hpp" + +#if defined(ENABLE_HOST_AND_DEVICE) +#include "KokkosFFT_FFTW_Types.hpp" +#endif // Check the size of complex type static_assert(sizeof(hipfftComplex) == sizeof(Kokkos::complex)); @@ -15,27 +22,61 @@ static_assert(alignof(hipfftComplex) <= alignof(Kokkos::complex)); static_assert(sizeof(hipfftDoubleComplex) == sizeof(Kokkos::complex)); static_assert(alignof(hipfftDoubleComplex) <= alignof(Kokkos::complex)); -#ifdef ENABLE_HOST_AND_DEVICE -#include -#include "KokkosFFT_utils.hpp" -static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); - -static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); -#endif - namespace KokkosFFT { namespace Impl { using FFTDirectionType = int; -// Unused -template -using FFTInfoType = int; +/// \brief A class that wraps hipfft for RAII +struct ScopedHIPfftPlan { + private: + hipfftHandle m_plan; + + public: + ScopedHIPfftPlan(int nx, hipfftType type, int batch) { + hipfftResult hipfft_rt = hipfftPlan1d(&m_plan, nx, type, batch); + KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan1d failed"); + } + + ScopedHIPfftPlan(int nx, int ny, hipfftType type) { + hipfftResult hipfft_rt = hipfftPlan2d(&m_plan, nx, ny, type); + KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan2d failed"); + } -#ifdef ENABLE_HOST_AND_DEVICE -enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; + ScopedHIPfftPlan(int nx, int ny, int nz, hipfftType type) { + hipfftResult hipfft_rt = hipfftPlan3d(&m_plan, nx, ny, nz, type); + KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlan3d failed"); + } + + ScopedHIPfftPlan(int rank, int *n, int *inembed, int istride, int idist, + int *onembed, int ostride, int odist, hipfftType type, + int batch) { + hipfftResult hipfft_rt = + hipfftPlanMany(&m_plan, rank, n, inembed, istride, idist, onembed, + ostride, odist, type, batch); + KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftPlanMany failed"); + } + + ~ScopedHIPfftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_hipfft]"); + hipfftResult hipfft_rt = hipfftDestroy(m_plan); + if (hipfft_rt != HIPFFT_SUCCESS) Kokkos::abort("hipfftDestroy failed"); + } + + ScopedHIPfftPlan() = delete; + ScopedHIPfftPlan(const ScopedHIPfftPlan &) = delete; + ScopedHIPfftPlan &operator=(const ScopedHIPfftPlan &) = delete; + ScopedHIPfftPlan &operator=(ScopedHIPfftPlan &&) = delete; + ScopedHIPfftPlan(ScopedHIPfftPlan &&) = delete; + + hipfftHandle plan() const noexcept { return m_plan; } + void commit(const Kokkos::HIP &exec_space) const { + hipfftResult hipfft_rt = hipfftSetStream(m_plan, exec_space.hip_stream()); + KOKKOSFFT_THROW_IF(hipfft_rt != HIPFFT_SUCCESS, "hipfftSetStream failed"); + } +}; +#if defined(ENABLE_HOST_AND_DEVICE) template struct FFTDataType { using float32 = @@ -52,15 +93,6 @@ struct FFTDataType { hipfftDoubleComplex, fftw_complex>; }; -template -struct FFTPlanType { - using fftwHandle = std::conditional_t< - std::is_same_v, float>, - fftwf_plan, fftw_plan>; - using type = std::conditional_t, - hipfftHandle, fftwHandle>; -}; - template using TransformType = std::conditional_t, hipfftType, @@ -136,6 +168,14 @@ struct transform_type, } }; +template +struct FFTPlanType { + using fftw_plan_type = ScopedFFTWPlan; + using hipfft_plan_type = ScopedHIPfftPlan; + using type = std::conditional_t, + hipfft_plan_type, fftw_plan_type>; +}; + template auto direction_type(Direction direction) { static constexpr FFTDirectionType FORWARD = @@ -155,11 +195,6 @@ struct FFTDataType { using complex128 = hipfftDoubleComplex; }; -template -struct FFTPlanType { - using type = hipfftHandle; -}; - template using TransformType = hipfftType; @@ -197,6 +232,11 @@ struct transform_type, static constexpr hipfftType type() { return m_type; }; }; +template +struct FFTPlanType { + using type = ScopedHIPfftPlan; +}; + template auto direction_type(Direction direction) { return direction == Direction::forward ? HIPFFT_FORWARD : HIPFFT_BACKWARD; diff --git a/fft/src/KokkosFFT_Host_plans.hpp b/fft/src/KokkosFFT_Host_plans.hpp index 7f24dde8..ad96c1ea 100644 --- a/fft/src/KokkosFFT_Host_plans.hpp +++ b/fft/src/KokkosFFT_Host_plans.hpp @@ -13,33 +13,16 @@ namespace KokkosFFT { namespace Impl { - -template -void init_threads([[maybe_unused]] const ExecutionSpace& exec_space) { -#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) - int nthreads = exec_space.concurrency(); - - if constexpr (std::is_same_v) { - fftwf_init_threads(); - fftwf_plan_with_nthreads(nthreads); - } else { - fftw_init_threads(); - fftw_plan_with_nthreads(nthreads); - } -#endif -} - // batched transform, over ND Views template , std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction direction, axis_type axes, - shape_type s, bool is_inplace) { + const OutViewType& out, Direction direction, + axis_type axes, shape_type s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -57,15 +40,7 @@ auto create_plan(const ExecutionSpace& exec_space, using out_value_type = typename OutViewType::non_const_value_type; Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_fftw]"); - const int rank = fft_rank; - init_threads>( - exec_space); - - constexpr auto type = - KokkosFFT::Impl::transform_type::type(); auto [in_extents, out_extents, fft_extents, howmany] = KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, @@ -85,47 +60,14 @@ auto create_plan(const ExecutionSpace& exec_space, [[maybe_unused]] auto sign = KokkosFFT::Impl::direction_type(direction); - plan = std::make_unique(); - if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { - *plan = fftwf_plan_many_dft_r2c( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); - } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { - *plan = fftw_plan_many_dft_r2c( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); - } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { - *plan = fftwf_plan_many_dft_c2r( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); - } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { - *plan = fftw_plan_many_dft_c2r( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); - } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { - *plan = fftwf_plan_many_dft( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); - } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { - *plan = fftw_plan_many_dft( - rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, - idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); - } + plan = std::make_unique(exec_space, rank, fft_extents.data(), + howmany, idata, in_extents.data(), istride, + idist, odata, out_extents.data(), ostride, + odist, sign, FFTW_ESTIMATE); return fft_size; } -template , std::nullptr_t> = - nullptr> -void destroy_plan_and_info(std::unique_ptr& plan, InfoType&) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_fftw]"); - if constexpr (std::is_same_v) { - fftwf_destroy_plan(*plan); - } else { - fftw_destroy_plan(*plan); - } -} } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Host_transform.hpp b/fft/src/KokkosFFT_Host_transform.hpp index 749e58b3..b4a54805 100644 --- a/fft/src/KokkosFFT_Host_transform.hpp +++ b/fft/src/KokkosFFT_Host_transform.hpp @@ -10,46 +10,53 @@ namespace KokkosFFT { namespace Impl { -template -void exec_plan(PlanType& plan, float* idata, fftwf_complex* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftwf_execute_dft_r2c(plan, idata, odata); + +template +void exec_plan(const ScopedPlanType& scoped_plan, float* idata, + fftwf_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecR2C]"); + fftwf_execute_dft_r2c(scoped_plan.plan(), idata, odata); } -template -void exec_plan(PlanType& plan, double* idata, fftw_complex* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftw_execute_dft_r2c(plan, idata, odata); +template +void exec_plan(const ScopedPlanType& scoped_plan, double* idata, + fftw_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecD2Z]"); + fftw_execute_dft_r2c(scoped_plan.plan(), idata, odata); } -template -void exec_plan(PlanType& plan, fftwf_complex* idata, float* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftwf_execute_dft_c2r(plan, idata, odata); +template +void exec_plan(const ScopedPlanType& scoped_plan, fftwf_complex* idata, + float* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecC2R]"); + fftwf_execute_dft_c2r(scoped_plan.plan(), idata, odata); } -template -void exec_plan(PlanType& plan, fftw_complex* idata, double* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftw_execute_dft_c2r(plan, idata, odata); +template +void exec_plan(const ScopedPlanType& scoped_plan, fftw_complex* idata, + double* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecZ2D]"); + fftw_execute_dft_c2r(scoped_plan.plan(), idata, odata); } -template -void exec_plan(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftwf_execute_dft(plan, idata, odata); +template +void exec_plan(const ScopedPlanType& scoped_plan, fftwf_complex* idata, + fftwf_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecC2C]"); + fftwf_execute_dft(scoped_plan.plan(), idata, odata); } -template -void exec_plan(PlanType plan, fftw_complex* idata, fftw_complex* odata, - int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_fftw]"); - fftw_execute_dft(plan, idata, odata); +template +void exec_plan(const ScopedPlanType& scoped_plan, fftw_complex* idata, + fftw_complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_fftwExecZ2Z]"); + fftw_execute_dft(scoped_plan.plan(), idata, odata); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Host_types.hpp b/fft/src/KokkosFFT_Host_types.hpp index a8a24d75..85e754b2 100644 --- a/fft/src/KokkosFFT_Host_types.hpp +++ b/fft/src/KokkosFFT_Host_types.hpp @@ -5,27 +5,12 @@ #ifndef KOKKOSFFT_HOST_TYPES_HPP #define KOKKOSFFT_HOST_TYPES_HPP -#include -#include "KokkosFFT_common_types.hpp" -#include "KokkosFFT_utils.hpp" - -// Check the size of complex type -static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); - -static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); +#include "KokkosFFT_FFTW_Types.hpp" namespace KokkosFFT { namespace Impl { using FFTDirectionType = int; -// Unused -template -using FFTInfoType = int; - -enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; - template struct FFTDataType { using float32 = float; @@ -34,52 +19,15 @@ struct FFTDataType { using complex128 = fftw_complex; }; -template -struct FFTPlanType { - using type = std::conditional_t< - std::is_same_v, float>, - fftwf_plan, fftw_plan>; -}; - template using TransformType = FFTWTransformType; -// Define fft transform types template -struct transform_type { - static_assert(std::is_same_v, - "Real to real transform is unavailable"); -}; - -template -struct transform_type> { - static_assert(std::is_same_v, - "T1 and T2 should have the same precision"); - static constexpr FFTWTransformType m_type = std::is_same_v - ? FFTWTransformType::R2C - : FFTWTransformType::D2Z; - static constexpr FFTWTransformType type() { return m_type; }; -}; +using transform_type = fftw_transform_type; template -struct transform_type, T2> { - static_assert(std::is_same_v, - "T1 and T2 should have the same precision"); - static constexpr FFTWTransformType m_type = std::is_same_v - ? FFTWTransformType::C2R - : FFTWTransformType::Z2D; - static constexpr FFTWTransformType type() { return m_type; }; -}; - -template -struct transform_type, - Kokkos::complex> { - static_assert(std::is_same_v, - "T1 and T2 should have the same precision"); - static constexpr FFTWTransformType m_type = std::is_same_v - ? FFTWTransformType::C2C - : FFTWTransformType::Z2Z; - static constexpr FFTWTransformType type() { return m_type; }; +struct FFTPlanType { + using type = ScopedFFTWPlan; }; template diff --git a/fft/src/KokkosFFT_Plans.hpp b/fft/src/KokkosFFT_Plans.hpp index 5ec1e744..7f6b98af 100644 --- a/fft/src/KokkosFFT_Plans.hpp +++ b/fft/src/KokkosFFT_Plans.hpp @@ -5,7 +5,7 @@ /// \file KokkosFFT_Plans.hpp /// \brief Wrapping fft plans of different fft libraries /// -/// This file provides KokkosFFT::Impl::Plan. +/// This file provides KokkosFFT::Plan. /// This implements a local (no MPI) interface for fft plans #ifndef KOKKOSFFT_PLANS_HPP @@ -22,7 +22,7 @@ #if defined(KOKKOS_ENABLE_CUDA) #include "KokkosFFT_Cuda_plans.hpp" #include "KokkosFFT_Cuda_transform.hpp" -#ifdef ENABLE_HOST_AND_DEVICE +#if defined(ENABLE_HOST_AND_DEVICE) #include "KokkosFFT_Host_plans.hpp" #include "KokkosFFT_Host_transform.hpp" #endif @@ -34,14 +34,14 @@ #include "KokkosFFT_HIP_plans.hpp" #include "KokkosFFT_HIP_transform.hpp" #endif -#ifdef ENABLE_HOST_AND_DEVICE +#if defined(ENABLE_HOST_AND_DEVICE) #include "KokkosFFT_Host_plans.hpp" #include "KokkosFFT_Host_transform.hpp" #endif #elif defined(KOKKOS_ENABLE_SYCL) #include "KokkosFFT_SYCL_plans.hpp" #include "KokkosFFT_SYCL_transform.hpp" -#ifdef ENABLE_HOST_AND_DEVICE +#if defined(ENABLE_HOST_AND_DEVICE) #include "KokkosFFT_Host_plans.hpp" #include "KokkosFFT_Host_transform.hpp" #endif @@ -88,19 +88,12 @@ class Plan { typename KokkosFFT::Impl::FFTPlanType::type; - //! The type of fft info (used for rocfft only) - using fft_info_type = typename KokkosFFT::Impl::FFTInfoType; - //! The type of fft size using fft_size_type = std::size_t; //! The type of map for transpose using map_type = axis_type; - //! Naive 1D View for work buffer - using BufferViewType = - Kokkos::View*, layout_type, execSpace>; - //! The type of extents of input/output views using extents_type = shape_type; @@ -111,9 +104,6 @@ class Plan { //! Dynamically allocatable fft plan. std::unique_ptr m_plan; - //! fft info - fft_info_type m_info; - //! fft size fft_size_type m_fft_size = 1; @@ -143,15 +133,12 @@ class Plan { extents_type m_in_extents, m_out_extents; ///@} - //! Internal work buffer (for rocfft) - BufferViewType m_buffer; - public: /// \brief Constructor /// /// \param exec_space [in] Kokkos execution device /// \param in [in] Input data - /// \param out [in] Ouput data + /// \param out [in] Output data /// \param direction [in] Direction of FFT (forward/backward) /// \param axis [in] Axis over which FFT is performed /// \param n [in] Length of the transformed axis of the output (default, @@ -209,16 +196,16 @@ class Plan { KOKKOSFFT_THROW_IF(m_is_inplace && m_is_crop_or_pad_needed, "In-place transform is not supported with reshape. " "Please use out-of-place transform."); - m_fft_size = KokkosFFT::Impl::create_plan(exec_space, m_plan, in, out, - m_buffer, m_info, direction, - m_axes, s, m_is_inplace); + + m_fft_size = KokkosFFT::Impl::create_plan( + exec_space, m_plan, in, out, direction, m_axes, s, m_is_inplace); } /// \brief Constructor for multidimensional FFT /// /// \param exec_space [in] Kokkos execution space for this plan /// \param in [in] Input data - /// \param out [in] Ouput data + /// \param out [in] Output data /// \param direction [in] Direction of FFT (forward/backward) /// \param axes [in] Axes over which FFT is performed /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -272,16 +259,13 @@ class Plan { KOKKOSFFT_THROW_IF(m_is_inplace && m_is_crop_or_pad_needed, "In-place transform is not supported with reshape. " "Please use out-of-place transform."); - m_fft_size = - KokkosFFT::Impl::create_plan(exec_space, m_plan, in, out, m_buffer, - m_info, direction, axes, s, m_is_inplace); - } - ~Plan() { - KokkosFFT::Impl::destroy_plan_and_info(m_plan, m_info); + m_fft_size = KokkosFFT::Impl::create_plan(exec_space, m_plan, in, out, + direction, axes, s, m_is_inplace); } + ~Plan() noexcept = default; + Plan() = delete; Plan(const Plan&) = delete; Plan& operator=(const Plan&) = delete; @@ -291,7 +275,7 @@ class Plan { /// \brief Execute FFT on input and output Views with normalization /// /// \param in [in] Input data - /// \param out [out] Ouput data + /// \param out [out] Output data /// \param norm [in] How the normalization is applied (default, backward) void execute(const InViewType& in, const OutViewType& out, KokkosFFT::Normalization norm = @@ -358,7 +342,7 @@ class Plan { auto const direction = KokkosFFT::Impl::direction_type(m_direction); - KokkosFFT::Impl::exec_plan(*m_plan, idata, odata, direction, m_info); + KokkosFFT::Impl::exec_plan(*m_plan, idata, odata, direction); if constexpr (KokkosFFT::Impl::is_complex_v && KokkosFFT::Impl::is_real_v) { @@ -379,10 +363,10 @@ class Plan { /// \brief Sanity check of the plan used to call FFT interface with /// pre-defined FFT plan. This raises an error if there is an - /// incosistency between FFT function and plan + /// inconsistency between FFT function and plan /// /// \param in [in] Input data - /// \param out [in] Ouput data + /// \param out [in] Output data void good(const InViewType& in, const OutViewType& out) const { auto in_extents = KokkosFFT::Impl::extract_extents(in); auto out_extents = KokkosFFT::Impl::extract_extents(out); diff --git a/fft/src/KokkosFFT_ROCM_plans.hpp b/fft/src/KokkosFFT_ROCM_plans.hpp index 41d221b8..25853b58 100644 --- a/fft/src/KokkosFFT_ROCM_plans.hpp +++ b/fft/src/KokkosFFT_ROCM_plans.hpp @@ -5,217 +5,84 @@ #ifndef KOKKOSFFT_ROCM_PLANS_HPP #define KOKKOSFFT_ROCM_PLANS_HPP +<<<<<<< HEAD #include #include #include + ======= +>>>>>>> main #include "KokkosFFT_ROCM_types.hpp" #include "KokkosFFT_Extents.hpp" #include "KokkosFFT_traits.hpp" #include "KokkosFFT_asserts.hpp" #include "KokkosFFT_utils.hpp" -namespace KokkosFFT { -namespace Impl { -// Helper to get input and output array type and direction from transform type -template -auto get_in_out_array_type(TransformType type, Direction direction) { - rocfft_array_type in_array_type, out_array_type; - rocfft_transform_type fft_direction; - - if (type == FFTWTransformType::C2C || type == FFTWTransformType::Z2Z) { - in_array_type = rocfft_array_type_complex_interleaved; - out_array_type = rocfft_array_type_complex_interleaved; - fft_direction = direction == Direction::forward - ? rocfft_transform_type_complex_forward - : rocfft_transform_type_complex_inverse; - } else if (type == FFTWTransformType::R2C || type == FFTWTransformType::D2Z) { - in_array_type = rocfft_array_type_real; - out_array_type = rocfft_array_type_hermitian_interleaved; - fft_direction = rocfft_transform_type_real_forward; - } else if (type == FFTWTransformType::C2R || type == FFTWTransformType::Z2D) { - in_array_type = rocfft_array_type_hermitian_interleaved; - out_array_type = rocfft_array_type_real; - fft_direction = rocfft_transform_type_real_inverse; + namespace KokkosFFT { + namespace Impl { + + // batched transform, over ND Views + template , + std::nullptr_t> = nullptr> + auto create_plan(const ExecutionSpace& exec_space, + std::unique_ptr& plan, const InViewType& in, + const OutViewType& out, Direction direction, + axis_type axes, shape_type s, + bool is_inplace) { + static_assert( + KokkosFFT::Impl::are_operatable_views_v, + "create_plan: InViewType and OutViewType must have the same base " + "floating point type (float/double), the same layout " + "(LayoutLeft/LayoutRight), " + "and the same rank. ExecutionSpace must be accessible to the data in " + "InViewType and OutViewType."); + + static_assert(InViewType::rank() >= fft_rank, + "KokkosFFT::create_plan: Rank of View must be larger than " + "Rank of FFT."); + + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; + + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::create_plan[TPL_rocfft]"); + + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents, howmany] = + KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); + + // Create a plan + plan = + std::make_unique(type, in_extents, out_extents, fft_extents, + howmany, direction, is_inplace); + plan->commit(exec_space); + + // Calculate the total size of the FFT + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); + + return fft_size; } - return std::tuple( - {in_array_type, out_array_type, fft_direction}); -}; - -template -rocfft_precision get_in_out_array_type() { - return std::is_same_v, - float> - ? rocfft_precision_single - : rocfft_precision_double; -} - -// Helper to convert the integer type of vectors -template -auto convert_int_type_and_reverse(std::vector& in) - -> std::vector { - std::vector out(in.size()); - std::transform( - in.begin(), in.end(), out.begin(), - [](const InType v) -> OutType { return static_cast(v); }); - - std::reverse(out.begin(), out.end()); - return out; -} - -// Helper to compute strides from extents -// (n0, n1, n2) -> (1, n0, n0*n1) -// (n0, n1) -> (1, n0) -// (n0) -> (1) -template -auto compute_strides(const std::vector& extents) - -> std::vector { - std::vector out = {1}; - auto reversed_extents = extents; - std::reverse(reversed_extents.begin(), reversed_extents.end()); - - for (std::size_t i = 1; i < reversed_extents.size(); i++) { - out.push_back(static_cast(reversed_extents.at(i - 1)) * - out.at(i - 1)); +<<<<<<< HEAD + template , + std::nullptr_t> = nullptr> + void destroy_plan_and_info(std::unique_ptr& plan, + InfoType& execution_info) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::destroy_plan[TPL_rocfft]"); + + rocfft_execution_info_destroy(execution_info); + rocfft_plan_destroy(*plan); } - - return out; -} - -// batched transform, over ND Views -template , - std::nullptr_t> = nullptr> -auto create_plan(const ExecutionSpace& exec_space, - std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType& buffer, - InfoType& execution_info, Direction direction, - axis_type axes, shape_type s, - bool is_inplace) { - static_assert( - KokkosFFT::Impl::are_operatable_views_v, - "create_plan: InViewType and OutViewType must have the same base " - "floating point type (float/double), the same layout " - "(LayoutLeft/LayoutRight), " - "and the same rank. ExecutionSpace must be accessible to the data in " - "InViewType and OutViewType."); - - static_assert( - InViewType::rank() >= fft_rank, - "KokkosFFT::create_plan: Rank of View must be larger than Rank of FFT."); - - using in_value_type = typename InViewType::non_const_value_type; - using out_value_type = typename OutViewType::non_const_value_type; - - Kokkos::Profiling::ScopedRegion region("KokkosFFT::create_plan[TPL_rocfft]"); - - constexpr auto type = - KokkosFFT::Impl::transform_type::type(); - auto [in_extents, out_extents, fft_extents, howmany] = - KokkosFFT::Impl::get_extents(in, out, axes, s, is_inplace); - int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, - std::multiplies<>()); - int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, - std::multiplies<>()); - int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, - std::multiplies<>()); - - // For the moment, considering the contiguous layout only - // Create plan - auto in_strides = compute_strides(in_extents); - auto out_strides = compute_strides(out_extents); - auto reversed_fft_extents = - convert_int_type_and_reverse(fft_extents); - - // Create the description - rocfft_plan_description description; - rocfft_status status = rocfft_plan_description_create(&description); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_plan_description_create failed"); - - auto [in_array_type, out_array_type, fft_direction] = - get_in_out_array_type(type, direction); - rocfft_precision precision = get_in_out_array_type(); - - status = rocfft_plan_description_set_data_layout( - description, // description handle - in_array_type, // input array type - out_array_type, // output array type - nullptr, // offsets to start of input data - nullptr, // offsets to start of output data - in_strides.size(), // input stride length - in_strides.data(), // input stride data - idist, // input batch distance - out_strides.size(), // output stride length - out_strides.data(), // output stride data - odist); // output batch distance - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_plan_description_set_data_layout failed"); - - // Out-of-place transform - const rocfft_result_placement place = - is_inplace ? rocfft_placement_inplace : rocfft_placement_notinplace; - - // Create a plan - plan = std::make_unique(); - status = rocfft_plan_create(&(*plan), place, fft_direction, precision, - reversed_fft_extents.size(), // Dimension - reversed_fft_extents.data(), // Lengths - howmany, // Number of transforms - description // Description - ); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_plan_create failed"); - - // Prepare workbuffer and set execution information - status = rocfft_execution_info_create(&execution_info); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_execution_info_create failed"); - - // set stream - // NOTE: The stream must be of type hipStream_t. - // It is an error to pass the address of a hipStream_t object. - hipStream_t stream = exec_space.hip_stream(); - status = rocfft_execution_info_set_stream(execution_info, stream); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_execution_info_set_stream failed"); - - std::size_t workbuffersize = 0; - status = rocfft_plan_get_work_buffer_size(*plan, &workbuffersize); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_plan_get_work_buffer_size failed"); - - if (workbuffersize > 0) { - buffer = BufferViewType("work_buffer", workbuffersize); - status = rocfft_execution_info_set_work_buffer( - execution_info, (void*)buffer.data(), workbuffersize); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_execution_info_set_work_buffer failed"); - } - - status = rocfft_plan_description_destroy(description); - KOKKOSFFT_THROW_IF(status != rocfft_status_success, - "rocfft_plan_description_destroy failed"); - - return fft_size; -} - -template , - std::nullptr_t> = nullptr> -void destroy_plan_and_info(std::unique_ptr& plan, - InfoType& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_rocfft]"); - - rocfft_execution_info_destroy(execution_info); - rocfft_plan_destroy(*plan); -} -} // namespace Impl +======= +>>>>>>> main + } // namespace Impl } // namespace KokkosFFT #endif diff --git a/fft/src/KokkosFFT_ROCM_transform.hpp b/fft/src/KokkosFFT_ROCM_transform.hpp index d6e4860d..bb89baff 100644 --- a/fft/src/KokkosFFT_ROCM_transform.hpp +++ b/fft/src/KokkosFFT_ROCM_transform.hpp @@ -9,69 +9,81 @@ #include #include #include "KokkosFFT_asserts.hpp" +#include "KokkosFFT_ROCM_types.hpp" namespace KokkosFFT { namespace Impl { -inline void exec_plan(rocfft_plan& plan, float* idata, - std::complex* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan(const ScopedRocfftPlan& scoped_plan, float* idata, + std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecR2C]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for R2C failed"); } -inline void exec_plan(rocfft_plan& plan, double* idata, - std::complex* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan(const ScopedRocfftPlan& scoped_plan, + double* idata, std::complex* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecD2Z]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for D2Z failed"); } -inline void exec_plan(rocfft_plan& plan, std::complex* idata, - float* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan( + const ScopedRocfftPlan>& scoped_plan, + std::complex* idata, float* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecC2R]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2R failed"); } -inline void exec_plan(rocfft_plan& plan, std::complex* idata, - double* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan( + const ScopedRocfftPlan>& scoped_plan, + std::complex* idata, double* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecZ2D]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2D failed"); } -inline void exec_plan(rocfft_plan& plan, std::complex* idata, - std::complex* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan( + const ScopedRocfftPlan>& scoped_plan, + std::complex* idata, std::complex* odata, int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecC2C]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for C2C failed"); } -inline void exec_plan(rocfft_plan& plan, std::complex* idata, - std::complex* odata, int /*direction*/, - const rocfft_execution_info& execution_info) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_rocfft]"); +inline void exec_plan( + const ScopedRocfftPlan>& scoped_plan, + std::complex* idata, std::complex* odata, + int /*direction*/) { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_rocfftExecZ2Z]"); rocfft_status status = - rocfft_execute(plan, (void**)&idata, (void**)&odata, execution_info); + rocfft_execute(scoped_plan.plan(), (void**)&idata, (void**)&odata, + scoped_plan.execution_info()); KOKKOSFFT_THROW_IF(status != rocfft_status_success, "rocfft_execute for Z2Z failed"); } - } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_ROCM_types.hpp b/fft/src/KokkosFFT_ROCM_types.hpp index 60af7e57..38c6682e 100644 --- a/fft/src/KokkosFFT_ROCM_types.hpp +++ b/fft/src/KokkosFFT_ROCM_types.hpp @@ -5,9 +5,18 @@ #ifndef KOKKOSFFT_ROCM_TYPES_HPP #define KOKKOSFFT_ROCM_TYPES_HPP +#include +#include #include #include +#include +#include #include "KokkosFFT_common_types.hpp" +#include "KokkosFFT_traits.hpp" +#include "KokkosFFT_asserts.hpp" +#if defined(ENABLE_HOST_AND_DEVICE) +#include "KokkosFFT_FFTW_Types.hpp" +#endif // Check the size of complex type static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); @@ -17,27 +26,257 @@ static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); static_assert(alignof(std::complex) <= alignof(Kokkos::complex)); -#ifdef ENABLE_HOST_AND_DEVICE -#include -#include "KokkosFFT_utils.hpp" -static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); - -static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); -#endif - namespace KokkosFFT { namespace Impl { using FFTDirectionType = int; constexpr FFTDirectionType ROCFFT_FORWARD = 1; constexpr FFTDirectionType ROCFFT_BACKWARD = -1; +#if !defined(ENABLE_HOST_AND_DEVICE) enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; +#endif template using TransformType = FFTWTransformType; +/// \brief A class that wraps rocfft_plan_description for RAII +struct ScopedRocfftPlanDescription { + private: + rocfft_plan_description m_description; + + public: + ScopedRocfftPlanDescription() { + rocfft_status status = rocfft_plan_description_create(&m_description); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_plan_description_create failed"); + } + ~ScopedRocfftPlanDescription() noexcept { + rocfft_status status = rocfft_plan_description_destroy(m_description); + if (status != rocfft_status_success) + Kokkos::abort("rocfft_plan_description_destroy failed"); + } + + ScopedRocfftPlanDescription(const ScopedRocfftPlanDescription &) = delete; + ScopedRocfftPlanDescription &operator=(const ScopedRocfftPlanDescription &) = + delete; + ScopedRocfftPlanDescription &operator=(ScopedRocfftPlanDescription &&) = + delete; + ScopedRocfftPlanDescription(ScopedRocfftPlanDescription &&) = delete; + + rocfft_plan_description description() const noexcept { return m_description; } +}; + +/// \brief A class that wraps rocfft_execution_info for RAII +struct ScopedRocfftExecutionInfo { + private: + rocfft_execution_info m_execution_info; + + //! Internal work buffer + void *m_workbuffer = nullptr; + + public: + ScopedRocfftExecutionInfo() { + // Prepare workbuffer and set execution information + rocfft_status status = rocfft_execution_info_create(&m_execution_info); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_execution_info_create failed"); + } + ~ScopedRocfftExecutionInfo() noexcept { + if (m_workbuffer != nullptr) { + Kokkos::kokkos_free(m_workbuffer); + } + rocfft_status status = rocfft_execution_info_destroy(m_execution_info); + if (status != rocfft_status_success) + Kokkos::abort("rocfft_execution_info_destroy failed"); + } + + ScopedRocfftExecutionInfo(const ScopedRocfftExecutionInfo &) = delete; + ScopedRocfftExecutionInfo &operator=(const ScopedRocfftExecutionInfo &) = + delete; + ScopedRocfftExecutionInfo &operator=(ScopedRocfftExecutionInfo &&) = delete; + ScopedRocfftExecutionInfo(ScopedRocfftExecutionInfo &&) = delete; + + rocfft_execution_info execution_info() const noexcept { + return m_execution_info; + } + + void setup(const Kokkos::HIP &exec_space, std::size_t workbuffersize) { + // set stream + // NOTE: The stream must be of type hipStream_t. + // It is an error to pass the address of a hipStream_t object. + hipStream_t stream = exec_space.hip_stream(); + rocfft_status status = + rocfft_execution_info_set_stream(m_execution_info, stream); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_execution_info_set_stream failed"); + + // Set work buffer + if (workbuffersize > 0) { + m_workbuffer = + Kokkos::kokkos_malloc("workbuffer", workbuffersize); + + status = rocfft_execution_info_set_work_buffer( + m_execution_info, m_workbuffer, workbuffersize); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_execution_info_set_work_buffer failed"); + } + } +}; + +/// \brief A class that wraps rocfft for RAII +template +struct ScopedRocfftPlan { + private: + using floating_point_type = KokkosFFT::Impl::base_floating_point_type; + rocfft_precision m_precision = std::is_same_v + ? rocfft_precision_single + : rocfft_precision_double; + rocfft_plan m_plan; + std::unique_ptr m_execution_info; + + public: + ScopedRocfftPlan(const FFTWTransformType transform_type, + const std::vector &in_extents, + const std::vector &out_extents, + const std::vector &fft_extents, int howmany, + Direction direction, bool is_inplace) { + auto [in_array_type, out_array_type, fft_direction] = + get_in_out_array_type(transform_type, direction); + + // Compute dist and strides from extents + int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, + std::multiplies<>()); + int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, + std::multiplies<>()); + + auto in_strides = compute_strides(in_extents); + auto out_strides = compute_strides(out_extents); + auto reversed_fft_extents = + convert_int_type_and_reverse(fft_extents); + + // Create a plan description + ScopedRocfftPlanDescription scoped_description; + rocfft_status status = rocfft_plan_description_set_data_layout( + scoped_description.description(), // description handle + in_array_type, // input array type + out_array_type, // output array type + nullptr, // offsets to start of input data + nullptr, // offsets to start of output data + in_strides.size(), // input stride length + in_strides.data(), // input stride data + idist, // input batch distance + out_strides.size(), // output stride length + out_strides.data(), // output stride data + odist); // output batch distance + + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_plan_description_set_data_layout failed"); + + // inplace or Out-of-place transform + const rocfft_result_placement place = + is_inplace ? rocfft_placement_inplace : rocfft_placement_notinplace; + + // Create a plan + status = rocfft_plan_create(&m_plan, place, fft_direction, m_precision, + reversed_fft_extents.size(), // Dimension + reversed_fft_extents.data(), // Lengths + howmany, // Number of transforms + scoped_description.description() // Description + ); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_plan_create failed"); + } + ~ScopedRocfftPlan() noexcept { + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::cleanup_plan[TPL_rocfft]"); + rocfft_status status = rocfft_plan_destroy(m_plan); + if (status != rocfft_status_success) + Kokkos::abort("rocfft_plan_destroy failed"); + } + + ScopedRocfftPlan() = delete; + ScopedRocfftPlan(const ScopedRocfftPlan &) = delete; + ScopedRocfftPlan &operator=(const ScopedRocfftPlan &) = delete; + ScopedRocfftPlan &operator=(ScopedRocfftPlan &&) = delete; + ScopedRocfftPlan(ScopedRocfftPlan &&) = delete; + + rocfft_plan plan() const noexcept { return m_plan; } + rocfft_execution_info execution_info() const noexcept { + return m_execution_info->execution_info(); + } + + void commit(const Kokkos::HIP &exec_space) { + std::size_t workbuffersize = 0; + rocfft_status status = + rocfft_plan_get_work_buffer_size(m_plan, &workbuffersize); + KOKKOSFFT_THROW_IF(status != rocfft_status_success, + "rocfft_plan_get_work_buffer_size failed"); + + m_execution_info = std::make_unique(); + m_execution_info->setup(exec_space, workbuffersize); + } + + // Helper to get input and output array type and direction from transform type + auto get_in_out_array_type(FFTWTransformType type, Direction direction) { + rocfft_array_type in_array_type, out_array_type; + rocfft_transform_type fft_direction; + + if (type == FFTWTransformType::C2C || type == FFTWTransformType::Z2Z) { + in_array_type = rocfft_array_type_complex_interleaved; + out_array_type = rocfft_array_type_complex_interleaved; + fft_direction = direction == Direction::forward + ? rocfft_transform_type_complex_forward + : rocfft_transform_type_complex_inverse; + } else if (type == FFTWTransformType::R2C || + type == FFTWTransformType::D2Z) { + in_array_type = rocfft_array_type_real; + out_array_type = rocfft_array_type_hermitian_interleaved; + fft_direction = rocfft_transform_type_real_forward; + } else if (type == FFTWTransformType::C2R || + type == FFTWTransformType::Z2D) { + in_array_type = rocfft_array_type_hermitian_interleaved; + out_array_type = rocfft_array_type_real; + fft_direction = rocfft_transform_type_real_inverse; + } + + return std::tuple( + {in_array_type, out_array_type, fft_direction}); + }; + + // Helper to convert the integer type of vectors + template + auto convert_int_type_and_reverse(const std::vector &in) + -> std::vector { + std::vector out(in.size()); + std::transform( + in.cbegin(), in.cend(), out.begin(), + [](const InType v) -> OutType { return static_cast(v); }); + + std::reverse(out.begin(), out.end()); + return out; + } + + // Helper to compute strides from extents + // (n0, n1, n2) -> (1, n0, n0*n1) + // (n0, n1) -> (1, n0) + // (n0) -> (1) + template + auto compute_strides(const std::vector &extents) + -> std::vector { + std::vector out = {1}; + auto reversed_extents = extents; + std::reverse(reversed_extents.begin(), reversed_extents.end()); + + for (std::size_t i = 1; i < reversed_extents.size(); i++) { + out.push_back(static_cast(reversed_extents.at(i - 1)) * + out.at(i - 1)); + } + + return out; + } +}; + // Define fft transform types template struct transform_type { @@ -76,7 +315,7 @@ struct transform_type, static constexpr FFTWTransformType type() { return m_type; }; }; -#ifdef ENABLE_HOST_AND_DEVICE +#if defined(ENABLE_HOST_AND_DEVICE) template struct FFTDataType { @@ -92,18 +331,12 @@ struct FFTDataType { template struct FFTPlanType { - using fftwHandle = std::conditional_t< - std::is_same_v, float>, - fftwf_plan, fftw_plan>; + using fftw_plan_type = ScopedFFTWPlan; + using rocfft_plan_type = ScopedRocfftPlan; using type = std::conditional_t, - rocfft_plan, fftwHandle>; + rocfft_plan_type, fftw_plan_type>; }; -template -using FFTInfoType = - std::conditional_t, - rocfft_execution_info, int>; - template auto direction_type(Direction direction) { static constexpr FFTDirectionType FORWARD = @@ -126,12 +359,9 @@ struct FFTDataType { template struct FFTPlanType { - using type = rocfft_plan; + using type = ScopedRocfftPlan; }; -template -using FFTInfoType = rocfft_execution_info; - template auto direction_type(Direction direction) { return direction == Direction::forward ? ROCFFT_FORWARD : ROCFFT_BACKWARD; diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp index e8da9fdc..2fed7d18 100644 --- a/fft/src/KokkosFFT_SYCL_plans.hpp +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -47,15 +47,14 @@ auto compute_strides(std::vector& extents) -> std::vector { // batched transform, over ND Views template < typename ExecutionSpace, typename PlanType, typename InViewType, - typename OutViewType, typename BufferViewType, typename InfoType, - std::size_t fft_rank = 1, + typename OutViewType, std::size_t fft_rank = 1, std::enable_if_t, std::nullptr_t> = nullptr> auto create_plan(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, - const OutViewType& out, BufferViewType&, InfoType&, - Direction /*direction*/, axis_type axes, - shape_type s, bool is_inplace) { + const OutViewType& out, Direction /*direction*/, + axis_type axes, shape_type s, + bool is_inplace) { static_assert( KokkosFFT::Impl::are_operatable_views_v, @@ -112,15 +111,6 @@ auto create_plan(const ExecutionSpace& exec_space, return fft_size; } - -template < - typename ExecutionSpace, typename PlanType, typename InfoType, - std::enable_if_t, - std::nullptr_t> = nullptr> -void destroy_plan_and_info(std::unique_ptr&, InfoType&) { - // In oneMKL, plans are destroybed by destructor - Kokkos::Profiling::ScopedRegion region("KokkosFFT::destroy_plan[TPL_oneMKL]"); -} } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp index 39c5d2b6..3c6e354b 100644 --- a/fft/src/KokkosFFT_SYCL_transform.hpp +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -14,7 +14,8 @@ namespace Impl { template void exec_plan(PlanType& plan, float* idata, std::complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecR2C]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); } @@ -22,7 +23,8 @@ void exec_plan(PlanType& plan, float* idata, std::complex* odata, template void exec_plan(PlanType& plan, double* idata, std::complex* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecD2Z]"); oneapi::mkl::dft::compute_forward(plan, idata, reinterpret_cast(odata)); } @@ -30,7 +32,8 @@ void exec_plan(PlanType& plan, double* idata, std::complex* odata, template void exec_plan(PlanType& plan, std::complex* idata, float* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecC2R]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); } @@ -38,7 +41,8 @@ void exec_plan(PlanType& plan, std::complex* idata, float* odata, template void exec_plan(PlanType& plan, std::complex* idata, double* odata, int /*direction*/, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecZ2D]"); oneapi::mkl::dft::compute_backward(plan, reinterpret_cast(idata), odata); } @@ -46,7 +50,8 @@ void exec_plan(PlanType& plan, std::complex* idata, double* odata, template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecC2C]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { @@ -57,7 +62,8 @@ void exec_plan(PlanType& plan, std::complex* idata, template void exec_plan(PlanType& plan, std::complex* idata, std::complex* odata, int direction, Args...) { - Kokkos::Profiling::ScopedRegion region("KokkosFFT::exec_plan[TPL_oneMKL]"); + Kokkos::Profiling::ScopedRegion region( + "KokkosFFT::exec_plan[TPL_oneMKLExecZ2Z]"); if (direction == 1) { oneapi::mkl::dft::compute_forward(plan, idata, odata); } else { diff --git a/fft/src/KokkosFFT_SYCL_types.hpp b/fft/src/KokkosFFT_SYCL_types.hpp index df359aca..9c6cb86b 100644 --- a/fft/src/KokkosFFT_SYCL_types.hpp +++ b/fft/src/KokkosFFT_SYCL_types.hpp @@ -12,6 +12,10 @@ #include "KokkosFFT_common_types.hpp" #include "KokkosFFT_utils.hpp" +#if defined(ENABLE_HOST_AND_DEVICE) +#include "KokkosFFT_FFTW_Types.hpp" +#endif + // Check the size of complex type // [TO DO] I guess this kind of test is already made by Kokkos itself static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); @@ -21,26 +25,15 @@ static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); static_assert(alignof(std::complex) <= alignof(Kokkos::complex)); -#ifdef ENABLE_HOST_AND_DEVICE -#include -static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); - -static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); -static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); -#endif - namespace KokkosFFT { namespace Impl { using FFTDirectionType = int; constexpr FFTDirectionType MKL_FFT_FORWARD = 1; constexpr FFTDirectionType MKL_FFT_BACKWARD = -1; -// Unused -template -using FFTInfoType = int; - +#if !defined(ENABLE_HOST_AND_DEVICE) enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; +#endif template using TransformType = FFTWTransformType; @@ -83,7 +76,7 @@ struct transform_type, static constexpr FFTWTransformType type() { return m_type; }; }; -#ifdef ENABLE_HOST_AND_DEVICE +#if defined(ENABLE_HOST_AND_DEVICE) template struct FFTDataType { @@ -115,11 +108,7 @@ struct FFTPlanType> { static constexpr oneapi::mkl::dft::domain dom = oneapi::mkl::dft::domain::REAL; - using fftwHandle = std::conditional_t< - std::is_same_v, - float>, - fftwf_plan, fftw_plan>; - + using fftwHandle = ScopedFFTWPlan>; using onemklHandle = oneapi::mkl::dft::descriptor; using type = std::conditional_t< std::is_same_v, onemklHandle, @@ -137,11 +126,7 @@ struct FFTPlanType, T2> { static constexpr oneapi::mkl::dft::domain dom = oneapi::mkl::dft::domain::REAL; - using fftwHandle = std::conditional_t< - std::is_same_v, - float>, - fftwf_plan, fftw_plan>; - + using fftwHandle = ScopedFFTWPlan, T2>; using onemklHandle = oneapi::mkl::dft::descriptor; using type = std::conditional_t< std::is_same_v, onemklHandle, @@ -159,11 +144,8 @@ struct FFTPlanType, Kokkos::complex> { static constexpr oneapi::mkl::dft::domain dom = oneapi::mkl::dft::domain::COMPLEX; - using fftwHandle = std::conditional_t< - std::is_same_v, - float>, - fftwf_plan, fftw_plan>; - + using fftwHandle = + ScopedFFTWPlan, Kokkos::complex>; using onemklHandle = oneapi::mkl::dft::descriptor; using type = std::conditional_t< std::is_same_v, onemklHandle, diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 374ff4dd..0dba4d6e 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -17,7 +17,7 @@ namespace KokkosFFT { /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -49,7 +49,7 @@ void fft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -81,7 +81,7 @@ void ifft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (real) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -119,7 +119,7 @@ void rfft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (real) +/// \param out [out] Output data (real) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -158,7 +158,7 @@ void irfft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (real) +/// \param out [out] Output data (real) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -204,7 +204,7 @@ void hfft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (real) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axis [in] Axis over which FFT is performed (default, -1) /// \param n [in] Length of the transformed axis of the output (default, @@ -248,7 +248,7 @@ void ihfft(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axes [in] Axes over which FFT is performed (default, {-2, -1}) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -279,7 +279,7 @@ void fft2(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axes [in] Axes over which FFT is performed (default, {-2, -1}) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -310,7 +310,7 @@ void ifft2(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (real) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param norm [in] How the normalization is applied (default, backward) /// \param axes [in] Axes over which FFT is performed (default, {-2, -1}) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -347,7 +347,7 @@ void rfft2(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (real) +/// \param out [out] Output data (real) /// \param norm [in] How the normalization is applied (default, backward) /// \param axes [in] Axes over which FFT is performed (default, {-2, -1}) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -386,7 +386,7 @@ void irfft2(const ExecutionSpace& exec_space, const InViewType& in, /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param axes [in] Axes over which FFT is performed (default, all axes) /// \param norm [in] How the normalization is applied (default, backward) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -429,7 +429,7 @@ void fftn( /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param axes [in] Axes over which FFT is performed (default, all axes) /// \param norm [in] How the normalization is applied (default, backward) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -474,7 +474,7 @@ void ifftn( /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (real) -/// \param out [out] Ouput data (complex) +/// \param out [out] Output data (complex) /// \param axes [in] Axes over which FFT is performed (default, all axes) /// \param norm [in] How the normalization is applied (default, backward) /// \param s [in] Shape of the transformed axis of the output (default, {}) @@ -525,7 +525,7 @@ void rfftn( /// /// \param exec_space [in] Kokkos execution space /// \param in [in] Input data (complex) -/// \param out [out] Ouput data (real) +/// \param out [out] Output data (real) /// \param axes [in] Axes over which FFT is performed (default, all axes) /// \param norm [in] How the normalization is applied (default, backward) /// \param s [in] Shape of the transformed axis of the output (default, {}) diff --git a/fft/unit_test/Test_Plans.cpp b/fft/unit_test/Test_Plans.cpp index 6d096f41..b71cb737 100644 --- a/fft/unit_test/Test_Plans.cpp +++ b/fft/unit_test/Test_Plans.cpp @@ -155,7 +155,7 @@ void test_plan_1dfft_1dview() { KokkosFFT::Direction::backward, /*axes=*/axes_type<1>({0})); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { @@ -230,7 +230,7 @@ void test_plan_1dfft_2dview() { KokkosFFT::Direction::forward, /*axis=*/1); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { @@ -333,7 +333,7 @@ void test_plan_1dfft_3dview() { KokkosFFT::Direction::backward, /*axis=*/2); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { @@ -443,7 +443,7 @@ void test_plan_2dfft_2dview() { KokkosFFT::Direction::forward, /*axes=*/axes_type<2>({1, 0})); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { @@ -550,7 +550,7 @@ void test_plan_2dfft_3dview() { KokkosFFT::Direction::forward, /*axes=*/axes_type<2>({2, 1})); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { @@ -730,7 +730,7 @@ void test_plan_3dfft_3dview() { KokkosFFT::Direction::forward, /*axes=*/axes_type<3>({2, 1, 0})); - // Check if errors are correctly raised aginst wrong dirction + // Check if errors are correctly raised against wrong direction // Input Real, Output Complex -> must be forward plan EXPECT_THROW( { diff --git a/fft/unit_test/Test_Transform.cpp b/fft/unit_test/Test_Transform.cpp index 60b36ba3..942f332d 100644 --- a/fft/unit_test/Test_Transform.cpp +++ b/fft/unit_test/Test_Transform.cpp @@ -230,7 +230,7 @@ void test_fft1_identity_reuse_plan(T atol = 1.0e-12) { KokkosFFT::Plan irfft_plan(execution_space(), ar_hat, inv_ar_hat, KokkosFFT::Direction::backward, axis); - // Check if errors are correctly raised aginst wrong extents + // Check if errors are correctly raised against wrong extents const int maxlen_wrong = 32 * 2; ComplexView1DType a_wrong("a_wrong", maxlen_wrong), inv_a_hat_wrong("inv_a_hat_wrong", maxlen_wrong); @@ -591,7 +591,7 @@ void test_fft1_1dfft_2dview(T atol = 1.e-12) { Kokkos::fence(); - // Along axis 0 (transpose neeed) + // Along axis 0 (transpose needed) // Perform batched 1D (along 0th axis) FFT sequentially for (int i1 = 0; i1 < n1; i1++) { auto sub_x = Kokkos::subview(x, Kokkos::ALL, i1); @@ -680,7 +680,7 @@ void test_fft1_1dfft_3dview(T atol = 1.e-12) { Kokkos::fence(); - // Along axis 0 (transpose neeed) + // Along axis 0 (transpose needed) // Perform batched 1D (along 0th axis) FFT sequentially for (int i2 = 0; i2 < n2; i2++) { for (int i1 = 0; i1 < n1; i1++) { @@ -710,7 +710,7 @@ void test_fft1_1dfft_3dview(T atol = 1.e-12) { Kokkos::deep_copy(x, ref_x); Kokkos::deep_copy(xr, ref_xr); - // Along axis 1 (transpose neeed) + // Along axis 1 (transpose needed) // Perform batched 1D (along 1st axis) FFT sequentially for (int i2 = 0; i2 < n2; i2++) { for (int i0 = 0; i0 < n0; i0++) { @@ -809,7 +809,7 @@ void test_fft1_1dfft_4dview(T atol = 1.e-12) { Kokkos::fence(); - // Along axis 0 (transpose neeed) + // Along axis 0 (transpose needed) // Perform batched 1D (along 0th axis) FFT sequentially for (int i3 = 0; i3 < n3; i3++) { for (int i2 = 0; i2 < n2; i2++) { @@ -841,7 +841,7 @@ void test_fft1_1dfft_4dview(T atol = 1.e-12) { Kokkos::deep_copy(x, ref_x); Kokkos::deep_copy(xr, ref_xr); - // Along axis 1 (transpose neeed) + // Along axis 1 (transpose needed) // Perform batched 1D (along 1st axis) FFT sequentially for (int i3 = 0; i3 < n3; i3++) { for (int i2 = 0; i2 < n2; i2++) { @@ -1231,7 +1231,7 @@ TYPED_TEST(FFT1D, FFT_1DView_shape) { test_fft1_shape(atol); } -// batced fft1 on 2D Views +// batched fft1 on 2D Views TYPED_TEST(FFT1D, FFT_batched_2DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1240,7 +1240,7 @@ TYPED_TEST(FFT1D, FFT_batched_2DView) { test_fft1_1dfft_2dview(atol); } -// batced fft1 on 3D Views +// batched fft1 on 3D Views TYPED_TEST(FFT1D, FFT_batched_3DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1249,7 +1249,7 @@ TYPED_TEST(FFT1D, FFT_batched_3DView) { test_fft1_1dfft_3dview(atol); } -// batced fft1 on 4D Views +// batched fft1 on 4D Views TYPED_TEST(FFT1D, FFT_batched_4DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1258,7 +1258,7 @@ TYPED_TEST(FFT1D, FFT_batched_4DView) { test_fft1_1dfft_4dview(atol); } -// batced fft1 on 5D Views +// batched fft1 on 5D Views TYPED_TEST(FFT1D, FFT_batched_5DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1267,7 +1267,7 @@ TYPED_TEST(FFT1D, FFT_batched_5DView) { test_fft1_1dfft_5dview(atol); } -// batced fft1 on 6D Views +// batched fft1 on 6D Views TYPED_TEST(FFT1D, FFT_batched_6DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1276,7 +1276,7 @@ TYPED_TEST(FFT1D, FFT_batched_6DView) { test_fft1_1dfft_6dview(atol); } -// batced fft1 on 7D Views +// batched fft1 on 7D Views TYPED_TEST(FFT1D, FFT_batched_7DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -1285,7 +1285,7 @@ TYPED_TEST(FFT1D, FFT_batched_7DView) { test_fft1_1dfft_7dview(atol); } -// batced fft1 on 8D Views +// batched fft1 on 8D Views TYPED_TEST(FFT1D, FFT_batched_8DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2354,7 +2354,7 @@ TYPED_TEST(FFT2D, 2DFFT_2DView_inplace) { test_fft2_2dfft_2dview_inplace(); } -// batced fft2 on 3D Views +// batched fft2 on 3D Views TYPED_TEST(FFT2D, FFT_batched_3DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2363,7 +2363,7 @@ TYPED_TEST(FFT2D, FFT_batched_3DView) { test_fft2_2dfft_3dview(atol); } -// batced fft2 on 4D Views +// batched fft2 on 4D Views TYPED_TEST(FFT2D, FFT_batched_4DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2372,7 +2372,7 @@ TYPED_TEST(FFT2D, FFT_batched_4DView) { test_fft2_2dfft_4dview(atol); } -// batced fft2 on 5D Views +// batched fft2 on 5D Views TYPED_TEST(FFT2D, FFT_batched_5DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2381,7 +2381,7 @@ TYPED_TEST(FFT2D, FFT_batched_5DView) { test_fft2_2dfft_5dview(atol); } -// batced fft2 on 6D Views +// batched fft2 on 6D Views TYPED_TEST(FFT2D, FFT_batched_6DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2390,7 +2390,7 @@ TYPED_TEST(FFT2D, FFT_batched_6DView) { test_fft2_2dfft_6dview(atol); } -// batced fft2 on 7D Views +// batched fft2 on 7D Views TYPED_TEST(FFT2D, FFT_batched_7DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -2399,7 +2399,7 @@ TYPED_TEST(FFT2D, FFT_batched_7DView) { test_fft2_2dfft_7dview(atol); } -// batced fft2 on 8D Views +// batched fft2 on 8D Views TYPED_TEST(FFT2D, FFT_batched_8DView) { using float_type = typename TestFixture::float_type; using layout_type = typename TestFixture::layout_type; @@ -3045,7 +3045,7 @@ void test_fftn_3dfft_3dview_shape(T atol = 1.0e-12) { for (auto&& shape2 : shapes2) { shape_type<3> new_shape = {shape0, shape1, shape2}; - // Real to comple + // Real to complex ComplexView3DType outr("outr", shape0, shape1, shape2 / 2 + 1), outr_b("outr_b", shape0, shape1, shape2 / 2 + 1), outr_o("outr_o", shape0, shape1, shape2 / 2 + 1),