From b8f621825dc16bc01d578ac3a13ddd8e6bfb4739 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 9 Nov 2023 13:02:38 -0800 Subject: [PATCH 1/4] Update nvcomp to 3.0.4 (includes API changes) (#314) Update the nvCOMP version used for compression/decompression to 3.0.4. See also: https://github.com/rapidsai/cudf/pull/13815 https://github.com/rapidsai/rapids-cmake/pull/451 Authors: - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Mads R. B. Kristensen (https://github.com/madsbk) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/kvikio/pull/314 --- .github/workflows/pr.yaml | 18 +++---- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/kvikio/conda_build_config.yaml | 2 +- cpp/cmake/fetch_rapids.cmake | 2 + dependencies.yaml | 2 +- python/kvikio/_lib/libnvcomp.pyx | 30 ++++++----- python/kvikio/_lib/nvcomp_cxx_api.pxd | 46 +++++++++++++--- python/kvikio/nvcomp.py | 37 +++---------- python/tests/test_nvcomp.py | 54 ------------------- 10 files changed, 77 insertions(+), 118 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 487903763d..3f5e1157fd 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -18,7 +18,7 @@ jobs: - conda-python-build - conda-python-tests - docs-build - - devcontainer +# - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12 checks: @@ -58,11 +58,11 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - devcontainer: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12 - with: - build_command: | - sccache -z; - build-all; - sccache -s; +# devcontainer: +# secrets: inherit +# uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12 +# with: +# build_command: | +# sccache -z; +# build-all; +# sccache -s; diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 66be9679fd..92c1b177b2 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - numpy>=1.21 - numpydoc - nvcc_linux-64=11.8 -- nvcomp==2.6.1 +- nvcomp==3.0.4 - packaging - pre-commit - pytest diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index c5951e47d6..c09b551089 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -25,7 +25,7 @@ dependencies: - numcodecs <0.12.0 - numpy>=1.21 - numpydoc -- nvcomp==2.6.1 +- nvcomp==3.0.4 - packaging - pre-commit - pytest diff --git a/conda/recipes/kvikio/conda_build_config.yaml b/conda/recipes/kvikio/conda_build_config.yaml index 7bce9d8853..d86ab17880 100644 --- a/conda/recipes/kvikio/conda_build_config.yaml +++ b/conda/recipes/kvikio/conda_build_config.yaml @@ -17,4 +17,4 @@ cmake_version: - ">=3.26.4" nvcomp_version: - - "=2.6.1" + - "=3.0.4" diff --git a/cpp/cmake/fetch_rapids.cmake b/cpp/cmake/fetch_rapids.cmake index 2b6832a80c..26e915039e 100644 --- a/cpp/cmake/fetch_rapids.cmake +++ b/cpp/cmake/fetch_rapids.cmake @@ -11,6 +11,8 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= +set(rapids-cmake-repo vuule/rapids-cmake) +set(rapids-cmake-branch upgrade-nvcomp-3.0.0) if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index c91e5bb5a9..425077bd2c 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -142,7 +142,7 @@ dependencies: common: - output_types: conda packages: - - nvcomp==2.6.1 + - nvcomp==3.0.4 specific: - output_types: conda matrices: diff --git a/python/kvikio/_lib/libnvcomp.pyx b/python/kvikio/_lib/libnvcomp.pyx index 19237d24ca..760a6b1254 100644 --- a/python/kvikio/_lib/libnvcomp.pyx +++ b/python/kvikio/_lib/libnvcomp.pyx @@ -39,8 +39,15 @@ from kvikio._lib.nvcomp_cxx_api cimport ( SnappyManager, create_manager, cudaStream_t, + nvcompBatchedANSDefaultOpts, + nvcompBatchedANSOpts_t, + nvcompBatchedBitcompFormatOpts, nvcompBatchedCascadedDefaultOpts, nvcompBatchedCascadedOpts_t, + nvcompBatchedGdeflateOpts_t, + nvcompBatchedLZ4Opts_t, + nvcompBatchedSnappyDefaultOpts, + nvcompBatchedSnappyOpts_t, nvcompManagerBase, nvcompType_t, ) @@ -134,14 +141,6 @@ cdef class _nvcompManager: self._decompression_config.get()[0] ) - def set_scratch_buffer(self, Array new_scratch_buffer): - return self._impl.set_scratch_buffer( - new_scratch_buffer.ptr - ) - - def get_required_scratch_buffer_size(self): - return self._impl.get_required_scratch_buffer_size() - def get_compressed_output_size(self, Array comp_buffer): return self._impl.get_compressed_output_size( comp_buffer.ptr @@ -157,6 +156,7 @@ cdef class _ANSManager(_nvcompManager): ): self._impl = new ANSManager( uncomp_chunk_size, + nvcompBatchedANSDefaultOpts, # TODO 0, # TODO device_id ) @@ -165,14 +165,16 @@ cdef class _ANSManager(_nvcompManager): cdef class _BitcompManager(_nvcompManager): def __cinit__( self, + size_t uncomp_chunk_size, nvcompType_t data_type, int bitcomp_algo, user_stream, const int device_id ): + cdef opts = nvcompBatchedBitcompFormatOpts(bitcomp_algo, data_type) self._impl = new BitcompManager( - data_type, - bitcomp_algo, + uncomp_chunk_size, + opts, 0, # TODO device_id ) @@ -186,6 +188,7 @@ cdef class _CascadedManager(_nvcompManager): const int device_id, ): self._impl = new CascadedManager( + _options["chunk_size"], nvcompBatchedCascadedDefaultOpts, # TODO 0, # TODO device_id, @@ -200,9 +203,10 @@ cdef class _GdeflateManager(_nvcompManager): user_stream, const int device_id ): + cdef opts = nvcompBatchedGdeflateOpts_t(algo) self._impl = new GdeflateManager( chunk_size, - algo, + opts, 0, # TODO device_id ) @@ -220,9 +224,10 @@ cdef class _LZ4Manager(_nvcompManager): # from anywhere up. I'm not going to rabbit hole on it until # everything else works. # cdef cudaStream_t stream = user_stream + cdef opts = nvcompBatchedLZ4Opts_t(data_type) self._impl = new LZ4Manager( uncomp_chunk_size, - data_type, + opts, 0, # TODO device_id ) @@ -240,6 +245,7 @@ cdef class _SnappyManager(_nvcompManager): # everything else works. self._impl = new SnappyManager( uncomp_chunk_size, + nvcompBatchedSnappyDefaultOpts, 0, # TODO device_id ) diff --git a/python/kvikio/_lib/nvcomp_cxx_api.pxd b/python/kvikio/_lib/nvcomp_cxx_api.pxd index e5b464d5c2..b6e90ed41a 100644 --- a/python/kvikio/_lib/nvcomp_cxx_api.pxd +++ b/python/kvikio/_lib/nvcomp_cxx_api.pxd @@ -106,8 +106,6 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp': uint8_t* decomp_buffer, const uint8_t* comp_buffer, const DecompressionConfig& decomp_config) - void set_scratch_buffer(uint8_t* new_scratch_buffer) except + - size_t get_required_scratch_buffer_size() except + size_t get_compressed_output_size(uint8_t* comp_buffer) except + cdef cppclass PimplManager "nvcomp::PimplManager": @@ -125,25 +123,38 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp': uint8_t* decomp_buffer, const uint8_t* comp_buffer, const DecompressionConfig& decomp_config) except + - void set_scratch_buffer(uint8_t* new_scratch_buffer) except + - size_t get_required_scratch_buffer_size() except + size_t get_compressed_output_size(uint8_t* comp_buffer) except + # C++ Concrete ANS Manager +cdef extern from "nvcomp/ans.h" nogil: + ctypedef enum nvcompANSType_t: + nvcomp_rANS = 0 + + ctypedef struct nvcompBatchedANSOpts_t: + nvcompANSType_t type + cdef nvcompBatchedANSOpts_t nvcompBatchedANSDefaultOpts + cdef extern from "nvcomp/ans.hpp": cdef cppclass ANSManager "nvcomp::ANSManager": ANSManager( size_t uncomp_chunk_size, + const nvcompBatchedANSOpts_t& format_opts, cudaStream_t user_stream, const int device_id ) except + # C++ Concrete Bitcomp Manager +cdef extern from "nvcomp/bitcomp.h" nogil: + ctypedef struct nvcompBatchedBitcompFormatOpts: + int algorithm_type + nvcompType_t data_type + cdef nvcompBatchedBitcompFormatOpts nvcompBatchedBitcompDefaultOpts + cdef extern from "nvcomp/bitcomp.hpp": cdef cppclass BitcompManager "nvcomp::BitcompManager": BitcompManager( - nvcompType_t data_type, - int bitcomp_algo, + size_t uncomp_chunk_size, + const nvcompBatchedBitcompFormatOpts& format_opts, cudaStream_t user_stream, const int device_id ) except + @@ -151,6 +162,8 @@ cdef extern from "nvcomp/bitcomp.hpp": # C++ Concrete Cascaded Manager cdef extern from "nvcomp/cascaded.h" nogil: ctypedef struct nvcompBatchedCascadedOpts_t: + size_t chunk_size + nvcompType_t type int num_RLEs int num_deltas int use_bp @@ -159,36 +172,53 @@ cdef extern from "nvcomp/cascaded.h" nogil: cdef extern from "nvcomp/cascaded.hpp" nogil: cdef cppclass CascadedManager "nvcomp::CascadedManager": CascadedManager( + size_t uncomp_chunk_size, const nvcompBatchedCascadedOpts_t& options, cudaStream_t user_stream, int device_id ) # C++ Concrete Gdeflate Manager +cdef extern from "nvcomp/gdeflate.h" nogil: + ctypedef struct nvcompBatchedGdeflateOpts_t: + int algo + cdef nvcompBatchedGdeflateOpts_t nvcompBatchedGdeflateDefaultOpts + cdef extern from "nvcomp/gdeflate.hpp": cdef cppclass GdeflateManager "nvcomp::GdeflateManager": GdeflateManager( int uncomp_chunk_size, - int algo, + const nvcompBatchedGdeflateOpts_t& format_opts, cudaStream_t user_stream, const int device_id ) except + # C++ Concrete LZ4 Manager +cdef extern from "nvcomp/gdeflate.h" nogil: + ctypedef struct nvcompBatchedLZ4Opts_t: + nvcompType_t data_type + cdef nvcompBatchedLZ4Opts_t nvcompBatchedLZ4DefaultOpts + cdef extern from "nvcomp/lz4.hpp": cdef cppclass LZ4Manager "nvcomp::LZ4Manager": LZ4Manager( size_t uncomp_chunk_size, - nvcompType_t data_type, + const nvcompBatchedLZ4Opts_t& format_opts, cudaStream_t user_stream, const int device_id ) except + # C++ Concrete Snappy Manager +cdef extern from "nvcomp/snappy.h" nogil: + ctypedef struct nvcompBatchedSnappyOpts_t: + int reserved + cdef nvcompBatchedSnappyOpts_t nvcompBatchedSnappyDefaultOpts + cdef extern from "nvcomp/snappy.hpp": cdef cppclass SnappyManager "nvcomp::SnappyManager": SnappyManager( size_t uncomp_chunk_size, + const nvcompBatchedSnappyOpts_t& format_opts, cudaStream_t user_stream, const int device_id ) except + diff --git a/python/kvikio/nvcomp.py b/python/kvikio/nvcomp.py index 38a326d410..482d7fbeb7 100644 --- a/python/kvikio/nvcomp.py +++ b/python/kvikio/nvcomp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. from enum import Enum @@ -192,35 +192,6 @@ def configure_decompression_with_compressed_buffer( asarray(data) ) - def get_required_scratch_buffer_size(self) -> int: - """Return the size of the optional scratch buffer. - - Returns - ------- - int - """ - return self._manager.get_required_scratch_buffer_size() - - def set_scratch_buffer(self, new_scratch_buffer: cp.ndarray) -> None: - """Use a pre-allocated buffer for compression. - - Use a GPU-allocated buffer that will be used for compression - temporary storage instead of allowing the library to create the - scratch buffer. - Can reduce memory usage. - - Parameters - ---------- - new_scratch_buffer : cp.ndarray - The buffer that you allocated on the GPU for compressor temporary - storage. - - Returns - ------- - cp.ndarray - """ - return self._manager.set_scratch_buffer(asarray(new_scratch_buffer)) - def get_compressed_output_size(self, comp_buffer: cp.ndarray) -> int: """Return the actual size of compression result. @@ -277,7 +248,11 @@ def __init__(self, **kwargs): super().__init__(kwargs) self._manager = _lib._BitcompManager( - self.data_type.value, self.bitcomp_algo, self.stream, self.device_id + self.chunk_size, + self.data_type.value, + self.bitcomp_algo, + self.stream, + self.device_id, ) diff --git a/python/tests/test_nvcomp.py b/python/tests/test_nvcomp.py index 59ba24869f..a2ea895dd4 100644 --- a/python/tests/test_nvcomp.py +++ b/python/tests/test_nvcomp.py @@ -406,60 +406,6 @@ def test_get_decompression_config_with_default_options(manager, expected): assert result == expected -@pytest.mark.parametrize("manager", managers()) -def test_set_scratch_buffer(manager): - length = 10000 - dtype = cupy.uint8 - data = cupy.array( - np.arange( - 0, - length // cupy.dtype(dtype).type(0).itemsize, - dtype=dtype, - ) - ) - compressor_instance = manager() - compressor_instance.configure_compression(len(data)) - buffer_size = compressor_instance.get_required_scratch_buffer_size() - buffer = cupy.zeros(buffer_size, dtype="int8") - compressor_instance.set_scratch_buffer(buffer) - compressor_instance.compress(data) - if isinstance(compressor_instance, libnvcomp.BitcompManager): - # Bitcomp does not use the scratch buffer - pytest.skip() - else: - assert (buffer[0:5] != cupy.array([0, 0, 0, 0, 0])).any() - - -@pytest.mark.parametrize( - "manager,expected", - zip( - managers(), - [ - 378355712, # ANS - 8, # Bitcomp - 1641608, # Cascaded - 393222400, # Gdeflate - 252334080, # LZ4 - 67311208, # Snappy - ], - ), -) -def test_get_required_scratch_buffer_size(manager, expected): - length = 10000 - dtype = cupy.uint8 - data = cupy.array( - np.arange( - 0, - length // cupy.dtype(dtype).type(0).itemsize, - dtype=dtype, - ) - ) - compressor_instance = manager() - compressor_instance.configure_compression(len(data)) - buffer_size = compressor_instance.get_required_scratch_buffer_size() - assert_compression_size(buffer_size, expected) - - @pytest.mark.parametrize( "manager, expected", zip(managers(), list(LEN.values())), From e1762f21d419cedca2dcf3207e41f1a711cb157d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 9 Nov 2023 15:13:18 -0600 Subject: [PATCH 2/4] Revert rapids-cmake branch. (#316) Accidentally didn't commit this change in #314. --- cpp/cmake/fetch_rapids.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/cmake/fetch_rapids.cmake b/cpp/cmake/fetch_rapids.cmake index 26e915039e..2b6832a80c 100644 --- a/cpp/cmake/fetch_rapids.cmake +++ b/cpp/cmake/fetch_rapids.cmake @@ -11,8 +11,6 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -set(rapids-cmake-repo vuule/rapids-cmake) -set(rapids-cmake-branch upgrade-nvcomp-3.0.0) if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake From 37667e7bc66da6a65722e9c0769f6f506e7ed6fa Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 15 Nov 2023 10:43:34 -0500 Subject: [PATCH 3/4] Update rapids-cmake functions to non-deprecated signatures (#301) Update to use non deprecated signatures for `rapids_export` functions Authors: - Robert Maynard (https://github.com/robertmaynard) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/301 --- cpp/CMakeLists.txt | 4 ++-- cpp/cmake/thirdparty/get_gtest.cmake | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9ce16f16f2..43031da923 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -125,10 +125,10 @@ install(FILES ${KvikIO_BINARY_DIR}/include/kvikio/version_config.hpp DESTINATION include("${rapids-cmake-dir}/export/find_package_file.cmake") rapids_export_find_package_file( - BUILD "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" kvikio-exports + BUILD "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET kvikio-exports ) rapids_export_find_package_file( - INSTALL "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" kvikio-exports + INSTALL "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET kvikio-exports ) set(doc_string diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake index c3210d842d..1133c846c3 100644 --- a/cpp/cmake/thirdparty/get_gtest.cmake +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -32,7 +32,7 @@ function(find_and_configure_gtest) include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root( - BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] kvikio-testing-exports + BUILD GTest [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET kvikio-testing-exports ) endif() From 613aed88e8ff1e257ab1961234dc02054e582832 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 15 Nov 2023 11:39:24 -0800 Subject: [PATCH 4/4] Also remove devcontainer job --- .github/workflows/pr.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index dfe243dd31..b4625fd411 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -58,11 +58,11 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - devcontainer: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 - with: - build_command: | - sccache -z; - build-all; - sccache -s; +# devcontainer: +# secrets: inherit +# uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 +# with: +# build_command: | +# sccache -z; +# build-all; +# sccache -s;