From 0311216a3fe8f53bc7e89cbfe147f6fde6715aad Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 3 Jan 2025 11:17:08 -0800 Subject: [PATCH 1/5] Use rapids-cmake for the logger (#17674) This PR switches cudf to use rapids-cmake to fetch rapids-logger so that it uses a consistent version with the rest of RAPIDS to avoid any cases where transitive CPM loads result in multiple packages being built from source that require a different version of rapids-logger. This PR also cherry-picks the Python docs changes from https://github.com/rapidsai/cudf/pull/17669 so that our Sphinx docs can build again without warnings. Depends on https://github.com/rapidsai/rapids-cmake/pull/737 and https://github.com/rapidsai/rmm/pull/1776. Contributes to rapidsai/build-planning#104. Authors: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17674 --- cpp/CMakeLists.txt | 9 ++--- .../user_guide/api_docs/general_functions.rst | 34 +++++++++---------- docs/cudf/source/user_guide/api_docs/io.rst | 8 ++--- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8c6cd922747..cb814aa8c0f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -274,11 +274,8 @@ endif() # add third party dependencies using CPM rapids_cpm_init() -# Not using rapids-cmake since we never want to find, always download. -CPMAddPackage( - NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG - c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 -) +include(${rapids-cmake-dir}/cpm/rapids_logger.cmake) +rapids_cpm_rapids_logger() rapids_make_logger(cudf EXPORT_SET cudf-exports) # find jitify diff --git a/docs/cudf/source/user_guide/api_docs/general_functions.rst b/docs/cudf/source/user_guide/api_docs/general_functions.rst index 38e070b0d53..5c5b5cb3b04 100644 --- a/docs/cudf/source/user_guide/api_docs/general_functions.rst +++ b/docs/cudf/source/user_guide/api_docs/general_functions.rst @@ -9,26 +9,26 @@ Data manipulations .. autosummary:: :toctree: api/ - cudf.concat - cudf.crosstab - cudf.cut - cudf.factorize - cudf.get_dummies - cudf.melt - cudf.merge - cudf.pivot - cudf.pivot_table - cudf.unstack + concat + crosstab + cut + factorize + get_dummies + melt + merge + pivot + pivot_table + unstack Top-level conversions --------------------- .. autosummary:: :toctree: api/ - cudf.to_numeric - cudf.from_dataframe - cudf.from_dlpack - cudf.from_pandas + to_numeric + from_dataframe + from_dlpack + from_pandas Top-level dealing with datetimelike data ---------------------------------------- @@ -36,8 +36,8 @@ Top-level dealing with datetimelike data .. autosummary:: :toctree: api/ - cudf.to_datetime - cudf.date_range + to_datetime + date_range Top-level dealing with Interval data ------------------------------------ @@ -45,4 +45,4 @@ Top-level dealing with Interval data .. autosummary:: :toctree: api/ - cudf.interval_range + interval_range diff --git a/docs/cudf/source/user_guide/api_docs/io.rst b/docs/cudf/source/user_guide/api_docs/io.rst index 417970715f8..ad8ba8a9bdf 100644 --- a/docs/cudf/source/user_guide/api_docs/io.rst +++ b/docs/cudf/source/user_guide/api_docs/io.rst @@ -35,10 +35,10 @@ Parquet read_parquet DataFrame.to_parquet - cudf.io.parquet.read_parquet_metadata - cudf.io.parquet.ParquetDatasetWriter - cudf.io.parquet.ParquetDatasetWriter.close - cudf.io.parquet.ParquetDatasetWriter.write_table + io.parquet.read_parquet_metadata + io.parquet.ParquetDatasetWriter + io.parquet.ParquetDatasetWriter.close + io.parquet.ParquetDatasetWriter.write_table ORC From 1dece5e2f5cde6f60f70475ac345820673185f1d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 3 Jan 2025 16:41:11 -0500 Subject: [PATCH 2/5] Fix possible overflow in WriteCoalescingCallbackWrapper::TearDown (#17642) Fixes possible overflow in `WriteCoalescingCallbackWrapper::TearDown` function if the `tile_out_count` is sufficiently large enough. The `out_char += blockDim.x` could overflow when within block-size of the max of `tile_out_count`. Authors: - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/17642 --- cpp/src/io/fst/agent_dfa.cuh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh index 2a75c034dc8..5685b50c322 100644 --- a/cpp/src/io/fst/agent_dfa.cuh +++ b/cpp/src/io/fst/agent_dfa.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -308,12 +308,14 @@ class WriteCoalescingCallbackWrapper { { __syncthreads(); if constexpr (!DiscardTranslatedOutput) { - for (uint32_t out_char = threadIdx.x; out_char < tile_out_count; out_char += blockDim.x) { + for (thread_index_type out_char = threadIdx.x; out_char < tile_out_count; + out_char += blockDim.x) { out_it[tile_out_offset + out_char] = temp_storage.compacted_symbols[out_char]; } } if constexpr (!DiscardIndexOutput) { - for (uint32_t out_char = threadIdx.x; out_char < tile_out_count; out_char += blockDim.x) { + for (thread_index_type out_char = threadIdx.x; out_char < tile_out_count; + out_char += blockDim.x) { out_idx_it[tile_out_offset + out_char] = temp_storage.compacted_offset[out_char] + tile_in_offset; } From 07406b353f71cd089e398960f491419dbce3b164 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 15:03:15 -0800 Subject: [PATCH 3/5] Fix groupby.len with null values in cudf.polars (#17671) closes https://github.com/rapidsai/cudf/issues/17667 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/17671 --- .../cudf_polars/dsl/expressions/aggregation.py | 8 ++++++-- python/cudf_polars/tests/test_groupby.py | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py index 2ba483c7b2d..b88b109a975 100644 --- a/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py +++ b/python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 # TODO: remove need for this # ruff: noqa: D101 @@ -69,7 +69,11 @@ def __init__( # TODO: handle nans req = plc.aggregation.variance(ddof=options) elif name == "count": - req = plc.aggregation.count(null_handling=plc.types.NullPolicy.EXCLUDE) + req = plc.aggregation.count( + null_handling=plc.types.NullPolicy.EXCLUDE + if not options + else plc.types.NullPolicy.INCLUDE + ) elif name == "quantile": _, quantile = self.children if not isinstance(quantile, Literal): diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py index 1e8246496cd..53b96ba574b 100644 --- a/python/cudf_polars/tests/test_groupby.py +++ b/python/cudf_polars/tests/test_groupby.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -213,3 +213,9 @@ def test_groupby_maintain_order_random(nrows, nkeys, with_nulls): ) q = df.lazy().group_by(key_names, maintain_order=True).agg(pl.col("value").sum()) assert_gpu_result_equal(q) + + +def test_groupby_len_with_nulls(): + df = pl.DataFrame({"a": [1, 1, 1, 2], "b": [1, None, 2, 3]}) + q = df.lazy().group_by("a").agg(pl.col("b").len()) + assert_gpu_result_equal(q, check_row_order=False) From 756d66bd25da6ded550932611386fc2ca2063486 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:19:06 -0800 Subject: [PATCH 4/5] Avoid shallow copies in groupby methods (#17646) Noticed while working on https://github.com/rapidsai/cudf/pull/17644 that `diff` and `fillna` were make some unnecessary shallow copies of the `grouping.value` object. Also noticed that `_cov_or_corr` just pulled the column names out of `grouping.value` object, so made a separate API, `values_column_names` to just create the column names without pulling out the actual columns. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17646 --- python/cudf/cudf/core/groupby/groupby.py | 32 +++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 4137109cc96..6ae524d6346 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from __future__ import annotations import copy @@ -49,7 +49,7 @@ from cudf.utils.utils import GetAttrGetItemMixin if TYPE_CHECKING: - from collections.abc import Generator, Iterable + from collections.abc import Generator, Hashable, Iterable from cudf._typing import ( AggType, @@ -2448,7 +2448,7 @@ def _cov_or_corr(self, func, method_name): # create expanded dataframe consisting all combinations of the # struct columns-pairs to be used in the correlation or covariance # i.e. (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2')) - column_names = self.grouping.values._column_names + column_names = self.grouping._values_column_names num_cols = len(column_names) column_pair_structs = {} @@ -2682,10 +2682,8 @@ def diff(self, periods=1, axis=0): if not axis == 0: raise NotImplementedError("Only axis=0 is supported.") - - values = self.obj.__class__._from_data( - self.grouping.values._data, self.obj.index - ) + values = self.grouping.values + values.index = self.obj.index return values - self.shift(periods=periods) def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries: @@ -2789,9 +2787,8 @@ def fillna( raise ValueError("Method can only be of 'ffill', 'bfill'.") return getattr(self, method, limit)() - values = self.obj.__class__._from_data( - self.grouping.values._data, self.obj.index - ) + values = self.grouping.values + values.index = self.obj.index return values.fillna( value=value, inplace=inplace, axis=axis, limit=limit ) @@ -3543,6 +3540,13 @@ def keys(self): self._key_columns[0], name=self.names[0] ) + @property + def _values_column_names(self) -> list[Hashable]: + # If the key columns are in `obj`, filter them out + return [ + x for x in self._obj._column_names if x not in self._named_columns + ] + @property def values(self) -> cudf.core.frame.Frame: """Return value columns as a frame. @@ -3553,11 +3557,9 @@ def values(self) -> cudf.core.frame.Frame: This is mainly used in transform-like operations. """ - # If the key columns are in `obj`, filter them out - value_column_names = [ - x for x in self._obj._column_names if x not in self._named_columns - ] - value_columns = self._obj._data.select_by_label(value_column_names) + value_columns = self._obj._data.select_by_label( + self._values_column_names + ) return self._obj.__class__._from_data(value_columns) def _handle_callable(self, by): From 62d72dff9363bf6a58154def9f99fdd4e8a9acc8 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 3 Jan 2025 21:05:50 -0800 Subject: [PATCH 5/5] Refactor distinct hash join to handle multiple probes with the same build table (#17609) This PR updates the distinct join implementation to allow the same build table to be reused for multiple probe operations. It also introduces several breaking changes, including removing the need for users to specify whether the input data contains nested columns. Additionally, the output order has been updated to align with the hash join behavior, with probe indices now appearing on the left and build indices on the right. The PR leverages the new conditional query API in the cuco hash set, enabling more efficient handling of nullable data. While this optimization improves performance, it is not currently reflected in benchmarks due to the absence of a dedicated test case for this scenario. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Jason Lowe (https://github.com/jlowe) - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17609 --- cpp/benchmarks/join/distinct_join.cu | 20 +- .../cudf/detail/distinct_hash_join.cuh | 112 ++++----- cpp/include/cudf/join.hpp | 35 +-- cpp/src/join/distinct_hash_join.cu | 238 ++++++++++-------- cpp/tests/join/distinct_join_tests.cpp | 59 +++-- java/src/main/native/src/TableJni.cpp | 32 +-- 6 files changed, 236 insertions(+), 260 deletions(-) diff --git a/cpp/benchmarks/join/distinct_join.cu b/cpp/benchmarks/join/distinct_join.cu index 3502cbcea2a..1085b03ac7b 100644 --- a/cpp/benchmarks/join/distinct_join.cu +++ b/cpp/benchmarks/join/distinct_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,13 +23,8 @@ void distinct_inner_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.inner_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.inner_join(probe_input); }; BM_join(state, join); @@ -42,13 +37,8 @@ void distinct_left_join(nvbench::state& state, auto join = [](cudf::table_view const& probe_input, cudf::table_view const& build_input, cudf::null_equality compare_nulls) { - auto const has_nulls = - cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - auto hj_obj = cudf::distinct_hash_join{ - build_input, probe_input, has_nulls, compare_nulls}; - return hj_obj.left_join(); + auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls}; + return hj_obj.left_join(probe_input); }; BM_join(state, join); diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index 2acc10105cf..9a10163eb15 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,19 +36,24 @@ using cudf::experimental::row::lhs_index_type; using cudf::experimental::row::rhs_index_type; /** - * @brief An comparator adapter wrapping both self comparator and two table comparator + * @brief A custom comparator used for the build table insertion */ -template -struct comparator_adapter { - comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} - - __device__ constexpr auto operator()( +struct always_not_equal { + __device__ constexpr bool operator()( cuco::pair const&, cuco::pair const&) const noexcept { // All build table keys are distinct thus `false` no matter what return false; } +}; + +/** + * @brief An comparator adapter wrapping the two table comparator + */ +template +struct comparator_adapter { + comparator_adapter(Equal const& d_equal) : _d_equal{d_equal} {} __device__ constexpr auto operator()( cuco::pair const& lhs, @@ -62,56 +67,14 @@ struct comparator_adapter { Equal _d_equal; }; -template -struct hasher_adapter { - hasher_adapter(Hasher const& d_hasher = {}) : _d_hasher{d_hasher} {} - - template - __device__ constexpr auto operator()(cuco::pair const& key) const noexcept - { - return _d_hasher(key.first); - } - - private: - Hasher _d_hasher; -}; - /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions. * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). */ -template -struct distinct_hash_join { - private: - /// Device row equal type - using d_equal_type = cudf::experimental::row::equality::strong_index_comparator_adapter< - cudf::experimental::row::equality::device_row_comparator>; - using hasher = hasher_adapter>; - using probing_scheme_type = cuco::linear_probing<1, hasher>; - using cuco_storage_type = cuco::storage<1>; - - /// Hash table type - using hash_table_type = cuco::static_set, - cuco::extent, - cuda::thread_scope_device, - comparator_adapter, - probing_scheme_type, - cudf::detail::cuco_allocator, - cuco_storage_type>; - - bool _has_nulls; ///< true if nulls are present in either build table or probe table - cudf::null_equality _nulls_equal; ///< whether to consider nulls as equal - cudf::table_view _build; ///< input table to build the hash map - cudf::table_view _probe; ///< input table to probe the hash map - std::shared_ptr - _preprocessed_build; ///< input table preprocssed for row operators - std::shared_ptr - _preprocessed_probe; ///< input table preprocssed for row operators - hash_table_type _hash_table; ///< hash table built on `_build` - +class distinct_hash_join { public: distinct_hash_join() = delete; ~distinct_hash_join() = default; @@ -120,21 +83,28 @@ struct distinct_hash_join { distinct_hash_join& operator=(distinct_hash_join const&) = delete; distinct_hash_join& operator=(distinct_hash_join&&) = delete; + /** + * @brief Hasher adapter used by distinct hash join + */ + struct hasher { + template + __device__ constexpr hash_value_type operator()( + cuco::pair const& key) const noexcept + { + return key.first; + } + }; + /** * @brief Constructor that internally builds the hash table based on the given `build` table. * * @throw cudf::logic_error if the number of columns in `build` table is 0. * * @param build The build table, from which the hash table is built - * @param probe The probe table - * @param has_nulls Flag to indicate if any nulls exist in the `build` table or - * any `probe` table that will be used later for join. * @param compare_nulls Controls whether null join-key values should match or not. * @param stream CUDA stream used for device memory operations and kernel launches. */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, cudf::null_equality compare_nulls, rmm::cuda_stream_view stream); @@ -143,12 +113,36 @@ struct distinct_hash_join { */ std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; /** * @copydoc cudf::distinct_hash_join::left_join */ std::unique_ptr> left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const; + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const; + + private: + using probing_scheme_type = cuco::linear_probing<1, hasher>; + using cuco_storage_type = cuco::storage<1>; + + /// Hash table type + using hash_table_type = cuco::static_set, + cuco::extent, + cuda::thread_scope_device, + always_not_equal, + probing_scheme_type, + cudf::detail::cuco_allocator, + cuco_storage_type>; + + bool _has_nested_columns; ///< True if nested columns are present in build and probe tables + cudf::null_equality _nulls_equal; ///< Whether to consider nulls as equal + cudf::table_view _build; ///< Input table to build the hash map + std::shared_ptr + _preprocessed_build; ///< Input table preprocssed for row operators + hash_table_type _hash_table; ///< Hash table built on `_build` }; } // namespace cudf::detail diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index afefd04d4fa..cc63565eee1 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,13 +34,6 @@ namespace CUDF_EXPORT cudf { -/** - * @brief Enum to indicate whether the distinct join table has nested columns or not - * - * @ingroup column_join - */ -enum class has_nested : bool { YES, NO }; - // forward declaration namespace hashing::detail { @@ -61,7 +54,6 @@ class hash_join; /** * @brief Forward declaration for our distinct hash join */ -template class distinct_hash_join; } // namespace detail @@ -469,20 +461,19 @@ class hash_join { rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - const std::unique_ptr _impl; + std::unique_ptr _impl; }; /** * @brief Distinct hash join that builds hash table in creation and probes results in subsequent * `*_join` member functions * + * This class enables the distinct hash join scheme that builds hash table once, and probes as many + * times as needed (possibly in parallel). + * * @note Behavior is undefined if the build table contains duplicates. * @note All NaNs are considered as equal - * - * @tparam HasNested Flag indicating whether there are nested columns in build/probe table */ -// TODO: `HasNested` to be removed via dispatching -template class distinct_hash_join { public: distinct_hash_join() = delete; @@ -496,15 +487,10 @@ class distinct_hash_join { * @brief Constructs a distinct hash join object for subsequent probe calls * * @param build The build table that contains distinct elements - * @param probe The probe table, from which the keys are probed - * @param has_nulls Flag to indicate if there exists any nulls in the `build` table or - * any `probe` table that will be used later for join * @param compare_nulls Controls whether null join-key values should match or not * @param stream CUDA stream used for device memory operations and kernel launches */ distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls = nullable_join::YES, null_equality compare_nulls = null_equality::EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream()); @@ -512,16 +498,18 @@ class distinct_hash_join { * @brief Returns the row indices that can be used to construct the result of performing * an inner join between two tables. @see cudf::inner_join(). * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned indices' device memory. * - * @return A pair of columns [`build_indices`, `probe_indices`] that can be used to + * @return A pair of columns [`probe_indices`, `build_indices`] that can be used to * construct the result of performing an inner join between two tables * with `build` and `probe` as the join keys. */ [[nodiscard]] std::pair>, std::unique_ptr>> - inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), + inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** @@ -532,19 +520,22 @@ class distinct_hash_join { * the row index of the matched row from the build table if there is a match. Otherwise, contains * `JoinNoneValue`. * + * @param probe The probe table, from which the keys are probed * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device * memory. + * * @return A `build_indices` column that can be used to construct the result of * performing a left join between two tables with `build` and `probe` as the join * keys. */ [[nodiscard]] std::unique_ptr> left_join( + cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: - using impl_type = typename cudf::detail::distinct_hash_join; ///< Implementation type + using impl_type = cudf::detail::distinct_hash_join; ///< Implementation type std::unique_ptr _impl; ///< Distinct hash join implementation }; diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index ce4d2067b82..d1a01ee76e4 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,28 +47,19 @@ namespace cudf { namespace detail { namespace { -template -auto prepare_device_equal( - std::shared_ptr build, - std::shared_ptr probe, - bool has_nulls, - cudf::null_equality compare_nulls) -{ - auto const two_table_equal = - cudf::experimental::row::equality::two_table_comparator(probe, build); - return comparator_adapter{two_table_equal.equal_to( - nullate::DYNAMIC{has_nulls}, compare_nulls)}; -} +bool constexpr has_nulls = true; ///< Always has nulls /** * @brief Device functor to create a pair of {hash_value, row_index} for a given row. - * - * @tparam Hasher The type of internal hasher to compute row hash. */ -template +template class build_keys_fn { + using hasher = + cudf::experimental::row::hash::device_row_hasher; + public: - CUDF_HOST_DEVICE build_keys_fn(Hasher const& hash) : _hash{hash} {} + CUDF_HOST_DEVICE constexpr build_keys_fn(hasher const& hash) : _hash{hash} {} __device__ __forceinline__ auto operator()(size_type i) const noexcept { @@ -76,7 +67,7 @@ class build_keys_fn { } private: - Hasher _hash; + hasher _hash; }; /** @@ -92,26 +83,19 @@ struct output_fn { }; } // namespace -template -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - bool has_nulls, - cudf::null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _has_nulls{has_nulls}, +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _has_nested_columns{cudf::has_nested_columns(build)}, _nulls_equal{compare_nulls}, _build{build}, - _probe{probe}, _preprocessed_build{ cudf::experimental::row::equality::preprocessed_table::create(_build, stream)}, - _preprocessed_probe{ - cudf::experimental::row::equality::preprocessed_table::create(_probe, stream)}, _hash_table{build.num_rows(), CUCO_DESIRED_LOAD_FACTOR, cuco::empty_key{cuco::pair{std::numeric_limits::max(), rhs_index_type{JoinNoneValue}}}, - prepare_device_equal( - _preprocessed_build, _preprocessed_probe, has_nulls, compare_nulls), + always_not_equal{}, {}, cuco::thread_scope_device, cuco_storage_type{}, @@ -124,10 +108,10 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, if (this->_build.num_rows() == 0) { return; } auto const row_hasher = experimental::row::hash::row_hasher{this->_preprocessed_build}; - auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); + auto const d_hasher = row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_hasher}); + auto const iter = + cudf::detail::make_counting_transform_iterator(0, build_keys_fn{d_hasher}); size_type const build_table_num_rows{build.num_rows()}; if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(this->_build))) { @@ -146,15 +130,15 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, } } -template std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::inner_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return immediately if (probe_table_num_rows == 0) { @@ -162,25 +146,62 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, std::make_unique>(0, stream, mr)); } + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + auto build_indices = std::make_unique>(probe_table_num_rows, stream, mr); auto probe_indices = std::make_unique>(probe_table_num_rows, stream, mr); - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto found_indices = rmm::device_uvector(probe_table_num_rows, stream); auto const found_begin = thrust::make_transform_output_iterator(found_indices.begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not equal - // to `JoinNoneValue`, then `idx` has a match in the hash set. - this->_hash_table.find_async(iter, iter + probe_table_num_rows, found_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + // If `idx` is within the range `[0, probe_table_num_rows)` and `found_indices[idx]` is not + // equal to `JoinNoneValue`, then `idx` has a match in the hash set. + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + found_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } auto const tuple_iter = cudf::detail::make_counting_transform_iterator( 0, @@ -203,16 +224,17 @@ distinct_hash_join::inner_join(rmm::cuda_stream_view stream, build_indices->resize(actual_size, stream); probe_indices->resize(actual_size, stream); - return {std::move(build_indices), std::move(probe_indices)}; + return {std::move(probe_indices), std::move(build_indices)}; } -template -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { cudf::scoped_range range{"distinct_hash_join::left_join"}; - size_type const probe_table_num_rows{this->_probe.num_rows()}; + size_type const probe_table_num_rows{probe.num_rows()}; // If output size is zero, return empty if (probe_table_num_rows == 0) { @@ -227,80 +249,82 @@ std::unique_ptr> distinct_hash_join::l thrust::fill( rmm::exec_policy_nosync(stream), build_indices->begin(), build_indices->end(), JoinNoneValue); } else { - auto const probe_row_hasher = - cudf::experimental::row::hash::row_hasher{this->_preprocessed_probe}; - auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{this->_has_nulls}); - auto const iter = cudf::detail::make_counting_transform_iterator( - 0, build_keys_fn{d_probe_hasher}); + auto preprocessed_probe = + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); + auto const two_table_equal = cudf::experimental::row::equality::two_table_comparator( + preprocessed_probe, _preprocessed_build); + + auto const probe_row_hasher = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; + auto const d_probe_hasher = probe_row_hasher.device_hasher(nullate::DYNAMIC{has_nulls}); + auto const iter = cudf::detail::make_counting_transform_iterator( + 0, build_keys_fn{d_probe_hasher}); auto const output_begin = thrust::make_transform_output_iterator(build_indices->begin(), output_fn{}); - // TODO conditional find for nulls once `cuco::static_set::find_if` is added - this->_hash_table.find_async(iter, iter + probe_table_num_rows, output_begin, stream.value()); + auto const comparator_helper = [&](auto device_comparator) { + if (this->_nulls_equal == cudf::null_equality::EQUAL or (not cudf::nullable(probe))) { + this->_hash_table.find_async(iter, + iter + probe_table_num_rows, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } else { + auto stencil = thrust::counting_iterator{0}; + auto const row_bitmask = + cudf::detail::bitmask_and(probe, stream, cudf::get_current_device_resource_ref()).first; + auto const pred = + cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; + + this->_hash_table.find_if_async(iter, + iter + probe_table_num_rows, + stencil, + pred, + comparator_adapter{device_comparator}, + hasher{}, + output_begin, + stream.value()); + } + }; + + if (_has_nested_columns) { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } else { + auto const device_comparator = + two_table_equal.equal_to(nullate::DYNAMIC{has_nulls}, _nulls_equal); + comparator_helper(device_comparator); + } } return build_indices; } } // namespace detail -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::~distinct_hash_join() = default; - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} - -template <> -distinct_hash_join::distinct_hash_join(cudf::table_view const& build, - cudf::table_view const& probe, - nullable_join has_nulls, - null_equality compare_nulls, - rmm::cuda_stream_view stream) - : _impl{std::make_unique( - build, probe, has_nulls == nullable_join::YES, compare_nulls, stream)} -{ -} +distinct_hash_join::~distinct_hash_join() = default; -template <> -std::pair>, - std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::distinct_hash_join(cudf::table_view const& build, + null_equality compare_nulls, + rmm::cuda_stream_view stream) + : _impl{std::make_unique(build, compare_nulls, stream)} { - return _impl->inner_join(stream, mr); } -template <> std::pair>, std::unique_ptr>> -distinct_hash_join::inner_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const -{ - return _impl->inner_join(stream, mr); -} - -template <> -std::unique_ptr> -distinct_hash_join::left_join(rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const +distinct_hash_join::inner_join(cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->inner_join(probe, stream, mr); } -template <> -std::unique_ptr> distinct_hash_join::left_join( - rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const +std::unique_ptr> distinct_hash_join::left_join( + cudf::table_view const& probe, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - return _impl->left_join(stream, mr); + return _impl->left_join(probe, stream, mr); } } // namespace cudf diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 9070efa38fe..e1ec8cda3ac 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ struct DistinctJoinTest : public cudf::test::BaseFixture { cudf::table_view const& expected_table, cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK) { - auto const& [build_join_indices, probe_join_indices] = result; + auto const& [probe_join_indices, build_join_indices] = result; auto build_indices_span = cudf::device_span{*build_join_indices}; auto probe_indices_span = cudf::device_span{*probe_join_indices}; @@ -89,10 +89,9 @@ TEST_F(DistinctJoinTest, IntegerInnerJoin) auto build_table = cudf::table_view{{build->view()}}; auto probe_table = cudf::table_view{{probe->view()}}; - auto distinct_join = cudf::distinct_hash_join{ - build_table, probe_table, cudf::nullable_join::NO}; + auto distinct_join = cudf::distinct_hash_join{build_table}; - auto result = distinct_join.inner_join(); + auto result = distinct_join.inner_join(probe_table); auto constexpr gold_size = size / 2; auto gold = cudf::sequence(gold_size, init, cudf::numeric_scalar{2}); @@ -120,8 +119,8 @@ TEST_F(DistinctJoinTest, InnerJoinNoNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{1, 2}}; strcol_wrapper col_gold_1({"s0", "s0"}); @@ -162,8 +161,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithNulls) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -229,8 +228,8 @@ TEST_F(DistinctJoinTest, InnerJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); column_wrapper col_gold_0{{3, 2}}; strcol_wrapper col_gold_1({"s1", "s0"}, {true, true}); @@ -284,8 +283,8 @@ TEST_F(DistinctJoinTest, EmptyBuildTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, build.view()); } @@ -307,9 +306,9 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -332,8 +331,8 @@ TEST_F(DistinctJoinTest, EmptyProbeTableInnerJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.inner_join(); + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.inner_join(probe.view()); this->compare_to_reference(build.view(), probe.view(), result, probe.view()); } @@ -355,9 +354,9 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) Table build(std::move(cols0)); Table probe(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -391,9 +390,9 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) cols_gold.push_back(col_gold_3.release()); Table gold(std::move(cols_gold)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; this->compare_to_reference( build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -416,9 +415,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true}); @@ -461,9 +460,9 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) Table probe(std::move(cols0)); Table build(std::move(cols1)); - auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; - auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; + auto distinct_join = cudf::distinct_hash_join{build.view()}; + auto result = distinct_join.left_join(probe.view()); + auto gather_map = std::pair{get_left_indices(result->size()), std::move(result)}; auto col0_gold_names_col = strcol_wrapper{ "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"}; diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 1f8b1ea207d..ed35f35794d 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2901,16 +2901,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_leftDistinctJoinGatherMap j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - return hash.left_join(); - } + cudf::distinct_hash_join hash(right, nulleq); + return hash.left_join(left); }); } @@ -3119,22 +3111,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_innerDistinctJoinGatherMa j_right_keys, compare_nulls_equal, [](cudf::table_view const& left, cudf::table_view const& right, cudf::null_equality nulleq) { - auto has_nulls = cudf::has_nested_nulls(left) || cudf::has_nested_nulls(right) - ? cudf::nullable_join::YES - : cudf::nullable_join::NO; - std::pair>, - std::unique_ptr>> - maps; - if (cudf::has_nested_columns(right)) { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } else { - cudf::distinct_hash_join hash(right, left, has_nulls, nulleq); - maps = hash.inner_join(); - } - // Unique join returns {right map, left map} but all the other joins - // return {left map, right map}. Swap here to make it consistent. - return std::make_pair(std::move(maps.second), std::move(maps.first)); + cudf::distinct_hash_join hash(right, nulleq); + return hash.inner_join(left); }); }