Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port C++ CAGRA runtime from RAFT #8

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

# cuvs build scripts

Expand All @@ -24,7 +24,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
clean - remove all existing build artifacts and configuration (start over)
libcuvs - build the cuvs C++ code only. Also builds the C-wrapper library
around the C++ code.
cuvs - build the cuvs Python package
cuvs - build the cuvs Python package
docs - build the documentation
tests - build the tests
bench-prims - build micro-benchmarks for primitives
Expand Down Expand Up @@ -77,8 +77,8 @@ INSTALL_TARGET=install
BUILD_REPORT_METRICS=""
BUILD_REPORT_INCL_CACHE_STATS=OFF

TEST_TARGETS="CLUSTER_TEST;DISTANCE_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_CAGRA_TEST;NEIGHBORS_ANN_NN_DESCENT_TEST;NEIGHBORS_ANN_IVF_TEST"
BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH"
TEST_TARGETS="NEIGHBORS_ANN_CAGRA_TEST"
BENCH_TARGETS="NEIGHBORS_BENCH"

CACHE_ARGS=""
NVTX=ON
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

./build.sh tests bench --allgpuarch --no-nvtx --build-metrics=tests_bench --incl-cache-stats
cmake --install cpp/build --component testing
32 changes: 24 additions & 8 deletions conda/recipes/libcuvs/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.

# Usage:
# conda build . -c conda-forge -c nvidia -c rapidsai
Expand All @@ -20,7 +20,23 @@ outputs:
version: {{ version }}
script: build_libcuvs_static.sh
build:
script_env: *script_env
script_env: &script_env
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- AWS_SESSION_TOKEN
- CMAKE_C_COMPILER_LAUNCHER
- CMAKE_CUDA_COMPILER_LAUNCHER
- CMAKE_CXX_COMPILER_LAUNCHER
- CMAKE_GENERATOR
- PARALLEL_LEVEL
- RAPIDS_ARTIFACTS_DIR
- SCCACHE_BUCKET
- SCCACHE_IDLE_TIMEOUT
- SCCACHE_REGION
- SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
- SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
ignore_run_exports_from:
Expand All @@ -41,7 +57,7 @@ outputs:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
Expand All @@ -61,7 +77,7 @@ outputs:
- libcusparse-dev
{% endif %}
run:
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
about:
home: https://rapids.ai/
Expand Down Expand Up @@ -92,7 +108,7 @@ outputs:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
Expand All @@ -119,7 +135,7 @@ outputs:
{% if cuda_major == "11" %}
- cudatoolkit
{% endif %}
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
- gmock {{ gtest_version }}
- gtest {{ gtest_version }}
about:
Expand Down Expand Up @@ -151,7 +167,7 @@ outputs:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
Expand All @@ -166,7 +182,7 @@ outputs:
{% if cuda_major == "11" %}
- cudatoolkit
{% endif %}
- {{ pin_subpackage('libraft-headers', exact=True) }}
- {{ pin_compatible('libraft', exact=True) }}
about:
home: https://rapids.ai/
license: Apache-2.0
Expand Down
181 changes: 14 additions & 167 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -191,169 +191,16 @@ include(cmake/thirdparty/get_cutlass.cmake)

add_library(
cuvs SHARED
src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_rbf.cu
src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
src/distance/distance.cu
src/distance/fused_l2_nn.cu
src/matrix/detail/select_k_double_int64_t.cu
src/matrix/detail/select_k_double_uint32_t.cu
src/matrix/detail/select_k_float_int64_t.cu
src/matrix/detail/select_k_float_uint32_t.cu
src/matrix/detail/select_k_float_int32.cu
src/matrix/detail/select_k_half_int64_t.cu
src/matrix/detail/select_k_half_uint32_t.cu
src/neighbors/ball_cover.cu
src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
src/neighbors/brute_force_knn_int_float_int.cu
src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
src/neighbors/brute_force_knn_index_float.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu
src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu
src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu
src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu
src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu
src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu
src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu
src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu
src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu
src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
src/neighbors/detail/ivf_flat_search.cu
src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
src/neighbors/detail/refine_host_float_float.cpp
src/neighbors/detail/refine_host_int8_t_float.cpp
src/neighbors/detail/refine_host_uint8_t_float.cpp
src/neighbors/detail/selection_faiss_int32_t_float.cu
src/neighbors/detail/selection_faiss_int_double.cu
src/neighbors/detail/selection_faiss_long_float.cu
src/neighbors/detail/selection_faiss_size_t_double.cu
src/neighbors/detail/selection_faiss_size_t_float.cu
src/neighbors/detail/selection_faiss_uint32_t_float.cu
src/neighbors/detail/selection_faiss_int64_t_double.cu
src/neighbors/detail/selection_faiss_int64_t_half.cu
src/neighbors/detail/selection_faiss_uint32_t_double.cu
src/neighbors/detail/selection_faiss_uint32_t_half.cu
src/neighbors/ivf_flat_build_float_int64_t.cu
src/neighbors/ivf_flat_build_int8_t_int64_t.cu
src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
src/neighbors/ivf_flat_extend_float_int64_t.cu
src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
src/neighbors/ivf_flat_search_float_int64_t.cu
src/neighbors/ivf_flat_search_int8_t_int64_t.cu
src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
src/neighbors/ivfpq_build_float_int64_t.cu
src/neighbors/ivfpq_build_int8_t_int64_t.cu
src/neighbors/ivfpq_build_uint8_t_int64_t.cu
src/neighbors/ivfpq_extend_float_int64_t.cu
src/neighbors/ivfpq_extend_int8_t_int64_t.cu
src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
src/neighbors/ivfpq_search_float_int64_t.cu
src/neighbors/ivfpq_search_int8_t_int64_t.cu
src/neighbors/ivfpq_search_uint8_t_int64_t.cu
src/neighbors/refine_float_float.cu
src/neighbors/refine_int8_t_float.cu
src/neighbors/refine_uint8_t_float.cu
src/cuvs_runtime/cluster/cluster_cost.cuh
src/cuvs_runtime/cluster/cluster_cost_double.cu
src/cuvs_runtime/cluster/cluster_cost_float.cu
src/cuvs_runtime/cluster/kmeans_fit_double.cu
src/cuvs_runtime/cluster/kmeans_fit_float.cu
src/cuvs_runtime/cluster/kmeans_init_plus_plus_double.cu
src/cuvs_runtime/cluster/kmeans_init_plus_plus_float.cu
src/cuvs_runtime/cluster/update_centroids.cuh
src/cuvs_runtime/cluster/update_centroids_double.cu
src/cuvs_runtime/cluster/update_centroids_float.cu
src/cuvs_runtime/distance/fused_l2_min_arg.cu
src/cuvs_runtime/distance/pairwise_distance.cu
src/cuvs_runtime/matrix/select_k_float_int64_t.cu
src/cuvs_runtime/neighbors/brute_force_knn_int64_t_float.cu
src/cuvs_runtime/neighbors/cagra_build.cu
src/cuvs_runtime/neighbors/cagra_search.cu
src/cuvs_runtime/neighbors/cagra_serialize.cu
src/cuvs_runtime/neighbors/ivf_flat_build.cu
src/cuvs_runtime/neighbors/ivf_flat_search.cu
src/cuvs_runtime/neighbors/ivf_flat_serialize.cu
src/cuvs_runtime/neighbors/ivfpq_build.cu
src/cuvs_runtime/neighbors/ivfpq_deserialize.cu
src/cuvs_runtime/neighbors/ivfpq_search_float_int64_t.cu
src/cuvs_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
src/cuvs_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
src/cuvs_runtime/neighbors/ivfpq_serialize.cu
src/cuvs_runtime/neighbors/refine_d_int64_t_float.cu
src/cuvs_runtime/neighbors/refine_d_int64_t_int8_t.cu
src/cuvs_runtime/neighbors/refine_d_int64_t_uint8_t.cu
src/cuvs_runtime/neighbors/refine_h_int64_t_float.cu
src/cuvs_runtime/neighbors/refine_h_int64_t_int8_t.cu
src/cuvs_runtime/neighbors/refine_h_int64_t_uint8_t.cu
src/cuvs_runtime/random/rmat_rectangular_generator_int64_double.cu
src/cuvs_runtime/random/rmat_rectangular_generator_int64_float.cu
src/cuvs_runtime/random/rmat_rectangular_generator_int_double.cu
src/cuvs_runtime/random/rmat_rectangular_generator_int_float.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
src/neighbors/cagra_build_float.cpp
src/neighbors/cagra_build_int8.cpp
src/neighbors/cagra_build_uint8.cpp
src/neighbors/cagra_optimize.cpp
src/neighbors/cagra_search_float.cpp
src/neighbors/cagra_search_int8.cpp
src/neighbors/cagra_search_uint8.cpp
src/neighbors/cagra_serialize_float.cpp
src/neighbors/cagra_serialize_int8.cpp
src/neighbors/cagra_serialize_uint8.cpp
)

target_compile_options(
Expand All @@ -370,7 +217,7 @@ target_include_directories(

if(NOT BUILD_CPU_ONLY)
# Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
target_link_libraries(cuvs PUBLIC raft::raft nvidia::cutlass::cutlass)
target_link_libraries(cuvs PUBLIC raft::raft raft::compiled nvidia::cutlass::cutlass)
endif()

# Endian detection
Expand Down Expand Up @@ -398,7 +245,7 @@ SECTIONS

if(CUVS_NVTX)
# This enables NVTX within the project with no option to disable it downstream.
target_link_libraries(cuvs PUBLIC CUDA::nvToolsExt)
target_link_libraries(cuvs PUBLIC CUDA::nvtx3)
target_compile_definitions(cuvs PUBLIC NVTX_ENABLED)
else()
# Allow enable NVTX downstream if not set here. This creates a new option at build/install time,
Expand All @@ -416,7 +263,7 @@ else()
"\" OFF)"
[=[

target_link_libraries(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:CUDA::nvToolsExt>)
target_link_libraries(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:CUDA::nvtx3>)
target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENABLED>)

]=]
Expand Down
10 changes: 4 additions & 6 deletions cpp/bench/ann/src/raft/raft_cagra_wrapper.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,9 +17,7 @@

#include <cassert>
#include <cuvs/distance/distance_types.hpp>
#include <cuvs/neighbors/cagra.cuh>
#include <cuvs/neighbors/cagra_serialize.cuh>
#include <cuvs/neighbors/cagra_types.hpp>
#include <cuvs/neighbors/cagra.hpp>
#include <cuvs/neighbors/detail/cagra/cagra_build.cuh>
#include <cuvs/neighbors/ivf_pq_types.hpp>
#include <cuvs/neighbors/nn_descent_types.hpp>
Expand Down Expand Up @@ -57,7 +55,7 @@ class RaftCagra : public ANN<T> {
using typename ANN<T>::AnnSearchParam;

struct SearchParam : public AnnSearchParam {
cuvs::neighbors::experimental::cagra::search_params p;
cuvs::neighbors::cagra::search_params p;
AllocatorType graph_mem = AllocatorType::Device;
AllocatorType dataset_mem = AllocatorType::Device;
auto needs_dataset() const -> bool override { return true; }
Expand Down Expand Up @@ -209,7 +207,7 @@ void RaftCagra<T, IdxT>::set_search_param(const AnnSearchParam& param)
allocator_to_string(dataset_mem_).c_str());

auto mr = get_mr(dataset_mem_);
cuvs::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_v_, mr);
raft::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_v_, mr);

index_->update_dataset(handle_, make_const_mdspan(dataset_.view()));

Expand Down
4 changes: 2 additions & 2 deletions cpp/bench/micro/neighbors/cagra_bench.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,7 @@
#pragma once

#include <common/benchmark.hpp>
#include <cuvs/neighbors/cagra.cuh>
#include <cuvs/neighbors/cagra.hpp>
#include <cuvs/neighbors/sample_filter.cuh>
#include <raft/random/rng.cuh>
#include <raft/util/itertools.hpp>
Expand Down
Loading
Loading