Skip to content

Commit

Permalink
Improve the performance of CAGRA new vector addition with the default…
Browse files Browse the repository at this point in the history
… params (#569)

This PR updates the default chunk size of the CAGRA graph extension and also adds a knob to control the batch size of the CAGRA searches run inside for better throughput.

The default chunk size was set to 1 in the current implementation because there is a potential problem with low recall when the chunk size is large, because no edges are made within nodes in the same chunk. However, as I have investigated, the low recall problem rarely occurs with large chunk sizes.

# Search performance

The performance was measured after applying a bugfix #565

## degree = 32


![extend-ir0 9-degree32](https://github.com/user-attachments/assets/a5bb2fb6-8c12-49ad-b96a-1b384d79a96b)


(I don't know the reason the performance is unstable in NYTimes.)

## degree = 64
![extend-ir0 9-degree64](https://github.com/user-attachments/assets/8e926e1c-d772-4682-9419-9cc027f09d3f)

So I increase the default chunk size to the size of the new dataset vectors for better throughput in this PR. I also make public a knob to control the search batch size in the `extend' function to control the balance between throughput and memory consumption.

Authors:
  - tsuki (https://github.com/enp1s0)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Tamas Bela Feher (https://github.com/tfeher)

URL: #569
  • Loading branch information
enp1s0 authored Jan 23, 2025
1 parent b62b11a commit 80370a1
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions cpp/src/neighbors/detail/cagra/add_nodes.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ void add_node_core(
const cuvs::neighbors::cagra::index<T, IdxT>& idx,
raft::mdspan<const T, raft::matrix_extent<int64_t>, raft::layout_stride, Accessor>
additional_dataset_view,
raft::host_matrix_view<IdxT, std::int64_t> updated_graph)
raft::host_matrix_view<IdxT, std::int64_t> updated_graph,
const cuvs::neighbors::cagra::extend_params& extend_params)
{
using DistanceT = float;
const std::size_t degree = idx.graph_degree();
Expand Down Expand Up @@ -68,7 +69,19 @@ void add_node_core(
new_size,
raft::resource::get_cuda_stream(handle));

const std::size_t max_chunk_size = 1024;
std::size_t data_size_per_vector =
sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim;
cudaPointerAttributes attr;
RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, additional_dataset_view.data_handle()));
if (attr.devicePointer == nullptr) {
// for batch_load_iterator
data_size_per_vector += sizeof(T) * dim;
}

const std::size_t max_search_batch_size =
std::min(std::max(1lu, raft::resource::get_workspace_free_bytes(handle) / data_size_per_vector),
num_add);
RAFT_EXPECTS(max_search_batch_size > 0, "No enough working memory space is left.");

cuvs::neighbors::cagra::search_params params;
params.itopk_size = std::max(base_degree * 2lu, 256lu);
Expand All @@ -77,24 +90,24 @@ void add_node_core(
auto mr = raft::resource::get_workspace_resource(handle);

auto neighbor_indices = raft::make_device_mdarray<IdxT, std::int64_t>(
handle, mr, raft::make_extents<std::int64_t>(max_chunk_size, base_degree));
handle, mr, raft::make_extents<std::int64_t>(max_search_batch_size, base_degree));

auto neighbor_distances = raft::make_device_mdarray<DistanceT, std::int64_t>(
handle, mr, raft::make_extents<std::int64_t>(max_chunk_size, base_degree));
handle, mr, raft::make_extents<std::int64_t>(max_search_batch_size, base_degree));

auto queries = raft::make_device_mdarray<T, std::int64_t>(
handle, mr, raft::make_extents<std::int64_t>(max_chunk_size, dim));
handle, mr, raft::make_extents<std::int64_t>(max_search_batch_size, dim));

auto host_neighbor_indices =
raft::make_host_matrix<IdxT, std::int64_t>(max_chunk_size, base_degree);
raft::make_host_matrix<IdxT, std::int64_t>(max_search_batch_size, base_degree);

cuvs::spatial::knn::detail::utils::batch_load_iterator<T> additional_dataset_batch(
additional_dataset_view.data_handle(),
num_add,
additional_dataset_view.stride(0),
max_chunk_size,
max_search_batch_size,
raft::resource::get_cuda_stream(handle),
raft::resource::get_workspace_resource(handle));
mr);
for (const auto& batch : additional_dataset_batch) {
// Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search
// Create queries
Expand Down Expand Up @@ -254,7 +267,8 @@ void add_graph_nodes(
const std::size_t degree = index.graph_degree();
const std::size_t dim = index.dim();
const std::size_t stride = input_updated_dataset_view.stride(0);
const std::size_t max_chunk_size_ = params.max_chunk_size == 0 ? 1 : params.max_chunk_size;
const std::size_t max_chunk_size_ =
params.max_chunk_size == 0 ? new_dataset_size : params.max_chunk_size;

raft::copy(updated_graph_view.data_handle(),
index.graph().data_handle(),
Expand Down Expand Up @@ -298,7 +312,7 @@ void add_graph_nodes(
stride);

neighbors::cagra::add_node_core<T, IdxT>(
handle, internal_index, additional_dataset_view, updated_graph);
handle, internal_index, additional_dataset_view, updated_graph, params);
raft::resource::sync_stream(handle);
}
}
Expand Down

0 comments on commit 80370a1

Please sign in to comment.