Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into bug_packed_bool_cas
Browse files Browse the repository at this point in the history
  • Loading branch information
seunghwak authored Jan 14, 2025
2 parents 3bd7a29 + 6b5db94 commit 1b0cae2
Show file tree
Hide file tree
Showing 14 changed files with 356 additions and 281 deletions.
4 changes: 2 additions & 2 deletions cpp/src/sampling/neighbor_sampling_impl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -184,7 +184,7 @@ neighbor_sample_impl(raft::handle_t const& handle,

std::vector<size_t> level_sizes{};

for (auto hop = 0; hop < num_hops; hop++) {
for (size_t hop = 0; hop < num_hops; ++hop) {
rmm::device_uvector<vertex_t> level_result_src(0, handle.get_stream());
rmm::device_uvector<vertex_t> level_result_dst(0, handle.get_stream());

Expand Down
8 changes: 3 additions & 5 deletions cpp/src/sampling/sampling_post_processing_impl.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1391,12 +1391,10 @@ compute_vertex_renumber_map(
[offsets = *vertex_type_offsets] __device__(auto lhs, auto rhs) {
auto lhs_v_type = thrust::distance(
offsets.begin() + 1,
thrust::upper_bound(
thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(lhs)));
thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), lhs));
auto rhs_v_type = thrust::distance(
offsets.begin() + 1,
thrust::upper_bound(
thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(rhs)));
thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), rhs));
return lhs_v_type < rhs_v_type;
});
}
Expand Down
1 change: 1 addition & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ files:
- depends_on_dask_cuda
- depends_on_dask_cudf
- depends_on_pylibcugraph
- depends_on_pylibraft
- depends_on_raft_dask
- depends_on_rmm
- depends_on_ucx_py
Expand Down
79 changes: 49 additions & 30 deletions python/cugraph/cugraph/gnn/data_loading/dist_io/writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -79,9 +79,15 @@ def get_reader(
return DistSampleReader(self._directory, format=self._format, rank=rank)

def __write_minibatches_coo(self, minibatch_dict):
has_edge_ids = minibatch_dict["edge_id"] is not None
has_edge_types = minibatch_dict["edge_type"] is not None
has_weights = minibatch_dict["weight"] is not None
has_edge_ids = (
"edge_id" in minibatch_dict and minibatch_dict["edge_id"] is not None
)
has_edge_types = (
"edge_type" in minibatch_dict and minibatch_dict["edge_type"] is not None
)
has_weights = (
"weight" in minibatch_dict and minibatch_dict["weight"] is not None
)

if minibatch_dict["renumber_map"] is None:
raise ValueError(
Expand All @@ -92,22 +98,22 @@ def __write_minibatches_coo(self, minibatch_dict):
if len(minibatch_dict["batch_id"]) == 0:
return

fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len(
minibatch_dict["batch_id"]
)
fanout_length = len(minibatch_dict["fanout"])
total_num_batches = (
len(minibatch_dict["label_hop_offsets"]) - 1
) / fanout_length

for p in range(
0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition))
):
for p in range(0, int(ceil(total_num_batches / self.__batches_per_partition))):
partition_start = p * (self.__batches_per_partition)
partition_end = (p + 1) * (self.__batches_per_partition)

label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][
partition_start * fanout_length : partition_end * fanout_length + 1
]

batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end]
start_batch_id = batch_id_array_p[0]
num_batches_p = len(label_hop_offsets_array_p) - 1

start_batch_id = minibatch_dict["batch_start"]

input_offsets_p = minibatch_dict["input_offsets"][
partition_start : (partition_end + 1)
Expand Down Expand Up @@ -171,7 +177,7 @@ def __write_minibatches_coo(self, minibatch_dict):
}
)

end_batch_id = start_batch_id + len(batch_id_array_p) - 1
end_batch_id = start_batch_id + num_batches_p - 1
rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0

full_output_path = os.path.join(
Expand All @@ -188,9 +194,15 @@ def __write_minibatches_coo(self, minibatch_dict):
)

def __write_minibatches_csr(self, minibatch_dict):
has_edge_ids = minibatch_dict["edge_id"] is not None
has_edge_types = minibatch_dict["edge_type"] is not None
has_weights = minibatch_dict["weight"] is not None
has_edge_ids = (
"edge_id" in minibatch_dict and minibatch_dict["edge_id"] is not None
)
has_edge_types = (
"edge_type" in minibatch_dict and minibatch_dict["edge_type"] is not None
)
has_weights = (
"weight" in minibatch_dict and minibatch_dict["weight"] is not None
)

if minibatch_dict["renumber_map"] is None:
raise ValueError(
Expand All @@ -201,22 +213,22 @@ def __write_minibatches_csr(self, minibatch_dict):
if len(minibatch_dict["batch_id"]) == 0:
return

fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len(
minibatch_dict["batch_id"]
)
fanout_length = len(minibatch_dict["fanout"])
total_num_batches = (
len(minibatch_dict["label_hop_offsets"]) - 1
) / fanout_length

for p in range(
0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition))
):
for p in range(0, int(ceil(total_num_batches / self.__batches_per_partition))):
partition_start = p * (self.__batches_per_partition)
partition_end = (p + 1) * (self.__batches_per_partition)

label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][
partition_start * fanout_length : partition_end * fanout_length + 1
]

batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end]
start_batch_id = batch_id_array_p[0]
num_batches_p = len(label_hop_offsets_array_p) - 1

start_batch_id = minibatch_dict["batch_start"]

input_offsets_p = minibatch_dict["input_offsets"][
partition_start : (partition_end + 1)
Expand Down Expand Up @@ -292,7 +304,7 @@ def __write_minibatches_csr(self, minibatch_dict):
}
)

end_batch_id = start_batch_id + len(batch_id_array_p) - 1
end_batch_id = start_batch_id + num_batches_p - 1
rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0

full_output_path = os.path.join(
Expand All @@ -309,12 +321,19 @@ def __write_minibatches_csr(self, minibatch_dict):
)

def write_minibatches(self, minibatch_dict):
if (minibatch_dict["majors"] is not None) and (
minibatch_dict["minors"] is not None
):
if "minors" not in minibatch_dict:
raise ValueError("invalid columns")

# PLC API specifies this behavior for empty input
# This needs to be handled here to avoid causing a hang
if len(minibatch_dict["minors"]) == 0:
return

if "majors" in minibatch_dict and minibatch_dict["majors"] is not None:
self.__write_minibatches_coo(minibatch_dict)
elif (minibatch_dict["major_offsets"] is not None) and (
minibatch_dict["minors"] is not None
elif (
"major_offsets" in minibatch_dict
and minibatch_dict["major_offsets"] is not None
):
self.__write_minibatches_csr(minibatch_dict)
else:
Expand Down
Loading

0 comments on commit 1b0cae2

Please sign in to comment.