Skip to content

Commit

Permalink
MAINT: Quick fix for the one pool world (#11)
Browse files Browse the repository at this point in the history
This is just Wonchan's old PR with small comments added. I plan to open
an issue about actually adding at least some estimates about memory
usage. This may not matter for many simple pipelines, but also could
matter a lot for some.

(Memory use is not easy to bound, e.g. for joins or csv reading,
although even a very high overestimate may be useful if we read a small
csv for example.)

---------

Signed-off-by: Sebastian Berg <[email protected]>
Co-authored-by: Wonchan Lee <[email protected]>
  • Loading branch information
seberg and magnatelee authored Jan 10, 2025
1 parent c3581e0 commit 59b04a5
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 26 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cpp/build/
python/build/
ci/manual-ci-test.py
Docker
python/_skbuild
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ RUN mkdir -p /opt/legate-dataframe/conda-env-file
COPY ./conda/environments/*.yaml /opt/legate-dataframe/conda-env-file/

# To ensure we find the GPU version of legate in the docker build.
ARG CONDA_OVERRIDE_CUDA=$CUDA_VERSION
ARG CONDA_OVERRIDE_CUDA=12.4
RUN /bin/bash -c '/opt/conda/bin/mamba env create --name legate-dev --file \
/opt/legate-dataframe/conda-env-file/all_cuda-$(cut --output-delimiter="" -d "." -f 1,2 <<< ${CUDA_VERSION})_arch-x86_64.yaml'
/opt/legate-dataframe/conda-env-file/all_cuda-$(cut --output-delimiter="" -d "." -f 1,2 <<< ${CONDA_OVERRIDE_CUDA})_arch-x86_64.yaml'

# Build and install legate-dataframe
WORKDIR /opt/legate-dataframe
Expand Down
16 changes: 3 additions & 13 deletions ci/manual-ci-test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

# Copyright 2024 NVIDIA Corporation
# Copyright (c) 2024-2025, NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,25 +30,15 @@
"--no-capture-output",
"-n",
"legate-dev",
# TODO: Should not need legate startup,
# see https://github.com/nv-legate/legate.core.internal/issues/1304
"legate",
"--gpus=2",
"cpp/build/gtests/cpp_tests",
"ci/run_ctests.sh",
],
"py": [
"conda",
"run",
"--no-capture-output",
"-n",
"legate-dev",
# TODO: Should not need legate startup,
# see https://github.com/nv-legate/legate.core.internal/issues/1304
"legate",
"--gpus=2",
"--module",
"pytest",
"python/tests",
"ci/run_pytests.sh",
],
}

Expand Down
3 changes: 2 additions & 1 deletion ci/run_pytests.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

# [description]
#
Expand All @@ -26,6 +26,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/tests/
LEGATE_TEST=${LEGATE_TEST:-1} \
legate \
--gpus "$(nvidia-smi -L | wc -l)"\
--fbmem=4000 \
--module pytest \
. \
-sv \
Expand Down
53 changes: 50 additions & 3 deletions cpp/src/core/library.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -69,6 +69,46 @@ class Mapper : public legate::mapping::Mapper {
return mappings;
}

std::optional<std::size_t> allocation_pool_size(const legate::mapping::Task& task,
legate::mapping::StoreTarget memory_kind)
{
const auto task_id = static_cast<int>(task.task_id());

if (memory_kind == legate::mapping::StoreTarget::ZCMEM) {
switch (task_id) {
case legate::dataframe::task::OpCode::Join:
// TODO: Join is identical to GroupBy, but we would have to look at
// both input tables to ge the maximum column number.
return std::nullopt;
case legate::dataframe::task::OpCode::GroupByAggregation: {
// Aggregation use repartitioning which uses ZCMEM for NCCL.
// This depends on the number of columns (first scalar when storing
// the first table is the number of columns).
if (task.is_single_task()) {
// No need for repartitioning, so no need for ZCMEM
return 0;
}
auto num_cols = task.scalars().at(0).value<int32_t>();
auto nrank = task.get_launch_domain().get_volume();

// Space for the exchange buffers containing size chunks:
size_t size_exchange_nbytes = nrank * nrank * 2 * sizeof(std::size_t);
// Space for column packing metadata exchange
size_t sizeof_serialized_column = 6 * 8; // not public, rough upper bound
// num_cols * 2 for the string column child and + 1 for the number of cols
size_t metadata_nbytes = nrank * (num_cols * 2 + 1) * sizeof_serialized_column;

return size_exchange_nbytes + metadata_nbytes;
}
default: return 0;
}
}

// TODO: Returning nullopt prevents other parallel task launches so it would be
// good to provide estimated usage for most tasks here.
return std::nullopt;
}

legate::Scalar tunable_value(legate::TunableID tunable_id) override { return legate::Scalar{0}; }

private:
Expand All @@ -81,8 +121,15 @@ legate::Library create_and_registrate_library()
if (env == nullptr || std::string{env} == "0") {
GlobalMemoryResource::set_as_default_mmr_resource();
}
auto context = legate::Runtime::get_runtime()->find_or_create_library(
library_name, legate::ResourceConfig{}, std::make_unique<Mapper>());
// Set with_has_allocations globally since currently all tasks allocate (and libcudf may also)
auto options = legate::VariantOptions{}.with_has_allocations(true);
auto context =
legate::Runtime::get_runtime()->find_or_create_library(library_name,
legate::ResourceConfig{},
std::make_unique<Mapper>(),
{{legate::VariantCode::CPU, options},
{legate::VariantCode::GPU, options},
{legate::VariantCode::OMP, options}});
task::Registry::get_registrar().register_all_tasks(context);
return legate::Runtime::get_runtime()->find_library(legate::dataframe::library_name);
}
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/core/repartition_by_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class ExchangedSizes {
: _ctx(ctx), stream(ctx.tmp_stream())
{
assert(columns.size() == ctx.nranks - 1);
// Note: Size of this buffer is taken into account in the mapper:
_all_sizes =
legate::create_buffer<std::size_t>(ctx.nranks * ctx.nranks * 2, Memory::Kind::Z_COPY_MEM);

Expand Down Expand Up @@ -151,6 +152,7 @@ std::pair<std::vector<cudf::table_view>, std::map<int, rmm::device_buffer>> shuf
std::size_t nbytes = sizes.metadata(peer, ctx.rank);
if (nbytes > 0) {
assert(peer != ctx.rank);
// Note: Size of this buffer is taken into account in the mapper:
recv_metadata.insert(
{peer, legate::create_buffer<uint8_t>(nbytes, Memory::Kind::Z_COPY_MEM)});
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/tests/test_cudf_interop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,7 +39,8 @@ namespace {
static const char* library_name = "test.cudf_interop";

struct RoundTripTableTask : public legate::LegateTask<RoundTripTableTask> {
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto GPU_VARIANT_OPTIONS = legate::VariantOptions{}.with_has_allocations(true);

static void gpu_variant(legate::TaskContext context)
{
Expand Down
5 changes: 3 additions & 2 deletions cpp/tests/test_repartition_by_hash.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,7 +38,8 @@ namespace {
static const char* library_name = "test.repartition_by_hash";

struct CheckHash : public legate::LegateTask<CheckHash> {
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto GPU_VARIANT_OPTIONS = legate::VariantOptions{}.with_has_allocations(true);

static void gpu_variant(legate::TaskContext context)
{
Expand Down
8 changes: 5 additions & 3 deletions cpp/tests/test_task.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,7 +29,8 @@ namespace {
static const char* library_name = "test.global_row_offset";

struct GlobalRowOffsetTask : public legate::LegateTask<GlobalRowOffsetTask> {
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto TASK_ID = legate::LocalTaskID{0};
static constexpr auto GPU_VARIANT_OPTIONS = legate::VariantOptions{}.with_has_allocations(true);

static void gpu_variant(legate::TaskContext context)
{
Expand All @@ -54,7 +55,8 @@ struct GlobalRowOffsetTask : public legate::LegateTask<GlobalRowOffsetTask> {
};

struct TaskArgumentMix : public legate::LegateTask<TaskArgumentMix> {
static constexpr auto TASK_ID = legate::LocalTaskID{1};
static constexpr auto TASK_ID = legate::LocalTaskID{1};
static constexpr auto GPU_VARIANT_OPTIONS = legate::VariantOptions{}.with_has_allocations(true);

static void gpu_variant(legate::TaskContext context)
{
Expand Down

0 comments on commit 59b04a5

Please sign in to comment.