From ac3589644f52e56d127223ac7d6b8f92234e645f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 29 Jan 2025 16:19:48 +0100 Subject: [PATCH] ENH: Support more return columns (#17) Each column takes up some space in the return and this space is currently limited but settable. This bumps the limit for all tasks so that we should be able to use well above 2500 columns everywhere. However, using that many columns may require passing `--zcmem=100` or similar. (Clearly, it is very plausible that we might bump this further if we run into a use-case that needs that.) --- Tested just with csv, since that is where it came up (but if used, it tends to be the first call). Either way, it should affect everything, so set it globally. --------- Signed-off-by: Sebastian Berg --- cpp/src/core/library.cpp | 3 ++- python/tests/test_csv.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/cpp/src/core/library.cpp b/cpp/src/core/library.cpp index 326835a..4875e26 100644 --- a/cpp/src/core/library.cpp +++ b/cpp/src/core/library.cpp @@ -122,7 +122,8 @@ legate::Library create_and_registrate_library() GlobalMemoryResource::set_as_default_mmr_resource(); } // Set with_has_allocations globally since currently all tasks allocate (and libcudf may also) - auto options = legate::VariantOptions{}.with_has_allocations(true); + // Also ensure we can generally work with 2000+ return columns. + auto options = legate::VariantOptions{}.with_has_allocations(true).with_return_size(131072); auto context = legate::Runtime::get_runtime()->find_or_create_library(library_name, legate::ResourceConfig{}, diff --git a/python/tests/test_csv.py b/python/tests/test_csv.py index 2e631c1..df882b7 100644 --- a/python/tests/test_csv.py +++ b/python/tests/test_csv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION +# Copyright (c) 2024-2025, NVIDIA CORPORATION # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,6 +58,19 @@ def test_read_single_rows(tmp_path): assert_frame_equal(tbl, df) +def test_read_single_many_columns(tmp_path): + # Legate has a limit on number of returns which limnits the + # number of columns (currently). Make sure we support 1250. + # 2500+ are OK, but requires higher `--czmem`. + file = tmp_path / "file.csv" + # Write a file with many columns (and a few rows) + ncols = 1250 + for i in range(5): + file.write_text(",".join([str(i) for i in range(ncols)]) + "\n") + + csv_read(file, dtypes=["str"] * ncols) + + def test_read_many_files_per_rank(tmp_path): # Use uneven number to test splitting filenames = str(tmp_path) + "/*.csv"