Skip to content

Commit

Permalink
Expose this to the sliced Python API
Browse files Browse the repository at this point in the history
  • Loading branch information
franzpoeschel committed Feb 13, 2024
1 parent ae644ef commit fefdf02
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 10 deletions.
1 change: 1 addition & 0 deletions docs/source/usage/workflow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Storing and reading chunks
The chunk is then stored by specifying an empty offset vector ``{}``.
The chunk extent vector must be equivalent to the global extent in all non-joined dimensions (i.e. joined arrays allow no further sub-chunking other than concatenation along the joined dimension).
The joined dimension of the extent vector specifies the extent that this piece should have along the joined dimension.
In the Python API, the slice-based setter syntax can be used as an abbreviation since the necessary information is determined from the passed array, e.g. ``record_component[()] = local_data``.
The global extent of the dataset along the joined dimension will then be the sum of all local chunk extents along the joined dimension.

Since openPMD follows a struct-of-array layout of data, it is important not to lose correlation of data between components. E.g., joining an array must take care that ``particles/e/position/x`` and ``particles/e/position/y`` are joined in uniform way.
Expand Down
9 changes: 7 additions & 2 deletions examples/5_write_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
version.parse(adios2.__version__) >= version.parse('2.9.0')
except ImportError:
USE_JOINED_DIMENSION = False

if __name__ == "__main__":
# also works with any other MPI communicator
comm = MPI.COMM_WORLD

# global data set to write: [MPI_Size * 10, 300]
# each rank writes a 10x300 slice with its MPI rank as values
local_value = comm.size
local_value = comm.rank
local_data = np.ones(10 * 300,
dtype=np.double).reshape(10, 300) * local_value
if 0 == comm.rank:
Expand Down Expand Up @@ -77,7 +78,11 @@
# example shows a 1D domain decomposition in first index

if USE_JOINED_DIMENSION:
mymesh.store_chunk(local_data, [], [10, 300])
# explicit API
# mymesh.store_chunk(local_data, [], [10, 300])
mymesh[:, :] = local_data
# or short:
# mymesh[()] = local_data
else:
mymesh[comm.rank*10:(comm.rank+1)*10, :] = local_data
if 0 == comm.rank:
Expand Down
119 changes: 111 additions & 8 deletions src/binding/python/RecordComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@
* and the GNU Lesser General Public License along with openPMD-api.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include <limits>
#include <pybind11/detail/common.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#include "openPMD/DatatypeHelpers.hpp"
#include "openPMD/Error.hpp"
#include "openPMD/RecordComponent.hpp"
#include "openPMD/Series.hpp"
#include "openPMD/backend/BaseRecordComponent.hpp"

Expand All @@ -40,6 +43,7 @@
#include <exception>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <tuple>
#include <type_traits>
Expand Down Expand Up @@ -111,14 +115,48 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
py::slice slice = py::cast<py::slice>(slices[i]);

size_t start, stop, step, slicelength;
auto mocked_extent = full_extent.at(curAxis);
// py::ssize_t is a signed type, so we will need to use another
// magic number for JOINED_DIMENSION in this computation, since the
// C++ API's JOINED_DIMENSION would be interpreted as a negative
// index
bool undo_mocked_extent = false;
constexpr auto PYTHON_JOINED_DIMENSION =
std::numeric_limits<py::ssize_t>::max() - 1;
if (mocked_extent == Dataset::JOINED_DIMENSION)
{
undo_mocked_extent = true;
mocked_extent = PYTHON_JOINED_DIMENSION;
}
if (!slice.compute(
full_extent.at(curAxis),
&start,
&stop,
&step,
&slicelength))
mocked_extent, &start, &stop, &step, &slicelength))
throw py::error_already_set();

if (undo_mocked_extent)
{
// do the same calculation again, but with another global extent
// (that is not smaller than the previous in order to avoid
// cutting off the range)
// this is to avoid the unlikely case
// that the mocked alternative value is actually the intended
// one
size_t start2, stop2, step2, slicelength2;
if (!slice.compute(
mocked_extent + 1,
&start2,
&stop2,
&step2,
&slicelength2))
throw py::error_already_set();
if (slicelength == slicelength2)
{
// slicelength was given as an absolute value and
// accidentally hit our mocked value
// --> keep that value
undo_mocked_extent = false;
}
}

// TODO PySlice_AdjustIndices: Python 3.6.1+
// Adjust start/end slice indices assuming a sequence of the
// specified length. Out of bounds indices are clipped in a
Expand All @@ -132,7 +170,10 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(

// verified for size later in C++ API
offset.at(curAxis) = start;
extent.at(curAxis) = slicelength; // stop - start;
extent.at(curAxis) =
undo_mocked_extent && slicelength == PYTHON_JOINED_DIMENSION
? Dataset::JOINED_DIMENSION
: slicelength; // stop - start;

continue;
}
Expand Down Expand Up @@ -187,6 +228,59 @@ inline std::tuple<Offset, Extent, std::vector<bool>> parseTupleSlices(
return std::make_tuple(offset, extent, flatten);
}

inline std::tuple<Offset, Extent, std::vector<bool>> parseJoinedTupleSlices(
uint8_t const ndim,
Extent const &full_extent,
py::tuple const &slices,
size_t joined_dim,
py::array const &a)
{

std::vector<bool> flatten;
Offset offset;
Extent extent;
std::tie(offset, extent, flatten) =
parseTupleSlices(ndim, full_extent, slices);
for (size_t i = 0; i < ndim; ++i)
{
if (offset.at(i) != 0)
{
throw std::runtime_error(
"Joined array: Cannot use non-zero offset in store_chunk "
"(offset[" +
std::to_string(i) + "] = " + std::to_string(offset[i]) + ").");
}
if (flatten.at(i))
{
throw std::runtime_error(
"Flattened slices unimplemented for joined arrays.");
}

if (i == joined_dim)
{
if (extent.at(i) == 0 || extent.at(i) == Dataset::JOINED_DIMENSION)
{
extent[i] = a.shape()[i];
}
}
else
{
if (extent.at(i) != full_extent.at(i))
{
throw std::runtime_error(
"Joined array: Must use full extent in store_chunk for "
"non-joined dimension "
"(local_extent[" +
std::to_string(i) + "] = " + std::to_string(extent[i]) +
" != global_extent[" + std::to_string(i) +
"] = " + std::to_string(full_extent[i]) + ").");
}
}
}
offset.clear();
return std::make_tuple(offset, extent, flatten);
}

/** Check an array is a contiguous buffer
*
* Required are contiguous buffers for store and load
Expand Down Expand Up @@ -388,8 +482,17 @@ store_chunk(RecordComponent &r, py::array &a, py::tuple const &slices)
Offset offset;
Extent extent;
std::vector<bool> flatten;
std::tie(offset, extent, flatten) =
parseTupleSlices(ndim, full_extent, slices);
if (auto joined_dimension = r.joinedDimension();
joined_dimension.has_value())
{
std::tie(offset, extent, flatten) = parseJoinedTupleSlices(
ndim, full_extent, slices, *joined_dimension, a);
}
else
{
std::tie(offset, extent, flatten) =
parseTupleSlices(ndim, full_extent, slices);
}

store_chunk(r, a, offset, extent, flatten);
}
Expand Down

0 comments on commit fefdf02

Please sign in to comment.