Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get core Dask functionality from cfdm #836

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version NEXTVERSION
-------------------

**2024-??-??**
**2024-12-??**
davidhassell marked this conversation as resolved.
Show resolved Hide resolved

* Allow ``'nearest_dtos'`` 2-d regridding to work with discrete
sampling geometry source grids
Expand All @@ -23,6 +23,8 @@ version NEXTVERSION
* New class `cf.NetCDF4Array`
* New class `cf.CFAH5netcdfArray`
* New class `cf.CFANetCDF4Array`
* Replace core `dask` functionality with that imported from `cfdm`
(https://github.com/NCAS-CMS/cf-python/pull/836)
* Fix bug that sometimes puts an incorrect ``radian-1`` or
``radian-2`` in the returned units of the differential operator
methods and functions
Expand Down
2 changes: 1 addition & 1 deletion cf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
Find the total amount of physical memory (in bytes).

CHUNKSIZE: `int`
The chunk size (in bytes) for data storage and processing.
The Dask chunk size (in bytes). See `cf.chunksize`.

TEMPDIR: `str`
The location to store temporary files. By default it is the
Expand Down
2 changes: 1 addition & 1 deletion cf/data/array/fullarray.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import numpy as np
from cfdm.data.mixin import IndexMixin

from ...functions import indices_shape, parse_indices
from .abstract import Array
from .mixin import IndexMixin

_FULLARRAY_HANDLED_FUNCTIONS = {}

Expand Down
60 changes: 1 addition & 59 deletions cf/data/array/h5netcdfarray.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import cfdm

from ...mixin_container import Container
from .locks import netcdf_lock
from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin, IndexMixin
from .mixin import ActiveStorageMixin, ArrayMixin, FileArrayMixin


class H5netcdfArray(
ActiveStorageMixin,
IndexMixin,
FileArrayMixin,
ArrayMixin,
Container,
Expand All @@ -23,59 +21,3 @@ class H5netcdfArray(
.. versionadded:: NEXTVERSION

"""

def __dask_tokenize__(self):
"""Return a value fully representative of the object.

.. versionadded:: NEXTVERSION

"""
return super().__dask_tokenize__() + (self.get_mask(),)

@property
def _lock(self):
"""Set the lock for use in `dask.array.from_array`.

Returns a lock object because concurrent reads are not
currently supported by the HDF5 library. The lock object will
be the same for all `NetCDF4Array` and `H5netcdfArray`
instances, regardless of the dataset they access, which means
that access to all netCDF and HDF files coordinates around the
same lock.

.. versionadded:: NEXTVERSION

"""
return netcdf_lock

def _get_array(self, index=None):
"""Returns a subspace of the dataset variable.

.. versionadded:: NEXTVERSION

.. seealso:: `__array__`, `index`

:Parameters:

{{index: `tuple` or `None`, optional}}

:Returns:

`numpy.ndarray`
The subspace.

"""
if index is None:
index = self.index()

# We need to lock because the netCDF file is about to be accessed.
self._lock.acquire()

# It's cfdm.H5netcdfArray.__getitem__ that we want to
# call here, but we use 'Container' in super because
# that comes immediately before cfdm.H5netcdfArray in
# the method resolution order.
array = super(Container, self).__getitem__(index)

self._lock.release()
return array
4 changes: 0 additions & 4 deletions cf/data/array/locks.py

This file was deleted.

1 change: 0 additions & 1 deletion cf/data/array/mixin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
from .cfamixin import CFAMixin
from .compressedarraymixin import CompressedArrayMixin
from .filearraymixin import FileArrayMixin
from .indexmixin import IndexMixin
3 changes: 1 addition & 2 deletions cf/data/array/mixin/cfamixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from itertools import accumulate, product

import numpy as np

from ...utils import chunk_locations, chunk_positions
from cfdm.data.utils import chunk_locations, chunk_positions


class CFAMixin:
Expand Down
3 changes: 1 addition & 2 deletions cf/data/array/mixin/compressedarraymixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,11 @@ def to_dask_array(self, chunks="auto"):
from functools import partial

import dask.array as da
from cfdm.data.utils import normalize_chunks
from dask import config
from dask.array.core import getter
from dask.base import tokenize

from ...utils import normalize_chunks

name = (f"{self.__class__.__name__}-{tokenize(self)}",)

dtype = self.dtype
Expand Down
Loading