opendatacube · robbibt · Jan 2, 2024 · Dec 28, 2023 · Dec 30, 2023 · Dec 30, 2023
diff --git a/docs/api.rst b/docs/api.rst
@@ -20,6 +20,8 @@ Interfacing with :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset` cla
    ODCExtension.spatial_dims
    ODCExtension.crs
    ODCExtension.map_bounds
+   ODCExtension.crop
+   ODCExtension.mask
 
    ODCExtensionDa
    ODCExtensionDa.assign_crs
@@ -50,6 +52,8 @@ Interfacing with :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset` cla
    to_cog
    write_cog
    compress
+   mask
+   crop
 
 
 odc.geo.geobox

diff --git a/odc/geo/_xr_interop.py b/odc/geo/_xr_interop.py
@@ -37,6 +37,7 @@
 from .types import Resolution, xy_
 
 # pylint: disable=import-outside-toplevel
+# pylint: disable=too-many-lines
 if have.rasterio:
     from ._cog import to_cog, write_cog
     from ._compress import compress
@@ -245,6 +246,96 @@ def assign_crs(
     return xx
 
 
+def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT:
+    """
+    Apply a polygon geometry as a mask, setting all xr.Dataset
+    or xr.DataArray pixels outside the rasterized polygon to ``NaN``.
+
+    :param xx:
+       :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`.
+
+    :param poly:
+       Geometry shape used to mask ``xx``.
+
+    :param all_touched:
+        If ``True``, all pixels touched by ``poly`` will remain unmasked.
+        If ``False``, only pixels whose center is within the polygon or
+        that are selected by Bresenham's line algorithm will remain unmasked.
+
+    :return:
+        A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`
+        masked by ``poly``.
+    """
+
+    # Reproject `poly` to match `xx`
+    poly = poly.to_crs(crs=xx.odc.crs)
+
+    # Rasterise `poly` into geobox of `xx`
+    rasterized = rasterize(
+        poly=poly,
+        how=xx.odc.geobox,
+        all_touched=all_touched,
+    )
+
+    # Mask data outside rasterized `poly`
+    xx_masked = xx.where(rasterized.data)
+
+    return xx_masked
+
+
+def crop(
+    xx: XrT, poly: Geometry, apply_mask: bool = True, all_touched: bool = True
+) -> XrT:
+    """
+    Crops and optionally mask an xr.Dataset or xr.DataArray (``xx``) to
+    the spatial extent of a geometry (``poly``).
+
+    :param xx:
+       :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`.
+
+    :param poly:
+       Geometry shape used to crop ``xx``.
+
+    :param apply_mask:
+       Whether to mask out pixels outside of the rasterized extent of
+       ``poly`` by setting them to ``NaN``.
+
+    :param all_touched:
+        If ``True``, all pixels touched by ``poly`` will remain unmasked.
+        If ``False``, only pixels whose center is within the polygon or
+        that are selected by Bresenham's line algorithm will remain unmasked.
+
+    :return:
+        A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`
+        cropped and optionally masked to the spatial extent of ``poly``.
+    """
+    # Reproject `poly` to match `xx`
+    poly = poly.to_crs(crs=xx.odc.crs)
+
+    # Verify that `poly` overlaps with `xx` extent
+    if not poly.intersects(xx.odc.geobox.extent):
+        raise ValueError(
+            "The supplied `poly` must overlap spatially with the extent of `xx`."
+        )
+
+    # Optionally mask data outside rasterized `poly`
+    if apply_mask:
+        xx = mask(xx, poly, all_touched=all_touched)
+
+    # Identify spatial dims and raise error if None
+    sdims = spatial_dims(xx)
+    if sdims is None:
+        raise ValueError("Can't locate spatial dimensions")
+
+    # Crop `xx` to the bounding box of `poly`. First create new geobox
+    # with the same pixel grid as `xx` but enclosing `poly`. Then
+    # calculate slices into `xx` for the intersection between both geoboxes.
+    roi = xx.odc.geobox.overlap_roi(xx.odc.geobox.enclosing(poly))
+    xx_cropped = xx.isel({sdims[0]: roi[0], sdims[1]: roi[1]})
+
+    return xx_cropped
+
+
 def xr_coords(
     gbox: SomeGeoBox, crs_coord_name: Optional[str] = _DEFAULT_CRS_COORD_NAME
 ) -> Dict[Hashable, xarray.DataArray]:
@@ -657,6 +748,13 @@ def geobox(self) -> Optional[SomeGeoBox]:
         """Query :py:class:`~odc.geo.geobox.GeoBox` or :py:class:`~odc.geo.gcp.GCPGeoBox`."""
         return self._state.geobox
 
+    @property
+    def aspect(self) -> float:
+        gbox = self._state.geobox
+        if gbox is None:
+            return 1
+        return gbox.aspect
+
     def output_geobox(self, crs: SomeCRS, **kw) -> GeoBox:
         """
         Compute geobox of this data in other projection.
@@ -677,6 +775,9 @@ def map_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
 
         return gbox.map_bounds()
 
+    mask = _wrap_op(mask)
+    crop = _wrap_op(crop)
+
 
 @xarray.register_dataarray_accessor("odc")
 class ODCExtensionDa(ODCExtension):

diff --git a/odc/geo/xr.py b/odc/geo/xr.py
@@ -18,6 +18,8 @@
     ODCExtensionDs,
     assign_crs,
     colorize,
+    crop,
+    mask,
     rasterize,
     register_geobox,
     spatial_dims,
@@ -45,6 +47,8 @@
     "xr_zeros",
     "colorize",
     "to_rgba",
+    "crop",
+    "mask",
 ]
 
 # pylint: disable=import-outside-toplevel,unused-import

diff --git a/tests/test_xr_interop.py b/tests/test_xr_interop.py
@@ -554,3 +554,133 @@ def test_geobox_0px(geobox: GeoBox, roi: ROI):
     assert xx.odc.geobox == geobox
 
     assert xx[roi].odc.geobox is None
+
+
+@pytest.mark.parametrize(
+    "poly, expected_fail",
+    [
+        (
+            geom.polygon(
+                ((-8, 8), (6, 1), (-4, -6)),
+                crs="EPSG:4326",
+            ),
+            False,  # Fully inside, matching CRS
+        ),
+        (
+            geom.polygon(
+                ((-24, 8), (-10, 1), (-20, -6)),
+                crs="EPSG:4326",
+            ),
+            False,  # Overlapping, matching CRS
+        ),
+        (
+            geom.polygon(
+                ((-40, 8), (-26, 1), (-36, -6)),
+                crs="EPSG:4326",
+            ),
+            True,  # Fully outside, matching CRS
+        ),
+        (
+            geom.polygon(
+                ((-890555, 893463), (667916, 111325), (-445277, -669141)),
+                crs="EPSG:3857",
+            ),
+            False,  # Fully inside, different CRS
+        ),
+        (
+            geom.polygon(
+                ((-2671667, 893463), (-1113194, 111325), (-2226389, -669141)),
+                crs="EPSG:3857",
+            ),
+            False,  # Overlapping, different CRS
+        ),
+        (
+            geom.polygon(
+                ((-4452779, 893463), (-2894306, 111325), (-4007501, -669141)),
+                crs="EPSG:3857",
+            ),
+            True,  # Fully outside, different CRS
+        ),
+    ],
+)
+def test_crop(xx_epsg4326, poly, expected_fail):
+    xx = xx_epsg4326
+
+    # If fail is expected, pass test
+    if expected_fail:
+        with pytest.raises(ValueError):
+            xx_cropped = xx.odc.crop(poly=poly)
+        return
+
+    # Crop with default settings
+    xx_cropped = xx.odc.crop(poly=poly)
+
+    # Verify that cropped data is smaller
+    assert xx_cropped.size <= xx.size
+
+    # Verify that resolution and alignment have not changed
+    np.testing.assert_array_almost_equal(
+        xx.odc.geobox.resolution.xy, xx_cropped.odc.geobox.resolution.xy
+    )
+    np.testing.assert_array_almost_equal(
+        xx.odc.geobox.alignment.xy, xx_cropped.odc.geobox.alignment.xy
+    )
+
+    # Verify that data contains NaN from default masking step
+    assert xx_cropped.isnull().any()
+
+    # Verify that no NaNs exist if masking not applied
+    xx_nomask = xx.odc.crop(poly=poly, apply_mask=False)
+    assert xx_nomask.notnull().all()
+
+    # Verify that cropping also works on datasets
+    xx_ds = xx.to_dataset(name="test")
+    xx_ds_cropped = xx_ds.odc.crop(poly=poly)
+    assert xx_ds_cropped.test.size <= xx_ds.test.size
+    np.testing.assert_array_almost_equal(
+        xx_ds.odc.geobox.resolution.xy, xx_ds_cropped.odc.geobox.resolution.xy
+    )
+    np.testing.assert_array_almost_equal(
+        xx_ds.odc.geobox.alignment.xy, xx_ds_cropped.odc.geobox.alignment.xy
+    )
+
+
+@pytest.mark.parametrize(
+    "poly",
+    [
+        geom.polygon(((-8, 8), (6, 1), (-4, -6)), crs="EPSG:4326"),
+        geom.polygon(
+            ((-890555, 893463), (667916, 111325), (-445277, -669141)), crs="EPSG:3857"
+        ),
+    ],
+)
+def test_mask(xx_epsg4326, poly):
+    # Create test data and replace values with random integers so we can
+    # reliably test that pixel values are the same before and after masking
+    xx = xx_epsg4326
+    xx.values[:] = np.random.randint(0, 10, size=xx.shape)
+
+    # Apply mask
+    xx_masked = xx.odc.mask(poly)
+
+    # Verify that geobox is the same
+    assert xx_masked.odc.geobox == xx.odc.geobox
+
+    # Verify that data contains NaN from default masking step
+    assert xx_masked.isnull().any()
+
+    # Verify that non-masked values are the same as in non-masked dataset
+    masked_pixels = np.isfinite(xx_masked.data)
+    np.testing.assert_array_almost_equal(
+        xx_masked.data[masked_pixels], xx.data[masked_pixels]
+    )
+
+    # Verify that `all_touched=False` produces fewer unmasked pixels
+    xx_notouched = xx.odc.mask(poly, all_touched=False)
+    assert xx_notouched.notnull().sum() < xx_masked.notnull().sum()
+
+    # Verify that masking also works on datasets
+    xx_ds = xx.to_dataset(name="test")
+    xx_ds_masked = xx_ds.odc.mask(poly=poly)
+    assert xx_ds_masked.odc.geobox == xx_ds.odc.geobox
+    assert xx_ds_masked.test.isnull().any()