From bf2a1c9352a3d63c9768c5b7926043f212228462 Mon Sep 17 00:00:00 2001 From: Robbi Bishop-Taylor Date: Thu, 28 Dec 2023 09:36:43 +0000 Subject: [PATCH 1/3] Add new mask and crop methods --- docs/api.rst | 4 ++ odc/geo/_xr_interop.py | 101 ++++++++++++++++++++++++++++++ odc/geo/xr.py | 4 ++ tests/test_xr_interop.py | 130 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 239 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 95d382a8..dd71e478 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -20,6 +20,8 @@ Interfacing with :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset` cla ODCExtension.spatial_dims ODCExtension.crs ODCExtension.map_bounds + ODCExtension.crop + ODCExtension.mask ODCExtensionDa ODCExtensionDa.assign_crs @@ -50,6 +52,8 @@ Interfacing with :py:class:`xarray.DataArray` and :py:class:`xarray.Dataset` cla to_cog write_cog compress + mask + crop odc.geo.geobox diff --git a/odc/geo/_xr_interop.py b/odc/geo/_xr_interop.py index a5af6f0d..5bb2d8f0 100644 --- a/odc/geo/_xr_interop.py +++ b/odc/geo/_xr_interop.py @@ -37,6 +37,7 @@ from .types import Resolution, xy_ # pylint: disable=import-outside-toplevel +# pylint: disable=too-many-lines if have.rasterio: from ._cog import to_cog, write_cog from ._compress import compress @@ -245,6 +246,96 @@ def assign_crs( return xx +def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT: + """ + Apply a polygon geometry as a mask, setting all xr.Dataset + or xr.DataArray pixels outside the rasterized polygon to ``NaN``. + + :param xx: + :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`. + + :param poly: + Geometry shape used to mask ``xx``. + + :param all_touched: + If ``True``, all pixels touched by ``poly`` will remain unmasked. + If ``False``, only pixels whose center is within the polygon or + that are selected by Bresenham's line algorithm will remain unmasked. + + :return: + A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` + masked by ``poly``. + """ + + # Reproject `poly` to match `xx` + poly = poly.to_crs(crs=xx.odc.crs) + + # Rasterise `poly` into geobox of `xx` + rasterized = rasterize( + poly=poly, + how=xx.odc.geobox, + all_touched=all_touched, + ) + + # Mask data outside rasterized `poly` + xx_masked = xx.where(rasterized.data) + + return xx_masked + + +def crop( + xx: XrT, poly: Geometry, apply_mask: bool = True, all_touched: bool = True +) -> XrT: + """ + Crops and optionally mask an xr.Dataset or xr.DataArray (``xx``) to + the spatial extent of a geometry (``poly``). + + :param xx: + :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`. + + :param poly: + Geometry shape used to crop ``xx``. + + :param apply_mask: + Whether to mask out pixels outside of the rasterized extent of + ``poly`` by setting them to ``NaN``. + + :param all_touched: + If ``True``, all pixels touched by ``poly`` will remain unmasked. + If ``False``, only pixels whose center is within the polygon or + that are selected by Bresenham's line algorithm will remain unmasked. + + :return: + A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` + cropped and optionally masked to the spatial extent of ``poly``. + """ + # Reproject `poly` to match `xx` + poly = poly.to_crs(crs=xx.odc.crs) + + # Verify that `poly` overlaps with `xx` extent + if not poly.intersects(xx.odc.geobox.extent): + raise ValueError( + "The supplied `poly` must overlap spatially with the extent of `xx`." + ) + + # Optionally mask data outside rasterized `poly` + if apply_mask: + xx = mask(xx, poly, all_touched=all_touched) + + # Identify spatial dims and raise error if None + sdims = spatial_dims(xx) + if sdims is None: + raise ValueError("Can't locate spatial dimensions") + + # Crop `xx` to the bounding box of `poly`. First create new geobox + # with the same pixel grid as `xx` but enclosing `poly`. Then + # calculate slices into `xx` for the intersection between both geoboxes. + roi = xx.odc.geobox.overlap_roi(xx.odc.geobox.enclosing(poly)) + xx_cropped = xx.isel({sdims[0]: roi[0], sdims[1]: roi[1]}) + + return xx_cropped + + def xr_coords( gbox: SomeGeoBox, crs_coord_name: Optional[str] = _DEFAULT_CRS_COORD_NAME ) -> Dict[Hashable, xarray.DataArray]: @@ -657,6 +748,13 @@ def geobox(self) -> Optional[SomeGeoBox]: """Query :py:class:`~odc.geo.geobox.GeoBox` or :py:class:`~odc.geo.gcp.GCPGeoBox`.""" return self._state.geobox + @property + def aspect(self) -> float: + gbox = self._state.geobox + if gbox is None: + return 1 + return gbox.aspect + def output_geobox(self, crs: SomeCRS, **kw) -> GeoBox: """ Compute geobox of this data in other projection. @@ -677,6 +775,9 @@ def map_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]: return gbox.map_bounds() + mask = _wrap_op(mask) + crop = _wrap_op(crop) + @xarray.register_dataarray_accessor("odc") class ODCExtensionDa(ODCExtension): diff --git a/odc/geo/xr.py b/odc/geo/xr.py index ac5ae13e..ff509a5a 100644 --- a/odc/geo/xr.py +++ b/odc/geo/xr.py @@ -18,6 +18,8 @@ ODCExtensionDs, assign_crs, colorize, + crop, + mask, rasterize, register_geobox, spatial_dims, @@ -45,6 +47,8 @@ "xr_zeros", "colorize", "to_rgba", + "crop", + "mask", ] # pylint: disable=import-outside-toplevel,unused-import diff --git a/tests/test_xr_interop.py b/tests/test_xr_interop.py index 63fa59f6..36689555 100644 --- a/tests/test_xr_interop.py +++ b/tests/test_xr_interop.py @@ -554,3 +554,133 @@ def test_geobox_0px(geobox: GeoBox, roi: ROI): assert xx.odc.geobox == geobox assert xx[roi].odc.geobox is None + + +@pytest.mark.parametrize( + "poly, expected_fail", + [ + ( + geom.polygon( + ((-8, 8), (6, 1), (-4, -6)), + crs="EPSG:4326", + ), + False, # Fully inside, matching CRS + ), + ( + geom.polygon( + ((-24, 8), (-10, 1), (-20, -6)), + crs="EPSG:4326", + ), + False, # Overlapping, matching CRS + ), + ( + geom.polygon( + ((-40, 8), (-26, 1), (-36, -6)), + crs="EPSG:4326", + ), + True, # Fully outside, matching CRS + ), + ( + geom.polygon( + ((-890555, 893463), (667916, 111325), (-445277, -669141)), + crs="EPSG:3857", + ), + False, # Fully inside, different CRS + ), + ( + geom.polygon( + ((-2671667, 893463), (-1113194, 111325), (-2226389, -669141)), + crs="EPSG:3857", + ), + False, # Overlapping, different CRS + ), + ( + geom.polygon( + ((-4452779, 893463), (-2894306, 111325), (-4007501, -669141)), + crs="EPSG:3857", + ), + True, # Fully outside, different CRS + ), + ], +) +def test_crop(xx_epsg4326, poly, expected_fail): + xx = xx_epsg4326 + + # If fail is expected, pass test + if expected_fail: + with pytest.raises(ValueError): + xx_cropped = xx.odc.crop(poly=poly) + return + + # Crop with default settings + xx_cropped = xx.odc.crop(poly=poly) + + # Verify that cropped data is smaller + assert xx_cropped.size <= xx.size + + # Verify that resolution and alignment have not changed + np.testing.assert_array_almost_equal( + xx.odc.geobox.resolution.xy, xx_cropped.odc.geobox.resolution.xy + ) + np.testing.assert_array_almost_equal( + xx.odc.geobox.alignment.xy, xx_cropped.odc.geobox.alignment.xy + ) + + # Verify that data contains NaN from default masking step + assert xx_cropped.isnull().any() + + # Verify that no NaNs exist if masking not applied + xx_nomask = xx.odc.crop(poly=poly, apply_mask=False) + assert xx_nomask.notnull().all() + + # Verify that cropping also works on datasets + xx_ds = xx.to_dataset(name="test") + xx_ds_cropped = xx_ds.odc.crop(poly=poly) + assert xx_ds_cropped.test.size <= xx_ds.test.size + np.testing.assert_array_almost_equal( + xx_ds.odc.geobox.resolution.xy, xx_ds_cropped.odc.geobox.resolution.xy + ) + np.testing.assert_array_almost_equal( + xx_ds.odc.geobox.alignment.xy, xx_ds_cropped.odc.geobox.alignment.xy + ) + + +@pytest.mark.parametrize( + "poly", + [ + geom.polygon(((-8, 8), (6, 1), (-4, -6)), crs="EPSG:4326"), + geom.polygon( + ((-890555, 893463), (667916, 111325), (-445277, -669141)), crs="EPSG:3857" + ), + ], +) +def test_mask(xx_epsg4326, poly): + # Create test data and replace values with random integers so we can + # reliably test that pixel values are the same before and after masking + xx = xx_epsg4326 + xx.values[:] = np.random.randint(0, 10, size=xx.shape) + + # Apply mask + xx_masked = xx.odc.mask(poly) + + # Verify that geobox is the same + assert xx_masked.odc.geobox == xx.odc.geobox + + # Verify that data contains NaN from default masking step + assert xx_masked.isnull().any() + + # Verify that non-masked values are the same as in non-masked dataset + masked_pixels = np.isfinite(xx_masked.data) + np.testing.assert_array_almost_equal( + xx_masked.data[masked_pixels], xx.data[masked_pixels] + ) + + # Verify that `all_touched=False` produces fewer unmasked pixels + xx_notouched = xx.odc.mask(poly, all_touched=False) + assert xx_notouched.notnull().sum() < xx_masked.notnull().sum() + + # Verify that masking also works on datasets + xx_ds = xx.to_dataset(name="test") + xx_ds_masked = xx_ds.odc.mask(poly=poly) + assert xx_ds_masked.odc.geobox == xx_ds.odc.geobox + assert xx_ds_masked.test.isnull().any() From ffd4cb3cbca2b91fb428ed3d8871f19e05723f0d Mon Sep 17 00:00:00 2001 From: Robbi Bishop-Taylor Date: Sat, 30 Dec 2023 03:00:04 +0000 Subject: [PATCH 2/3] Address review feedback --- odc/geo/_xr_interop.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/odc/geo/_xr_interop.py b/odc/geo/_xr_interop.py index 5bb2d8f0..bd483467 100644 --- a/odc/geo/_xr_interop.py +++ b/odc/geo/_xr_interop.py @@ -34,8 +34,10 @@ from .geom import Geometry from .math import affine_from_axis, maybe_int, resolution_from_affine from .overlap import compute_output_geobox +from .roi import roi_is_empty from .types import Resolution, xy_ + # pylint: disable=import-outside-toplevel # pylint: disable=too-many-lines if have.rasterio: @@ -266,10 +268,6 @@ def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT: A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` masked by ``poly``. """ - - # Reproject `poly` to match `xx` - poly = poly.to_crs(crs=xx.odc.crs) - # Rasterise `poly` into geobox of `xx` rasterized = rasterize( poly=poly, @@ -280,6 +278,13 @@ def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT: # Mask data outside rasterized `poly` xx_masked = xx.where(rasterized.data) + # Remove nodata attribute from arrays + if isinstance(xx_masked, xarray.Dataset): + for var in xx_masked.data_vars: + xx_masked[var].attrs.pop("nodata", None) + else: + xx_masked.attrs.pop("nodata", None) + return xx_masked @@ -309,30 +314,29 @@ def crop( A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` cropped and optionally masked to the spatial extent of ``poly``. """ - # Reproject `poly` to match `xx` - poly = poly.to_crs(crs=xx.odc.crs) + # Create new geobox with pixel grid of `xx` but enclosing `poly`. + poly_geobox = xx.odc.geobox.enclosing(poly) + + # Calculate ROI slices into `xx` for intersection between both geoboxes. + roi = xx.odc.geobox.overlap_roi(poly_geobox) - # Verify that `poly` overlaps with `xx` extent - if not poly.intersects(xx.odc.geobox.extent): + # Verify that `poly` overlaps with `xx` by checking if the returned + # ROI is empty + if roi_is_empty(roi): raise ValueError( "The supplied `poly` must overlap spatially with the extent of `xx`." ) - # Optionally mask data outside rasterized `poly` - if apply_mask: - xx = mask(xx, poly, all_touched=all_touched) - - # Identify spatial dims and raise error if None + # Crop spatial dims of `xx` using ROI sdims = spatial_dims(xx) if sdims is None: raise ValueError("Can't locate spatial dimensions") - - # Crop `xx` to the bounding box of `poly`. First create new geobox - # with the same pixel grid as `xx` but enclosing `poly`. Then - # calculate slices into `xx` for the intersection between both geoboxes. - roi = xx.odc.geobox.overlap_roi(xx.odc.geobox.enclosing(poly)) xx_cropped = xx.isel({sdims[0]: roi[0], sdims[1]: roi[1]}) + # Optionally mask data outside rasterized `poly` + if apply_mask: + xx_cropped = mask(xx_cropped, poly, all_touched=all_touched) + return xx_cropped From 40330a76ed6bdccec46cc093a920d4bb8542b519 Mon Sep 17 00:00:00 2001 From: Robbi Bishop-Taylor Date: Sat, 30 Dec 2023 04:28:34 +0000 Subject: [PATCH 3/3] Add invert mask param, update doco --- odc/geo/_xr_interop.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/odc/geo/_xr_interop.py b/odc/geo/_xr_interop.py index bd483467..8970eefe 100644 --- a/odc/geo/_xr_interop.py +++ b/odc/geo/_xr_interop.py @@ -248,7 +248,9 @@ def assign_crs( return xx -def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT: +def mask( + xx: XrT, poly: Geometry, invert: bool = False, all_touched: bool = True +) -> XrT: """ Apply a polygon geometry as a mask, setting all xr.Dataset or xr.DataArray pixels outside the rasterized polygon to ``NaN``. @@ -257,22 +259,30 @@ def mask(xx: XrT, poly: Geometry, all_touched: bool = True) -> XrT: :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`. :param poly: - Geometry shape used to mask ``xx``. + A :py:class:`odc.geo.geom.Geometry` polygon used to mask ``xx``. + + :param invert: + Whether to invert the mask before applying it to ``xx``. If + ``True``, only pixels inside of ``poly`` will be masked. :param all_touched: - If ``True``, all pixels touched by ``poly`` will remain unmasked. - If ``False``, only pixels whose center is within the polygon or - that are selected by Bresenham's line algorithm will remain unmasked. + If ``True``, the rasterize step will burn in all pixels touched + by ``poly``. If ``False``, only pixels whose centers are within + the polygon or that are selected by Bresenham's line algorithm + will be burned in. :return: A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` masked by ``poly``. + + ..seealso:: :py:meth:`odc.geo.xr.rasterize` """ # Rasterise `poly` into geobox of `xx` rasterized = rasterize( poly=poly, how=xx.odc.geobox, all_touched=all_touched, + value_inside=not invert, ) # Mask data outside rasterized `poly` @@ -292,27 +302,30 @@ def crop( xx: XrT, poly: Geometry, apply_mask: bool = True, all_touched: bool = True ) -> XrT: """ - Crops and optionally mask an xr.Dataset or xr.DataArray (``xx``) to - the spatial extent of a geometry (``poly``). + Crops and optionally mask an xr.Dataset or xr.DataArray to the + spatial extent of a geometry. :param xx: :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray`. :param poly: - Geometry shape used to crop ``xx``. + A :py:class:`odc.geo.geom.Geometry` polygon used to crop ``xx``. :param apply_mask: Whether to mask out pixels outside of the rasterized extent of ``poly`` by setting them to ``NaN``. :param all_touched: - If ``True``, all pixels touched by ``poly`` will remain unmasked. - If ``False``, only pixels whose center is within the polygon or - that are selected by Bresenham's line algorithm will remain unmasked. + If ``True`` and ``apply_mask=True``, the rasterize step will + burn in all pixels touched by ``poly``. If ``False``, only + pixels whose centers are within the polygon or that are selected + by Bresenham's line algorithm will be burned in. :return: A :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` cropped and optionally masked to the spatial extent of ``poly``. + + ..seealso:: :py:meth:`odc.geo.xr.mask` """ # Create new geobox with pixel grid of `xx` but enclosing `poly`. poly_geobox = xx.odc.geobox.enclosing(poly)