From c30e5530a879a60af659affa4b0df17f3ea722ef Mon Sep 17 00:00:00 2001 From: Yun-Wu Date: Thu, 23 Jan 2025 18:53:39 +0800 Subject: [PATCH] Fix Dimension Mismatch in ETD Calculation Error (#393) --- .../Elliptical Time Density (ETD).ipynb | 10 +- .../04. EcoMap & EcoPlot/EcoMap.ipynb | 3 +- ecoscope/analysis/UD/etd_range.py | 47 ++++++-- ecoscope/analysis/percentile.py | 108 ++++++------------ ecoscope/io/earthranger.py | 2 +- ecoscope/io/raster.py | 26 +++++ ecoscope/plotting/plot.py | 2 +- tests/test_ud.py | 38 +++++- 8 files changed, 143 insertions(+), 93 deletions(-) diff --git a/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb b/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb index b3643f5d..05a79e25 100644 --- a/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb +++ b/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb @@ -49,6 +49,7 @@ "import ecoscope\n", "from ecoscope.analysis.UD import calculate_etd_range\n", "from ecoscope.analysis.percentile import get_percentile_area\n", + "from ecoscope.io.raster import RasterData\n", "\n", "ecoscope.init()" ] @@ -255,7 +256,12 @@ "outputs": [], "source": [ "percentiles = pd.concat(\n", - " [get_percentile_area(percentile_levels=[50, 99.9, 100.0], raster_path=v, subject_id=k) for k, v in etd.items()]\n", + " [\n", + " get_percentile_area(\n", + " percentile_levels=[50, 99.9, 100.0], raster_data=RasterData.from_raster_file(v), subject_id=k\n", + " )\n", + " for k, v in etd.items()\n", + " ]\n", ").reset_index(drop=True)\n", "\n", "percentiles" @@ -301,7 +307,7 @@ "source": [ "salif = get_percentile_area(\n", " percentile_levels=[50, 60, 70, 80, 90, 99.9],\n", - " raster_path=etd.at[\"Salif Keita\"],\n", + " raster_data=RasterData.from_raster_file(etd.at[\"Salif Keita\"]),\n", " subject_id=\"Salif Keita\",\n", ")" ] diff --git a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb index ffe4a9ad..243120e1 100644 --- a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb +++ b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb @@ -496,9 +496,10 @@ "metadata": {}, "outputs": [], "source": [ + "raster_data = ecoscope.io.raster.RasterData.from_raster_file(etd.at[\"Salif Keita\"])\n", "percentile_areas = get_percentile_area(\n", " percentile_levels=[50, 60, 70, 80, 90, 99.9],\n", - " raster_path=etd.at[\"Salif Keita\"],\n", + " raster_data=raster_data,\n", " subject_id=\"Salif Keita\",\n", ").to_crs(4326)\n", "\n", diff --git a/ecoscope/analysis/UD/etd_range.py b/ecoscope/analysis/UD/etd_range.py index ca276bbd..094e1ffb 100644 --- a/ecoscope/analysis/UD/etd_range.py +++ b/ecoscope/analysis/UD/etd_range.py @@ -1,24 +1,28 @@ +import logging import math import os import typing from dataclasses import dataclass import numpy as np + from ecoscope.base import Trajectory from ecoscope.io import raster try: import numba as nb import scipy - from sklearn import neighbors from scipy.optimize import minimize from scipy.stats import weibull_min + from sklearn import neighbors except ModuleNotFoundError: raise ModuleNotFoundError( 'Missing optional dependencies required by this module. \ Please run pip install ecoscope["analysis"]' ) +logger = logging.getLogger(__name__) + @nb.cfunc("double(intc, CPointer(double))") def __etd__(_, a): @@ -84,13 +88,14 @@ class Weibull3Parameter(WeibullPDF): def calculate_etd_range( trajectory_gdf: Trajectory, - output_path: typing.Union[str, bytes, os.PathLike], + output_path: typing.Union[str, bytes, os.PathLike, None] = None, max_speed_kmhr: float = 0.0, max_speed_percentage: float = 0.9999, raster_profile: raster.RasterProfile = None, expansion_factor: float = 1.3, weibull_pdf: typing.Union[Weibull2Parameter, Weibull3Parameter] = Weibull2Parameter(), -) -> None: + grid_threshold: int = 100, +) -> raster.RasterData: """ The ETDRange class provides a trajectory-based, nonparametric approach to estimate the utilization distribution (UD) of an animal, using model parameters derived directly from the movement behaviour of the species. @@ -100,12 +105,13 @@ def calculate_etd_range( Parameters ---------- trajectory_gdf : geopandas.GeoDataFrame - output_path : str or PathLike + output_path : str or PathLike or None max_speed_kmhr : float max_speed_percentage : 0.999 raster_profile : raster.RasterProfile expansion_factor : float weibull_pdf : Weibull2Parameter or Weibull3Parameter + grid_threshold: int Returns ------- @@ -169,8 +175,20 @@ def calculate_etd_range( grid_centroids[1, 0] = y_max - raster_profile.pixel_size * 0.5 centroids_coords = np.dot(grid_centroids, np.mgrid[1:2, :num_columns, :num_rows].T.reshape(-1, 3, 1)) + centroids_coords = centroids_coords.squeeze().T + + if centroids_coords.size < grid_threshold: + logger.warning( + f"Centroid size {centroids_coords.size} is too small to calculate density. " + f"The threshold value is {grid_threshold}. " + "Check if there’s a data issue or decrease pixel size" + ) + return raster.RasterData(data=np.array([]), crs=raster_profile.crs, transform=raster_profile.transform) + + if centroids_coords.ndim != 2: + centroids_coords = centroids_coords.reshape(1, -1) - tr = neighbors.KDTree(centroids_coords.squeeze().T) + tr = neighbors.KDTree(centroids_coords) del centroids_coords @@ -206,12 +224,17 @@ def calculate_etd_range( # Normalize the grid values raster_ndarray = raster_ndarray / raster_ndarray.sum() - # Set the null data values - raster_ndarray[raster_ndarray == 0] = raster_profile.nodata_value + ndarray = raster_ndarray.reshape(num_rows, num_columns) # write raster_ndarray to GeoTIFF file. - raster.RasterPy.write( - ndarray=raster_ndarray.reshape(num_rows, num_columns), - fp=output_path, - **raster_profile, - ) + if output_path: + # Set the null data values + raster_ndarray[np.isnan(raster_ndarray) | (raster_ndarray == 0)] = raster_profile.nodata_value + + raster.RasterPy.write( + ndarray, + fp=output_path, + **raster_profile, + ) + + return raster.RasterData(data=ndarray.astype("float32"), crs=raster_profile.crs, transform=raster_profile.transform) diff --git a/ecoscope/analysis/percentile.py b/ecoscope/analysis/percentile.py index 41bfb3a3..244ad4ee 100644 --- a/ecoscope/analysis/percentile.py +++ b/ecoscope/analysis/percentile.py @@ -1,6 +1,4 @@ -import os import typing -from dataclasses import dataclass, field import geopandas as gpd import numpy as np @@ -9,83 +7,25 @@ from shapely.geometry import shape from shapely.geometry.multipolygon import MultiPolygon +from ecoscope.io import raster -@dataclass -class PercentileAreaProfile: - input_raster: typing.Union[str, bytes, os.PathLike] - percentile_levels: typing.List = field(default_factory=[50.0]) - subject_id: str = "" - -class PercentileArea: - @staticmethod - def _multipolygon(shapes, percentile): - return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)]) - - @classmethod - def calculate_percentile_area(cls, profile: PercentileAreaProfile): - """ - Parameters - ---------- - profile: PercentileAreaProfile - dataclass object with information for percentile-area calculation - - Returns - ------- - GeodataFrame - """ - - assert type(profile) is PercentileAreaProfile - shapes = [] - - # open raster - with rasterio.open(profile.input_raster) as src: - crs = src.crs.to_wkt() - - for percentile in profile.percentile_levels: - data_array = src.read(1).astype(np.float32) - - # Mask no-data values - data_array[data_array == src.nodata] = np.nan - - # calculate percentile value - # percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile) - values = np.sort(data_array[~np.isnan(data_array)]).flatten() - csum = np.cumsum(values) - percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))] - - # TODO: make a more explicit comparison for less than and greater than - - # Set any vals less than the cutoff to be nan - data_array[data_array < percentile_val] = np.nan - - # Mask any vals that are less than the cutoff percentile - data_array[data_array >= percentile_val] = percentile - - shapes.extend(rasterio.features.shapes(data_array, transform=src.transform)) - - return gpd.GeoDataFrame( - [ - [profile.subject_id, percentile, cls._multipolygon(shapes, percentile)] - for percentile in sorted(profile.percentile_levels, reverse=True) - ], - columns=["subject_id", "percentile", "geometry"], - crs=crs, - ) +def _multipolygon(shapes, percentile): + return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)]) def get_percentile_area( percentile_levels: typing.List, - raster_path: typing.Union[str, bytes, os.PathLike], + raster_data: raster.RasterData, subject_id: str = "", -): +) -> gpd.GeoDataFrame: """ Parameters ---------- percentile_levels: Typing.List[Int] list of k-th percentile scores. - raster_path: str or os.PathLike - file path to where the raster is stored. + raster_data: raster.RasterData + array of raster values subject_id: str unique identifier for the subject @@ -94,9 +34,33 @@ def get_percentile_area( GeoDataFrame """ - percentile_profile = PercentileAreaProfile( - percentile_levels=percentile_levels, - input_raster=raster_path, - subject_id=subject_id, + shapes = [] + for percentile in percentile_levels: + data_array = raster_data.data.copy() + + # calculate percentile value + values = np.sort(data_array[~np.isnan(data_array)]).flatten() + if len(values) == 0: + continue + + csum = np.cumsum(values) + percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))] + + # TODO: make a more explicit comparison for less than and greater than + + # Set any vals less than the cutoff to be nan + data_array[data_array < percentile_val] = np.nan + + # Mask any vals that are less than the cutoff percentile + data_array[data_array >= percentile_val] = percentile + + shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform)) + + return gpd.GeoDataFrame( + [ + [subject_id, percentile, _multipolygon(shapes, percentile)] + for percentile in sorted(percentile_levels, reverse=True) + ], + columns=["subject_id", "percentile", "geometry"], + crs=raster_data.crs, ) - return PercentileArea.calculate_percentile_area(profile=percentile_profile) diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py index b6936bf4..e6159e8e 100644 --- a/ecoscope/io/earthranger.py +++ b/ecoscope/io/earthranger.py @@ -1186,7 +1186,7 @@ def upload(obs): "records" ) del obs["geometry"] - obs = pack_columns(obs, columns=["source", "recorded_at", "location"]) + obs = pack_columns(obs, columns=["source", "recorded_at", "location", "exclusion_flags", "additional"]) post_data = obs.to_dict("records") results = super(EarthRangerIO, self).post_observation(post_data) except ERClientException as exc: diff --git a/ecoscope/io/raster.py b/ecoscope/io/raster.py index 4a0ff064..3470b2e1 100644 --- a/ecoscope/io/raster.py +++ b/ecoscope/io/raster.py @@ -2,6 +2,7 @@ import math import os from collections import UserDict +from dataclasses import dataclass import geopandas as gpd import numpy as np @@ -102,6 +103,20 @@ def __setitem__(self, key, item): self._recompute_transform_(key) +@dataclass +class RasterData: + data: np.ndarray + crs: str + transform: rio.Affine + + @classmethod + def from_raster_file(cls, raster_path): + with rasterio.open(raster_path) as src: + data_array = src.read(1).astype(np.float32) + data_array[data_array == src.nodata] = np.nan + return cls(data_array, src.crs.to_wkt(), src.transform) + + class RasterPy: @classmethod def write( @@ -249,3 +264,14 @@ def grid_to_raster(grid=None, val_column="", out_dir="", raster_name=None, xlen= ).write(vals, 1) return memfile + + +def raster_to_grid(raster_path): + with rasterio.open(raster_path) as src: + data_array = src.read(1).astype(np.float32) + data_array[data_array == src.nodata] = np.nan + + +def get_crs(raster_path): + with rasterio.open(raster_path) as src: + return src.crs.to_wkt() diff --git a/ecoscope/plotting/plot.py b/ecoscope/plotting/plot.py index 1f530b65..9c62ac18 100644 --- a/ecoscope/plotting/plot.py +++ b/ecoscope/plotting/plot.py @@ -422,7 +422,7 @@ def draw_historic_timeseries( The title shown in the plot legend for historic band historic_mean_column: str The name of the dataframe column to pull historic mean values from - current_value_title: str + historic_mean_title: str The title shown in the plot legend for historic mean values layout_kwargs: dict Additional kwargs passed to plotly.go.Figure(layout) diff --git a/tests/test_ud.py b/tests/test_ud.py index eb94826f..c1c4997c 100644 --- a/tests/test_ud.py +++ b/tests/test_ud.py @@ -4,13 +4,15 @@ import geopandas as gpd import geopandas.testing import numpy as np +import pytest import ecoscope from ecoscope.analysis import UD from ecoscope.analysis.percentile import get_percentile_area -def test_etd_range(movebank_relocations): +@pytest.fixture +def movebank_trajectory_gdf(movebank_relocations): # apply relocation coordinate filter to movebank data pnts_filter = ecoscope.base.RelocsCoordinateFilter( min_x=-5, @@ -23,15 +25,20 @@ def test_etd_range(movebank_relocations): movebank_relocations.remove_filtered(inplace=True) # Create Trajectory - movebank_trajectory_gdf = ecoscope.base.Trajectory.from_relocations(movebank_relocations) + return ecoscope.base.Trajectory.from_relocations(movebank_relocations) - raster_profile = ecoscope.io.raster.RasterProfile( + +@pytest.fixture +def raster_profile(): + return ecoscope.io.raster.RasterProfile( pixel_size=250.0, crs="ESRI:102022", nodata_value=np.nan, band_count=1, # Albers Africa Equal Area Conic ) + +def test_etd_range_with_tif_file(movebank_trajectory_gdf, raster_profile): file = NamedTemporaryFile(delete=False) try: UD.calculate_etd_range( @@ -42,8 +49,31 @@ def test_etd_range(movebank_relocations): expansion_factor=1.3, ) + raster_data = ecoscope.io.raster.RasterData.from_raster_file(file.name) + + percentile_area = get_percentile_area( + percentile_levels=[99.9], raster_data=raster_data, subject_id="Salif_Keita" + ).to_crs(4326) + finally: + file.close() + os.unlink(file.name) + + expected_percentile_area = gpd.read_feather("tests/test_output/etd_percentile_area.feather") + gpd.testing.geom_almost_equals(percentile_area, expected_percentile_area) + + +def test_etd_range_without_tif_file(movebank_trajectory_gdf, raster_profile): + file = NamedTemporaryFile(delete=False) + try: + raster_data = UD.calculate_etd_range( + trajectory_gdf=movebank_trajectory_gdf, + max_speed_kmhr=1.05 * movebank_trajectory_gdf.speed_kmhr.max(), + raster_profile=raster_profile, + expansion_factor=1.3, + ) + percentile_area = get_percentile_area( - percentile_levels=[99.9], raster_path=file.name, subject_id="Salif_Keita" + percentile_levels=[99.9], raster_data=raster_data, subject_id="Salif_Keita" ).to_crs(4326) finally: file.close()