Fix Dimension Mismatch in ETD Calculation Error (#393)

wildlife-dynamics · Jan 23, 2025 · c30e553 · c30e553
1 parent 1f52997
commit c30e553
Show file tree

Hide file tree

Showing 8 changed files with 143 additions and 93 deletions.
diff --git a/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb b/doc/source/notebooks/03. Home Range & Movescape/Elliptical Time Density (ETD).ipynb
@@ -49,6 +49,7 @@
     "import ecoscope\n",
     "from ecoscope.analysis.UD import calculate_etd_range\n",
     "from ecoscope.analysis.percentile import get_percentile_area\n",
+    "from ecoscope.io.raster import RasterData\n",
     "\n",
     "ecoscope.init()"
    ]
@@ -255,7 +256,12 @@
    "outputs": [],
    "source": [
     "percentiles = pd.concat(\n",
-    "    [get_percentile_area(percentile_levels=[50, 99.9, 100.0], raster_path=v, subject_id=k) for k, v in etd.items()]\n",
+    "    [\n",
+    "        get_percentile_area(\n",
+    "            percentile_levels=[50, 99.9, 100.0], raster_data=RasterData.from_raster_file(v), subject_id=k\n",
+    "        )\n",
+    "        for k, v in etd.items()\n",
+    "    ]\n",
     ").reset_index(drop=True)\n",
     "\n",
     "percentiles"
@@ -301,7 +307,7 @@
    "source": [
     "salif = get_percentile_area(\n",
     "    percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
-    "    raster_path=etd.at[\"Salif Keita\"],\n",
+    "    raster_data=RasterData.from_raster_file(etd.at[\"Salif Keita\"]),\n",
     "    subject_id=\"Salif Keita\",\n",
     ")"
    ]

diff --git a/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb b/doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb
@@ -496,9 +496,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "raster_data = ecoscope.io.raster.RasterData.from_raster_file(etd.at[\"Salif Keita\"])\n",
     "percentile_areas = get_percentile_area(\n",
     "    percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
-    "    raster_path=etd.at[\"Salif Keita\"],\n",
+    "    raster_data=raster_data,\n",
     "    subject_id=\"Salif Keita\",\n",
     ").to_crs(4326)\n",
     "\n",

diff --git a/ecoscope/analysis/UD/etd_range.py b/ecoscope/analysis/UD/etd_range.py
@@ -1,24 +1,28 @@
+import logging
 import math
 import os
 import typing
 from dataclasses import dataclass
 
 import numpy as np
+
 from ecoscope.base import Trajectory
 from ecoscope.io import raster
 
 try:
     import numba as nb
     import scipy
-    from sklearn import neighbors
     from scipy.optimize import minimize
     from scipy.stats import weibull_min
+    from sklearn import neighbors
 except ModuleNotFoundError:
     raise ModuleNotFoundError(
         'Missing optional dependencies required by this module. \
          Please run pip install ecoscope["analysis"]'
     )
 
+logger = logging.getLogger(__name__)
+
 
 @nb.cfunc("double(intc, CPointer(double))")
 def __etd__(_, a):
@@ -84,13 +88,14 @@ class Weibull3Parameter(WeibullPDF):
 
 def calculate_etd_range(
     trajectory_gdf: Trajectory,
-    output_path: typing.Union[str, bytes, os.PathLike],
+    output_path: typing.Union[str, bytes, os.PathLike, None] = None,
     max_speed_kmhr: float = 0.0,
     max_speed_percentage: float = 0.9999,
     raster_profile: raster.RasterProfile = None,
     expansion_factor: float = 1.3,
     weibull_pdf: typing.Union[Weibull2Parameter, Weibull3Parameter] = Weibull2Parameter(),
-) -> None:
+    grid_threshold: int = 100,
+) -> raster.RasterData:
     """
     The ETDRange class provides a trajectory-based, nonparametric approach to estimate the utilization distribution (UD)
     of an animal, using model parameters derived directly from the movement behaviour of the species.
@@ -100,12 +105,13 @@ def calculate_etd_range(
     Parameters
     ----------
     trajectory_gdf : geopandas.GeoDataFrame
-    output_path : str or PathLike
+    output_path : str or PathLike or None
     max_speed_kmhr : float
     max_speed_percentage : 0.999
     raster_profile : raster.RasterProfile
     expansion_factor : float
     weibull_pdf : Weibull2Parameter or Weibull3Parameter
+    grid_threshold: int
 
     Returns
     -------
@@ -169,8 +175,20 @@ def calculate_etd_range(
     grid_centroids[1, 0] = y_max - raster_profile.pixel_size * 0.5
 
     centroids_coords = np.dot(grid_centroids, np.mgrid[1:2, :num_columns, :num_rows].T.reshape(-1, 3, 1))
+    centroids_coords = centroids_coords.squeeze().T
+
+    if centroids_coords.size < grid_threshold:
+        logger.warning(
+            f"Centroid size {centroids_coords.size} is too small to calculate density. "
+            f"The threshold value is {grid_threshold}. "
+            "Check if there’s a data issue or decrease pixel size"
+        )
+        return raster.RasterData(data=np.array([]), crs=raster_profile.crs, transform=raster_profile.transform)
+
+    if centroids_coords.ndim != 2:
+        centroids_coords = centroids_coords.reshape(1, -1)
 
-    tr = neighbors.KDTree(centroids_coords.squeeze().T)
+    tr = neighbors.KDTree(centroids_coords)
 
     del centroids_coords
 
@@ -206,12 +224,17 @@ def calculate_etd_range(
     # Normalize the grid values
     raster_ndarray = raster_ndarray / raster_ndarray.sum()
 
-    # Set the null data values
-    raster_ndarray[raster_ndarray == 0] = raster_profile.nodata_value
+    ndarray = raster_ndarray.reshape(num_rows, num_columns)
 
     # write raster_ndarray to GeoTIFF file.
-    raster.RasterPy.write(
-        ndarray=raster_ndarray.reshape(num_rows, num_columns),
-        fp=output_path,
-        **raster_profile,
-    )
+    if output_path:
+        # Set the null data values
+        raster_ndarray[np.isnan(raster_ndarray) | (raster_ndarray == 0)] = raster_profile.nodata_value
+
+        raster.RasterPy.write(
+            ndarray,
+            fp=output_path,
+            **raster_profile,
+        )
+
+    return raster.RasterData(data=ndarray.astype("float32"), crs=raster_profile.crs, transform=raster_profile.transform)
diff --git a/ecoscope/analysis/percentile.py b/ecoscope/analysis/percentile.py
@@ -1,6 +1,4 @@
-import os
 import typing
-from dataclasses import dataclass, field
 
 import geopandas as gpd
 import numpy as np
@@ -9,83 +7,25 @@
 from shapely.geometry import shape
 from shapely.geometry.multipolygon import MultiPolygon
 
+from ecoscope.io import raster
 
-@dataclass
-class PercentileAreaProfile:
-    input_raster: typing.Union[str, bytes, os.PathLike]
-    percentile_levels: typing.List = field(default_factory=[50.0])
-    subject_id: str = ""
 
-
-class PercentileArea:
-    @staticmethod
-    def _multipolygon(shapes, percentile):
-        return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])
-
-    @classmethod
-    def calculate_percentile_area(cls, profile: PercentileAreaProfile):
-        """
-        Parameters
-        ----------
-        profile:  PercentileAreaProfile
-            dataclass object with information for percentile-area calculation
-
-        Returns
-        -------
-        GeodataFrame
-        """
-
-        assert type(profile) is PercentileAreaProfile
-        shapes = []
-
-        # open raster
-        with rasterio.open(profile.input_raster) as src:
-            crs = src.crs.to_wkt()
-
-            for percentile in profile.percentile_levels:
-                data_array = src.read(1).astype(np.float32)
-
-                # Mask no-data values
-                data_array[data_array == src.nodata] = np.nan
-
-                # calculate percentile value
-                # percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
-                values = np.sort(data_array[~np.isnan(data_array)]).flatten()
-                csum = np.cumsum(values)
-                percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]
-
-                # TODO: make a more explicit comparison for less than and greater than
-
-                # Set any vals less than the cutoff to be nan
-                data_array[data_array < percentile_val] = np.nan
-
-                # Mask any vals that are less than the cutoff percentile
-                data_array[data_array >= percentile_val] = percentile
-
-                shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))
-
-        return gpd.GeoDataFrame(
-            [
-                [profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
-                for percentile in sorted(profile.percentile_levels, reverse=True)
-            ],
-            columns=["subject_id", "percentile", "geometry"],
-            crs=crs,
-        )
+def _multipolygon(shapes, percentile):
+    return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])
 
 
 def get_percentile_area(
     percentile_levels: typing.List,
-    raster_path: typing.Union[str, bytes, os.PathLike],
+    raster_data: raster.RasterData,
     subject_id: str = "",
-):
+) -> gpd.GeoDataFrame:
     """
     Parameters
     ----------
     percentile_levels: Typing.List[Int]
         list of k-th percentile scores.
-    raster_path: str or os.PathLike
-        file path to where the raster is stored.
+    raster_data: raster.RasterData
+        array of raster values
     subject_id: str
         unique identifier for the subject
 
@@ -94,9 +34,33 @@ def get_percentile_area(
     GeoDataFrame
 
     """
-    percentile_profile = PercentileAreaProfile(
-        percentile_levels=percentile_levels,
-        input_raster=raster_path,
-        subject_id=subject_id,
+    shapes = []
+    for percentile in percentile_levels:
+        data_array = raster_data.data.copy()
+
+        # calculate percentile value
+        values = np.sort(data_array[~np.isnan(data_array)]).flatten()
+        if len(values) == 0:
+            continue
+
+        csum = np.cumsum(values)
+        percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]
+
+        # TODO: make a more explicit comparison for less than and greater than
+
+        # Set any vals less than the cutoff to be nan
+        data_array[data_array < percentile_val] = np.nan
+
+        # Mask any vals that are less than the cutoff percentile
+        data_array[data_array >= percentile_val] = percentile
+
+        shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform))
+
+    return gpd.GeoDataFrame(
+        [
+            [subject_id, percentile, _multipolygon(shapes, percentile)]
+            for percentile in sorted(percentile_levels, reverse=True)
+        ],
+        columns=["subject_id", "percentile", "geometry"],
+        crs=raster_data.crs,
     )
-    return PercentileArea.calculate_percentile_area(profile=percentile_profile)
diff --git a/ecoscope/io/earthranger.py b/ecoscope/io/earthranger.py
@@ -1186,7 +1186,7 @@ def upload(obs):
                     "records"
                 )
                 del obs["geometry"]
-                obs = pack_columns(obs, columns=["source", "recorded_at", "location"])
+                obs = pack_columns(obs, columns=["source", "recorded_at", "location", "exclusion_flags", "additional"])
                 post_data = obs.to_dict("records")
                 results = super(EarthRangerIO, self).post_observation(post_data)
             except ERClientException as exc:

diff --git a/ecoscope/io/raster.py b/ecoscope/io/raster.py
@@ -2,6 +2,7 @@
 import math
 import os
 from collections import UserDict
+from dataclasses import dataclass
 
 import geopandas as gpd
 import numpy as np
@@ -102,6 +103,20 @@ def __setitem__(self, key, item):
         self._recompute_transform_(key)
 
 
+@dataclass
+class RasterData:
+    data: np.ndarray
+    crs: str
+    transform: rio.Affine
+
+    @classmethod
+    def from_raster_file(cls, raster_path):
+        with rasterio.open(raster_path) as src:
+            data_array = src.read(1).astype(np.float32)
+            data_array[data_array == src.nodata] = np.nan
+            return cls(data_array, src.crs.to_wkt(), src.transform)
+
+
 class RasterPy:
     @classmethod
     def write(
@@ -249,3 +264,14 @@ def grid_to_raster(grid=None, val_column="", out_dir="", raster_name=None, xlen=
         ).write(vals, 1)
 
         return memfile
+
+
+def raster_to_grid(raster_path):
+    with rasterio.open(raster_path) as src:
+        data_array = src.read(1).astype(np.float32)
+        data_array[data_array == src.nodata] = np.nan
+
+
+def get_crs(raster_path):
+    with rasterio.open(raster_path) as src:
+        return src.crs.to_wkt()
diff --git a/ecoscope/plotting/plot.py b/ecoscope/plotting/plot.py
@@ -422,7 +422,7 @@ def draw_historic_timeseries(
         The title shown in the plot legend for historic band
     historic_mean_column: str
         The name of the dataframe column to pull historic mean values from
-    current_value_title: str
+    historic_mean_title: str
         The title shown in the plot legend for historic mean values
     layout_kwargs: dict
         Additional kwargs passed to plotly.go.Figure(layout)