Skip to content

Commit

Permalink
Fix Dimension Mismatch in ETD Calculation Error (#393)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yun-Wu authored Jan 23, 2025
1 parent 1f52997 commit c30e553
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"import ecoscope\n",
"from ecoscope.analysis.UD import calculate_etd_range\n",
"from ecoscope.analysis.percentile import get_percentile_area\n",
"from ecoscope.io.raster import RasterData\n",
"\n",
"ecoscope.init()"
]
Expand Down Expand Up @@ -255,7 +256,12 @@
"outputs": [],
"source": [
"percentiles = pd.concat(\n",
" [get_percentile_area(percentile_levels=[50, 99.9, 100.0], raster_path=v, subject_id=k) for k, v in etd.items()]\n",
" [\n",
" get_percentile_area(\n",
" percentile_levels=[50, 99.9, 100.0], raster_data=RasterData.from_raster_file(v), subject_id=k\n",
" )\n",
" for k, v in etd.items()\n",
" ]\n",
").reset_index(drop=True)\n",
"\n",
"percentiles"
Expand Down Expand Up @@ -301,7 +307,7 @@
"source": [
"salif = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=RasterData.from_raster_file(etd.at[\"Salif Keita\"]),\n",
" subject_id=\"Salif Keita\",\n",
")"
]
Expand Down
3 changes: 2 additions & 1 deletion doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,10 @@
"metadata": {},
"outputs": [],
"source": [
"raster_data = ecoscope.io.raster.RasterData.from_raster_file(etd.at[\"Salif Keita\"])\n",
"percentile_areas = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=raster_data,\n",
" subject_id=\"Salif Keita\",\n",
").to_crs(4326)\n",
"\n",
Expand Down
47 changes: 35 additions & 12 deletions ecoscope/analysis/UD/etd_range.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
import logging
import math
import os
import typing
from dataclasses import dataclass

import numpy as np

from ecoscope.base import Trajectory
from ecoscope.io import raster

try:
import numba as nb
import scipy
from sklearn import neighbors
from scipy.optimize import minimize
from scipy.stats import weibull_min
from sklearn import neighbors
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Missing optional dependencies required by this module. \
Please run pip install ecoscope["analysis"]'
)

logger = logging.getLogger(__name__)


@nb.cfunc("double(intc, CPointer(double))")
def __etd__(_, a):
Expand Down Expand Up @@ -84,13 +88,14 @@ class Weibull3Parameter(WeibullPDF):

def calculate_etd_range(
trajectory_gdf: Trajectory,
output_path: typing.Union[str, bytes, os.PathLike],
output_path: typing.Union[str, bytes, os.PathLike, None] = None,
max_speed_kmhr: float = 0.0,
max_speed_percentage: float = 0.9999,
raster_profile: raster.RasterProfile = None,
expansion_factor: float = 1.3,
weibull_pdf: typing.Union[Weibull2Parameter, Weibull3Parameter] = Weibull2Parameter(),
) -> None:
grid_threshold: int = 100,
) -> raster.RasterData:
"""
The ETDRange class provides a trajectory-based, nonparametric approach to estimate the utilization distribution (UD)
of an animal, using model parameters derived directly from the movement behaviour of the species.
Expand All @@ -100,12 +105,13 @@ def calculate_etd_range(
Parameters
----------
trajectory_gdf : geopandas.GeoDataFrame
output_path : str or PathLike
output_path : str or PathLike or None
max_speed_kmhr : float
max_speed_percentage : 0.999
raster_profile : raster.RasterProfile
expansion_factor : float
weibull_pdf : Weibull2Parameter or Weibull3Parameter
grid_threshold: int
Returns
-------
Expand Down Expand Up @@ -169,8 +175,20 @@ def calculate_etd_range(
grid_centroids[1, 0] = y_max - raster_profile.pixel_size * 0.5

centroids_coords = np.dot(grid_centroids, np.mgrid[1:2, :num_columns, :num_rows].T.reshape(-1, 3, 1))
centroids_coords = centroids_coords.squeeze().T

if centroids_coords.size < grid_threshold:
logger.warning(
f"Centroid size {centroids_coords.size} is too small to calculate density. "
f"The threshold value is {grid_threshold}. "
"Check if there’s a data issue or decrease pixel size"
)
return raster.RasterData(data=np.array([]), crs=raster_profile.crs, transform=raster_profile.transform)

if centroids_coords.ndim != 2:
centroids_coords = centroids_coords.reshape(1, -1)

tr = neighbors.KDTree(centroids_coords.squeeze().T)
tr = neighbors.KDTree(centroids_coords)

del centroids_coords

Expand Down Expand Up @@ -206,12 +224,17 @@ def calculate_etd_range(
# Normalize the grid values
raster_ndarray = raster_ndarray / raster_ndarray.sum()

# Set the null data values
raster_ndarray[raster_ndarray == 0] = raster_profile.nodata_value
ndarray = raster_ndarray.reshape(num_rows, num_columns)

# write raster_ndarray to GeoTIFF file.
raster.RasterPy.write(
ndarray=raster_ndarray.reshape(num_rows, num_columns),
fp=output_path,
**raster_profile,
)
if output_path:
# Set the null data values
raster_ndarray[np.isnan(raster_ndarray) | (raster_ndarray == 0)] = raster_profile.nodata_value

raster.RasterPy.write(
ndarray,
fp=output_path,
**raster_profile,
)

return raster.RasterData(data=ndarray.astype("float32"), crs=raster_profile.crs, transform=raster_profile.transform)
108 changes: 36 additions & 72 deletions ecoscope/analysis/percentile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import typing
from dataclasses import dataclass, field

import geopandas as gpd
import numpy as np
Expand All @@ -9,83 +7,25 @@
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon

from ecoscope.io import raster

@dataclass
class PercentileAreaProfile:
input_raster: typing.Union[str, bytes, os.PathLike]
percentile_levels: typing.List = field(default_factory=[50.0])
subject_id: str = ""


class PercentileArea:
@staticmethod
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])

@classmethod
def calculate_percentile_area(cls, profile: PercentileAreaProfile):
"""
Parameters
----------
profile: PercentileAreaProfile
dataclass object with information for percentile-area calculation
Returns
-------
GeodataFrame
"""

assert type(profile) is PercentileAreaProfile
shapes = []

# open raster
with rasterio.open(profile.input_raster) as src:
crs = src.crs.to_wkt()

for percentile in profile.percentile_levels:
data_array = src.read(1).astype(np.float32)

# Mask no-data values
data_array[data_array == src.nodata] = np.nan

# calculate percentile value
# percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))

return gpd.GeoDataFrame(
[
[profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
for percentile in sorted(profile.percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=crs,
)
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])


def get_percentile_area(
percentile_levels: typing.List,
raster_path: typing.Union[str, bytes, os.PathLike],
raster_data: raster.RasterData,
subject_id: str = "",
):
) -> gpd.GeoDataFrame:
"""
Parameters
----------
percentile_levels: Typing.List[Int]
list of k-th percentile scores.
raster_path: str or os.PathLike
file path to where the raster is stored.
raster_data: raster.RasterData
array of raster values
subject_id: str
unique identifier for the subject
Expand All @@ -94,9 +34,33 @@ def get_percentile_area(
GeoDataFrame
"""
percentile_profile = PercentileAreaProfile(
percentile_levels=percentile_levels,
input_raster=raster_path,
subject_id=subject_id,
shapes = []
for percentile in percentile_levels:
data_array = raster_data.data.copy()

# calculate percentile value
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
if len(values) == 0:
continue

csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform))

return gpd.GeoDataFrame(
[
[subject_id, percentile, _multipolygon(shapes, percentile)]
for percentile in sorted(percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=raster_data.crs,
)
return PercentileArea.calculate_percentile_area(profile=percentile_profile)
2 changes: 1 addition & 1 deletion ecoscope/io/earthranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ def upload(obs):
"records"
)
del obs["geometry"]
obs = pack_columns(obs, columns=["source", "recorded_at", "location"])
obs = pack_columns(obs, columns=["source", "recorded_at", "location", "exclusion_flags", "additional"])
post_data = obs.to_dict("records")
results = super(EarthRangerIO, self).post_observation(post_data)
except ERClientException as exc:
Expand Down
26 changes: 26 additions & 0 deletions ecoscope/io/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import math
import os
from collections import UserDict
from dataclasses import dataclass

import geopandas as gpd
import numpy as np
Expand Down Expand Up @@ -102,6 +103,20 @@ def __setitem__(self, key, item):
self._recompute_transform_(key)


@dataclass
class RasterData:
data: np.ndarray
crs: str
transform: rio.Affine

@classmethod
def from_raster_file(cls, raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan
return cls(data_array, src.crs.to_wkt(), src.transform)


class RasterPy:
@classmethod
def write(
Expand Down Expand Up @@ -249,3 +264,14 @@ def grid_to_raster(grid=None, val_column="", out_dir="", raster_name=None, xlen=
).write(vals, 1)

return memfile


def raster_to_grid(raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan


def get_crs(raster_path):
with rasterio.open(raster_path) as src:
return src.crs.to_wkt()
2 changes: 1 addition & 1 deletion ecoscope/plotting/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def draw_historic_timeseries(
The title shown in the plot legend for historic band
historic_mean_column: str
The name of the dataframe column to pull historic mean values from
current_value_title: str
historic_mean_title: str
The title shown in the plot legend for historic mean values
layout_kwargs: dict
Additional kwargs passed to plotly.go.Figure(layout)
Expand Down
Loading

0 comments on commit c30e553

Please sign in to comment.