Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Dimension Mismatch in ETD Calculation Error #393

Merged
merged 6 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"import ecoscope\n",
"from ecoscope.analysis.UD import calculate_etd_range\n",
"from ecoscope.analysis.percentile import get_percentile_area\n",
"from ecoscope.io.raster import RasterData\n",
"\n",
"ecoscope.init()"
]
Expand Down Expand Up @@ -255,7 +256,12 @@
"outputs": [],
"source": [
"percentiles = pd.concat(\n",
" [get_percentile_area(percentile_levels=[50, 99.9, 100.0], raster_path=v, subject_id=k) for k, v in etd.items()]\n",
" [\n",
" get_percentile_area(\n",
" percentile_levels=[50, 99.9, 100.0], raster_data=RasterData.from_raster_file(v), subject_id=k\n",
" )\n",
" for k, v in etd.items()\n",
" ]\n",
").reset_index(drop=True)\n",
"\n",
"percentiles"
Expand Down Expand Up @@ -301,7 +307,7 @@
"source": [
"salif = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=RasterData.from_raster_file(etd.at[\"Salif Keita\"]),\n",
" subject_id=\"Salif Keita\",\n",
")"
]
Expand Down
3 changes: 2 additions & 1 deletion doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,10 @@
"metadata": {},
"outputs": [],
"source": [
"raster_data = ecoscope.io.raster.RasterData.from_raster_file(etd.at[\"Salif Keita\"])\n",
"percentile_areas = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=raster_data,\n",
" subject_id=\"Salif Keita\",\n",
").to_crs(4326)\n",
"\n",
Expand Down
34 changes: 22 additions & 12 deletions ecoscope/analysis/UD/etd_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
from dataclasses import dataclass

import numpy as np

from ecoscope.base import Trajectory
from ecoscope.io import raster

try:
import numba as nb
import scipy
from sklearn import neighbors
from scipy.optimize import minimize
from scipy.stats import weibull_min
from sklearn import neighbors
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Missing optional dependencies required by this module. \
Expand Down Expand Up @@ -84,13 +85,13 @@ class Weibull3Parameter(WeibullPDF):

def calculate_etd_range(
trajectory_gdf: Trajectory,
output_path: typing.Union[str, bytes, os.PathLike],
output_path: typing.Union[str, bytes, os.PathLike, None] = None,
max_speed_kmhr: float = 0.0,
max_speed_percentage: float = 0.9999,
raster_profile: raster.RasterProfile = None,
expansion_factor: float = 1.3,
weibull_pdf: typing.Union[Weibull2Parameter, Weibull3Parameter] = Weibull2Parameter(),
) -> None:
) -> raster.RasterData:
"""
The ETDRange class provides a trajectory-based, nonparametric approach to estimate the utilization distribution (UD)
of an animal, using model parameters derived directly from the movement behaviour of the species.
Expand All @@ -100,7 +101,7 @@ def calculate_etd_range(
Parameters
----------
trajectory_gdf : geopandas.GeoDataFrame
output_path : str or PathLike
output_path : str or PathLike or None
max_speed_kmhr : float
max_speed_percentage : 0.999
raster_profile : raster.RasterProfile
Expand Down Expand Up @@ -169,8 +170,12 @@ def calculate_etd_range(
grid_centroids[1, 0] = y_max - raster_profile.pixel_size * 0.5

centroids_coords = np.dot(grid_centroids, np.mgrid[1:2, :num_columns, :num_rows].T.reshape(-1, 3, 1))
centroids_coords = centroids_coords.squeeze().T

if centroids_coords.ndim != 2:
centroids_coords = centroids_coords.reshape(1, -1)

tr = neighbors.KDTree(centroids_coords.squeeze().T)
tr = neighbors.KDTree(centroids_coords)
Yun-Wu marked this conversation as resolved.
Show resolved Hide resolved

del centroids_coords

Expand Down Expand Up @@ -206,12 +211,17 @@ def calculate_etd_range(
# Normalize the grid values
raster_ndarray = raster_ndarray / raster_ndarray.sum()

# Set the null data values
raster_ndarray[raster_ndarray == 0] = raster_profile.nodata_value
ndarray = raster_ndarray.reshape(num_rows, num_columns)

# write raster_ndarray to GeoTIFF file.
raster.RasterPy.write(
ndarray=raster_ndarray.reshape(num_rows, num_columns),
fp=output_path,
**raster_profile,
)
if output_path:
# Set the null data values
raster_ndarray[np.isnan(raster_ndarray) | (raster_ndarray == 0)] = raster_profile.nodata_value

raster.RasterPy.write(
ndarray,
fp=output_path,
**raster_profile,
)

return raster.RasterData(data=ndarray.astype("float32"), crs=raster_profile.crs, transform=raster_profile.transform)
Yun-Wu marked this conversation as resolved.
Show resolved Hide resolved
108 changes: 36 additions & 72 deletions ecoscope/analysis/percentile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import typing
from dataclasses import dataclass, field

import geopandas as gpd
import numpy as np
Expand All @@ -9,83 +7,25 @@
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon

from ecoscope.io import raster

@dataclass
class PercentileAreaProfile:
input_raster: typing.Union[str, bytes, os.PathLike]
percentile_levels: typing.List = field(default_factory=[50.0])
subject_id: str = ""


class PercentileArea:
@staticmethod
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])

@classmethod
def calculate_percentile_area(cls, profile: PercentileAreaProfile):
"""
Parameters
----------
profile: PercentileAreaProfile
dataclass object with information for percentile-area calculation

Returns
-------
GeodataFrame
"""

assert type(profile) is PercentileAreaProfile
shapes = []

# open raster
with rasterio.open(profile.input_raster) as src:
crs = src.crs.to_wkt()

for percentile in profile.percentile_levels:
data_array = src.read(1).astype(np.float32)

# Mask no-data values
data_array[data_array == src.nodata] = np.nan

# calculate percentile value
# percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))

return gpd.GeoDataFrame(
[
[profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
for percentile in sorted(profile.percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=crs,
)
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])


def get_percentile_area(
percentile_levels: typing.List,
raster_path: typing.Union[str, bytes, os.PathLike],
raster_data: raster.RasterData,
subject_id: str = "",
):
) -> gpd.GeoDataFrame:
"""
Parameters
----------
percentile_levels: Typing.List[Int]
list of k-th percentile scores.
raster_path: str or os.PathLike
file path to where the raster is stored.
raster_data: raster.RasterData
array of raster values
subject_id: str
unique identifier for the subject

Expand All @@ -94,9 +34,33 @@ def get_percentile_area(
GeoDataFrame

"""
percentile_profile = PercentileAreaProfile(
percentile_levels=percentile_levels,
input_raster=raster_path,
subject_id=subject_id,
shapes = []
for percentile in percentile_levels:
data_array = raster_data.data.copy()

# calculate percentile value
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
if len(values) == 0:
continue

csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform))

return gpd.GeoDataFrame(
[
[subject_id, percentile, _multipolygon(shapes, percentile)]
for percentile in sorted(percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=raster_data.crs,
)
return PercentileArea.calculate_percentile_area(profile=percentile_profile)
2 changes: 1 addition & 1 deletion ecoscope/io/earthranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ def upload(obs):
"records"
)
del obs["geometry"]
obs = pack_columns(obs, columns=["source", "recorded_at", "location"])
obs = pack_columns(obs, columns=["source", "recorded_at", "location", "exclusion_flags", "additional"])
post_data = obs.to_dict("records")
results = super(EarthRangerIO, self).post_observation(post_data)
except ERClientException as exc:
Expand Down
26 changes: 26 additions & 0 deletions ecoscope/io/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import math
import os
from collections import UserDict
from dataclasses import dataclass

import geopandas as gpd
import numpy as np
Expand Down Expand Up @@ -102,6 +103,20 @@ def __setitem__(self, key, item):
self._recompute_transform_(key)


@dataclass
class RasterData:
data: np.ndarray
crs: str
transform: rio.Affine

@classmethod
def from_raster_file(cls, raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan
return cls(data_array, src.crs.to_wkt(), src.transform)


class RasterPy:
@classmethod
def write(
Expand Down Expand Up @@ -249,3 +264,14 @@ def grid_to_raster(grid=None, val_column="", out_dir="", raster_name=None, xlen=
).write(vals, 1)

return memfile


def raster_to_grid(raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan


def get_crs(raster_path):
with rasterio.open(raster_path) as src:
return src.crs.to_wkt()
2 changes: 1 addition & 1 deletion ecoscope/plotting/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def draw_historic_timeseries(
The title shown in the plot legend for historic band
historic_mean_column: str
The name of the dataframe column to pull historic mean values from
current_value_title: str
historic_mean_title: str
The title shown in the plot legend for historic mean values
layout_kwargs: dict
Additional kwargs passed to plotly.go.Figure(layout)
Expand Down
39 changes: 35 additions & 4 deletions tests/test_ud.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import geopandas as gpd
import geopandas.testing
import numpy as np
import pytest

import ecoscope
from ecoscope.analysis import UD
from ecoscope.analysis.percentile import get_percentile_area


def test_etd_range(movebank_relocations):
@pytest.fixture
def movebank_trajectory_gdf(movebank_relocations):
# apply relocation coordinate filter to movebank data
pnts_filter = ecoscope.base.RelocsCoordinateFilter(
min_x=-5,
Expand All @@ -23,15 +25,20 @@ def test_etd_range(movebank_relocations):
movebank_relocations.remove_filtered(inplace=True)

# Create Trajectory
movebank_trajectory_gdf = ecoscope.base.Trajectory.from_relocations(movebank_relocations)
return ecoscope.base.Trajectory.from_relocations(movebank_relocations)

raster_profile = ecoscope.io.raster.RasterProfile(

@pytest.fixture
def raster_profile():
return ecoscope.io.raster.RasterProfile(
pixel_size=250.0,
crs="ESRI:102022",
nodata_value=np.nan,
band_count=1, # Albers Africa Equal Area Conic
)


def test_etd_range_with_tif_file(movebank_trajectory_gdf, raster_profile):
file = NamedTemporaryFile(delete=False)
try:
UD.calculate_etd_range(
Expand All @@ -42,8 +49,32 @@ def test_etd_range(movebank_relocations):
expansion_factor=1.3,
)

raster_data = ecoscope.io.raster.RasterData.from_raster_file(file.name)

percentile_area = get_percentile_area(
percentile_levels=[99.9], raster_data=raster_data, subject_id="Salif_Keita"
).to_crs(4326)
finally:
file.close()
os.unlink(file.name)

expected_percentile_area = gpd.read_feather("tests/test_output/etd_percentile_area.feather")
gpd.testing.geom_almost_equals(percentile_area, expected_percentile_area)


def test_etd_range_without_tif_file(movebank_trajectory_gdf, raster_profile):
file = NamedTemporaryFile(delete=False)
try:
raster_data = UD.calculate_etd_range(
trajectory_gdf=movebank_trajectory_gdf,
output_path=file.name,
max_speed_kmhr=1.05 * movebank_trajectory_gdf.speed_kmhr.max(),
raster_profile=raster_profile,
expansion_factor=1.3,
)

percentile_area = get_percentile_area(
percentile_levels=[99.9], raster_path=file.name, subject_id="Salif_Keita"
percentile_levels=[99.9], raster_data=raster_data, subject_id="Salif_Keita"
).to_crs(4326)
finally:
file.close()
Expand Down
Loading