From e3a1f199ff27ddd94509953751c3b3cdc2b1f050 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Mon, 4 Mar 2024 16:03:08 +0100 Subject: [PATCH 01/13] Initial band metadata added to load_stac #527 --- openeo/rest/connection.py | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 8737855df..127c5a102 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -7,6 +7,7 @@ import json import logging import os +import pystac import shlex import sys import warnings @@ -27,7 +28,7 @@ from openeo.internal.jupyter import VisualDict, VisualList from openeo.internal.processes.builder import ProcessBuilderBase from openeo.internal.warnings import deprecated, legacy_alias -from openeo.metadata import Band, BandDimension, CollectionMetadata, SpatialDimension, TemporalDimension +from openeo.metadata import Band, BandDimension, CollectionMetadata, CubeMetadata, SpatialDimension, TemporalDimension from openeo.rest import ( CapabilitiesException, OpenEoApiError, @@ -1149,6 +1150,39 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = """ return self.datacube_from_flat_graph(load_json_resource(src), parameters=parameters) + def metadata_from_stac(self, url: str) -> CubeMetadata: + """ + Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + """ + collection = pystac.read_file(href=url) + + def get_band_names(itm: pystac.Item, asst: pystac.Asset) -> List[Band]: + return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] + + def is_band_asset(asset: pystac.Asset) -> bool: + return "eo:bands" in asset.extra_fields + + band_names = [] + for itm in collection.get_items(): + band_assets = { + asset_id: asset + for asset_id, asset in dict(sorted(itm.get_assets().items())).items() + if is_band_asset(asset) + } + + for asset_id, asset in band_assets.items(): + asset_band_names = get_band_names(itm, asset) + for asset_band_name in asset_band_names: + if asset_band_name not in band_names: + band_names.append(asset_band_name) + + band_dimension = BandDimension(name="bands", bands=band_names) + metadata = CubeMetadata(dimensions=[band_dimension]) + return metadata + @openeo_process def load_collection( self, @@ -1247,6 +1281,7 @@ def load_stac( temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Optional[List[str]] = None, properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, + get_metadata: Optional[bool] = False, ) -> DataCube: """ Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. @@ -1340,6 +1375,9 @@ def load_stac( The value must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not supported for static STAC. + :param get_metadata: + Specify whether to also load the band name metadata from the URL. + .. versionadded:: 0.17.0 .. versionchanged:: 0.23.0 @@ -1361,6 +1399,8 @@ def load_stac( prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() } cube = self.datacube_from_process(process_id="load_stac", **arguments) + if get_metadata: + cube.metadata = self.metadata_from_stac(url) return cube def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: From 257d6871ce76227a2e54bba883525d01dd9e92ed Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 7 Mar 2024 16:04:25 +0100 Subject: [PATCH 02/13] added options for STAC items and catalogs as well in metadata_from_stac --- openeo/rest/connection.py | 58 ++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 127c5a102..22545ec47 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1157,27 +1157,51 @@ def metadata_from_stac(self, url: str) -> CubeMetadata: :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. """ - collection = pystac.read_file(href=url) - def get_band_names(itm: pystac.Item, asst: pystac.Asset) -> List[Band]: + def get_band_names(asst: pystac.Asset) -> List[Band]: return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] def is_band_asset(asset: pystac.Asset) -> bool: return "eo:bands" in asset.extra_fields + stac_object = pystac.read_file(href=url) + band_names = [] - for itm in collection.get_items(): - band_assets = { - asset_id: asset - for asset_id, asset in dict(sorted(itm.get_assets().items())).items() - if is_band_asset(asset) - } + collection = None + + if isinstance(stac_object, pystac.Item): + item = stac_object + if "eo:bands" in item.properties: + eo_bands_location = item.properties + elif item.get_collection() is not None: + collection = item.get_collection() + eo_bands_location = item.get_collection().summaries.lists + else: + eo_bands_location = {} + band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] + + elif isinstance(stac_object, pystac.Collection): + collection = stac_object + band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] + + # Summaries is not a required field in a STAC collection, so also check the assets + for itm in collection.get_items(): + band_assets = { + asset_id: asset + for asset_id, asset in dict(sorted(itm.get_assets().items())).items() + if is_band_asset(asset) + } + + for asset in band_assets.values(): + asset_band_names = get_band_names(asset) + for asset_band_name in asset_band_names: + if asset_band_name not in band_names: + band_names.append(asset_band_name) - for asset_id, asset in band_assets.items(): - asset_band_names = get_band_names(itm, asset) - for asset_band_name in asset_band_names: - if asset_band_name not in band_names: - band_names.append(asset_band_name) + else: + assert isinstance(stac_object, pystac.Catalog) + catalog = stac_object + band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] band_dimension = BandDimension(name="bands", bands=band_names) metadata = CubeMetadata(dimensions=[band_dimension]) @@ -1281,7 +1305,6 @@ def load_stac( temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Optional[List[str]] = None, properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, - get_metadata: Optional[bool] = False, ) -> DataCube: """ Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. @@ -1375,9 +1398,6 @@ def load_stac( The value must be a condition (user-defined process) to be evaluated against a STAC API. This parameter is not supported for static STAC. - :param get_metadata: - Specify whether to also load the band name metadata from the URL. - .. versionadded:: 0.17.0 .. versionchanged:: 0.23.0 @@ -1399,8 +1419,8 @@ def load_stac( prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() } cube = self.datacube_from_process(process_id="load_stac", **arguments) - if get_metadata: - cube.metadata = self.metadata_from_stac(url) + + cube.metadata = self.metadata_from_stac(url) return cube def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: From 3a3cfe5c0deea34620a053433ae656fda745d8ea Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 7 Mar 2024 17:07:48 +0100 Subject: [PATCH 03/13] added exception to metadata in load_stac in case the python client cannot read the STAC url --- openeo/rest/connection.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 22545ec47..35e382c02 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1419,8 +1419,10 @@ def load_stac( prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items() } cube = self.datacube_from_process(process_id="load_stac", **arguments) - - cube.metadata = self.metadata_from_stac(url) + try: + cube.metadata = self.metadata_from_stac(url) + except: + print("Python client could not read band metadata.") return cube def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: From 43cb52b5a4c3c66e38aa1e68a6ee73f6919e2f60 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 7 Mar 2024 17:14:13 +0100 Subject: [PATCH 04/13] Changed could not read from URL exception --- openeo/rest/connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 35e382c02..7d3e02eab 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1421,8 +1421,8 @@ def load_stac( cube = self.datacube_from_process(process_id="load_stac", **arguments) try: cube.metadata = self.metadata_from_stac(url) - except: - print("Python client could not read band metadata.") + except Exception: + _log.warning("Python client could not read band metadata from URL.") return cube def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: From a34abd1468dd154a184325b317182517118e8c86 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Tue, 12 Mar 2024 13:49:19 +0100 Subject: [PATCH 05/13] added pystac dependency in setup.py #527 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 73f7ef8c8..1e1a6e2d0 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ "flake8>=5.0.0", "time_machine", "pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency + "pystac" ] docs_require = [ From f6e8e4a10feb53c225d412db77733407f7665e46 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Tue, 12 Mar 2024 16:37:03 +0100 Subject: [PATCH 06/13] moved metadata_from_stac function from connection to metadata module #527 --- openeo/metadata.py | 59 +++++++++++++++++++++++++++++++++ openeo/rest/connection.py | 69 +++++---------------------------------- 2 files changed, 68 insertions(+), 60 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 31d97513f..cf59aa211 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import pystac import warnings from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union @@ -522,3 +523,61 @@ def _repr_html_(self): def __str__(self) -> str: bands = self.band_names if self.has_band_dimension() else "no bands dimension" return f"CollectionMetadata({self.extent} - {bands} - {self.dimension_names()})" + + +def metadata_from_stac(url: str) -> CubeMetadata: + """ + Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + """ + + def get_band_names(asst: pystac.Asset) -> List[Band]: + return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] + + def is_band_asset(asset: pystac.Asset) -> bool: + return "eo:bands" in asset.extra_fields + + stac_object = pystac.read_file(href=url) + + band_names = [] + collection = None + + if isinstance(stac_object, pystac.Item): + item = stac_object + if "eo:bands" in item.properties: + eo_bands_location = item.properties + elif item.get_collection() is not None: + collection = item.get_collection() + eo_bands_location = item.get_collection().summaries.lists + else: + eo_bands_location = {} + band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] + + elif isinstance(stac_object, pystac.Collection): + collection = stac_object + band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] + + # Summaries is not a required field in a STAC collection, so also check the assets + for itm in collection.get_items(): + band_assets = { + asset_id: asset + for asset_id, asset in dict(sorted(itm.get_assets().items())).items() + if is_band_asset(asset) + } + + for asset in band_assets.values(): + asset_band_names = get_band_names(asset) + for asset_band_name in asset_band_names: + if asset_band_name not in band_names: + band_names.append(asset_band_name) + + else: + assert isinstance(stac_object, pystac.Catalog) + catalog = stac_object + band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] + + band_dimension = BandDimension(name="bands", bands=band_names) + metadata = CubeMetadata(dimensions=[band_dimension]) + return metadata diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 7d3e02eab..8aa95210c 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -7,7 +7,6 @@ import json import logging import os -import pystac import shlex import sys import warnings @@ -28,7 +27,14 @@ from openeo.internal.jupyter import VisualDict, VisualList from openeo.internal.processes.builder import ProcessBuilderBase from openeo.internal.warnings import deprecated, legacy_alias -from openeo.metadata import Band, BandDimension, CollectionMetadata, CubeMetadata, SpatialDimension, TemporalDimension +from openeo.metadata import ( + Band, + BandDimension, + CollectionMetadata, + SpatialDimension, + TemporalDimension, + metadata_from_stac, +) from openeo.rest import ( CapabilitiesException, OpenEoApiError, @@ -1150,63 +1156,6 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = """ return self.datacube_from_flat_graph(load_json_resource(src), parameters=parameters) - def metadata_from_stac(self, url: str) -> CubeMetadata: - """ - Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` - - :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection - :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. - """ - - def get_band_names(asst: pystac.Asset) -> List[Band]: - return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] - - def is_band_asset(asset: pystac.Asset) -> bool: - return "eo:bands" in asset.extra_fields - - stac_object = pystac.read_file(href=url) - - band_names = [] - collection = None - - if isinstance(stac_object, pystac.Item): - item = stac_object - if "eo:bands" in item.properties: - eo_bands_location = item.properties - elif item.get_collection() is not None: - collection = item.get_collection() - eo_bands_location = item.get_collection().summaries.lists - else: - eo_bands_location = {} - band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] - - elif isinstance(stac_object, pystac.Collection): - collection = stac_object - band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] - - # Summaries is not a required field in a STAC collection, so also check the assets - for itm in collection.get_items(): - band_assets = { - asset_id: asset - for asset_id, asset in dict(sorted(itm.get_assets().items())).items() - if is_band_asset(asset) - } - - for asset in band_assets.values(): - asset_band_names = get_band_names(asset) - for asset_band_name in asset_band_names: - if asset_band_name not in band_names: - band_names.append(asset_band_name) - - else: - assert isinstance(stac_object, pystac.Catalog) - catalog = stac_object - band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] - - band_dimension = BandDimension(name="bands", bands=band_names) - metadata = CubeMetadata(dimensions=[band_dimension]) - return metadata - @openeo_process def load_collection( self, @@ -1420,7 +1369,7 @@ def load_stac( } cube = self.datacube_from_process(process_id="load_stac", **arguments) try: - cube.metadata = self.metadata_from_stac(url) + cube.metadata = metadata_from_stac(url) except Exception: _log.warning("Python client could not read band metadata from URL.") return cube From ea41eaf91a13e4c81a22c040c717a3b1c42ef84e Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Tue, 12 Mar 2024 16:42:00 +0100 Subject: [PATCH 07/13] First unit test on metadata_from_stac #527 --- tests/test_metadata.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 28ac81dc7..ef144dc68 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,6 +2,7 @@ from typing import List +import json import pytest from openeo.metadata import ( @@ -14,6 +15,7 @@ MetadataException, SpatialDimension, TemporalDimension, + metadata_from_stac, ) @@ -782,3 +784,25 @@ def filter_bbox(self, bbox): assert isinstance(new, MyCubeMetadata) assert orig.bbox is None assert new.bbox == (1, 2, 3, 4) + + +def test_metadata_from_stac(tmp_path): + collection_json = { + "type": "Collection", + "id": "test-collection", + "stac_version": "1.0.0", + "description": "Test collection", + "links": [], + "title": "Test Collection", + "extent": { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, + }, + "license": "proprietary", + "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, + } + + path = tmp_path / "collection.json" + path.write_text(json.dumps(collection_json)) + metadata = metadata_from_stac(path) + assert metadata.band_names == ["B01", "B02"] From 822ec334d28cab1ac7146ef4d13ab89454c96651 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Tue, 12 Mar 2024 17:07:19 +0100 Subject: [PATCH 08/13] Added more unit tests for STAC catalogs and items for metadata_from_stac #527 --- tests/test_metadata.py | 61 +++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index ef144dc68..04db4fd3b 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -786,23 +786,48 @@ def filter_bbox(self, bbox): assert new.bbox == (1, 2, 3, 4) -def test_metadata_from_stac(tmp_path): - collection_json = { - "type": "Collection", - "id": "test-collection", - "stac_version": "1.0.0", - "description": "Test collection", - "links": [], - "title": "Test Collection", - "extent": { - "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, - }, - "license": "proprietary", - "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, - } +collection_json = { + "type": "Collection", + "id": "test-collection", + "stac_version": "1.0.0", + "description": "Test collection", + "links": [], + "title": "Test Collection", + "extent": { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, + }, + "license": "proprietary", + "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, +} + +catalog_json = { + "type": "Catalog", + "id": "test-catalog", + "stac_version": "1.0.0", + "description": "Test Catalog", + "links": [], +} + +item_json = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "test-item", + "properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]}, + "geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"}, + "links": [], + "assets": {}, + "bbox": [0, 1, 0, 1], + "stac_extensions": [], +} + + +@pytest.mark.parametrize( + "test_stac, expected", [(collection_json, ["B01", "B02"]), (catalog_json, []), (item_json, ["SCL", "B08"])] +) +def test_metadata_from_stac(tmp_path, test_stac, expected): - path = tmp_path / "collection.json" - path.write_text(json.dumps(collection_json)) + path = tmp_path / "stac.json" + path.write_text(json.dumps(test_stac)) metadata = metadata_from_stac(path) - assert metadata.band_names == ["B01", "B02"] + assert metadata.band_names == expected From e29b2a43bb982a5f60a474e2c58e263cc7401da6 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 14 Mar 2024 15:24:26 +0100 Subject: [PATCH 09/13] moved pystac dependency from test to general #527 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1e1a6e2d0..b1810709f 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,6 @@ "flake8>=5.0.0", "time_machine", "pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency - "pystac" ] docs_require = [ @@ -74,6 +73,7 @@ "numpy>=1.17.0", "xarray>=0.12.3", "pandas>0.20.0", + "pystac", "deprecated>=1.2.12", 'oschmod>=0.3.12; sys_platform == "win32"', "importlib_resources; python_version<'3.9'", From 3f50dbc3fec26c5a1eabcb6513759ac105cd73db Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 14 Mar 2024 16:50:22 +0100 Subject: [PATCH 10/13] rewritten metadata_from_stac to also read the band wavelength and common names #527 --- openeo/metadata.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index cf59aa211..43df17c01 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -533,15 +533,21 @@ def metadata_from_stac(url: str) -> CubeMetadata: :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. """ - def get_band_names(asst: pystac.Asset) -> List[Band]: - return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] + def get_band_metadata(eo_bands_location: dict) -> List[Band]: + return [ + Band(name=band["name"], common_name=band.get("common_name"), wavelength_um=band.get("center_wavelength")) + for band in eo_bands_location.get("eo:bands", []) + ] + + def get_band_names(bands: List[Band]) -> List[str]: + return [band.name for band in bands] def is_band_asset(asset: pystac.Asset) -> bool: return "eo:bands" in asset.extra_fields stac_object = pystac.read_file(href=url) - band_names = [] + bands = [] collection = None if isinstance(stac_object, pystac.Item): @@ -553,11 +559,11 @@ def is_band_asset(asset: pystac.Asset) -> bool: eo_bands_location = item.get_collection().summaries.lists else: eo_bands_location = {} - band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] + bands = get_band_metadata(eo_bands_location) elif isinstance(stac_object, pystac.Collection): collection = stac_object - band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] + bands = get_band_metadata(collection.summaries.lists) # Summaries is not a required field in a STAC collection, so also check the assets for itm in collection.get_items(): @@ -568,16 +574,16 @@ def is_band_asset(asset: pystac.Asset) -> bool: } for asset in band_assets.values(): - asset_band_names = get_band_names(asset) - for asset_band_name in asset_band_names: - if asset_band_name not in band_names: - band_names.append(asset_band_name) + asset_bands = get_band_metadata(asset.extra_fields) + for asset_band in asset_bands: + if asset_band.name not in get_band_names(bands): + bands.append(asset_band) else: assert isinstance(stac_object, pystac.Catalog) catalog = stac_object - band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] + bands = get_band_metadata(catalog.extra_fields.get("summaries", {})) - band_dimension = BandDimension(name="bands", bands=band_names) + band_dimension = BandDimension(name="bands", bands=bands) metadata = CubeMetadata(dimensions=[band_dimension]) return metadata From 7062ba11ce1f44ea1ca10d8198f8fc5c57f28de0 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 14 Mar 2024 16:56:55 +0100 Subject: [PATCH 11/13] Included more info in the thrown exception in load_stac #527 --- openeo/rest/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 8aa95210c..7840e1f66 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1371,7 +1371,7 @@ def load_stac( try: cube.metadata = metadata_from_stac(url) except Exception: - _log.warning("Python client could not read band metadata from URL.") + _log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True) return cube def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel: From c4f2d6c30642ca29a6c54cc96e3c058f4adfc960 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 14 Mar 2024 18:26:26 +0100 Subject: [PATCH 12/13] removed sorting in asset dictionary in metadata_from_stac #527 --- openeo/metadata.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 43df17c01..af15a913d 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -567,11 +567,7 @@ def is_band_asset(asset: pystac.Asset) -> bool: # Summaries is not a required field in a STAC collection, so also check the assets for itm in collection.get_items(): - band_assets = { - asset_id: asset - for asset_id, asset in dict(sorted(itm.get_assets().items())).items() - if is_band_asset(asset) - } + band_assets = {asset_id: asset for asset_id, asset in itm.get_assets().items() if is_band_asset(asset)} for asset in band_assets.values(): asset_bands = get_band_metadata(asset.extra_fields) From 85fe5a61ea9262277e9a5ec1f3562f7ee909c37c Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Thu, 14 Mar 2024 18:32:44 +0100 Subject: [PATCH 13/13] removed top level variables from test_metadata_from_stac #527 --- tests/test_metadata.py | 81 +++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 04db4fd3b..9a23fd9f0 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -786,44 +786,51 @@ def filter_bbox(self, bbox): assert new.bbox == (1, 2, 3, 4) -collection_json = { - "type": "Collection", - "id": "test-collection", - "stac_version": "1.0.0", - "description": "Test collection", - "links": [], - "title": "Test Collection", - "extent": { - "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, - }, - "license": "proprietary", - "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, -} - -catalog_json = { - "type": "Catalog", - "id": "test-catalog", - "stac_version": "1.0.0", - "description": "Test Catalog", - "links": [], -} - -item_json = { - "type": "Feature", - "stac_version": "1.0.0", - "id": "test-item", - "properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]}, - "geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"}, - "links": [], - "assets": {}, - "bbox": [0, 1, 0, 1], - "stac_extensions": [], -} - - @pytest.mark.parametrize( - "test_stac, expected", [(collection_json, ["B01", "B02"]), (catalog_json, []), (item_json, ["SCL", "B08"])] + "test_stac, expected", + [ + ( + { + "type": "Collection", + "id": "test-collection", + "stac_version": "1.0.0", + "description": "Test collection", + "links": [], + "title": "Test Collection", + "extent": { + "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, + "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, + }, + "license": "proprietary", + "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, + }, + ["B01", "B02"], + ), + ( + { + "type": "Catalog", + "id": "test-catalog", + "stac_version": "1.0.0", + "description": "Test Catalog", + "links": [], + }, + [], + ), + ( + { + "type": "Feature", + "stac_version": "1.0.0", + "id": "test-item", + "properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]}, + "geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"}, + "links": [], + "assets": {}, + "bbox": [0, 1, 0, 1], + "stac_extensions": [], + }, + ["SCL", "B08"], + ), + ], ) def test_metadata_from_stac(tmp_path, test_stac, expected):