From d84e60f8985aad5572a5268ca78e9d557b86b5ce Mon Sep 17 00:00:00 2001 From: Lorenzo Zampieri Date: Fri, 6 Jan 2023 14:14:26 -0700 Subject: [PATCH 1/4] Added meta.yaml and recipe.py for OSI-SAF-450-430-a_rg025 recipe --- .../osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml | 45 +++++++++++++++++++ .../osisaf_ice_conc_cdr_v3p0_rg025/recipe.py | 34 ++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml create mode 100644 recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py diff --git a/recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml b/recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml new file mode 100644 index 0000000000..2fcaa6bb99 --- /dev/null +++ b/recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml @@ -0,0 +1,45 @@ +# Name for dataset. User chosen. +title: "OSI-SAF Global Sea Ice Concentration Climate Data Record (1990-2021 Daily) Release 3" +# Description of dataset. User chosen, roughly 1 sentence in length. +description: "Daily global sea ice concentration climate data record (product ID: OSI-450-a and OSI-430-a) for the preiod 1990-2021 interpolated onto a global regular grid with a resolution of 0.25 degrees" +# Version of pangeo_forge_recipes library that was used +pangeo_forge_version: "0.9.2" +# The recipes section tells Pangeo Cloud where to find the recipes within your PR. +# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. +# If your PR contains multiple recipes, you may add additional elements to the list below. +recipes: + # User chosen name for recipe. Likely similiar to dataset name, ~25 characters in length + - id: OSI-SAF-450-430-a_rg025 + # The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format : + # is name of .py file where the Python recipe object is defined. + # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. + # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. + # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` + object: "recipe:recipe" +provenance: + # Data provider object. Follow STAC spec. + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object + providers: + - name: "EUMETSAT OSI SAF" + description: "EUMETSAT Ocean and Sea Ice Satellite Application Facility High Latitude Processing Center" + roles: + - producer + - licensor + url: https://osi-saf.eumetsat.int/products/osi-450-a + - name: "NCAR" + description: "Regridding and merging of the observations performed by Lorenzo Zampieri at the National Center for Atmospheric Research, where observations are stored" + roles: + - processor + - host + # This is a required field for provider. Follow STAC spec + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license + license: "EUMETSAT Essential (free and unrestricted)" +maintainers: + # Information about recipe creator. name and github are required + - name: "Lorenzo Zampieri" + orcid: "0000-0003-1703-4162" + github: lzampier +# The specific bakery (i.e. cloud infrastructure) that your recipe will run on. +# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries +bakery: + id: "pangeo-ldeo-nsf-earthcube" diff --git a/recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py b/recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py new file mode 100644 index 0000000000..8b858d7614 --- /dev/null +++ b/recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py @@ -0,0 +1,34 @@ +import pandas as pd + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern +from pangeo_forge_recipes.recipes import XarrayZarrRecipe + +dates = pd.date_range('1989-01-01', '2021-12-31', freq='D') + +missing_dates = [ + pd.Timestamp(1990,8,13), pd.Timestamp(1990,8,25), pd.Timestamp(1990,8,26), + pd.Timestamp(1990,10,21), pd.Timestamp(1990,10,22), pd.Timestamp(1990,10,26), + pd.Timestamp(1990,10,27), pd.Timestamp(1990,10,28), pd.Timestamp(1990,12,21), + pd.Timestamp(1990,12,22), pd.Timestamp(1990,12,23), pd.Timestamp(1990,12,24), + pd.Timestamp(1990,12,25), pd.Timestamp(1990,12,26), pd.Timestamp(2000,12,1), + pd.Timestamp(2021, 2, 20) + ] + +# Drop missing dates +dates = dates.drop(missing_dates) + +URL_FORMAT = ( + "https://g-08c618.7a577b.6fbd.data.globus.org/" + "ice_conc_r1440x720_{interim}cdr-v3p0_{time:%Y%m%d}1200.nc" +) + +def make_url(time): + if time.year <= 2020: + return URL_FORMAT.format(time=time, interim='') + if time.year > 2020: + return URL_FORMAT.format(time=time, interim='i') + +time_concat_dim = ConcatDim("time", dates, nitems_per_file=1) +pattern = FilePattern(make_url, time_concat_dim) + +recipe = XarrayZarrRecipe(pattern, inputs_per_chunk=5) From 03bc38aa9b2cd9d409f1229212cbcc0974a56bff Mon Sep 17 00:00:00 2001 From: Lorenzo Zampieri Date: Fri, 6 Jan 2023 14:15:59 -0700 Subject: [PATCH 2/4] Example folder noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic has been removed --- .../meta.yaml | 40 ------------------- .../recipe.py | 21 ---------- 2 files changed, 61 deletions(-) delete mode 100644 recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/meta.yaml delete mode 100644 recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/recipe.py diff --git a/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/meta.yaml b/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/meta.yaml deleted file mode 100644 index fb4bbf0ffe..0000000000 --- a/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/meta.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Name for dataset. User chosen. -title: 'NOAA Atmospheric Climate Data Records: Cloud Properties ISCCP' -# Description of dataset. User chosen, roughly 1 sentence in length. -description: 'A collection of global, 3 hour, data describing distribution and variation of cloud radiative properties to improve understanding and modeling of the way clouds affect climate. Variables derived from operational weather satellites, polar orbiting, and geostationary satellites.' -# Version of pangeo_forge_recipes library that was used -pangeo_forge_version: '0.9.2' -# The recipes section tells Pangeo Cloud where to find the recipes within your PR. -# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. -# If your PR contains multiple recipes, you may add additional elements to the list below. -recipes: - # User chosen name for recipe. Likely similiar to dataset name, ~25 characters in length - - id: noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic - # The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format : - # is name of .py file where the Python recipe object is defined. - # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. - # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. - # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` - object: 'recipe:recipe' -provenance: - # Data provider object. Follow STAC spec. - # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object - providers: - - name: 'NOAA NCEI' - description: 'National Oceanographic & Atmospheric Administration National Centers for Environmental Information' - roles: - - producer - - licensor - url: https://www.ncei.noaa.gov/products/climate-data-records/cloud-properties-isccp - # This is a required field for provider. Follow STAC spec - # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license - license: 'Open Data' -maintainers: - # Information about recipe creator. name and github are required - - name: 'Ryan Avery' - orcid: '0000-0001-7392-1474' - github: rbavery -# The specific bakery (i.e. cloud infrastructure) that your recipe will run on. -# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries -bakery: - id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/recipe.py b/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/recipe.py deleted file mode 100644 index 3992f041d0..0000000000 --- a/recipes/noaa-atmosphere-climate-cloud-properties-isccp-hgg-basic/recipe.py +++ /dev/null @@ -1,21 +0,0 @@ -from os.path import basename, join - -import s3fs - -from pangeo_forge_recipes.patterns import pattern_from_file_sequence -from pangeo_forge_recipes.recipes.reference_hdf_zarr import HDFReferenceRecipe - -url_base = 's3://noaa-cdr-cloud-properties-isccp-pds/data/isccp-basic/hgg' -fs = s3fs.S3FileSystem(anon=True) -yearmonth_folders = fs.ls(join(url_base)) -yearmonths = list(map(lambda x: basename(x), yearmonth_folders)) -file_list = [] -for yearmonth in yearmonths: - file_list += sorted( - filter( - lambda x: x.endswith('.nc'), - map(lambda x: 's3://' + x, fs.ls(join(url_base, str(yearmonth)), detail=False)), - ) - ) -pattern = pattern_from_file_sequence(file_list, 'time', nitems_per_file=1) -recipe = HDFReferenceRecipe(pattern, netcdf_storage_options={'anon': True}) From 77504d55c03992a63a86aa711db13f671b00e54d Mon Sep 17 00:00:00 2001 From: Lorenzo Zampieri Date: Fri, 6 Jan 2023 14:40:55 -0700 Subject: [PATCH 3/4] Dataset name has been changed --- .../meta.yaml | 0 .../recipe.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename recipes/{osisaf_ice_conc_cdr_v3p0_rg025 => OSI-SAF-450-430-a_rg025}/meta.yaml (100%) rename recipes/{osisaf_ice_conc_cdr_v3p0_rg025 => OSI-SAF-450-430-a_rg025}/recipe.py (100%) diff --git a/recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml b/recipes/OSI-SAF-450-430-a_rg025/meta.yaml similarity index 100% rename from recipes/osisaf_ice_conc_cdr_v3p0_rg025/meta.yaml rename to recipes/OSI-SAF-450-430-a_rg025/meta.yaml diff --git a/recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py b/recipes/OSI-SAF-450-430-a_rg025/recipe.py similarity index 100% rename from recipes/osisaf_ice_conc_cdr_v3p0_rg025/recipe.py rename to recipes/OSI-SAF-450-430-a_rg025/recipe.py From 4b3060fe207986abcef1a00dc6920e5ab4178035 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Jan 2023 22:40:47 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- recipes/OSI-SAF-450-430-a_rg025/meta.yaml | 26 ++++++++-------- recipes/OSI-SAF-450-430-a_rg025/recipe.py | 36 +++++++++++++++-------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/recipes/OSI-SAF-450-430-a_rg025/meta.yaml b/recipes/OSI-SAF-450-430-a_rg025/meta.yaml index 2fcaa6bb99..66e2b6c325 100644 --- a/recipes/OSI-SAF-450-430-a_rg025/meta.yaml +++ b/recipes/OSI-SAF-450-430-a_rg025/meta.yaml @@ -1,9 +1,9 @@ # Name for dataset. User chosen. -title: "OSI-SAF Global Sea Ice Concentration Climate Data Record (1990-2021 Daily) Release 3" +title: 'OSI-SAF Global Sea Ice Concentration Climate Data Record (1990-2021 Daily) Release 3' # Description of dataset. User chosen, roughly 1 sentence in length. -description: "Daily global sea ice concentration climate data record (product ID: OSI-450-a and OSI-430-a) for the preiod 1990-2021 interpolated onto a global regular grid with a resolution of 0.25 degrees" -# Version of pangeo_forge_recipes library that was used -pangeo_forge_version: "0.9.2" +description: 'Daily global sea ice concentration climate data record (product ID: OSI-450-a and OSI-430-a) for the preiod 1990-2021 interpolated onto a global regular grid with a resolution of 0.25 degrees' +# Version of pangeo_forge_recipes library that was used +pangeo_forge_version: '0.9.2' # The recipes section tells Pangeo Cloud where to find the recipes within your PR. # Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. # If your PR contains multiple recipes, you may add additional elements to the list below. @@ -15,31 +15,31 @@ recipes: # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` - object: "recipe:recipe" + object: 'recipe:recipe' provenance: # Data provider object. Follow STAC spec. # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object providers: - - name: "EUMETSAT OSI SAF" - description: "EUMETSAT Ocean and Sea Ice Satellite Application Facility High Latitude Processing Center" + - name: 'EUMETSAT OSI SAF' + description: 'EUMETSAT Ocean and Sea Ice Satellite Application Facility High Latitude Processing Center' roles: - producer - licensor url: https://osi-saf.eumetsat.int/products/osi-450-a - - name: "NCAR" - description: "Regridding and merging of the observations performed by Lorenzo Zampieri at the National Center for Atmospheric Research, where observations are stored" + - name: 'NCAR' + description: 'Regridding and merging of the observations performed by Lorenzo Zampieri at the National Center for Atmospheric Research, where observations are stored' roles: - processor - host # This is a required field for provider. Follow STAC spec # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license - license: "EUMETSAT Essential (free and unrestricted)" + license: 'EUMETSAT Essential (free and unrestricted)' maintainers: # Information about recipe creator. name and github are required - - name: "Lorenzo Zampieri" - orcid: "0000-0003-1703-4162" + - name: 'Lorenzo Zampieri' + orcid: '0000-0003-1703-4162' github: lzampier # The specific bakery (i.e. cloud infrastructure) that your recipe will run on. # Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries bakery: - id: "pangeo-ldeo-nsf-earthcube" + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/OSI-SAF-450-430-a_rg025/recipe.py b/recipes/OSI-SAF-450-430-a_rg025/recipe.py index 8b858d7614..2b8d129188 100644 --- a/recipes/OSI-SAF-450-430-a_rg025/recipe.py +++ b/recipes/OSI-SAF-450-430-a_rg025/recipe.py @@ -6,29 +6,41 @@ dates = pd.date_range('1989-01-01', '2021-12-31', freq='D') missing_dates = [ - pd.Timestamp(1990,8,13), pd.Timestamp(1990,8,25), pd.Timestamp(1990,8,26), - pd.Timestamp(1990,10,21), pd.Timestamp(1990,10,22), pd.Timestamp(1990,10,26), - pd.Timestamp(1990,10,27), pd.Timestamp(1990,10,28), pd.Timestamp(1990,12,21), - pd.Timestamp(1990,12,22), pd.Timestamp(1990,12,23), pd.Timestamp(1990,12,24), - pd.Timestamp(1990,12,25), pd.Timestamp(1990,12,26), pd.Timestamp(2000,12,1), - pd.Timestamp(2021, 2, 20) - ] + pd.Timestamp(1990, 8, 13), + pd.Timestamp(1990, 8, 25), + pd.Timestamp(1990, 8, 26), + pd.Timestamp(1990, 10, 21), + pd.Timestamp(1990, 10, 22), + pd.Timestamp(1990, 10, 26), + pd.Timestamp(1990, 10, 27), + pd.Timestamp(1990, 10, 28), + pd.Timestamp(1990, 12, 21), + pd.Timestamp(1990, 12, 22), + pd.Timestamp(1990, 12, 23), + pd.Timestamp(1990, 12, 24), + pd.Timestamp(1990, 12, 25), + pd.Timestamp(1990, 12, 26), + pd.Timestamp(2000, 12, 1), + pd.Timestamp(2021, 2, 20), +] # Drop missing dates dates = dates.drop(missing_dates) URL_FORMAT = ( - "https://g-08c618.7a577b.6fbd.data.globus.org/" - "ice_conc_r1440x720_{interim}cdr-v3p0_{time:%Y%m%d}1200.nc" + 'https://g-08c618.7a577b.6fbd.data.globus.org/' + 'ice_conc_r1440x720_{interim}cdr-v3p0_{time:%Y%m%d}1200.nc' ) + def make_url(time): - if time.year <= 2020: + if time.year <= 2020: return URL_FORMAT.format(time=time, interim='') - if time.year > 2020: + if time.year > 2020: return URL_FORMAT.format(time=time, interim='i') -time_concat_dim = ConcatDim("time", dates, nitems_per_file=1) + +time_concat_dim = ConcatDim('time', dates, nitems_per_file=1) pattern = FilePattern(make_url, time_concat_dim) recipe = XarrayZarrRecipe(pattern, inputs_per_chunk=5)