From 55bf675e8e555b845cc6dbe703f9488739dba4af Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Tue, 1 Nov 2022 11:39:22 -0600 Subject: [PATCH 1/4] NWM pangeo-forge recipe --- recipes/NWM/meta.yaml | 23 +++++++++++++++++++++++ recipes/NWM/recipe.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 recipes/NWM/meta.yaml create mode 100644 recipes/NWM/recipe.py diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml new file mode 100644 index 0000000000..d5308acb32 --- /dev/null +++ b/recipes/NWM/meta.yaml @@ -0,0 +1,23 @@ +title: 'E-OBS dataset' +description: The National Water Model dataset is a produced by the National Oceanic and Atmospheric Administations (NOAA) Office of Water Prediction. It is a forecast model of water resources, providing multiple variables across the Continental United States (CONUS). This dataset is available via the Registry of Open Data on AWS as a collection of netCDF files that do not require any login authentication. This Pangeo-Forge recipe uses the HDFReferenceRecipe, which leverages kerchunk and fsspec_reference_maker to build an index so that this dataset can be read as if it were a ARCO dataset. +pangeo_forge_version: '0.9.0' +pangeo_notebook_version: '2022.06.02' +recipes: + - id: NWM + object: 'recipe:recipe' +provenance: + providers: + - name: 'National Oceanic and Atmospheric Administration (NOAA) Office of Water Prediction' + description: 'NOAA National Water Model CONUS Retrospective Dataset was accessed on 11-01-2022 from https://registry.opendata.aws/nwm-archive.' + roles: + - producer + - licensor + url: https://water.noaa.gov/about/nwm + license: Open + +maintainers: + - name: 'Raphael Hagen' + orcid: '0000-0003-1994-1153' + github: norlandrhagen +bakery: + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/NWM/recipe.py b/recipes/NWM/recipe.py new file mode 100644 index 0000000000..2936ea83fc --- /dev/null +++ b/recipes/NWM/recipe.py @@ -0,0 +1,37 @@ +# Author: Norland Raphael Hagen @norlandrhagen 11-01-2022 +# Pangeo-Forge recipe for National Water Model - Short Range Forecast # noqa: E501 +# Heavily adapted from Kerchunk example written by Rich Signell (USGS) @rsignell-usgs. https://gist.github.com/rsignell-usgs/ef435a53ac530a2843ce7e1d59f96e22 # noqa: E501 + +import os + +import fsspec + +from pangeo_forge_recipes.patterns import pattern_from_file_sequence +from pangeo_forge_recipes.recipes.reference_hdf_zarr import HDFReferenceRecipe + +# Create fsspec aws filesystem +fs = fsspec.filesystem('s3', anon=True, skip_instance_cache=True) +flist = fs.glob('noaa-nwm-pds/nwm.*/short_range/nwm.*.short_range.channel_rt.f001.conus.nc') + +# Join the "best time series" from past forecasts with the latest forecast +# Remove the first day of data since this is a rolling collection and +# we don't want to be trying to access files that soon will be removed. +# & Use all the files from the last forecast cycle + +last_dir = f'{os.path.dirname(flist[-1])}' +last_file = os.path.basename(flist[-1]).split('.') +last_files = fs.glob( + f'{last_dir}/{last_file[0]}.{last_file[1]}.{last_file[2]}.channel_rt.*.conus.nc' +) + +# Skip the first of the last_files since it's a duplicate +flist.extend(last_files[1:]) + +# Append s3 prefix +urls = ['s3://' + f for f in flist] + +# Create filepattern from urls +pattern = pattern_from_file_sequence(urls, 'time') + +# Create HDFReference recipe from pattern +recipe = HDFReferenceRecipe(pattern, netcdf_storage_options={'anon': True}) From 907f845cd5b083b1c459f135732510ebfad0daa3 Mon Sep 17 00:00:00 2001 From: Raphael Hagen Date: Tue, 1 Nov 2022 12:48:20 -0600 Subject: [PATCH 2/4] title update --- recipes/NWM/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml index d5308acb32..b47e532a0b 100644 --- a/recipes/NWM/meta.yaml +++ b/recipes/NWM/meta.yaml @@ -1,4 +1,4 @@ -title: 'E-OBS dataset' +title: 'National Water Model - Short Range Forecast' description: The National Water Model dataset is a produced by the National Oceanic and Atmospheric Administations (NOAA) Office of Water Prediction. It is a forecast model of water resources, providing multiple variables across the Continental United States (CONUS). This dataset is available via the Registry of Open Data on AWS as a collection of netCDF files that do not require any login authentication. This Pangeo-Forge recipe uses the HDFReferenceRecipe, which leverages kerchunk and fsspec_reference_maker to build an index so that this dataset can be read as if it were a ARCO dataset. pangeo_forge_version: '0.9.0' pangeo_notebook_version: '2022.06.02' From 8429ba26275636bc1bb54ee8dd43020507866829 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 1 Nov 2022 14:22:39 -0600 Subject: [PATCH 3/4] Update license info --- recipes/NWM/meta.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml index b47e532a0b..9c635d3485 100644 --- a/recipes/NWM/meta.yaml +++ b/recipes/NWM/meta.yaml @@ -13,7 +13,10 @@ provenance: - producer - licensor url: https://water.noaa.gov/about/nwm - license: Open + license: proprietary + license_link: + url: 'https://water.noaa.gov/about/nwm' + title: 'Open Data. There are no restrictions on the use of this data.' maintainers: - name: 'Raphael Hagen' From 8308f82cbdede7d8039a72e4137e5d16c800eb89 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Nov 2022 20:24:18 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- recipes/NWM/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml index 9c635d3485..78fd8a1cba 100644 --- a/recipes/NWM/meta.yaml +++ b/recipes/NWM/meta.yaml @@ -15,8 +15,8 @@ provenance: url: https://water.noaa.gov/about/nwm license: proprietary license_link: - url: 'https://water.noaa.gov/about/nwm' - title: 'Open Data. There are no restrictions on the use of this data.' + url: 'https://water.noaa.gov/about/nwm' + title: 'Open Data. There are no restrictions on the use of this data.' maintainers: - name: 'Raphael Hagen'