diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml new file mode 100644 index 0000000000..78fd8a1cba --- /dev/null +++ b/recipes/NWM/meta.yaml @@ -0,0 +1,26 @@ +title: 'National Water Model - Short Range Forecast' +description: The National Water Model dataset is a produced by the National Oceanic and Atmospheric Administations (NOAA) Office of Water Prediction. It is a forecast model of water resources, providing multiple variables across the Continental United States (CONUS). This dataset is available via the Registry of Open Data on AWS as a collection of netCDF files that do not require any login authentication. This Pangeo-Forge recipe uses the HDFReferenceRecipe, which leverages kerchunk and fsspec_reference_maker to build an index so that this dataset can be read as if it were a ARCO dataset. +pangeo_forge_version: '0.9.0' +pangeo_notebook_version: '2022.06.02' +recipes: + - id: NWM + object: 'recipe:recipe' +provenance: + providers: + - name: 'National Oceanic and Atmospheric Administration (NOAA) Office of Water Prediction' + description: 'NOAA National Water Model CONUS Retrospective Dataset was accessed on 11-01-2022 from https://registry.opendata.aws/nwm-archive.' + roles: + - producer + - licensor + url: https://water.noaa.gov/about/nwm + license: proprietary + license_link: + url: 'https://water.noaa.gov/about/nwm' + title: 'Open Data. There are no restrictions on the use of this data.' + +maintainers: + - name: 'Raphael Hagen' + orcid: '0000-0003-1994-1153' + github: norlandrhagen +bakery: + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/NWM/recipe.py b/recipes/NWM/recipe.py new file mode 100644 index 0000000000..2936ea83fc --- /dev/null +++ b/recipes/NWM/recipe.py @@ -0,0 +1,37 @@ +# Author: Norland Raphael Hagen @norlandrhagen 11-01-2022 +# Pangeo-Forge recipe for National Water Model - Short Range Forecast # noqa: E501 +# Heavily adapted from Kerchunk example written by Rich Signell (USGS) @rsignell-usgs. https://gist.github.com/rsignell-usgs/ef435a53ac530a2843ce7e1d59f96e22 # noqa: E501 + +import os + +import fsspec + +from pangeo_forge_recipes.patterns import pattern_from_file_sequence +from pangeo_forge_recipes.recipes.reference_hdf_zarr import HDFReferenceRecipe + +# Create fsspec aws filesystem +fs = fsspec.filesystem('s3', anon=True, skip_instance_cache=True) +flist = fs.glob('noaa-nwm-pds/nwm.*/short_range/nwm.*.short_range.channel_rt.f001.conus.nc') + +# Join the "best time series" from past forecasts with the latest forecast +# Remove the first day of data since this is a rolling collection and +# we don't want to be trying to access files that soon will be removed. +# & Use all the files from the last forecast cycle + +last_dir = f'{os.path.dirname(flist[-1])}' +last_file = os.path.basename(flist[-1]).split('.') +last_files = fs.glob( + f'{last_dir}/{last_file[0]}.{last_file[1]}.{last_file[2]}.channel_rt.*.conus.nc' +) + +# Skip the first of the last_files since it's a duplicate +flist.extend(last_files[1:]) + +# Append s3 prefix +urls = ['s3://' + f for f in flist] + +# Create filepattern from urls +pattern = pattern_from_file_sequence(urls, 'time') + +# Create HDFReference recipe from pattern +recipe = HDFReferenceRecipe(pattern, netcdf_storage_options={'anon': True})