Skip to content

Commit

Permalink
allow Variables to read s3urls
Browse files Browse the repository at this point in the history
  • Loading branch information
rwegener2 committed Oct 31, 2023
1 parent 4bf2ba8 commit 11625ec
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 28 deletions.
81 changes: 64 additions & 17 deletions icepyx/core/is2ref.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import h5py
import json
import numpy as np
import requests
import warnings
from xml.etree import ElementTree as ET

import h5py
import earthaccess
import numpy as np

import icepyx

Expand Down Expand Up @@ -340,25 +341,71 @@ def latest_version(product):
[entry["version_id"] for entry in _about_product["feed"]["entry"]]
)

def extract_product(filepath):
def extract_product(filepath, auth=None):
"""
Read the product type from the metadata of the file. Return the product as a string.
Read the product type from the metadata of the file. Valid for local or s3 files, but must
provide an auth object if reading from s3. Return the product as a string.
Parameters
----------
filepath: string
local or remote location of a file. Could be a local string or an s3 filepath
auth: earthaccess.auth.Auth, default None
An earthaccess authentication object. Optional, but necessary if accessing data in an
s3 bucket.
"""
with h5py.File(filepath, 'r') as f:
try:
product = f.attrs['short_name'].decode()
product = _validate_product(product)
except KeyError:
raise 'Unable to parse the product name from file metadata'
# Generate a file reader object relevant for the file location
if filepath.startswith('s3'):
if not auth:
raise AttributeError('Must provide credentials to `auth` if accessing s3 data')
# Read the s3 file
s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth)
f = h5py.File(s3.open(filepath, 'rb'))
else:
# Otherwise assume a local filepath. Read with h5py.
f = h5py.File(filepath, 'r')

# Extract the product information
try:
product = f.attrs['short_name'].decode()
product = _validate_product(product)
except KeyError:
raise 'Unable to parse the product name from file metadata'
# Close the file reader
f.close()

return product

def extract_version(filepath):
def extract_version(filepath, auth=None):
"""
Read the version from the metadata of the file. Return the version as a string.
Read the version from the metadata of the file. Valid for local or s3 files, but must
provide an auth object if reading from s3. Return the version as a string.
Parameters
----------
filepath: string
local or remote location of a file. Could be a local string or an s3 filepath
auth: earthaccess.auth.Auth, default None
An earthaccess authentication object. Optional, but necessary if accessing data in an
s3 bucket.
"""
with h5py.File(filepath, 'r') as f:
try:
version = f['METADATA']['DatasetIdentification'].attrs['VersionID'].decode()
except KeyError:
raise 'Unable to parse the version from file metadata'
# Generate a file reader object relevant for the file location
if filepath.startswith('s3'):
if not auth:
raise AttributeError('Must provide credentials to `auth` if accessing s3 data')
# Read the s3 file
s3 = earthaccess.get_s3fs_session(daac="NSIDC", provider=auth)
f = h5py.File(s3.open(filepath, 'rb'))
else:
# Otherwise assume a local filepath. Read with h5py.
f = h5py.File(filepath, 'r')

# Read the version information
try:
version = f['METADATA']['DatasetIdentification'].attrs['VersionID'].decode()
except KeyError:
raise 'Unable to parse the version from file metadata'
# Close the file reader
f.close()

return version
20 changes: 9 additions & 11 deletions icepyx/core/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,10 @@ class Variables(EarthdataAuthMixin):
Dictionary (key:values) of available variable names (keys) and paths (values).
wanted : dictionary, default None
As avail, but for the desired list of variables
session : requests.session object
A session object authenticating the user to download data using their Earthdata login information.
The session object will automatically be passed from the query object if you
have successfully logged in there.
auth : earthaccess.auth.Auth, default None
An earthaccess authentication object. Available as an argument so an existing
earthaccess.auth.Auth object can be used for authentication. If not given, a new auth
object will be created whenever authentication is needed.
"""

def __init__(
Expand All @@ -79,12 +78,14 @@ def __init__(
'variables will be read from the file. If a product is provided all available ',
'variables for that product will be returned.'
)
# initialize authentication properties
EarthdataAuthMixin.__init__(self, auth=auth)

# Set the product and version from either the input args or the file
if path:
self._path = path
self._product = is2ref.extract_product(self._path)
self._version = is2ref.extract_version(self._path)
self._product = is2ref.extract_product(self._path, auth=self.auth)
self._version = is2ref.extract_version(self._path, auth=self.auth)
elif product:
# Check for valid product string
self._product = is2ref._validate_product(product)
Expand All @@ -93,9 +94,6 @@ def __init__(
self._version = val.prod_version(is2ref.latest_version(self._product), version)
else:
raise TypeError('Either a filepath or a product need to be given as input arguments.')

# initialize authentication properties
EarthdataAuthMixin.__init__(self, auth=auth)

self._avail = avail
self.wanted = wanted
Expand Down Expand Up @@ -138,7 +136,7 @@ def avail(self, options=False, internal=False):
"""

if not hasattr(self, "_avail") or self._avail == None:
if not hasattr(self, 'path'):
if not hasattr(self, 'path') or self.path.startswith('s3'):
self._avail = is2ref._get_custom_options(
self.session, self.product, self.version
)["variables"]
Expand Down

0 comments on commit 11625ec

Please sign in to comment.