From 81cfcdec9ee4b08e8bbdb5dee74b5c7cec65ba12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20R=C3=B6sner?= Date: Tue, 14 Nov 2023 09:53:08 +0100 Subject: [PATCH 1/6] feat: try anon=True if no credentials are supplied or found. --- s3fs/core.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 5d0247f4..5d397a85 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -30,6 +30,7 @@ from aiobotocore.config import AioConfig from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError from botocore.parsers import ResponseParserError +from botocore.credentials import create_credential_resolver from s3fs.errors import translate_boto_error from s3fs.utils import S3BucketRegionCache, ParamKwargsHelper, _get_brange, FileExpired @@ -137,6 +138,12 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): except Exception as e: err = e err = translate_boto_error(err) + + s3 = func.__self__ + if isinstance(err, PermissionError) and s3._client_config.signature_version == botocore.UNSIGNED: + print("""No credentials were given and thus anonymous access was tried and failed. Please provide + credentials.""") + raise err @@ -464,6 +471,19 @@ async def set_session(self, refresh=False, kwargs={}): return self._s3 logger.debug("Setting up s3fs instance") + if self.session is None: + self.session = aiobotocore.session.AioSession(**self.kwargs) + + cred_resolver = create_credential_resolver(self.session, region_name=self.session._last_client_region_used) + credentials = cred_resolver.load_credentials() + + if credentials is None and self.key is None and self.secret is None and self.token is None and not self.anon: + self.anon = True + else: + self.key = credentials.access_key + self.secret = credentials.secret_key + self.token = credentials.token + client_kwargs = self.client_kwargs.copy() init_kwargs = dict( aws_access_key_id=self.key, @@ -479,6 +499,7 @@ async def set_session(self, refresh=False, kwargs={}): if "use_ssl" not in client_kwargs.keys(): init_kwargs["use_ssl"] = self.use_ssl config_kwargs = self._prepare_config_kwargs() + if self.anon: from botocore import UNSIGNED @@ -498,8 +519,6 @@ async def set_session(self, refresh=False, kwargs={}): config_kwargs["signature_version"] = UNSIGNED conf = AioConfig(**config_kwargs) - if self.session is None: - self.session = aiobotocore.session.AioSession(**self.kwargs) for parameters in (config_kwargs, self.kwargs, init_kwargs, client_kwargs): for option in ("region_name", "endpoint_url"): From 2b09181e2b242401cae65460939b8e38b4927911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20R=C3=B6sner?= Date: Mon, 20 Nov 2023 14:47:43 +0100 Subject: [PATCH 2/6] refactor: add logging statement and comment for credentials resolver. --- s3fs/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/s3fs/core.py b/s3fs/core.py index 5d397a85..d0ed2cb2 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -474,10 +474,13 @@ async def set_session(self, refresh=False, kwargs={}): if self.session is None: self.session = aiobotocore.session.AioSession(**self.kwargs) + # creating credentials resolver which enables loading credentials from configs/environment variables see + # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 cred_resolver = create_credential_resolver(self.session, region_name=self.session._last_client_region_used) credentials = cred_resolver.load_credentials() if credentials is None and self.key is None and self.secret is None and self.token is None and not self.anon: + logger.debug("No credentials given/found, setting `anon` to True.") self.anon = True else: self.key = credentials.access_key From 1e8db83f0268bd695b035a00737708d0718fdc73 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 21 Nov 2023 10:06:34 -0500 Subject: [PATCH 3/6] Make creds lookup conditional --- s3fs/core.py | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index d0ed2cb2..583ed63a 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -140,9 +140,14 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): err = translate_boto_error(err) s3 = func.__self__ - if isinstance(err, PermissionError) and s3._client_config.signature_version == botocore.UNSIGNED: - print("""No credentials were given and thus anonymous access was tried and failed. Please provide - credentials.""") + if ( + isinstance(err, PermissionError) + and s3._client_config.signature_version == botocore.UNSIGNED + ): + print( + """No credentials were given and thus anonymous access was tried and failed. Please provide + credentials.""" + ) raise err @@ -474,18 +479,28 @@ async def set_session(self, refresh=False, kwargs={}): if self.session is None: self.session = aiobotocore.session.AioSession(**self.kwargs) - # creating credentials resolver which enables loading credentials from configs/environment variables see - # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 - cred_resolver = create_credential_resolver(self.session, region_name=self.session._last_client_region_used) - credentials = cred_resolver.load_credentials() + if ( + self.key is None + and self.secret is None + and self.token is None + and not self.anon + ): + # creating credentials resolver which enables loading credentials from configs/environment variables see + # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 + # tests whether any creds are available at all; if not, default to anonymous + cred_resolver = create_credential_resolver( + self.session, region_name=self.session._last_client_region_used + ) + credentials = cred_resolver.load_credentials() - if credentials is None and self.key is None and self.secret is None and self.token is None and not self.anon: - logger.debug("No credentials given/found, setting `anon` to True.") - self.anon = True - else: - self.key = credentials.access_key - self.secret = credentials.secret_key - self.token = credentials.token + if credentials is None: + logger.debug("No credentials given/found, setting `anon` to True.") + self.anon = True + else: + # by stashing these, we avoid doing the lookup again + self.key = credentials.access_key + self.secret = credentials.secret_key + self.token = credentials.token client_kwargs = self.client_kwargs.copy() init_kwargs = dict( From 6804fde2db8da696dfad08012c02083d60cd89dc Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 21 Nov 2023 10:37:23 -0500 Subject: [PATCH 4/6] fix --- s3fs/core.py | 37 +++++++++++++++++++------------------ s3fs/tests/test_mapping.py | 16 ---------------- s3fs/utils.py | 36 ++++++++++++++++-------------------- 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 583ed63a..44174f92 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import asyncio import errno +import inspect import logging import mimetypes import os @@ -139,15 +140,16 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): err = e err = translate_boto_error(err) - s3 = func.__self__ - if ( - isinstance(err, PermissionError) - and s3._client_config.signature_version == botocore.UNSIGNED - ): - print( - """No credentials were given and thus anonymous access was tried and failed. Please provide - credentials.""" - ) + if inspect.ismethod(func): + s3 = func.__self__ + try: + is_anon = s3._client_config.signature_version == botocore.UNSIGNED + except AttributeError: + is_anon = False + if isinstance(err, PermissionError) and is_anon: + raise PermissionError( + "Access failed in anonymous mode. You may need to provide credentials." + ) from err raise err @@ -479,11 +481,15 @@ async def set_session(self, refresh=False, kwargs={}): if self.session is None: self.session = aiobotocore.session.AioSession(**self.kwargs) + drop_keys = { + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token", + } if ( - self.key is None - and self.secret is None - and self.token is None - and not self.anon + not self.anon + and (self.key or self.secret or self.token) is None + and not drop_keys.intersection(set(self.client_kwargs)) ): # creating credentials resolver which enables loading credentials from configs/environment variables see # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 @@ -521,11 +527,6 @@ async def set_session(self, refresh=False, kwargs={}): if self.anon: from botocore import UNSIGNED - drop_keys = { - "aws_access_key_id", - "aws_secret_access_key", - "aws_session_token", - } init_kwargs = { key: value for key, value in init_kwargs.items() if key not in drop_keys } diff --git a/s3fs/tests/test_mapping.py b/s3fs/tests/test_mapping.py index e67c7e73..af4ed4d0 100644 --- a/s3fs/tests/test_mapping.py +++ b/s3fs/tests/test_mapping.py @@ -52,22 +52,6 @@ def test_with_data(s3): assert list(d) == [] -def test_complex_keys(s3): - d = s3.get_mapper(root) - d[1] = b"hello" - assert d[1] == b"hello" - del d[1] - - d[1, 2] = b"world" - assert d[1, 2] == b"world" - del d[1, 2] - - d["x", 1, 2] = b"hello world" - assert d["x", 1, 2] == b"hello world" - - assert ("x", 1, 2) in d - - def test_clear_empty(s3): d = s3.get_mapper(root) d.clear() diff --git a/s3fs/utils.py b/s3fs/utils.py index f9d6589e..12370134 100644 --- a/s3fs/utils.py +++ b/s3fs/utils.py @@ -1,7 +1,6 @@ import errno import logging from contextlib import contextmanager, AsyncExitStack -from botocore.exceptions import ClientError logger = logging.getLogger("s3fs") @@ -31,26 +30,13 @@ async def get_bucket_client(self, bucket_name=None): if bucket_name in self._buckets: return self._buckets[bucket_name] - general_client = await self.get_client() if bucket_name is None: - return general_client - - try: - response = await general_client.head_bucket(Bucket=bucket_name) - except ClientError as e: - region = ( - e.response["ResponseMetadata"] - .get("HTTPHeaders", {}) - .get("x-amz-bucket-region") - ) - if not region: - logger.debug( - "RC: HEAD_BUCKET call for %r has failed, returning the general client", - bucket_name, - ) - return general_client - else: - region = response["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"] + # general client + return await self.get_client() + + region = get_bucket_region( + bucket_name + ) # this is sync - matters? can reuse some aiohttp session? if region not in self._regions: logger.debug( @@ -68,6 +54,7 @@ async def get_bucket_client(self, bucket_name=None): return client async def get_client(self): + # general, non-regional client if not self._client: self._client = await self._stack.enter_async_context( self._session.create_client("s3", **self._client_kwargs) @@ -88,6 +75,15 @@ async def __aexit__(self, *exc_args): await self.clear() +def get_bucket_region(bucket): + """Simple way to locate bucket""" + import requests + + return requests.head(f"https://s3.amazonaws.com/{bucket}").headers[ + "x-amz-bucket-region" + ] + + class FileExpired(IOError): """ Is raised, when the file content has been changed from a different process after From 396ffee81cc5de501262d4bd31ce17e1a4d22265 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sat, 2 Dec 2023 20:51:05 -0500 Subject: [PATCH 5/6] Add simple test --- s3fs/tests/test_s3fs.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index b243ff1f..e59512ce 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -166,6 +166,16 @@ def test_simple(s3): assert out == data +def test_auto_anon(s3, monkeypatch): + monkeypatch.delenv("AWS_ACCESS_KEY_ID") + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY") + monkeypatch.delenv("AWS_SESSION_TOKEN") + + fs = S3FileSystem(skip_instance_cache=True, endpoint_url=endpoint_uri) + fs.s3 + assert fs.anon + + def test_with_size(s3): data = b"a" * (10 * 2**20) From 2988be82fcf00e39f4e26c54ef54ba783189d291 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sat, 2 Dec 2023 20:55:11 -0500 Subject: [PATCH 6/6] fix --- s3fs/tests/test_s3fs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index e59512ce..e0e19fbb 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -167,9 +167,9 @@ def test_simple(s3): def test_auto_anon(s3, monkeypatch): - monkeypatch.delenv("AWS_ACCESS_KEY_ID") - monkeypatch.delenv("AWS_SECRET_ACCESS_KEY") - monkeypatch.delenv("AWS_SESSION_TOKEN") + monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) + monkeypatch.delenv("AWS_SESSION_TOKEN", raising=False) fs = S3FileSystem(skip_instance_cache=True, endpoint_url=endpoint_uri) fs.s3