diff --git a/s3fs/core.py b/s3fs/core.py index 5d0247f4..44174f92 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import asyncio import errno +import inspect import logging import mimetypes import os @@ -30,6 +31,7 @@ from aiobotocore.config import AioConfig from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError from botocore.parsers import ResponseParserError +from botocore.credentials import create_credential_resolver from s3fs.errors import translate_boto_error from s3fs.utils import S3BucketRegionCache, ParamKwargsHelper, _get_brange, FileExpired @@ -137,6 +139,18 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): except Exception as e: err = e err = translate_boto_error(err) + + if inspect.ismethod(func): + s3 = func.__self__ + try: + is_anon = s3._client_config.signature_version == botocore.UNSIGNED + except AttributeError: + is_anon = False + if isinstance(err, PermissionError) and is_anon: + raise PermissionError( + "Access failed in anonymous mode. You may need to provide credentials." + ) from err + raise err @@ -464,6 +478,36 @@ async def set_session(self, refresh=False, kwargs={}): return self._s3 logger.debug("Setting up s3fs instance") + if self.session is None: + self.session = aiobotocore.session.AioSession(**self.kwargs) + + drop_keys = { + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token", + } + if ( + not self.anon + and (self.key or self.secret or self.token) is None + and not drop_keys.intersection(set(self.client_kwargs)) + ): + # creating credentials resolver which enables loading credentials from configs/environment variables see + # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 + # tests whether any creds are available at all; if not, default to anonymous + cred_resolver = create_credential_resolver( + self.session, region_name=self.session._last_client_region_used + ) + credentials = cred_resolver.load_credentials() + + if credentials is None: + logger.debug("No credentials given/found, setting `anon` to True.") + self.anon = True + else: + # by stashing these, we avoid doing the lookup again + self.key = credentials.access_key + self.secret = credentials.secret_key + self.token = credentials.token + client_kwargs = self.client_kwargs.copy() init_kwargs = dict( aws_access_key_id=self.key, @@ -479,14 +523,10 @@ async def set_session(self, refresh=False, kwargs={}): if "use_ssl" not in client_kwargs.keys(): init_kwargs["use_ssl"] = self.use_ssl config_kwargs = self._prepare_config_kwargs() + if self.anon: from botocore import UNSIGNED - drop_keys = { - "aws_access_key_id", - "aws_secret_access_key", - "aws_session_token", - } init_kwargs = { key: value for key, value in init_kwargs.items() if key not in drop_keys } @@ -498,8 +538,6 @@ async def set_session(self, refresh=False, kwargs={}): config_kwargs["signature_version"] = UNSIGNED conf = AioConfig(**config_kwargs) - if self.session is None: - self.session = aiobotocore.session.AioSession(**self.kwargs) for parameters in (config_kwargs, self.kwargs, init_kwargs, client_kwargs): for option in ("region_name", "endpoint_url"): diff --git a/s3fs/tests/test_mapping.py b/s3fs/tests/test_mapping.py index e67c7e73..af4ed4d0 100644 --- a/s3fs/tests/test_mapping.py +++ b/s3fs/tests/test_mapping.py @@ -52,22 +52,6 @@ def test_with_data(s3): assert list(d) == [] -def test_complex_keys(s3): - d = s3.get_mapper(root) - d[1] = b"hello" - assert d[1] == b"hello" - del d[1] - - d[1, 2] = b"world" - assert d[1, 2] == b"world" - del d[1, 2] - - d["x", 1, 2] = b"hello world" - assert d["x", 1, 2] == b"hello world" - - assert ("x", 1, 2) in d - - def test_clear_empty(s3): d = s3.get_mapper(root) d.clear() diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index b243ff1f..e0e19fbb 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -166,6 +166,16 @@ def test_simple(s3): assert out == data +def test_auto_anon(s3, monkeypatch): + monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) + monkeypatch.delenv("AWS_SESSION_TOKEN", raising=False) + + fs = S3FileSystem(skip_instance_cache=True, endpoint_url=endpoint_uri) + fs.s3 + assert fs.anon + + def test_with_size(s3): data = b"a" * (10 * 2**20) diff --git a/s3fs/utils.py b/s3fs/utils.py index f9d6589e..12370134 100644 --- a/s3fs/utils.py +++ b/s3fs/utils.py @@ -1,7 +1,6 @@ import errno import logging from contextlib import contextmanager, AsyncExitStack -from botocore.exceptions import ClientError logger = logging.getLogger("s3fs") @@ -31,26 +30,13 @@ async def get_bucket_client(self, bucket_name=None): if bucket_name in self._buckets: return self._buckets[bucket_name] - general_client = await self.get_client() if bucket_name is None: - return general_client - - try: - response = await general_client.head_bucket(Bucket=bucket_name) - except ClientError as e: - region = ( - e.response["ResponseMetadata"] - .get("HTTPHeaders", {}) - .get("x-amz-bucket-region") - ) - if not region: - logger.debug( - "RC: HEAD_BUCKET call for %r has failed, returning the general client", - bucket_name, - ) - return general_client - else: - region = response["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"] + # general client + return await self.get_client() + + region = get_bucket_region( + bucket_name + ) # this is sync - matters? can reuse some aiohttp session? if region not in self._regions: logger.debug( @@ -68,6 +54,7 @@ async def get_bucket_client(self, bucket_name=None): return client async def get_client(self): + # general, non-regional client if not self._client: self._client = await self._stack.enter_async_context( self._session.create_client("s3", **self._client_kwargs) @@ -88,6 +75,15 @@ async def __aexit__(self, *exc_args): await self.clear() +def get_bucket_region(bucket): + """Simple way to locate bucket""" + import requests + + return requests.head(f"https://s3.amazonaws.com/{bucket}").headers[ + "x-amz-bucket-region" + ] + + class FileExpired(IOError): """ Is raised, when the file content has been changed from a different process after