From 3f5a8d799fced7732d733775903e2b0d876df1a9 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 5 Dec 2023 09:27:37 -0500 Subject: [PATCH] Revert anon fallback (#835) * Revert "Don't assume attributes after connect (#832)" This reverts commit cc6663f3bf935dbca2c25ce95c6c7e3da7c6fecc. * Revert "Try anon=True if no credentials are supplied or found (#823)" This reverts commit 8a87309bc8846b043ab8f5576251f6b90a6bdcae. --- s3fs/core.py | 52 +++++--------------------------------- s3fs/tests/test_mapping.py | 16 ++++++++++++ s3fs/tests/test_s3fs.py | 10 -------- s3fs/utils.py | 36 ++++++++++++++------------ 4 files changed, 43 insertions(+), 71 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index f21d51ac..e9e8d8f8 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import asyncio import errno -import inspect import logging import mimetypes import os @@ -31,7 +30,6 @@ from aiobotocore.config import AioConfig from botocore.exceptions import ClientError, HTTPClientError, ParamValidationError from botocore.parsers import ResponseParserError -from botocore.credentials import create_credential_resolver from s3fs.errors import translate_boto_error from s3fs.utils import S3BucketRegionCache, ParamKwargsHelper, _get_brange, FileExpired @@ -139,18 +137,6 @@ async def _error_wrapper(func, *, args=(), kwargs=None, retries): except Exception as e: err = e err = translate_boto_error(err) - - if inspect.ismethod(func): - s3 = func.__self__ - try: - is_anon = s3._client_config.signature_version == botocore.UNSIGNED - except AttributeError: - is_anon = False - if isinstance(err, PermissionError) and is_anon: - raise PermissionError( - "Access failed in anonymous mode. You may need to provide credentials." - ) from err - raise err @@ -478,36 +464,6 @@ async def set_session(self, refresh=False, kwargs={}): return self._s3 logger.debug("Setting up s3fs instance") - if self.session is None: - self.session = aiobotocore.session.AioSession(**self.kwargs) - - drop_keys = { - "aws_access_key_id", - "aws_secret_access_key", - "aws_session_token", - } - if ( - not self.anon - and (self.key or self.secret or self.token) is None - and not drop_keys.intersection(set(self.client_kwargs)) - ): - # creating credentials resolver which enables loading credentials from configs/environment variables see - # https://github.com/boto/botocore/blob/develop/botocore/credentials.py#L2043 - # tests whether any creds are available at all; if not, default to anonymous - cred_resolver = create_credential_resolver( - self.session, region_name=self.session._last_client_region_used - ) - credentials = cred_resolver.load_credentials() - - if credentials is None: - logger.debug("No credentials given/found, setting `anon` to True.") - self.anon = True - else: - # by stashing these, we avoid doing the lookup again - self.key = getattr(credentials, "access_key", None) - self.secret = getattr(credentials, "secret_key", None) - self.token = getattr(credentials, "token", None) - client_kwargs = self.client_kwargs.copy() init_kwargs = dict( aws_access_key_id=self.key, @@ -523,10 +479,14 @@ async def set_session(self, refresh=False, kwargs={}): if "use_ssl" not in client_kwargs.keys(): init_kwargs["use_ssl"] = self.use_ssl config_kwargs = self._prepare_config_kwargs() - if self.anon: from botocore import UNSIGNED + drop_keys = { + "aws_access_key_id", + "aws_secret_access_key", + "aws_session_token", + } init_kwargs = { key: value for key, value in init_kwargs.items() if key not in drop_keys } @@ -538,6 +498,8 @@ async def set_session(self, refresh=False, kwargs={}): config_kwargs["signature_version"] = UNSIGNED conf = AioConfig(**config_kwargs) + if self.session is None: + self.session = aiobotocore.session.AioSession(**self.kwargs) for parameters in (config_kwargs, self.kwargs, init_kwargs, client_kwargs): for option in ("region_name", "endpoint_url"): diff --git a/s3fs/tests/test_mapping.py b/s3fs/tests/test_mapping.py index af4ed4d0..e67c7e73 100644 --- a/s3fs/tests/test_mapping.py +++ b/s3fs/tests/test_mapping.py @@ -52,6 +52,22 @@ def test_with_data(s3): assert list(d) == [] +def test_complex_keys(s3): + d = s3.get_mapper(root) + d[1] = b"hello" + assert d[1] == b"hello" + del d[1] + + d[1, 2] = b"world" + assert d[1, 2] == b"world" + del d[1, 2] + + d["x", 1, 2] = b"hello world" + assert d["x", 1, 2] == b"hello world" + + assert ("x", 1, 2) in d + + def test_clear_empty(s3): d = s3.get_mapper(root) d.clear() diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 711781cd..d2c12570 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -166,16 +166,6 @@ def test_simple(s3): assert out == data -def test_auto_anon(s3, monkeypatch): - monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) - monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) - monkeypatch.delenv("AWS_SESSION_TOKEN", raising=False) - - fs = S3FileSystem(skip_instance_cache=True, endpoint_url=endpoint_uri) - fs.s3 - assert fs.anon - - def test_with_size(s3): data = b"a" * (10 * 2**20) diff --git a/s3fs/utils.py b/s3fs/utils.py index 12370134..f9d6589e 100644 --- a/s3fs/utils.py +++ b/s3fs/utils.py @@ -1,6 +1,7 @@ import errno import logging from contextlib import contextmanager, AsyncExitStack +from botocore.exceptions import ClientError logger = logging.getLogger("s3fs") @@ -30,13 +31,26 @@ async def get_bucket_client(self, bucket_name=None): if bucket_name in self._buckets: return self._buckets[bucket_name] + general_client = await self.get_client() if bucket_name is None: - # general client - return await self.get_client() - - region = get_bucket_region( - bucket_name - ) # this is sync - matters? can reuse some aiohttp session? + return general_client + + try: + response = await general_client.head_bucket(Bucket=bucket_name) + except ClientError as e: + region = ( + e.response["ResponseMetadata"] + .get("HTTPHeaders", {}) + .get("x-amz-bucket-region") + ) + if not region: + logger.debug( + "RC: HEAD_BUCKET call for %r has failed, returning the general client", + bucket_name, + ) + return general_client + else: + region = response["ResponseMetadata"]["HTTPHeaders"]["x-amz-bucket-region"] if region not in self._regions: logger.debug( @@ -54,7 +68,6 @@ async def get_bucket_client(self, bucket_name=None): return client async def get_client(self): - # general, non-regional client if not self._client: self._client = await self._stack.enter_async_context( self._session.create_client("s3", **self._client_kwargs) @@ -75,15 +88,6 @@ async def __aexit__(self, *exc_args): await self.clear() -def get_bucket_region(bucket): - """Simple way to locate bucket""" - import requests - - return requests.head(f"https://s3.amazonaws.com/{bucket}").headers[ - "x-amz-bucket-region" - ] - - class FileExpired(IOError): """ Is raised, when the file content has been changed from a different process after