Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For exists(), more precise error handling. #757

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
9 changes: 7 additions & 2 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,12 +1019,17 @@ async def _exists(self, path):
"list_objects_v2", MaxKeys=1, Bucket=bucket, **self.req_kw
)
return True
except Exception:
except (FileNotFoundError, PermissionError):
pass
try:
await self._call_s3("get_bucket_location", Bucket=bucket, **self.req_kw)
return True
except Exception:
except FileNotFoundError:
return False
except PermissionError:
logger.warning(
"Bucket %s doesn't exist or you don't have access to it.", bucket
)
return False

exists = sync_wrapper(_exists)
Expand Down
35 changes: 35 additions & 0 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from s3fs.core import S3FileSystem
from s3fs.utils import ignoring, SSEParams
from botocore.exceptions import NoCredentialsError
from botocore.exceptions import EndpointConnectionError
from fsspec.asyn import sync
from fsspec.callbacks import Callback
from packaging import version
Expand Down Expand Up @@ -2464,6 +2465,40 @@ def test_exists_isdir(s3):
assert not s3.isdir(bad_path)


def test_exists_raises_on_connection_error():
# Ensure that we raise a ConnectionError instead of returning False if we
# are not actually able to connect to storage, by setting a fake proxy and
# then re-creating the S3FileSystem instance.
S3FileSystem.clear_instance_cache()
s3 = S3FileSystem(
anon=False,
skip_instance_cache=True,
endpoint_url="https://fakeproxy.127.0.0.1:8080/",
)
s3.invalidate_cache()
with pytest.raises(EndpointConnectionError):
s3.exists("this-bucket-does-not-exist/")


def test_exists_bucket_nonexistent_or_no_access(caplog):
# Ensure that a warning is raised and False is returned if querying a
# bucket that might either not exist or be private.
# This tests against real AWS, might fail due to network issues.
caplog.clear()
fs = s3fs.S3FileSystem(key="wrongkey", secret="wrongsecret")
assert not fs.exists("s3://this-bucket-might-not-exist/")
assert caplog.records[0].levelname == "WARNING"
assert "doesn't exist or you don't have access" in caplog.text


def test_exists_bucket_nonexistent(s3, caplog):
# Ensure that NO warning is raised and False is returned if checking bucket
# existance when we have full access.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the bucket might still exist, but it's not one we can see. We happen to know for the sake of the test that it doesn't exist.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure whether this is true for all S3 systems, but at least for our internal systems, I get a "PermissionError" when trying to access a bucket I cannot see (because I don't have permission to see it) but a FileNotFoundError like in this test when I would have permission to see, but it simply doesn't exist. This is why in the PR change, the warning would only be raised in case of PermissionError but not in case of FileNotFoundError. Should I change it to return the warning in both cases?

caplog.clear()
assert not s3.exists("non_existent_bucket/")
assert len(caplog.records) == 0


def test_list_del_multipart(s3):
path = test_bucket_name + "/afile"
f = s3.open(path, "wb")
Expand Down