From 778b2e7954b92b62d9498eded1634322b6ea3d97 Mon Sep 17 00:00:00 2001 From: Yohannes Berhane Date: Sun, 7 Jul 2024 22:11:31 -0400 Subject: [PATCH 01/16] Authentication docs update --- docs/backends/gcloud.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 962298eb0..8c6bcca7f 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -59,6 +59,23 @@ For development use cases, or other instances outside Google infrastructure: #. Ensure the key is mounted/available to your running Django app. #. Set an environment variable of GOOGLE_APPLICATION_CREDENTIALS to the path of the json file. +**Note Regarding Authentication** + +There is currently a limitation in the GCS client for Python which by default requires a service account private key file to be +present when generating signed urls. **This is important to realize**. The service account private key is unavailable when running on a compute service. +Compute Services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch access tokens from a metadata service. +Those services do not have access to the service account private key. That means you must use one of the IAM sign functions (SignBlob, SignJwt) +to have Google sign using their managed private key. This means, you can't directly sign data. You must use the Cloud IAM API to do signing. + +Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. +When both of those args are provided, generate_signed_url will use the IAM service SignBlob API to sign the url and no private key file is needed. + +Google also now recommends avoiding service account json key files as they are insecure, risky and hard to manage. This avoids the need for that +when developing locally. + +`GS_SA_EMAIL` will be what is what provided to generate_signed_url param: service_account_email. Note, this service account will need credentials to +sign and download/upload files as necessary. Read more `here `__ + Alternatively, you can use the setting ``credentials`` or ``GS_CREDENTIALS`` as described below. @@ -219,3 +236,14 @@ Settings It supports `timedelta`, `datetime`, or `integer` seconds since epoch time. Note: The maximum value for this option is 7 days (604800 seconds) in version `v4` (See this `Github issue `_) + +``sa_email`` or ``GS_SA_EMAIL`` + + default: ``''`` + + This is the service account email to be used for signing the url. Signing urls either requires a service account key file to be present in the env or IAM API call. + Compute services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) for example don't have access to the key file in the env. Providing, sa_email, will use + the IAM API in order to sign the URL thus avoiding the need for a private service account json key file. + + As above please note that, Default Google Compute Engine (GCE) Service accounts are + `unable to sign urls `_. From 3d806a55897a31621fb7e9deb683904df72e7cd8 Mon Sep 17 00:00:00 2001 From: Yohannes Berhane Date: Sun, 7 Jul 2024 22:51:05 -0400 Subject: [PATCH 02/16] Need to upgrade this version because v4 signing requires private key files before this version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9e29b4f83..daae7f77a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ dropbox = [ "dropbox>=7.2.1", ] google = [ - "google-cloud-storage>=1.27", + "google-cloud-storage>=1.36.1", ] libcloud = [ "apache-libcloud", From 485c639151c62ec19b089aa462065ffe0e98e089 Mon Sep 17 00:00:00 2001 From: Yohannes Berhane Date: Mon, 8 Jul 2024 00:34:21 -0400 Subject: [PATCH 03/16] doc update --- docs/backends/gcloud.rst | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 8c6bcca7f..b6e397175 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -68,7 +68,7 @@ Those services do not have access to the service account private key. That means to have Google sign using their managed private key. This means, you can't directly sign data. You must use the Cloud IAM API to do signing. Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. -When both of those args are provided, generate_signed_url will use the IAM service SignBlob API to sign the url and no private key file is needed. +When both of those args are provided, generate_signed_url will use the IAM SignBlob API to sign the url and no private key file is needed. Google also now recommends avoiding service account json key files as they are insecure, risky and hard to manage. This avoids the need for that when developing locally. @@ -239,11 +239,9 @@ Settings ``sa_email`` or ``GS_SA_EMAIL`` - default: ``''`` - - This is the service account email to be used for signing the url. Signing urls either requires a service account key file to be present in the env or IAM API call. - Compute services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) for example don't have access to the key file in the env. Providing, sa_email, will use - the IAM API in order to sign the URL thus avoiding the need for a private service account json key file. + default: ``None`` - As above please note that, Default Google Compute Engine (GCE) Service accounts are - `unable to sign urls `_. + This is the service account email to be used for signing the url. Signing urls requires a service account key file to be present in the env or IAM SignBlob/JWT API call + through a provided service account email. Compute services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) for example don't have access to the key file in the env. + Providing, sa_email, will use the IAM API in order to sign the URL thus avoiding the need for a private service account json key file. If using `v4` of generate_signed_url, + `google-cloud-storage>=v1.36.1 `_ is required . From a9503afc8d1cd709cbc4f0fd2f7526f0feda88d1 Mon Sep 17 00:00:00 2001 From: Yohannes Berhane Date: Mon, 8 Jul 2024 01:17:11 -0400 Subject: [PATCH 04/16] implementation of service account email argument sa_email --- storages/backends/gcloud.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 5ae74a1a8..3fe9420d0 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -19,6 +19,9 @@ from storages.utils import to_bytes try: + from google import auth + from google.auth.transport import requests + from google.auth.credentials import TokenState from google.cloud.exceptions import NotFound from google.cloud.storage import Blob from google.cloud.storage import Client @@ -141,11 +144,20 @@ def get_default_settings(self): # roll over. "max_memory_size": setting("GS_MAX_MEMORY_SIZE", 0), "blob_chunk_size": setting("GS_BLOB_CHUNK_SIZE"), + "sa_email": setting("GS_SA_EMAIL") } @property def client(self): if self._client is None: + if self.project_id is None or self.credentials is None: + self.credentials, self.project_id = auth.default( + scopes=['https://www.googleapis.com/auth/cloud-platform'] + ) + if not self.credentials.token_state == TokenState.FRESH: + self.credentials.refresh(requests.Request()) + if not hasattr(self.credentials, "service_account_email") and self.sa_email: + self.credentials.service_account_email = self.sa_email self._client = Client(project=self.project_id, credentials=self.credentials) return self._client @@ -323,12 +335,15 @@ def url(self, name, parameters=None): quoted_name=_quote(name, safe=b"/~"), ) else: + params = parameters or {} default_params = { "bucket_bound_hostname": self.custom_endpoint, "expiration": self.expiration, - "version": "v4", + "version": "v4" } - params = parameters or {} + if hasattr(self.credentials, "service_account_email"): + default_params["access_token"] = self.credentials.token + default_params["service_account_email"] = self.credentials.service_account_email for key, value in default_params.items(): if value and key not in params: From dd1b819a5e72409efc81f6c8b69b650083415817 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Tue, 9 Jul 2024 01:12:18 -0400 Subject: [PATCH 05/16] Storage client mock. adjust for default adc missing test errors --- storages/backends/gcloud.py | 7 ++++--- tests/test_gcloud.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 3fe9420d0..020f5c1ba 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -154,11 +154,12 @@ def client(self): self.credentials, self.project_id = auth.default( scopes=['https://www.googleapis.com/auth/cloud-platform'] ) - if not self.credentials.token_state == TokenState.FRESH: - self.credentials.refresh(requests.Request()) if not hasattr(self.credentials, "service_account_email") and self.sa_email: self.credentials.service_account_email = self.sa_email self._client = Client(project=self.project_id, credentials=self.credentials) + + if self.credentials and self.credentials.token_state != TokenState.FRESH: + self.credentials.refresh(requests.Request()) return self._client @property @@ -339,7 +340,7 @@ def url(self, name, parameters=None): default_params = { "bucket_bound_hostname": self.custom_endpoint, "expiration": self.expiration, - "version": "v4" + "version": "v4", } if hasattr(self.credentials, "service_account_email"): default_params["access_token"] = self.credentials.token diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index cacb44d3d..fb2ce071e 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -36,6 +36,7 @@ def test_open_read(self): """ data = b"This is some test read data." + self.storage._client = mock.MagicMock() with self.storage.open(self.filename) as f: self.storage._client.bucket.assert_called_with(self.bucket_name) self.storage._bucket.get_blob.assert_called_with( @@ -49,6 +50,7 @@ def test_open_read_num_bytes(self): data = b"This is some test read data." num_bytes = 10 + self.storage._client = mock.MagicMock() with self.storage.open(self.filename) as f: self.storage._client.bucket.assert_called_with(self.bucket_name) self.storage._bucket.get_blob.assert_called_with( @@ -107,6 +109,7 @@ def test_save(self): data = "This is some test content." content = ContentFile(data) + self.storage._client = mock.MagicMock() self.storage.save(self.filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -124,6 +127,7 @@ def test_save2(self): filename = "ủⓝï℅ⅆℇ.txt" content = ContentFile(data) + self.storage._client = mock.MagicMock() self.storage.save(filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -145,7 +149,7 @@ def test_save_with_default_acl(self): # 'projectPrivate', 'bucketOwnerRead', 'bucketOwnerFullControl', # 'private', 'authenticatedRead', 'publicRead', 'publicReadWrite' self.storage.default_acl = "publicRead" - + self.storage._client = mock.MagicMock() self.storage.save(filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -159,6 +163,7 @@ def test_save_with_default_acl(self): ) def test_delete(self): + self.storage._client = mock.MagicMock() self.storage.delete(self.filename) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -185,6 +190,7 @@ def test_exists_no_bucket(self): def test_exists_bucket(self): # exists('') should return True if the bucket exists + self.storage._client = mock.MagicMock() self.assertTrue(self.storage.exists("")) def test_exists_file_overwrite(self): @@ -371,6 +377,7 @@ def test_custom_endpoint_with_parameters(self): self.storage.default_acl = "publicRead" url = "{}/{}".format(self.storage.custom_endpoint, self.filename) + self.storage._client = mock.MagicMock() self.assertEqual(self.storage.url(self.filename), url) bucket_name = "hyacinth" @@ -414,6 +421,7 @@ def test_cache_control(self): GS_OBJECT_PARAMETERS={"cache_control": "public, max-age=604800"} ): self.storage = gcloud.GoogleCloudStorage(bucket_name=self.bucket_name) + self.storage._client = mock.MagicMock() self.storage.save(filename, content) bucket = self.storage.client.bucket(self.bucket_name) blob = bucket.get_blob(filename) @@ -427,6 +435,7 @@ def test_storage_save_gzip_twice(self): content = ContentFile("I should be gzip'd") # When + self.storage._client = mock.MagicMock() self.storage.save(name, content) self.storage.save("test_storage_save_2.css", content) @@ -528,6 +537,7 @@ def test_storage_save_gzipped(self, *args): patcher = mock.patch("google.cloud.storage.Bucket.get_blob", return_value=blob) try: patcher.start() + self.storage._client = mock.MagicMock() self.storage.save(name, content) obj = self.storage._bucket.get_blob() obj.upload_from_file.assert_called_with( @@ -555,6 +565,7 @@ def test_storage_save_gzip(self, *args): try: patcher.start() + self.storage._client = mock.MagicMock() self.storage.save(name, content) obj = self.storage._bucket.get_blob() obj.upload_from_file.assert_called_with( From ad30e53785dfe091698f1c4b7c6b6d0e6a8bb6b6 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Tue, 9 Jul 2024 02:43:34 -0400 Subject: [PATCH 06/16] Potential better solution where we only use the sign iam api call if only explicilty passed in setting --- docs/backends/gcloud.rst | 43 +++++++++++++++++++------------------ storages/backends/gcloud.py | 22 +++++++++++++------ 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index b6e397175..ea7c03360 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -53,28 +53,20 @@ In most cases, the default service accounts are not sufficient to read/write and #. Make sure your service account has access to the bucket and appropriate permissions. (`Using IAM Permissions `__) #. Ensure this service account is associated to the type of compute being used (Google Compute Engine (GCE), Google Kubernetes Engine (GKE), Google Cloud Run (GCR), etc) -For development use cases, or other instances outside Google infrastructure: - -#. Create the key and download ``your-project-XXXXX.json`` file. -#. Ensure the key is mounted/available to your running Django app. -#. Set an environment variable of GOOGLE_APPLICATION_CREDENTIALS to the path of the json file. - -**Note Regarding Authentication** - -There is currently a limitation in the GCS client for Python which by default requires a service account private key file to be -present when generating signed urls. **This is important to realize**. The service account private key is unavailable when running on a compute service. -Compute Services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch access tokens from a metadata service. -Those services do not have access to the service account private key. That means you must use one of the IAM sign functions (SignBlob, SignJwt) -to have Google sign using their managed private key. This means, you can't directly sign data. You must use the Cloud IAM API to do signing. +**Note:** There is currently a limitation in the GCS client for Python which by default requires a service account private key file to be +present when generating signed urls. The service account private key is unavailable when running on a compute service. +Compute Services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch `access tokens from the metadata server `__ . +These services do not have access to the service account private key. This means that when trying to sign data in these services, +you **MUST** use one of the Cloud IAM sign functions (SignBlob, SignJwt) to sign data and directly signing data isn't possible by any means. Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. When both of those args are provided, generate_signed_url will use the IAM SignBlob API to sign the url and no private key file is needed. -Google also now recommends avoiding service account json key files as they are insecure, risky and hard to manage. This avoids the need for that -when developing locally. +Last resort you can still use the service account key file for authentication (not recommended by Google): -`GS_SA_EMAIL` will be what is what provided to generate_signed_url param: service_account_email. Note, this service account will need credentials to -sign and download/upload files as necessary. Read more `here `__ +#. Create the key and download ``your-project-XXXXX.json`` file. +#. Ensure the key is mounted/available to your running Django app. +#. Set an environment variable of GOOGLE_APPLICATION_CREDENTIALS to the path of the json file. Alternatively, you can use the setting ``credentials`` or ``GS_CREDENTIALS`` as described below. @@ -237,11 +229,20 @@ Settings Note: The maximum value for this option is 7 days (604800 seconds) in version `v4` (See this `Github issue `_) +``iam_sign_blob`` or ``GS_IAM_SIGN_BLOB`` + + default: ``False`` + + Signing urls requires a service account key file to be present in the env or IAM SignBlob/JWT API call + through a service account email and access_token. Certain GCP services (ex: Compute services) don't have access to the key file in the env. + This setting needs to be `True` when running on such services as they fetch access tokens from metadata server instead of having key files + If using `v4` of generate_signed_url, `google-cloud-storage>=v1.36.1 `_ is required . + ``sa_email`` or ``GS_SA_EMAIL`` default: ``None`` - This is the service account email to be used for signing the url. Signing urls requires a service account key file to be present in the env or IAM SignBlob/JWT API call - through a provided service account email. Compute services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) for example don't have access to the key file in the env. - Providing, sa_email, will use the IAM API in order to sign the URL thus avoiding the need for a private service account json key file. If using `v4` of generate_signed_url, - `google-cloud-storage>=v1.36.1 `_ is required . + The service account email to use for signing url. If a service account is being used for authentication (attached to your service), + this setting doesn't need to be provided unless you want to use another service account than the one attached to your service for signing urls. + Can be used in local development env as well to sign using sa_email instead of the user credentials or keeping a insecure service account key file + If using `v4` of generate_signed_url, `google-cloud-storage>=v1.36.1 `_ is required . diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 020f5c1ba..4525b0df4 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -144,7 +144,10 @@ def get_default_settings(self): # roll over. "max_memory_size": setting("GS_MAX_MEMORY_SIZE", 0), "blob_chunk_size": setting("GS_BLOB_CHUNK_SIZE"), - "sa_email": setting("GS_SA_EMAIL") + # use in cases where service account key isn't available in env + # in such cases, sign blob api is REQUIRED for signing data + "iam_sign_blob": setting("GS_IAM_SIGN_BLOB", False), + "sa_email": setting("GS_SA_EMAIL"), } @property @@ -154,10 +157,7 @@ def client(self): self.credentials, self.project_id = auth.default( scopes=['https://www.googleapis.com/auth/cloud-platform'] ) - if not hasattr(self.credentials, "service_account_email") and self.sa_email: - self.credentials.service_account_email = self.sa_email self._client = Client(project=self.project_id, credentials=self.credentials) - if self.credentials and self.credentials.token_state != TokenState.FRESH: self.credentials.refresh(requests.Request()) return self._client @@ -342,9 +342,19 @@ def url(self, name, parameters=None): "expiration": self.expiration, "version": "v4", } - if hasattr(self.credentials, "service_account_email"): + + if self.iam_sign_blob: + if not hasattr(self.credentials, "service_account_email") and not self.sa_email: + raise AttributeError( + "Sign Blob API requires service_account_email to be available " + "through ADC or setting `sa_email`" + ) + if hasattr(self.credentials, "service_account_email"): + default_params["service_account_email"] = self.credentials.service_account_email + # sa_email has the final say of which service_account_email to be used for signing if provided + if self.sa_email: + default_params["service_account_email"] = self.sa_email default_params["access_token"] = self.credentials.token - default_params["service_account_email"] = self.credentials.service_account_email for key, value in default_params.items(): if value and key not in params: From 55e7a789c05aee2bee3d80630a780ab0c4b0f1d5 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Tue, 9 Jul 2024 22:22:23 -0400 Subject: [PATCH 07/16] small change --- docs/backends/gcloud.rst | 4 ++-- storages/backends/gcloud.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index ea7c03360..83ce9cf2e 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -57,7 +57,7 @@ In most cases, the default service accounts are not sufficient to read/write and present when generating signed urls. The service account private key is unavailable when running on a compute service. Compute Services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch `access tokens from the metadata server `__ . These services do not have access to the service account private key. This means that when trying to sign data in these services, -you **MUST** use one of the Cloud IAM sign functions (SignBlob, SignJwt) to sign data and directly signing data isn't possible by any means. +you **MUST** use Cloud IAM sign function (SignBlob) to sign data and directly signing data isn't possible by any means. Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. When both of those args are provided, generate_signed_url will use the IAM SignBlob API to sign the url and no private key file is needed. @@ -233,7 +233,7 @@ Settings default: ``False`` - Signing urls requires a service account key file to be present in the env or IAM SignBlob/JWT API call + Signing urls requires a service account key file to be present in the env or IAM SignBlob API call through a service account email and access_token. Certain GCP services (ex: Compute services) don't have access to the key file in the env. This setting needs to be `True` when running on such services as they fetch access tokens from metadata server instead of having key files If using `v4` of generate_signed_url, `google-cloud-storage>=v1.36.1 `_ is required . diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 4525b0df4..3e4efdf36 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -336,13 +336,13 @@ def url(self, name, parameters=None): quoted_name=_quote(name, safe=b"/~"), ) else: - params = parameters or {} default_params = { "bucket_bound_hostname": self.custom_endpoint, "expiration": self.expiration, "version": "v4", } - + params = parameters or {} + if self.iam_sign_blob: if not hasattr(self.credentials, "service_account_email") and not self.sa_email: raise AttributeError( From ac0d0698b9487e1c5c659efe0ad1f6b4e7ca8fb9 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sat, 20 Jul 2024 15:00:53 -0400 Subject: [PATCH 08/16] Additional clean up and doc update --- docs/backends/gcloud.rst | 2 ++ storages/backends/gcloud.py | 44 +++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 83ce9cf2e..328d3b9d6 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -61,6 +61,8 @@ you **MUST** use Cloud IAM sign function (SignBlob) to sign data and directly si Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. When both of those args are provided, generate_signed_url will use the IAM SignBlob API to sign the url and no private key file is needed. +In order to enable this, use setting `iam_blob_sign` and the optional `sa_email` (if providing a service account email different than the one attached +to GCP Environment). Last resort you can still use the service account key file for authentication (not recommended by Google): diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 3e4efdf36..b00a3cbb5 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -153,13 +153,7 @@ def get_default_settings(self): @property def client(self): if self._client is None: - if self.project_id is None or self.credentials is None: - self.credentials, self.project_id = auth.default( - scopes=['https://www.googleapis.com/auth/cloud-platform'] - ) self._client = Client(project=self.project_id, credentials=self.credentials) - if self.credentials and self.credentials.token_state != TokenState.FRESH: - self.credentials.refresh(requests.Request()) return self._client @property @@ -344,20 +338,36 @@ def url(self, name, parameters=None): params = parameters or {} if self.iam_sign_blob: - if not hasattr(self.credentials, "service_account_email") and not self.sa_email: - raise AttributeError( - "Sign Blob API requires service_account_email to be available " - "through ADC or setting `sa_email`" - ) - if hasattr(self.credentials, "service_account_email"): - default_params["service_account_email"] = self.credentials.service_account_email - # sa_email has the final say of which service_account_email to be used for signing if provided - if self.sa_email: - default_params["service_account_email"] = self.sa_email - default_params["access_token"] = self.credentials.token + service_account_email, access_token = self._get_iam_sign_blob_params() + default_params["service_account_email"] = service_account_email + default_params["access_token"] = access_token for key, value in default_params.items(): if value and key not in params: params[key] = value return blob.generate_signed_url(**params) + + def _get_iam_sign_blob_params(self): + credentials, _ = auth.default( + scopes=['https://www.googleapis.com/auth/cloud-platform'] + ) + if credentials and credentials.token_state != TokenState.FRESH: + credentials.refresh(requests.Request()) + + try: + service_account_email = credentials.service_account_email + except AttributeError: + service_account_email = None + + # sa_email has the final say of which service_account_email to be used for signing if provided + if self.sa_email: + service_account_email = self.sa_email + + if not service_account_email: + raise AttributeError( + "Sign Blob API requires service_account_email to be available " + "through ADC or setting `sa_email`" + ) + + return service_account_email, credentials.token From 869ad033d536d627bb23b005087a8b58a7ef28c5 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sat, 20 Jul 2024 15:05:16 -0400 Subject: [PATCH 09/16] Revert tests --- tests/test_gcloud.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index fb2ce071e..cacb44d3d 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -36,7 +36,6 @@ def test_open_read(self): """ data = b"This is some test read data." - self.storage._client = mock.MagicMock() with self.storage.open(self.filename) as f: self.storage._client.bucket.assert_called_with(self.bucket_name) self.storage._bucket.get_blob.assert_called_with( @@ -50,7 +49,6 @@ def test_open_read_num_bytes(self): data = b"This is some test read data." num_bytes = 10 - self.storage._client = mock.MagicMock() with self.storage.open(self.filename) as f: self.storage._client.bucket.assert_called_with(self.bucket_name) self.storage._bucket.get_blob.assert_called_with( @@ -109,7 +107,6 @@ def test_save(self): data = "This is some test content." content = ContentFile(data) - self.storage._client = mock.MagicMock() self.storage.save(self.filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -127,7 +124,6 @@ def test_save2(self): filename = "ủⓝï℅ⅆℇ.txt" content = ContentFile(data) - self.storage._client = mock.MagicMock() self.storage.save(filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -149,7 +145,7 @@ def test_save_with_default_acl(self): # 'projectPrivate', 'bucketOwnerRead', 'bucketOwnerFullControl', # 'private', 'authenticatedRead', 'publicRead', 'publicReadWrite' self.storage.default_acl = "publicRead" - self.storage._client = mock.MagicMock() + self.storage.save(filename, content) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -163,7 +159,6 @@ def test_save_with_default_acl(self): ) def test_delete(self): - self.storage._client = mock.MagicMock() self.storage.delete(self.filename) self.storage._client.bucket.assert_called_with(self.bucket_name) @@ -190,7 +185,6 @@ def test_exists_no_bucket(self): def test_exists_bucket(self): # exists('') should return True if the bucket exists - self.storage._client = mock.MagicMock() self.assertTrue(self.storage.exists("")) def test_exists_file_overwrite(self): @@ -377,7 +371,6 @@ def test_custom_endpoint_with_parameters(self): self.storage.default_acl = "publicRead" url = "{}/{}".format(self.storage.custom_endpoint, self.filename) - self.storage._client = mock.MagicMock() self.assertEqual(self.storage.url(self.filename), url) bucket_name = "hyacinth" @@ -421,7 +414,6 @@ def test_cache_control(self): GS_OBJECT_PARAMETERS={"cache_control": "public, max-age=604800"} ): self.storage = gcloud.GoogleCloudStorage(bucket_name=self.bucket_name) - self.storage._client = mock.MagicMock() self.storage.save(filename, content) bucket = self.storage.client.bucket(self.bucket_name) blob = bucket.get_blob(filename) @@ -435,7 +427,6 @@ def test_storage_save_gzip_twice(self): content = ContentFile("I should be gzip'd") # When - self.storage._client = mock.MagicMock() self.storage.save(name, content) self.storage.save("test_storage_save_2.css", content) @@ -537,7 +528,6 @@ def test_storage_save_gzipped(self, *args): patcher = mock.patch("google.cloud.storage.Bucket.get_blob", return_value=blob) try: patcher.start() - self.storage._client = mock.MagicMock() self.storage.save(name, content) obj = self.storage._bucket.get_blob() obj.upload_from_file.assert_called_with( @@ -565,7 +555,6 @@ def test_storage_save_gzip(self, *args): try: patcher.start() - self.storage._client = mock.MagicMock() self.storage.save(name, content) obj = self.storage._bucket.get_blob() obj.upload_from_file.assert_called_with( From 886c6f279659f3e2538f4b90b0af15c199cae4b8 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sat, 20 Jul 2024 15:26:32 -0400 Subject: [PATCH 10/16] Adjust auth default use --- storages/backends/gcloud.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index b00a3cbb5..ea1d046e1 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -153,7 +153,12 @@ def get_default_settings(self): @property def client(self): if self._client is None: + if self.iam_sign_blob and not self.credentials: + self.credentials, self.project_id = auth.default( + scopes=['https://www.googleapis.com/auth/cloud-platform'] + ) self._client = Client(project=self.project_id, credentials=self.credentials) + return self._client @property @@ -349,14 +354,11 @@ def url(self, name, parameters=None): return blob.generate_signed_url(**params) def _get_iam_sign_blob_params(self): - credentials, _ = auth.default( - scopes=['https://www.googleapis.com/auth/cloud-platform'] - ) - if credentials and credentials.token_state != TokenState.FRESH: - credentials.refresh(requests.Request()) + if self.credentials.token_state != TokenState.FRESH: + self.credentials.refresh(requests.Request()) try: - service_account_email = credentials.service_account_email + service_account_email = self.credentials.service_account_email except AttributeError: service_account_email = None @@ -370,4 +372,4 @@ def _get_iam_sign_blob_params(self): "through ADC or setting `sa_email`" ) - return service_account_email, credentials.token + return service_account_email, self.credentials.token From cf029acaff2edaf1756faae20a5c4db860a7c452 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sat, 20 Jul 2024 15:29:42 -0400 Subject: [PATCH 11/16] remove empty line --- storages/backends/gcloud.py | 1 - 1 file changed, 1 deletion(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index ea1d046e1..94e47476f 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -158,7 +158,6 @@ def client(self): scopes=['https://www.googleapis.com/auth/cloud-platform'] ) self._client = Client(project=self.project_id, credentials=self.credentials) - return self._client @property From b275944278fc191c0089c0b4b596753522d3e0c9 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Mon, 22 Jul 2024 13:31:01 -0400 Subject: [PATCH 12/16] remove version notes --- docs/backends/gcloud.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 328d3b9d6..81f1fe1e3 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -237,8 +237,7 @@ Settings Signing urls requires a service account key file to be present in the env or IAM SignBlob API call through a service account email and access_token. Certain GCP services (ex: Compute services) don't have access to the key file in the env. - This setting needs to be `True` when running on such services as they fetch access tokens from metadata server instead of having key files - If using `v4` of generate_signed_url, `google-cloud-storage>=v1.36.1 `_ is required . + This setting needs to be `True` when running on such services as they fetch access tokens from metadata server instead of having key files. ``sa_email`` or ``GS_SA_EMAIL`` @@ -246,5 +245,4 @@ Settings The service account email to use for signing url. If a service account is being used for authentication (attached to your service), this setting doesn't need to be provided unless you want to use another service account than the one attached to your service for signing urls. - Can be used in local development env as well to sign using sa_email instead of the user credentials or keeping a insecure service account key file - If using `v4` of generate_signed_url, `google-cloud-storage>=v1.36.1 `_ is required . + Can be used in local development env as well to sign using sa_email instead of the user credentials or keeping a insecure service account key file. From 25c3e86ca07ca3e746e1831341a41a20835ed0fa Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Mon, 22 Jul 2024 15:08:31 -0400 Subject: [PATCH 13/16] Add Tests around new settings and generate signed url call with new settings --- tests/test_gcloud.py | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index cacb44d3d..fac760bd7 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -509,6 +509,85 @@ def test_dupe_file_chunk_size(self): self.filename, chunk_size=chunk_size ) + def test_iam_sign_blob_setting(self): + self.assertEqual(self.storage.iam_sign_blob, False) + with override_settings( + GS_IAM_SIGN_BLOB=True + ): + storage = gcloud.GoogleCloudStorage() + self.assertEqual(storage.iam_sign_blob, True) + + def test_sa_email_setting(self): + self.assertEqual(self.storage.sa_email, None) + with override_settings( + GS_SA_EMAIL="service_account_email@gmail.com" + ): + storage = gcloud.GoogleCloudStorage() + self.assertEqual(storage.sa_email, "service_account_email@gmail.com") + + def test_iam_sign_blob_no_service_account_email_raises_attribute_error(self): + with override_settings( + GS_IAM_SIGN_BLOB=True + ): + storage = gcloud.GoogleCloudStorage() + storage._bucket = mock.MagicMock() + storage.credentials = mock.MagicMock() + # deleting mocked attribute to simulate no service_account_email + del storage.credentials.service_account_email + # simulating access token + storage.credentials.token = "1234" + # no sa_email or adc service_account_email found + with self.assertRaises(AttributeError, msg=( + "Sign Blob API requires service_account_email to be available " + "through ADC or setting `sa_email`" + )): + storage.url(self.filename) + + def test_iam_sign_blob_with_adc_service_account_email(self): + with override_settings( + GS_IAM_SIGN_BLOB=True + ): + storage = gcloud.GoogleCloudStorage() + storage._bucket = mock.MagicMock() + storage.credentials = mock.MagicMock() + # simulating adc service account email + storage.credentials.service_account_email = "adc_service_account_email@gmail.com" + # simulating access token + storage.credentials.token = "1234" + blob = mock.MagicMock() + storage._bucket.blob.return_value = blob + storage.url(self.filename) + # called with adc service account email and access token + blob.generate_signed_url.assert_called_with( + expiration=timedelta(seconds=86400), + version="v4", + service_account_email=storage.credentials.service_account_email, + access_token=storage.credentials.token + ) + + def test_iam_sign_blob_with_sa_email_setting(self): + with override_settings( + GS_IAM_SIGN_BLOB=True, + GS_SA_EMAIL="service_account_email@gmail.com" + ): + storage = gcloud.GoogleCloudStorage() + storage._bucket = mock.MagicMock() + storage.credentials = mock.MagicMock() + # simulating adc service account email + storage.credentials.service_account_email = "adc_service_account_email@gmail.com" + # simulating access token + storage.credentials.token = "1234" + blob = mock.MagicMock() + storage._bucket.blob.return_value = blob + storage.url(self.filename) + # called with sa_email as it has final say + blob.generate_signed_url.assert_called_with( + expiration=timedelta(seconds=86400), + version="v4", + service_account_email=storage.sa_email, + access_token=storage.credentials.token + ) + class GoogleCloudGzipClientTests(GCloudTestCase): def setUp(self): From db37dbcf9817b235a00022a326a23ff1bc746f99 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sun, 4 Aug 2024 01:55:13 -0400 Subject: [PATCH 14/16] documentation update --- docs/backends/gcloud.rst | 47 +++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/docs/backends/gcloud.rst b/docs/backends/gcloud.rst index 81f1fe1e3..25d85dd22 100644 --- a/docs/backends/gcloud.rst +++ b/docs/backends/gcloud.rst @@ -52,17 +52,8 @@ In most cases, the default service accounts are not sufficient to read/write and #. Create a service account. (`Google Getting Started Guide `__) #. Make sure your service account has access to the bucket and appropriate permissions. (`Using IAM Permissions `__) #. Ensure this service account is associated to the type of compute being used (Google Compute Engine (GCE), Google Kubernetes Engine (GKE), Google Cloud Run (GCR), etc) - -**Note:** There is currently a limitation in the GCS client for Python which by default requires a service account private key file to be -present when generating signed urls. The service account private key is unavailable when running on a compute service. -Compute Services (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch `access tokens from the metadata server `__ . -These services do not have access to the service account private key. This means that when trying to sign data in these services, -you **MUST** use Cloud IAM sign function (SignBlob) to sign data and directly signing data isn't possible by any means. - -Luckily this can be worked around by passing `service_account_email` and `access_token` to the generate_signed_url function. -When both of those args are provided, generate_signed_url will use the IAM SignBlob API to sign the url and no private key file is needed. -In order to enable this, use setting `iam_blob_sign` and the optional `sa_email` (if providing a service account email different than the one attached -to GCP Environment). +#. If your django app only handles ``publicRead`` storage objects then, above steps are all that is required +#. If your django app handles signed (expiring) urls, then read through the options in the ``Settings for Signed Urls`` section Last resort you can still use the service account key file for authentication (not recommended by Google): @@ -72,6 +63,29 @@ Last resort you can still use the service account key file for authentication (n Alternatively, you can use the setting ``credentials`` or ``GS_CREDENTIALS`` as described below. +Settings for Signed Urls +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + There is currently a limitation in the GCS client for Python which by default requires a + service account private key file to be present when generating signed urls. The service + account private key file is unavailable when running on compute services. Compute Services + (App Engine, Cloud Run, Cloud Functions, Compute Engine...) fetch `access tokens from the metadata server + `__ + +Due to the above limitation, currently the only way to generate signed url without having the private key file mounted +in the env is through the IAM Sign Blob API. + +IAM Sign Blob API doesn't require a private key file to be present in the env, but it does have +`quota limits `__ which could be a deal-breaker. In order to enable this, +the setting ``GS_IAM_SIGN_BLOB`` (default=`False`) needs to be `True`. When this setting is enabled, +signed urls are generated through the IAM SignBlob API using the attached service account email and access_token instead +of the credentials in the key file. + +``GS_IAM_SIGN_BLOB`` setting is also complemented with the optional setting ``GS_SA_EMAIL``. This setting allows +you to override the service account to be used to generate the signed url if it is different from the one attached +to your env. Also useful for local/development use cases where the metadata server isn't available and storing private key +files is dangerous. Settings ~~~~~~~~ @@ -235,14 +249,13 @@ Settings default: ``False`` - Signing urls requires a service account key file to be present in the env or IAM SignBlob API call - through a service account email and access_token. Certain GCP services (ex: Compute services) don't have access to the key file in the env. - This setting needs to be `True` when running on such services as they fetch access tokens from metadata server instead of having key files. + Generate signed urls using the IAM Sign Blob API which doesn't require a service account private key file to be present in the env. + Set this setting to ``True`` if storing private key file isn't viable and would rather generate signed urls using an API. ``sa_email`` or ``GS_SA_EMAIL`` default: ``None`` - The service account email to use for signing url. If a service account is being used for authentication (attached to your service), - this setting doesn't need to be provided unless you want to use another service account than the one attached to your service for signing urls. - Can be used in local development env as well to sign using sa_email instead of the user credentials or keeping a insecure service account key file. + Allows override of the service account to be used for generating signed urls using the IAM Sign Blob API. + This setting is completely optional and should be used if the service account associated with your service/app isn't + the one with the permissions to SignBlob. Also helpful for development use cases where private key file is not recommended. \ No newline at end of file From 77c1ea0b1585f4f5a2cb767ca0378cf0538e57d6 Mon Sep 17 00:00:00 2001 From: Yohannes-B Date: Sun, 4 Aug 2024 02:01:59 -0400 Subject: [PATCH 15/16] fix linter errors --- storages/backends/gcloud.py | 4 ++-- tests/test_gcloud.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 94e47476f..519bfb790 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -20,8 +20,8 @@ try: from google import auth - from google.auth.transport import requests from google.auth.credentials import TokenState + from google.auth.transport import requests from google.cloud.exceptions import NotFound from google.cloud.storage import Blob from google.cloud.storage import Client @@ -361,7 +361,7 @@ def _get_iam_sign_blob_params(self): except AttributeError: service_account_email = None - # sa_email has the final say of which service_account_email to be used for signing if provided + # sa_email has final say of service_account used to sign url if provided if self.sa_email: service_account_email = self.sa_email diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index fac760bd7..7ec4fa0ff 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -551,7 +551,7 @@ def test_iam_sign_blob_with_adc_service_account_email(self): storage._bucket = mock.MagicMock() storage.credentials = mock.MagicMock() # simulating adc service account email - storage.credentials.service_account_email = "adc_service_account_email@gmail.com" + storage.credentials.service_account_email = "service@gmail.com" # simulating access token storage.credentials.token = "1234" blob = mock.MagicMock() @@ -574,7 +574,7 @@ def test_iam_sign_blob_with_sa_email_setting(self): storage._bucket = mock.MagicMock() storage.credentials = mock.MagicMock() # simulating adc service account email - storage.credentials.service_account_email = "adc_service_account_email@gmail.com" + storage.credentials.service_account_email = "service@gmail.com" # simulating access token storage.credentials.token = "1234" blob = mock.MagicMock() From e4529153ffcfd35617cd8aee5b6b894974703081 Mon Sep 17 00:00:00 2001 From: Joonhyung Shin Date: Thu, 8 Aug 2024 01:31:20 -0500 Subject: [PATCH 16/16] Make black happy --- storages/backends/gcloud.py | 2 +- tests/test_gcloud.py | 34 ++++++++++++++-------------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/storages/backends/gcloud.py b/storages/backends/gcloud.py index 519bfb790..8732e75f5 100644 --- a/storages/backends/gcloud.py +++ b/storages/backends/gcloud.py @@ -155,7 +155,7 @@ def client(self): if self._client is None: if self.iam_sign_blob and not self.credentials: self.credentials, self.project_id = auth.default( - scopes=['https://www.googleapis.com/auth/cloud-platform'] + scopes=["https://www.googleapis.com/auth/cloud-platform"] ) self._client = Client(project=self.project_id, credentials=self.credentials) return self._client diff --git a/tests/test_gcloud.py b/tests/test_gcloud.py index 7ec4fa0ff..e7e44c50c 100644 --- a/tests/test_gcloud.py +++ b/tests/test_gcloud.py @@ -511,24 +511,18 @@ def test_dupe_file_chunk_size(self): def test_iam_sign_blob_setting(self): self.assertEqual(self.storage.iam_sign_blob, False) - with override_settings( - GS_IAM_SIGN_BLOB=True - ): + with override_settings(GS_IAM_SIGN_BLOB=True): storage = gcloud.GoogleCloudStorage() self.assertEqual(storage.iam_sign_blob, True) def test_sa_email_setting(self): self.assertEqual(self.storage.sa_email, None) - with override_settings( - GS_SA_EMAIL="service_account_email@gmail.com" - ): + with override_settings(GS_SA_EMAIL="service_account_email@gmail.com"): storage = gcloud.GoogleCloudStorage() self.assertEqual(storage.sa_email, "service_account_email@gmail.com") def test_iam_sign_blob_no_service_account_email_raises_attribute_error(self): - with override_settings( - GS_IAM_SIGN_BLOB=True - ): + with override_settings(GS_IAM_SIGN_BLOB=True): storage = gcloud.GoogleCloudStorage() storage._bucket = mock.MagicMock() storage.credentials = mock.MagicMock() @@ -537,16 +531,17 @@ def test_iam_sign_blob_no_service_account_email_raises_attribute_error(self): # simulating access token storage.credentials.token = "1234" # no sa_email or adc service_account_email found - with self.assertRaises(AttributeError, msg=( - "Sign Blob API requires service_account_email to be available " - "through ADC or setting `sa_email`" - )): + with self.assertRaises( + AttributeError, + msg=( + "Sign Blob API requires service_account_email to be available " + "through ADC or setting `sa_email`" + ), + ): storage.url(self.filename) def test_iam_sign_blob_with_adc_service_account_email(self): - with override_settings( - GS_IAM_SIGN_BLOB=True - ): + with override_settings(GS_IAM_SIGN_BLOB=True): storage = gcloud.GoogleCloudStorage() storage._bucket = mock.MagicMock() storage.credentials = mock.MagicMock() @@ -562,13 +557,12 @@ def test_iam_sign_blob_with_adc_service_account_email(self): expiration=timedelta(seconds=86400), version="v4", service_account_email=storage.credentials.service_account_email, - access_token=storage.credentials.token + access_token=storage.credentials.token, ) def test_iam_sign_blob_with_sa_email_setting(self): with override_settings( - GS_IAM_SIGN_BLOB=True, - GS_SA_EMAIL="service_account_email@gmail.com" + GS_IAM_SIGN_BLOB=True, GS_SA_EMAIL="service_account_email@gmail.com" ): storage = gcloud.GoogleCloudStorage() storage._bucket = mock.MagicMock() @@ -585,7 +579,7 @@ def test_iam_sign_blob_with_sa_email_setting(self): expiration=timedelta(seconds=86400), version="v4", service_account_email=storage.sa_email, - access_token=storage.credentials.token + access_token=storage.credentials.token, )