Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #3094] Use CDN URLs for attachments instead of S3 URLs #3659

Merged
merged 10 commits into from
Feb 5, 2025
17 changes: 15 additions & 2 deletions api/src/services/opportunities_v1/get_opportunity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from src.api.route_utils import raise_flask_error
from src.db.models.agency_models import Agency
from src.db.models.opportunity_models import Opportunity, OpportunityAttachment, OpportunitySummary
from src.util.file_util import pre_sign_file_location
from src.util.env_config import PydanticBaseEnvConfig
from src.util.file_util import convert_s3_to_cdn_url, pre_sign_file_location


class AttachmentConfig(PydanticBaseEnvConfig):
# If the CDN URL is set, we'll use it instead of pre-signing the file locations
cdn_url: str | None = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't need to be in capslock?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pydantic will look for CDN_URL as an env var. We sometimes also give values an alias if we want a cleaner python name, but doesn't matter too much here.



def _fetch_opportunity(
Expand Down Expand Up @@ -50,7 +56,14 @@ def get_opportunity(db_session: db.Session, opportunity_id: int) -> Opportunity:
db_session, opportunity_id, load_all_opportunity_summaries=False
)

pre_sign_opportunity_file_location(opportunity.opportunity_attachments)
attachment_config = AttachmentConfig()
if attachment_config.cdn_url is not None:
for opp_att in opportunity.opportunity_attachments:
opp_att.download_path = convert_s3_to_cdn_url( # type: ignore
opp_att.file_location, attachment_config.cdn_url
)
else:
pre_sign_opportunity_file_location(opportunity.opportunity_attachments)
Comment on lines +67 to +68
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure there will be a case where it's not set once we merge this. (NOTE - we should first verify with @coilysiren that the CDN is fully working before we merge this - I think it's still in progress)


return opportunity

Expand Down
14 changes: 14 additions & 0 deletions api/src/util/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,17 @@ def read_file(path: str | Path, mode: str = "r", encoding: str | None = None) ->
"""Simple function for just getting all of the contents of a file"""
with open_stream(path, mode, encoding) as input_file:
return input_file.read()


def convert_s3_to_cdn_url(file_path: str, cdn_url: str) -> str:
"""
Convert an S3 URL to a CDN URL

Example:
s3://bucket-name/path/to/file.txt -> https://cdn.example.com/path/to/file.txt
"""
if not is_s3_path(file_path):
raise ValueError(f"Expected s3:// path, got: {file_path}")

_, key = split_s3_url(file_path)
mikehgrantsgov marked this conversation as resolved.
Show resolved Hide resolved
return f"{cdn_url.rstrip('/')}/{key}"
mikehgrantsgov marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,34 @@ def test_get_opportunity_404_not_found_is_draft(client, api_auth_token, enable_f
resp.get_json()["message"]
== f"Could not find Opportunity with ID {opportunity.opportunity_id}"
)


def test_get_opportunity_returns_cdn_urls(
client, api_auth_token, monkeypatch_session, enable_factory_create, db_session, mock_s3_bucket
):
monkeypatch_session.setenv("CDN_URL", "https://cdn.example.com")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we set this in some autouse conftest value? Otherwise we'll need to set it for every API test with attachments. Fine to override it here.

Also we should set CDN_URL in local.env to be localhost:4566 and test that this works locally as well?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried this locally and receive:
Screenshot 2025-01-28 at 2 48 17 PM

This would be the correct path is <s3_path> was replaced with <cdn_path>

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you go to that path, does the file download work? I think for local, it also needs the bucket name (ie. CDN_URL should include the bucket for our local dev purposes).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the bucket name to CDN_URL in local.env and it works. Checking that in shortly.

"""Test that S3 file locations are converted to CDN URLs in the response"""
# Create an opportunity with a specific attachment
opportunity = OpportunityFactory.create(opportunity_attachments=[])

object_name = "test_file_1.txt"
file_loc = f"s3://{mock_s3_bucket}/{object_name}"
OpportunityAttachmentFactory.create(
file_location=file_loc, opportunity=opportunity, file_contents="Hello, world"
)

# Make the GET request
resp = client.get(
f"/v1/opportunities/{opportunity.opportunity_id}", headers={"X-Auth": api_auth_token}
)

# Check the response
assert resp.status_code == 200
response_data = resp.get_json()["data"]

# Verify attachment URL is a CDN URL
assert len(response_data["attachments"]) == 1
attachment = response_data["attachments"][0]

assert attachment["download_path"].startswith("https://cdn.")
assert "s3://" not in attachment["download_path"]
36 changes: 36 additions & 0 deletions api/tests/src/util/test_file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,39 @@ def test_move_file_local_disk(tmp_path):
assert file_util.file_exists(other_file_path) is True

assert file_util.read_file(other_file_path) == contents


@pytest.mark.parametrize(
"s3_path,cdn_url,expected",
[
(
"s3://my-bucket/path/to/file.pdf",
"cdn.example.com",
"https://cdn.example.com/path/to/file.pdf",
),
(
"s3://local-mock-public-bucket/opportunities/9/attachments/79853231/manager.webm",
"cdn.example.com",
"https://cdn.example.com/opportunities/9/attachments/79853231/manager.webm",
),
# Test with trailing slash in CDN URL
(
"s3://my-bucket/file.txt",
"cdn.example.com/",
"https://cdn.example.com/file.txt",
),
# Test with subdirectory in CDN URL
(
"s3://my-bucket/file.txt",
"cdn.example.com/assets",
"https://cdn.example.com/assets/file.txt",
),
],
)
def test_convert_s3_to_cdn_url(s3_path, cdn_url, expected):
assert file_util.convert_s3_to_cdn_url(s3_path, cdn_url) == expected


def test_convert_s3_to_cdn_url_invalid_path():
with pytest.raises(ValueError, match="Expected s3:// path"):
file_util.convert_s3_to_cdn_url("http://not-s3/file.txt", "cdn.example.com")
Loading