Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AG-1591, AG-1592: don't deploy unused containers, create DocumentDB cluster, allow connection from API container #17

Open
wants to merge 7 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @Sage-Bionetworks-IT/sagebio-it @Sage-Bionetworks-IT/infra-oversight-committee @Sage-Bionetworks-IT/agora-admin
* @Sage-Bionetworks-IT/infra-oversight-committee @Sage-Bionetworks-IT/agora-admin
108 changes: 34 additions & 74 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from os import environ

import aws_cdk as cdk
from aws_cdk import aws_ec2 as ec2

from src.ecs_stack import EcsStack
from src.load_balancer_stack import LoadBalancerStack
from src.network_stack import NetworkStack
from src.service_props import ServiceProps, ContainerVolume
from src.service_props import ServiceProps, ServiceSecret
from src.service_stack import LoadBalancedServiceStack, ServiceStack
from src.docdb_props import DocdbProps
from src.docdb_stack import DocdbStack

# get the environment and set environment specific variables
VALID_ENVIRONMENTS = ["dev", "stage", "prod"]
Expand Down Expand Up @@ -43,6 +46,7 @@
fully_qualified_domain_name = environment_variables["FQDN"]
environment_tags = environment_variables["TAGS"]
agora_version = "4.0.0-rc1"
docdb_master_username = "master"

# Define stacks
cdk_app = cdk.App()
Expand All @@ -58,6 +62,19 @@
vpc_cidr=environment_variables["VPC_CIDR"],
)

docdb_props = DocdbProps(
instance_type=ec2.InstanceType.of(
ec2.InstanceClass.MEMORY5, ec2.InstanceSize.LARGE
),
zaro0508 marked this conversation as resolved.
Show resolved Hide resolved
master_username=docdb_master_username,
)
docdb_stack = DocdbStack(
scope=cdk_app,
construct_id=f"{stack_name_prefix}-docdb",
vpc=network_stack.vpc,
props=docdb_props,
)

ecs_stack = EcsStack(
scope=cdk_app,
construct_id=f"{stack_name_prefix}-ecs",
Expand All @@ -75,82 +92,25 @@
vpc=network_stack.vpc,
)

api_docs_props = ServiceProps(
container_name="agora-api-docs",
container_location=f"ghcr.io/sage-bionetworks/agora-api-docs:{agora_version}",
container_port=8010,
container_memory=200,
container_env_vars={"PORT": "8010"},
)
api_docs_stack = ServiceStack(
scope=cdk_app,
construct_id=f"{stack_name_prefix}-api-docs",
vpc=network_stack.vpc,
cluster=ecs_stack.cluster,
props=api_docs_props,
)

mongo_props = ServiceProps(
container_name="agora-mongo",
container_location=f"ghcr.io/sage-bionetworks/agora-mongo:{agora_version}",
container_port=27017,
container_memory=500,
container_env_vars={
"MONGO_INITDB_ROOT_USERNAME": "root",
"MONGO_INITDB_ROOT_PASSWORD": "changeme",
"MONGO_INITDB_DATABASE": "agora",
},
container_volumes=[
ContainerVolume(
path="/data/db",
size=30,
)
],
)
mongo_stack = ServiceStack(
scope=cdk_app,
construct_id=f"{stack_name_prefix}-mongo",
vpc=network_stack.vpc,
cluster=ecs_stack.cluster,
props=mongo_props,
)

# It is probably not appropriate host this container in ECS
# data_props = ServiceProps(
# container_name="agora-data",
# container_location=f"ghcr.io/sage-bionetworks/agora-data:{agora_version}",
# container_port=9999, # Not used
# container_memory=2048,
# )
# data_stack = ServiceStack(
# scope=cdk_app,
# construct_id=f"{stack_name_prefix}-data",
# vpc=network_stack.vpc,
# cluster=ecs_stack.cluster,
# props=data_props,
# container_env_vars={
# "DB_USER": "root",
# "DB_PASS": "changeme",
# "DB_NAME": "agora",
# "DB_PORT": "27017",
# "DB_HOST": "agora-mongo",
# "DATA_FILE": "syn13363290",
# "DATA_VERSION": "68",
# "TEAM_IMAGES_ID": "syn12861877",
# "SYNAPSE_AUTH_TOKEN": "agora-service-user-pat-here",
# },
# )
# data_stack.add_dependency(mongo_stack)

api_props = ServiceProps(
container_name="agora-api",
container_location=f"ghcr.io/sage-bionetworks/agora-api:{agora_version}",
container_port=3333,
container_memory=1024,
container_env_vars={
"MONGODB_URI": "mongodb://root:changeme@agora-mongo:27017/agora?authSource=admin",
"NODE_ENV": "development",
"MONGODB_PORT": "27017",
"MONGODB_NAME": "agora",
"MONDODB_USER": docdb_master_username,
"MONGODB_HOST": docdb_stack.cluster.cluster_endpoint.hostname,
},
container_secrets=[
ServiceSecret(
secret_name=docdb_stack.master_password_secret.secret_name,
environment_key="MONGODB_PASS",
)
],
container_security_groups=[docdb_stack.access_docdb_security_group],
)
api_stack = ServiceStack(
scope=cdk_app,
Expand All @@ -159,18 +119,19 @@
cluster=ecs_stack.cluster,
props=api_props,
)
api_stack.add_dependency(mongo_stack)
api_stack.add_dependency(docdb_stack)

app_props = ServiceProps(
container_name="agora-app",
container_location=f"ghcr.io/sage-bionetworks/agora-app:{agora_version}",
container_port=4200,
container_memory=200,
container_env_vars={
"API_DOCS_URL": f"http://{fully_qualified_domain_name}/api-docs",
"API_DOCS_URL": f"https://{fully_qualified_domain_name}/api-docs",
"APP_VERSION": f"{agora_version}",
"CSR_API_URL": f"http://{fully_qualified_domain_name}/api/v1",
"SSR_API_URL": "http://agora-api:3333/v1",
"CSR_API_URL": f"https://{fully_qualified_domain_name}/api/v1",
"SSR_API_URL": "https://agora-api:3333/v1",
"TAG_NAME": f"agora/v${agora_version}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this a dup of APP_VERSION?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're very similar, but not quite the same (e.g. 4.0.0 for APP_VERSION vs agora/v4.0.0.0 for TAG_NAME). @sagely1 would know better, but I believe separate variables were created because the GitHub tag format was in flux. However, with that decision made and the format so similar, they should probably be collapsed to just one environment variable, but that will require work in the sage-monorepo first!

Copy link

@sagely1 sagely1 Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have settled on semantic versioning after some discussion. TAG_NAME will be a string of a format like so:
agora/vX.X.X with a potential suffix such as -rc.X so agora/vX.X.X-rc.X. I think we can just change this to get a tag_name variable instead. https://semver.org/

Copy link
Contributor Author

@hallieswan hallieswan Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! I've opened a ticket to track the work, since we'll need to update the sage-monorepo code first.

},
)
app_stack = ServiceStack(
Expand Down Expand Up @@ -207,7 +168,6 @@
health_check_path="/health",
)
apex_stack.add_dependency(app_stack)
apex_stack.add_dependency(api_docs_stack)
apex_stack.add_dependency(api_stack)

cdk_app.synth()
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
aws-cdk-lib~=2.139
aws-cdk-lib~=2.176
constructs~=10.0
zaro0508 marked this conversation as resolved.
Show resolved Hide resolved
boto3~=1.34
14 changes: 14 additions & 0 deletions src/docdb_props.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from aws_cdk import aws_ec2 as ec2


class DocdbProps:
zaro0508 marked this conversation as resolved.
Show resolved Hide resolved
"""
DocumentDB properties
instance_type: What type of instance to start for the replicas
master_username: The database admin account username
"""

def __init__(self, instance_type: ec2.InstanceType, master_username: str) -> None:
self.instance_type = instance_type
self.master_username = master_username
94 changes: 94 additions & 0 deletions src/docdb_stack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import aws_cdk as cdk
from aws_cdk import (
aws_docdb as docdb,
aws_ec2 as ec2,
aws_secretsmanager as sm,
)
from src.docdb_props import DocdbProps

from constructs import Construct

MONGODB_PORT = 27017


class DocdbStack(cdk.Stack):
"""
DocumentDB cluster
"""

def __init__(
self,
scope: Construct,
construct_id: str,
vpc: ec2.Vpc,
props: DocdbProps,
**kwargs,
) -> None:
super().__init__(scope, construct_id, **kwargs)

self.master_password_secret = sm.Secret(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! Will the password environment variable in the API container be automatically updated when the secret is rotated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i believe that's the best feature of secrets manager. here's a blog on how to setup https://aws.amazon.com/blogs/security/how-to-rotate-amazon-documentdb-and-amazon-redshift-credentials-in-aws-secrets-manager/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the article you linked, the application retrieves the password directly from Secrets Manager at runtime. However, the Agora API container currently retrieves the password from an environment variable, which is set by the ServiceStack during deployment. If the secret is retrieved at deployment time, then would the API container environment variable only be updated when the stack is redeployed (and not automatically be updated when the secret is rotated)?

If that's right, then to allow us to use automatic password rotation, I think we'd need to update the API to retrieve the password directly from Secrets Manager (and refetch the credentials without restarting the container, as described here) at runtime, rather than getting the password from an environment variable. What do you think?

Copy link
Contributor

@zaro0508 zaro0508 Jan 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, yes you are correct. getting the secret from an env var will not work with secrets rotation. refactoring the API to get secrets directly from the secrets manager would be a better approach because is more secure and works with secrets rotation. I guess we can leave out secrets rotation at this time. I'll let you guys decide whether you want to refactor Agora to support this use case. I would highly recommend it though.

self,
"DocDbMasterPassword",
generate_secret_string=sm.SecretStringGenerator(
password_length=32, exclude_punctuation=True
),
)

self.access_docdb_security_group = ec2.SecurityGroup(
self,
"DocDbAccessSecurityGroup",
vpc=vpc,
description="Instances with access to document DB servers",
)
self.docdb_security_group = ec2.SecurityGroup(
self,
"DocDbSecurityGroup",
vpc=vpc,
description="Document DB server management and access ports",
)
self.docdb_security_group.add_ingress_rule(
peer=self.access_docdb_security_group,
connection=ec2.Port.tcp_range(MONGODB_PORT, 27030),
)
self.docdb_security_group.add_ingress_rule(
peer=self.access_docdb_security_group, connection=ec2.Port.tcp(28017)
)

cluster_parameter_group = docdb.ClusterParameterGroup(
self,
"DocDbClusterParameterGroup",
family="docdb5.0",
parameters={
"audit_logs": "disabled",
"profiler": "enabled",
"profiler_sampling_rate": "1.0",
"profiler_threshold_ms": "50",
"change_stream_log_retention_duration": "10800",
"tls": "disabled",
"ttl_monitor": "disabled",
},
)

# https://docs.aws.amazon.com/cdk/api/v2/python/aws_cdk.aws_docdb/DatabaseCluster.html
self.cluster = docdb.DatabaseCluster(
self,
"DocDbCluster",
master_user=docdb.Login(
username=props.master_username,
password=self.master_password_secret.secret_value,
),
instance_type=props.instance_type,
vpc=vpc,
vpc_subnets=ec2.SubnetSelection(
subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS
),
parameter_group=cluster_parameter_group,
removal_policy=cdk.RemovalPolicy.DESTROY,
storage_encrypted=True,
preferred_maintenance_window="sat:06:54-sat:07:24",
port=MONGODB_PORT,
export_profiler_logs_to_cloud_watch=True,
security_group=self.docdb_security_group,
)

self.cluster.add_security_groups(self.access_docdb_security_group)
9 changes: 8 additions & 1 deletion src/service_props.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass
from typing import List, Optional, Sequence

from aws_cdk import aws_ecs as ecs
from aws_cdk import aws_ecs as ecs, aws_ec2 as ec2

CONTAINER_LOCATION_PATH_ID = "path://"

Expand Down Expand Up @@ -62,6 +62,7 @@ class ServiceProps:
auto_scale_max_capacity: the fargate auto scaling maximum capacity
container_command: Optional commands to run during the container startup
container_healthcheck: Optional health check configuration for the container
container_security_groups: Optional List of security groups for the container
"""

def __init__(
Expand All @@ -77,6 +78,7 @@ def __init__(
auto_scale_max_capacity: int = 1,
container_command: Optional[Sequence[str]] = None,
container_healthcheck: Optional[ecs.HealthCheck] = None,
container_security_groups: Optional[List[ec2.SecurityGroup]] = None,
) -> None:
self.container_name = container_name
self.container_port = container_port
Expand Down Expand Up @@ -106,3 +108,8 @@ def __init__(
self.auto_scale_max_capacity = auto_scale_max_capacity
self.container_command = container_command
self.container_healthcheck = container_healthcheck

if container_security_groups is None:
self.container_security_groups = []
else:
self.container_security_groups = container_security_groups
4 changes: 3 additions & 1 deletion src/service_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ def _get_secret(scope: Construct, id: str, name: str) -> sm.Secret:
peer=ec2.Peer.ipv4("0.0.0.0/0"),
connection=ec2.Port.tcp(props.container_port),
)
self.security_groups = props.container_security_groups.copy()
self.security_groups.append(self.security_group)
Comment on lines +135 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, it looks like we add the security_group defined here to every container so how about we move the security_group to the ServiceProps class? It can just be added to the container_security_groups object as a default SG?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we create the SecurityGroup in ServiceProps, then I believe ServiceProps would need to inherit from Construct, which would result in duplicate code between the props and the stack class. What do you think about continuing to create security_group in ServiceStack, so that the resource creation only happens in one class?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about continuing to create security_group in ServiceStack, so that the resource creation only happens in one class?

Not sure exactly what you mean but if you mean define a list of service groups in app.py then pass it to ServiceProps object then I think that will work as well.


# attach ECS task to ECS cluster
self.service = ecs.FargateService(
Expand All @@ -141,7 +143,7 @@ def _get_secret(scope: Construct, id: str, name: str) -> sm.Secret:
task_definition=self.task_definition,
enable_execute_command=True,
circuit_breaker=ecs.DeploymentCircuitBreaker(enable=True, rollback=True),
security_groups=([self.security_group]),
security_groups=self.security_groups,
service_connect_configuration=ecs.ServiceConnectProps(
log_driver=ecs.LogDrivers.aws_logs(stream_prefix=f"{construct_id}"),
services=[
Expand Down
Loading
Loading