diff --git a/indexer/backup_full_node_ap_northeast_1.tf b/indexer/backup_full_node_ap_northeast_1.tf index d95b16fa..d248edf8 100644 --- a/indexer/backup_full_node_ap_northeast_1.tf +++ b/indexer/backup_full_node_ap_northeast_1.tf @@ -1,5 +1,6 @@ module "backup_full_node_ap_northeast_1" { source = "../modules/validator" + count = var.create_backup_full_node ? 1 : 0 environment = var.environment @@ -37,7 +38,16 @@ module "backup_full_node_ap_northeast_1" { use_persistent_docker_volume = var.full_node_use_persistent_docker_volume + root_block_device_size = var.full_node_root_block_device_size + root_block_device_delete_on_termination = true + ecs_task_cpu_architecture = var.fullnode_ecs_task_cpu_architecture + providers = { aws = aws.ap_northeast_1 } } + +moved { + from = module.backup_full_node_ap_northeast_1 + to = module.backup_full_node_ap_northeast_1[0] +} diff --git a/indexer/ecs.tf b/indexer/ecs.tf index b7a172c9..0743def9 100644 --- a/indexer/ecs.tf +++ b/indexer/ecs.tf @@ -50,7 +50,7 @@ resource "aws_ecs_service" "main" { aws_subnet.private_subnets[subnet_name].id ] : [for subnet in aws_subnet.private_subnets : subnet.id] security_groups = [aws_security_group.services[each.key].id] - assign_public_ip = true + assign_public_ip = false } dynamic "load_balancer" { @@ -162,6 +162,7 @@ resource "aws_ecs_task_definition" "main" { runtime_platform { operating_system_family = "LINUX" + cpu_architecture = var.indexer_ecs_task_cpu_architecture } tags = { diff --git a/indexer/full_node_ap_northeast_1.tf b/indexer/full_node_ap_northeast_1.tf index 18e32008..84c04301 100644 --- a/indexer/full_node_ap_northeast_1.tf +++ b/indexer/full_node_ap_northeast_1.tf @@ -36,9 +36,11 @@ module "full_node_ap_northeast_1" { use_persistent_docker_volume = var.full_node_use_persistent_docker_volume + root_block_device_size = var.full_node_root_block_device_size + root_block_device_delete_on_termination = true + ecs_task_cpu_architecture = var.fullnode_ecs_task_cpu_architecture + providers = { aws = aws.ap_northeast_1 } - - root_block_device_size = var.full_node_root_block_device_size } diff --git a/indexer/lambda.tf b/indexer/lambda.tf index f7f4a576..42faf841 100644 --- a/indexer/lambda.tf +++ b/indexer/lambda.tf @@ -12,7 +12,7 @@ resource "aws_lambda_function" "main" { package_type = "Image" function_name = "${each.key}_lambda_function" role = aws_iam_role.lambda_services[each.key].arn - architectures = ["x86_64"] + architectures = [lower(var.lambda_cpu_architecture)] timeout = 300 environment { diff --git a/indexer/locals.tf b/indexer/locals.tf index 346d5ca2..e398b6b9 100644 --- a/indexer/locals.tf +++ b/indexer/locals.tf @@ -102,7 +102,7 @@ locals { }, "${local.service_names["socks"]}" : { ecs_desired_count : var.socks_ecs_desired_count, - task_definition_memory : 20480, + task_definition_memory : 8192, task_definition_cpu : 4096, is_public_facing : true, ports : [8080, 8000], diff --git a/indexer/msk.tf b/indexer/msk.tf index 892b395c..55c05d95 100644 --- a/indexer/msk.tf +++ b/indexer/msk.tf @@ -38,7 +38,7 @@ resource "aws_msk_cluster" "main" { instance_type = var.msk_instance_type storage_info { ebs_storage_info { - volume_size = var.environment == "mainnet" ? 4000 : 1000 # in GB + volume_size = var.msk_storage_size } } client_subnets = [ diff --git a/indexer/rds.tf b/indexer/rds.tf index 29b374be..7fd48308 100644 --- a/indexer/rds.tf +++ b/indexer/rds.tf @@ -215,7 +215,7 @@ resource "aws_db_instance" "main" { performance_insights_enabled = true performance_insights_retention_period = 31 auto_minor_version_upgrade = false - multi_az = true + multi_az = var.enable_rds_main_multiaz tags = { Name = local.aws_db_instance_main_name @@ -251,6 +251,7 @@ resource "aws_db_instance" "read_replica" { # Read replica 2 resource "aws_db_instance" "read_replica_2" { + count = var.create_read_replica_2 ? 1 : 0 identifier = "${local.aws_db_instance_main_name}-read-replica-2" instance_class = var.rds_db_instance_class # engine, engine_version, name, username, db_subnet_group_name, allocated_storage do not have to diff --git a/indexer/route53.tf b/indexer/route53.tf index 60b2ad26..6adac3f0 100644 --- a/indexer/route53.tf +++ b/indexer/route53.tf @@ -19,11 +19,12 @@ resource "aws_route53_record" "read_replica_1" { } resource "aws_route53_record" "read_replica_2" { + count = var.create_read_replica_2 ? 1 : 0 zone_id = aws_route53_zone.main.zone_id name = "postgres-main-rr.dydx-indexer.private" type = "CNAME" ttl = "30" - records = ["${aws_db_instance.read_replica_2.address}"] + records = ["${aws_db_instance.read_replica_2[count.index].address}"] weighted_routing_policy { weight = 1 } diff --git a/indexer/route_table.tf b/indexer/route_table.tf index b19491e2..ce37872d 100644 --- a/indexer/route_table.tf +++ b/indexer/route_table.tf @@ -68,9 +68,10 @@ resource "aws_route" "full_node_route_to_indexer" { # NOTE: This is not an individual AWS resource, but rather an attachment to the route table, and so # no tags are added. resource "aws_route" "backup_full_node_route_to_indexer" { - route_table_id = module.backup_full_node_ap_northeast_1.route_table_id + count = var.create_backup_full_node ? 1 : 0 + route_table_id = module.backup_full_node_ap_northeast_1[0].route_table_id destination_cidr_block = var.indexers[var.region].vpc_cidr_block - vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer.id + vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer[0].id } # Route from the Indexer's private subnets to the full node's VPC. Needed so that the full node can @@ -88,9 +89,9 @@ resource "aws_route" "indexer_route_to_full_node" { } resource "aws_route" "indexer_route_to_backup_full_node" { - for_each = aws_route_table.private + for_each = var.create_backup_full_node ? aws_route_table.private : {} route_table_id = each.value.id destination_cidr_block = var.backup_full_node_cidr_vpc - vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer.id + vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer[0].id } diff --git a/indexer/s3_bucket.tf b/indexer/s3_bucket.tf index e0b9636b..7c2516cf 100644 --- a/indexer/s3_bucket.tf +++ b/indexer/s3_bucket.tf @@ -9,6 +9,20 @@ resource "aws_s3_bucket" "load_balancer" { } } +resource "aws_s3_bucket_lifecycle_configuration" "load_balancer" { + count = var.enable_s3_load_balancer_logs_lifecycle ? 1 : 0 + bucket = aws_s3_bucket.load_balancer.id + + rule { + id = "expire-old-logs" + status = "Enabled" + + expiration { + days = var.s3_load_balancer_logs_expiration_days + } + } +} + # TODO: refactor snapshotting full node into a separate module # AWS S3 bucket to store all Indexer full node snapshots resource "aws_s3_bucket" "indexer_full_node_snapshots" { @@ -22,6 +36,21 @@ resource "aws_s3_bucket" "indexer_full_node_snapshots" { } } +resource "aws_s3_bucket_lifecycle_configuration" "indexer_full_node_snapshots" { + count = var.enable_s3_snapshot_lifecycle ? 1 : 0 + bucket = aws_s3_bucket.indexer_full_node_snapshots.id + + rule { + id = "expire-old-snapshots" + status = "Enabled" + + expiration { + days = var.s3_snapshot_expiration_days + } + } +} + + # Enable S3 bucket metrics to be sent to Datadog for monitoring resource "aws_s3_bucket_metric" "indexer_full_node_snapshots" { bucket = aws_s3_bucket.indexer_full_node_snapshots.id @@ -64,3 +93,17 @@ resource "aws_s3_bucket" "athena_rds_snapshots" { Environment = var.environment } } + +resource "aws_s3_bucket_lifecycle_configuration" "athena_rds_snapshots" { + count = var.enable_s3_rds_snapshot_lifecycle ? 1 : 0 + bucket = aws_s3_bucket.athena_rds_snapshots.id + + rule { + id = "expire-old-snapshots" + status = "Enabled" + + expiration { + days = var.s3_rds_snapshot_expiration_days + } + } +} diff --git a/indexer/security_group.tf b/indexer/security_group.tf index 08d8cd59..c2e1a79c 100644 --- a/indexer/security_group.tf +++ b/indexer/security_group.tf @@ -62,7 +62,7 @@ resource "aws_security_group" "msk" { security_groups = flatten([ aws_security_group.devbox.id, module.full_node_ap_northeast_1.aws_security_group_id, - module.backup_full_node_ap_northeast_1.aws_security_group_id, + var.create_backup_full_node ? [module.backup_full_node_ap_northeast_1[0].aws_security_group_id] : [], # Lambda Services [ for service in keys(local.lambda_services) : diff --git a/indexer/snapshot_full_node_ap_northeast_1.tf b/indexer/snapshot_full_node_ap_northeast_1.tf index 832c905c..92c1af4d 100644 --- a/indexer/snapshot_full_node_ap_northeast_1.tf +++ b/indexer/snapshot_full_node_ap_northeast_1.tf @@ -42,7 +42,9 @@ module "full_node_snapshot_ap_northeast_1" { datadog_env = "snapshot-${var.environment}" - root_block_device_size = var.full_node_snapshot_ebs_volume_size + root_block_device_size = var.full_node_snapshot_ebs_volume_size + root_block_device_delete_on_termination = true + ecs_task_cpu_architecture = var.fullnode_ecs_task_cpu_architecture entry_point = [ "sh", diff --git a/indexer/variables.tf b/indexer/variables.tf index b4911b56..424cea89 100644 --- a/indexer/variables.tf +++ b/indexer/variables.tf @@ -105,6 +105,12 @@ variable "msk_instance_type" { description = "Instance type for MSK brokers" } +variable "msk_storage_size" { + type = string + description = "Storage size of MSK nodes. Suggested value: 2000 for mainnet, 1000 for staging and testnet and 500 for dev." + default = "500" +} + variable "rds_db_instance_class" { type = string description = "Instance class for the Postgres RDS DB" @@ -163,6 +169,12 @@ variable "full_node_container_chain_home" { description = "Full-node's home directory for the chain. Used to boot up the chain, and configure the `cmd` in ECS" } +variable "full_node_root_block_device_size" { + type = number + description = "Size of root block device in gigabytes" + default = 1000 +} + variable "snapshot_full_node_container_chain_home" { type = string description = "Snapshot full-node's home directory for the chain. Used to boot up the chain, and configure the `cmd` in ECS" @@ -202,7 +214,7 @@ variable "full_node_snapshot_upload_period" { variable "full_node_snapshot_ebs_volume_size" { type = number description = "Size (in GiB) of the EBS volume used for the fast sync full node" - default = 3000 + default = 1000 } variable "full_node_ec2_instance_type" { @@ -474,6 +486,99 @@ variable "image_count" { default = 100 } +variable "enable_s3_snapshot_lifecycle" { + type = bool + description = "Enables S3 lifecycle on snapshot bucket. Default is true" + default = true +} + +variable "s3_snapshot_expiration_days" { + type = number + description = "Number of days to store fullnode snapshot on S3, defaults to 7." + default = 7 +} + +variable "enable_s3_rds_snapshot_lifecycle" { + type = bool + description = "Enables S3 lifecycle on rds snapshot bucket. Default is true" + default = true +} + +variable "s3_rds_snapshot_expiration_days" { + type = number + description = "Number of days to store rds snapshot on S3, defaults to 14." + default = 14 +} + +variable "enable_s3_load_balancer_logs_lifecycle" { + type = bool + description = "Enables S3 lifecycle on snapshot bucket. Default is true" + default = true +} + +variable "s3_load_balancer_logs_expiration_days" { + type = number + description = "Number of days to store load balancer logs on S3, defaults to 14." + default = 14 +} + +variable "create_read_replica_2" { + description = "Create read replia 2 or not. Default: true" + type = bool + default = true +} + +variable "enable_rds_main_multiaz" { + description = "Enable RDS main instance. Default: true" + type = bool + default = true +} + +variable "indexer_ecs_task_cpu_architecture" { + type = string + description = "Type of ecs cpu architecture. Accept: X86_64 or ARM64" + default = "X86_64" + validation { + condition = contains( + ["X86_64", "ARM64"], + var.indexer_ecs_task_cpu_architecture + ) + error_message = "Err: invalid environment. Must be one of {X86_64 | ARM64}." + } +} + +variable "fullnode_ecs_task_cpu_architecture" { + type = string + description = "Type of ecs cpu architecture. Accept: X86_64 or ARM64" + default = "X86_64" + validation { + condition = contains( + ["X86_64", "ARM64"], + var.fullnode_ecs_task_cpu_architecture + ) + error_message = "Err: invalid environment. Must be one of {X86_64 | ARM64}." + } +} + +variable "lambda_cpu_architecture" { + type = string + description = "Type of lambda cpu architecture. Accept: X86_64 or ARM64" + default = "X86_64" + validation { + condition = contains( + ["X86_64", "ARM64"], + var.lambda_cpu_architecture + ) + error_message = "Err: invalid environment. Must be one of {X86_64 | ARM64}." + } +} + +variable "create_backup_full_node" { + description = "Create backup full node. Default: false for all envs test and dev environment. Mainnet and Testnet should enable it." + type = bool + default = false +} + variable "vulcan_ecs_desired_count" { type = number description = "Number of desired vulcan instances." @@ -491,9 +596,3 @@ variable "socks_ecs_desired_count" { description = "Number of desired socks instances." default = 5 } - -variable "full_node_root_block_device_size" { - type = number - description = "Size of Size of root block device in gigabytes." - default = 4000 -} diff --git a/indexer/vpc.tf b/indexer/vpc.tf index dbb30339..6ae30168 100644 --- a/indexer/vpc.tf +++ b/indexer/vpc.tf @@ -84,8 +84,9 @@ resource "aws_vpc_peering_connection" "full_node_peer" { } resource "aws_vpc_peering_connection" "backup_full_node_peer" { + count = var.create_backup_full_node ? 1 : 0 peer_vpc_id = aws_vpc.main.id - vpc_id = module.backup_full_node_ap_northeast_1.aws_vpc_id + vpc_id = module.backup_full_node_ap_northeast_1[0].aws_vpc_id # Auto-accept allows the VPC peering connection to be made programmatically with no manual steps # to accept the VPC peering connection in the console # This can only be done if both VPCs are in the same region and AWS account (which they are) diff --git a/modules/validator/ec2.tf b/modules/validator/ec2.tf index a4894320..cc3279eb 100644 --- a/modules/validator/ec2.tf +++ b/modules/validator/ec2.tf @@ -27,7 +27,7 @@ data "aws_ami" "amazon_linux_ecs_ami" { filter { name = "name" - values = ["amzn2-ami-ecs-inf-hvm-*-x86_64-ebs"] + values = [var.ecs_task_cpu_architecture == "X86_64" ? "amzn2-ami-ecs-inf-hvm-*-x86_64-ebs" : "amzn2-ami-ecs-hvm-*-arm64-ebs"] } } diff --git a/modules/validator/ecs.tf b/modules/validator/ecs.tf index 1cd2810f..f62c9b45 100644 --- a/modules/validator/ecs.tf +++ b/modules/validator/ecs.tf @@ -230,6 +230,11 @@ resource "aws_ecs_task_definition" "main" { } } + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = var.ecs_task_cpu_architecture + } + tags = { Name = "${var.environment}-${var.name}-task" Environment = var.environment diff --git a/modules/validator/variables.tf b/modules/validator/variables.tf index 1c02d851..e07c25de 100644 --- a/modules/validator/variables.tf +++ b/modules/validator/variables.tf @@ -266,3 +266,16 @@ variable "dd_site" { default = "datadoghq.com" description = "The site that the datadog agent will send data to" } + +variable "ecs_task_cpu_architecture" { + type = string + description = "Type of ecs cpu architecture. Accept: X86_64 or ARM64" + default = "X86_64" + validation { + condition = contains( + ["X86_64", "ARM64"], + var.ecs_task_cpu_architecture + ) + error_message = "Err: invalid environment. Must be one of {X86_64 | ARM64}." + } +}