From bdd581dbcca11ab3e70fd6fc416346b48b3ea801 Mon Sep 17 00:00:00 2001 From: Jake Heath <76011913+jakeyheath@users.noreply.github.com> Date: Fri, 19 Jan 2024 16:10:02 -0800 Subject: [PATCH] feat: allow for progress_deadline_seconds to be set (#2963) --- terraform/modules/happy-service-eks/README.md | 1 + terraform/modules/happy-service-eks/main.tf | 4 ++-- terraform/modules/happy-service-eks/variables.tf | 6 ++++++ terraform/modules/happy-stack-eks/README.md | 2 +- terraform/modules/happy-stack-eks/main.tf | 1 + terraform/modules/happy-stack-eks/variables.tf | 1 + 6 files changed, 12 insertions(+), 3 deletions(-) diff --git a/terraform/modules/happy-service-eks/README.md b/terraform/modules/happy-service-eks/README.md index 9511504197..fdd963afa8 100644 --- a/terraform/modules/happy-service-eks/README.md +++ b/terraform/modules/happy-service-eks/README.md @@ -82,6 +82,7 @@ | [memory\_requests](#input\_memory\_requests) | Memory requests per pod | `string` | `"10Mi"` | no | | [period\_seconds](#input\_period\_seconds) | The period in seconds used for the liveness and readiness probes. | `number` | `3` | no | | [platform\_architecture](#input\_platform\_architecture) | The platform to deploy to (valid values: `amd64`, `arm64`). Defaults to `amd64`. | `string` | `"amd64"` | no | +| [progress\_deadline\_seconds](#input\_progress\_deadline\_seconds) | The maximum time in seconds for a deployment to make progress before it is considered to be failed. Defaults to 600 seconds. | `number` | `600` | no | | [readiness\_timeout\_seconds](#input\_readiness\_timeout\_seconds) | Readiness probe timeout seconds | `number` | `30` | no | | [regional\_wafv2\_arn](#input\_regional\_wafv2\_arn) | A WAF to protect the EKS Ingress if needed | `string` | `null` | no | | [routing](#input\_routing) | Routing configuration for the ingress |
object({| n/a | yes | diff --git a/terraform/modules/happy-service-eks/main.tf b/terraform/modules/happy-service-eks/main.tf index 0f8b80ab22..99524d4501 100644 --- a/terraform/modules/happy-service-eks/main.tf +++ b/terraform/modules/happy-service-eks/main.tf @@ -69,8 +69,8 @@ resource "kubernetes_deployment_v1" "deployment" { wait_for_rollout = var.wait_for_steady_state spec { - replicas = var.desired_count - + replicas = var.desired_count + progress_deadline_seconds = var.progress_deadline_seconds strategy { type = "RollingUpdate" rolling_update { diff --git a/terraform/modules/happy-service-eks/variables.tf b/terraform/modules/happy-service-eks/variables.tf index 9fc199eda9..51b08759ab 100644 --- a/terraform/modules/happy-service-eks/variables.tf +++ b/terraform/modules/happy-service-eks/variables.tf @@ -252,6 +252,12 @@ variable "additional_volumes_from_config_maps" { description = "Additional volumes to add to the container from the following config maps" } +variable "progress_deadline_seconds" { + type = number + description = "The maximum time in seconds for a deployment to make progress before it is considered to be failed. Defaults to 600 seconds." + default = 600 +} + variable "routing" { type = object({ method : optional(string, "DOMAIN") diff --git a/terraform/modules/happy-stack-eks/README.md b/terraform/modules/happy-stack-eks/README.md index 19b3c1ef02..be3bd4f392 100644 --- a/terraform/modules/happy-stack-eks/README.md +++ b/terraform/modules/happy-stack-eks/README.md @@ -60,7 +60,7 @@ | [image\_tags](#input\_image\_tags) | Override image tag for each docker image | `map(string)` | `{}` | no | | [k8s\_namespace](#input\_k8s\_namespace) | K8S namespace for this stack | `string` | n/a | yes | | [routing\_method](#input\_routing\_method) | Traffic routing method for this stack. Valid options are 'DOMAIN', when every service gets a unique domain name, or a 'CONTEXT' when all services share the same domain name, and routing is done by request path. | `string` | `"DOMAIN"` | no | -| [services](#input\_services) | The services you want to deploy as part of this stack. |
method : optional(string, "DOMAIN")
host_match : string
additional_hostnames : optional(set(string), [])
group_name : string
alb : optional(object({
name : string,
listener_port : number,
}), null)
priority : number
path : optional(string, "/*")
service_name : string
port : number
service_port : number
alb_idle_timeout : optional(number, 60) // in seconds
service_scheme : optional(string, "HTTP")
scheme : optional(string, "HTTP")
success_codes : optional(string, "200-499")
service_type : string
service_mesh : bool
allow_mesh_services : optional(list(object({
service : optional(string, null),
stack : optional(string, null),
service_account_name : optional(string, null),
})), null)
oidc_config : optional(object({
issuer : string
authorizationEndpoint : string
tokenEndpoint : string
userInfoEndpoint : string
secretName : string
}), {
issuer = ""
authorizationEndpoint = ""
tokenEndpoint = ""
userInfoEndpoint = ""
secretName = ""
})
bypasses : optional(map(object({
paths = optional(set(string), [])
methods = optional(set(string), [])
})))
sticky_sessions = optional(object({
enabled = optional(bool, false),
duration_seconds = optional(number, 600),
cookie_name = optional(string, "happy_sticky_session"),
}), {})
})
map(object({| n/a | yes | +| [services](#input\_services) | The services you want to deploy as part of this stack. |
name = string,
service_type = optional(string, "INTERNAL"),
allow_mesh_services = optional(list(object({
service = optional(string, null),
stack = optional(string, null),
service_account_name = optional(string, null)
})), null),
ingress_security_groups = optional(list(string), []), // Only used for VPC service_type
alb = optional(object({
name = string,
listener_port = number,
}), null), // Only used for TARGET_GROUP_ONLY
desired_count = optional(number, 2),
max_count = optional(number, 5),
max_unavailable_count = optional(string, "1"),
scaling_cpu_threshold_percentage = optional(number, 80),
port = optional(number, 80),
scheme = optional(string, "HTTP"),
cmd = optional(list(string), []),
args = optional(list(string), []),
image_pull_policy = optional(string, "IfNotPresent"), // Supported values= IfNotPresent, Always, Never
tag_mutability = optional(bool, true),
scan_on_push = optional(bool, false),
service_port = optional(number, null),
service_scheme = optional(string, "HTTP"),
linkerd_additional_skip_ports = optional(set(number), []),
memory = optional(string, "500Mi"),
memory_requests = optional(string, "200Mi"),
cpu = optional(string, "1"),
cpu_requests = optional(string, "500m"),
gpu = optional(number, null), // Whole number of GPUs to request, 0 will schedule all available GPUs. Requires GPU-enabled nodes in the cluster, `k8s-device-plugin` installed, platform_architecture = "amd64", and additional_node_selectors = { "nvidia.com/gpu.present" = "true" } present.
health_check_path = optional(string, "/"),
aws_iam = optional(object({
policy_json = optional(string, ""),
service_account_name = optional(string, null),
}), {}),
path = optional(string, "/*"), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
priority = optional(number, 0), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
success_codes = optional(string, "200-499"),
synthetics = optional(bool, false),
initial_delay_seconds = optional(number, 30),
alb_idle_timeout = optional(number, 60) // in seconds
period_seconds = optional(number, 3),
liveness_timeout_seconds = optional(number, 30),
readiness_timeout_seconds = optional(number, 30),
platform_architecture = optional(string, "amd64"), // Supported values= amd64, arm64; GPU nodes are amd64 only.
additional_node_selectors = optional(map(string), {}), // For GPU use= { "nvidia.com/gpu.present" = "true" }
bypasses = optional(map(object({ // Only used for INTERNAL service_type
paths = optional(set(string), [])
methods = optional(set(string), [])
})), {})
sticky_sessions = optional(object({
enabled = optional(bool, false),
duration_seconds = optional(number, 600),
cookie_name = optional(string, "happy_sticky_session"),
}), {})
sidecars = optional(map(object({
image = string
tag = string
cmd = optional(list(string), [])
args = optional(list(string), [])
port = optional(number, 80)
scheme = optional(string, "HTTP")
memory = optional(string, "200Mi")
cpu = optional(string, "500m")
image_pull_policy = optional(string, "IfNotPresent") // Supported values= IfNotPresent, Always, Never
health_check_path = optional(string, "/")
initial_delay_seconds = optional(number, 30)
period_seconds = optional(number, 3)
liveness_timeout_seconds = optional(number, 30)
readiness_timeout_seconds = optional(number, 30)
})), {})
init_containers = optional(map(object({
image = string
tag = string
cmd = optional(list(string), []),
})), {}),
additional_env_vars = optional(map(string), {}),
}))
map(object({| n/a | yes | | [skip\_config\_injection](#input\_skip\_config\_injection) | Skip injecting app configs into the services / tasks | `bool` | `false` | no | | [stack\_name](#input\_stack\_name) | Happy Path stack name | `string` | n/a | yes | | [stack\_prefix](#input\_stack\_prefix) | Do bucket storage paths and db schemas need to be prefixed with the stack name? (Usually '/{stack\_name}' for dev stacks, and '' for staging/prod stacks) | `string` | `""` | no | diff --git a/terraform/modules/happy-stack-eks/main.tf b/terraform/modules/happy-stack-eks/main.tf index 54b7e0904b..1a74b54b18 100644 --- a/terraform/modules/happy-stack-eks/main.tf +++ b/terraform/modules/happy-stack-eks/main.tf @@ -194,6 +194,7 @@ module "services" { init_containers = each.value.init_containers ingress_security_groups = each.value.ingress_security_groups linkerd_additional_skip_ports = each.value.linkerd_additional_skip_ports + progress_deadline_seconds = each.value.progress_deadline_seconds routing = { method = var.routing_method diff --git a/terraform/modules/happy-stack-eks/variables.tf b/terraform/modules/happy-stack-eks/variables.tf index bad1509318..cfc096c556 100644 --- a/terraform/modules/happy-stack-eks/variables.tf +++ b/terraform/modules/happy-stack-eks/variables.tf @@ -89,6 +89,7 @@ variable "services" { period_seconds = optional(number, 3), liveness_timeout_seconds = optional(number, 30), readiness_timeout_seconds = optional(number, 30), + progress_deadline_seconds = optional(number, 600), platform_architecture = optional(string, "amd64"), // Supported values= amd64, arm64; GPU nodes are amd64 only. additional_node_selectors = optional(map(string), {}), // For GPU use= { "nvidia.com/gpu.present" = "true" } bypasses = optional(map(object({ // Only used for INTERNAL service_type
name = string,
service_type = optional(string, "INTERNAL"),
allow_mesh_services = optional(list(object({
service = optional(string, null),
stack = optional(string, null),
service_account_name = optional(string, null)
})), null),
ingress_security_groups = optional(list(string), []), // Only used for VPC service_type
alb = optional(object({
name = string,
listener_port = number,
}), null), // Only used for TARGET_GROUP_ONLY
desired_count = optional(number, 2),
max_count = optional(number, 5),
max_unavailable_count = optional(string, "1"),
scaling_cpu_threshold_percentage = optional(number, 80),
port = optional(number, 80),
scheme = optional(string, "HTTP"),
cmd = optional(list(string), []),
args = optional(list(string), []),
image_pull_policy = optional(string, "IfNotPresent"), // Supported values= IfNotPresent, Always, Never
tag_mutability = optional(bool, true),
scan_on_push = optional(bool, false),
service_port = optional(number, null),
service_scheme = optional(string, "HTTP"),
linkerd_additional_skip_ports = optional(set(number), []),
memory = optional(string, "500Mi"),
memory_requests = optional(string, "200Mi"),
cpu = optional(string, "1"),
cpu_requests = optional(string, "500m"),
gpu = optional(number, null), // Whole number of GPUs to request, 0 will schedule all available GPUs. Requires GPU-enabled nodes in the cluster, `k8s-device-plugin` installed, platform_architecture = "amd64", and additional_node_selectors = { "nvidia.com/gpu.present" = "true" } present.
health_check_path = optional(string, "/"),
aws_iam = optional(object({
policy_json = optional(string, ""),
service_account_name = optional(string, null),
}), {}),
path = optional(string, "/*"), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
priority = optional(number, 0), // Only used for CONTEXT and TARGET_GROUP_ONLY routing
success_codes = optional(string, "200-499"),
synthetics = optional(bool, false),
initial_delay_seconds = optional(number, 30),
alb_idle_timeout = optional(number, 60) // in seconds
period_seconds = optional(number, 3),
liveness_timeout_seconds = optional(number, 30),
readiness_timeout_seconds = optional(number, 30),
progress_deadline_seconds = optional(number, 600),
platform_architecture = optional(string, "amd64"), // Supported values= amd64, arm64; GPU nodes are amd64 only.
additional_node_selectors = optional(map(string), {}), // For GPU use= { "nvidia.com/gpu.present" = "true" }
bypasses = optional(map(object({ // Only used for INTERNAL service_type
paths = optional(set(string), [])
methods = optional(set(string), [])
})), {})
sticky_sessions = optional(object({
enabled = optional(bool, false),
duration_seconds = optional(number, 600),
cookie_name = optional(string, "happy_sticky_session"),
}), {})
sidecars = optional(map(object({
image = string
tag = string
cmd = optional(list(string), [])
args = optional(list(string), [])
port = optional(number, 80)
scheme = optional(string, "HTTP")
memory = optional(string, "200Mi")
cpu = optional(string, "500m")
image_pull_policy = optional(string, "IfNotPresent") // Supported values= IfNotPresent, Always, Never
health_check_path = optional(string, "/")
initial_delay_seconds = optional(number, 30)
period_seconds = optional(number, 3)
liveness_timeout_seconds = optional(number, 30)
readiness_timeout_seconds = optional(number, 30)
})), {})
init_containers = optional(map(object({
image = string
tag = string
cmd = optional(list(string), []),
})), {}),
additional_env_vars = optional(map(string), {}),
}))