Skip to content

Commit

Permalink
correct linting
Browse files Browse the repository at this point in the history
  • Loading branch information
aidanrussell committed Jan 21, 2025
1 parent 0726e75 commit 0b2293e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 29 deletions.
16 changes: 8 additions & 8 deletions infra/modules/sagemaker_deployment/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,12 @@ resource "aws_cloudwatch_metric_alarm" "cloudwatch_alarm" {
statistic = var.alarms[count.index].statistic
alarm_actions = concat(var.alarms[count.index].alarm_actions, [aws_sns_topic.sns_topic_alarmstate[count.index].arn])
ok_actions = concat(var.alarms[count.index].ok_actions, [aws_sns_topic.sns_topic_okstate[count.index].arn])
dimensions = count.index == 0 ? { # TODO: this logic is brittle as it assumes "backlog" has index 0; it would be better to have a logic that rests on the specific name of that metric
EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name # Only EndpointName is used in this case
} : {
EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name, # Both EndpointName and VariantName are used in all other cases
VariantName = aws_sagemaker_endpoint_configuration.endpoint_config.production_variants[0].variant_name # Note this logic would not work if there were ever more than one production variant deployed for an LLM
}
dimensions = count.index == 0 ? { # TODO: this logic is brittle as it assumes "backlog" has index 0; it would be better to have a logic that rests on the specific name of that metric
EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name # Only EndpointName is used in this case
} : {
EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name, # Both EndpointName and VariantName are used in all other cases
VariantName = aws_sagemaker_endpoint_configuration.endpoint_config.production_variants[0].variant_name # Note this logic would not work if there were ever more than one production variant deployed for an LLM
}


depends_on = [aws_sagemaker_endpoint.sagemaker_endpoint, aws_sns_topic.sns_topic_alarmstate, aws_sns_topic.sns_topic_okstate]
Expand All @@ -176,7 +176,7 @@ resource "aws_cloudwatch_metric_alarm" "cloudwatch_alarm" {
resource "aws_sns_topic" "sns_topic_alarmstate" {
count = length(var.alarms)

name = "alarm-alarmstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.sagemaker_endpoint.name}"
name = "alarm-alarmstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.sagemaker_endpoint.name}"
policy = jsonencode({
Version = "2012-10-17",
Statement = [
Expand Down Expand Up @@ -213,7 +213,7 @@ resource "aws_sns_topic_subscription" "sns_lambda_subscription_alarmstate" {
resource "aws_sns_topic" "sns_topic_okstate" {
count = length(var.alarms)

name = "alarm-okstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.sagemaker_endpoint.name}"
name = "alarm-okstate-${var.alarms[count.index].alarm_name_prefix}-${aws_sagemaker_endpoint.sagemaker_endpoint.name}"

policy = jsonencode({
Version = "2012-10-17",
Expand Down
42 changes: 21 additions & 21 deletions infra/sagemaker_llm_resources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ module "gpt_neo_125m_deployment" {

alarms = [
{
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_description = "Scale based on existence of backlog or not"
metric_name = "ApproximateBacklogSize"
namespace = "AWS/SageMaker"
Expand All @@ -57,7 +57,7 @@ module "gpt_neo_125m_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 80 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -72,7 +72,7 @@ module "gpt_neo_125m_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 20 * 8 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -87,7 +87,7 @@ module "gpt_neo_125m_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -102,7 +102,7 @@ module "gpt_neo_125m_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand Down Expand Up @@ -239,7 +239,7 @@ module "phi_2_3b_deployment" {

alarms = [
{
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_description = "Scale based on existence of backlog or not"
metric_name = "ApproximateBacklogSize"
namespace = "AWS/SageMaker"
Expand All @@ -259,7 +259,7 @@ module "phi_2_3b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 80 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -274,7 +274,7 @@ module "phi_2_3b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 20 * 4 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -289,7 +289,7 @@ module "phi_2_3b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 80 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -304,7 +304,7 @@ module "phi_2_3b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 20 * 1 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand Down Expand Up @@ -443,7 +443,7 @@ module "mistral_7b_deployment" {

alarms = [
{
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_description = "Scale based on existence of backlog or not"
metric_name = "ApproximateBacklogSize"
namespace = "AWS/SageMaker"
Expand All @@ -463,7 +463,7 @@ module "mistral_7b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 48 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 80 * 48 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -478,7 +478,7 @@ module "mistral_7b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 48 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 20 * 48 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -493,7 +493,7 @@ module "mistral_7b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 80 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -508,7 +508,7 @@ module "mistral_7b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 20 * 4 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand Down Expand Up @@ -646,7 +646,7 @@ module "gemma_2_27b_deployment" {

alarms = [
{
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_description = "Scale based on existence of backlog or not"
metric_name = "ApproximateBacklogSize"
namespace = "AWS/SageMaker"
Expand All @@ -666,7 +666,7 @@ module "gemma_2_27b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 192 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 80 * 192 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -681,7 +681,7 @@ module "gemma_2_27b_deployment" {
metric_name = "CPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 192 # TODO: we must manually multiply by vCPU count as Normalized metric not available
threshold = 20 * 192 # TODO: we must manually multiply by vCPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -696,7 +696,7 @@ module "gemma_2_27b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "GreaterThanOrEqualToThreshold"
threshold = 80 * 8 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 80 * 8 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand All @@ -711,7 +711,7 @@ module "gemma_2_27b_deployment" {
metric_name = "GPUUtilization"
namespace = "/aws/sagemaker/Endpoints"
comparison_operator = "LessThanOrEqualToThreshold"
threshold = 20 * 8 # TODO: we must manually multiply by GPU count as Normalized metric not available
threshold = 20 * 8 # TODO: we must manually multiply by GPU count as Normalized metric not available
evaluation_periods = 1
datapoints_to_alarm = 1
period = 60
Expand Down Expand Up @@ -856,7 +856,7 @@ module "llama_3_70b_deployment" {

alarms = [
{
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_name_prefix = "backlog" # TODO: backlog is currently required to have index 0, which is brittle
alarm_description = "Scale based on existence of backlog or not"
metric_name = "ApproximateBacklogSize"
namespace = "AWS/SageMaker"
Expand Down

0 comments on commit 0b2293e

Please sign in to comment.