diff --git a/indexer/indexer_monitors.tf b/indexer/indexer_monitors.tf index fbe98946..ebbceb7b 100644 --- a/indexer/indexer_monitors.tf +++ b/indexer/indexer_monitors.tf @@ -1,13 +1,14 @@ module "indexer_monitors" { count = var.enable_monitoring ? 1 : 0 - source = "../modules/indexer_monitors" - env_tag = "v4-${var.environment}" - environment = var.environment - slack_channel = var.monitoring_slack_channel - pagerduty_tag = var.monitoring_pagerduty_tag - ecs_cluster_name = var.full_node_name - msk_cluster_name = aws_msk_cluster.main.cluster_name - team = var.monitoring_team - url = var.indexer_url + source = "../modules/indexer_monitors" + env_tag = "v4-${var.environment}" + environment = var.environment + slack_channel = var.monitoring_slack_channel + pagerduty_tag = var.monitoring_pagerduty_tag + ecs_cluster_name = var.full_node_name + msk_cluster_name = aws_msk_cluster.main.cluster_name + team = var.monitoring_team + url = var.indexer_url + enable_precautionary_monitors = var.enable_precautionary_monitors } diff --git a/indexer/variables.tf b/indexer/variables.tf index 8c61709a..2f17e22f 100644 --- a/indexer/variables.tf +++ b/indexer/variables.tf @@ -332,6 +332,12 @@ variable "monitoring_team" { default = "v4-indexer" } +variable "enable_precautionary_monitors" { + type = bool + description = "Whether to enable precautionary monitors" + default = true +} + variable "indexer_url" { type = string description = "indexer URL to monitor, should not include https:// or www. Should be something like `indexer.dydx.exchange`" diff --git a/modules/indexer_monitors/monitors.tf b/modules/indexer_monitors/monitors.tf index d543d1ac..e4b78e60 100644 --- a/modules/indexer_monitors/monitors.tf +++ b/modules/indexer_monitors/monitors.tf @@ -1,4 +1,6 @@ resource "datadog_monitor_json" "socks_kafka_offset" { + count = var.enable_precautionary_monitors ? 1 : 0 + monitor = < 0.5", + "message": "This is not an actionable alert. When this alert fires, that means that the Indexer is processing blocks slow and more time should be invested in improving Ender latency. Please notify Trading if this alert fires.\n\n${local.monitor_suffix_literal}", + "tags": [ + "team:${var.team}", + "env:${var.env_tag}" + ], + "options": { + "thresholds": { + "critical": 0.5 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": true, + "renotify_interval": 0, + "include_tags": false, + "no_data_timeframe": 60, + "new_host_delay": 300, + "silenced": {} + }, + "priority": null, + "restricted_roles": null +} +EOF +} + +resource "datadog_monitor_json" "p95_block_processing_rate" { + monitor = < 0.75", + "message": "This is not an actionable alert. When this alert fires, that means that the Indexer is processing blocks slow and more time should be invested in improving Ender latency. Please notify Trading if this alert fires.\n\n${local.monitor_suffix_literal}", + "tags": [ + "team:${var.team}", + "env:${var.env_tag}" + ], + "options": { + "thresholds": { + "critical": 0.75 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": true, + "renotify_interval": 0, + "include_tags": false, + "no_data_timeframe": 60, + "new_host_delay": 300, + "silenced": {} + }, + "priority": null, + "restricted_roles": null +} +EOF +} + diff --git a/modules/indexer_monitors/variables.tf b/modules/indexer_monitors/variables.tf index 6cdc94ba..e7d3b0e2 100644 --- a/modules/indexer_monitors/variables.tf +++ b/modules/indexer_monitors/variables.tf @@ -37,3 +37,8 @@ variable "url" { type = string description = "Indexer URL to monitor, should not include https:// or www. Should be something like `indexer.dydx.exchange`" } + +variable "enable_precautionary_monitors" { + type = bool + description = "Whether to enable precautionary monitors" +}