Skip to content

Commit

Permalink
[IND-429]: Create precautionary monitors for tracking block processin…
Browse files Browse the repository at this point in the history
…g speed (#47)
  • Loading branch information
Christopher-Li authored Oct 27, 2023
1 parent a102353 commit f285ffb
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 9 deletions.
19 changes: 10 additions & 9 deletions indexer/indexer_monitors.tf
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
module "indexer_monitors" {
count = var.enable_monitoring ? 1 : 0

source = "../modules/indexer_monitors"
env_tag = "v4-${var.environment}"
environment = var.environment
slack_channel = var.monitoring_slack_channel
pagerduty_tag = var.monitoring_pagerduty_tag
ecs_cluster_name = var.full_node_name
msk_cluster_name = aws_msk_cluster.main.cluster_name
team = var.monitoring_team
url = var.indexer_url
source = "../modules/indexer_monitors"
env_tag = "v4-${var.environment}"
environment = var.environment
slack_channel = var.monitoring_slack_channel
pagerduty_tag = var.monitoring_pagerduty_tag
ecs_cluster_name = var.full_node_name
msk_cluster_name = aws_msk_cluster.main.cluster_name
team = var.monitoring_team
url = var.indexer_url
enable_precautionary_monitors = var.enable_precautionary_monitors
}
6 changes: 6 additions & 0 deletions indexer/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,12 @@ variable "monitoring_team" {
default = "v4-indexer"
}

variable "enable_precautionary_monitors" {
type = bool
description = "Whether to enable precautionary monitors"
default = true
}

variable "indexer_url" {
type = string
description = "indexer URL to monitor, should not include https:// or www. Should be something like `indexer.dydx.exchange`"
Expand Down
4 changes: 4 additions & 0 deletions modules/indexer_monitors/monitors.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
resource "datadog_monitor_json" "socks_kafka_offset" {
count = var.enable_precautionary_monitors ? 1 : 0

monitor = <<EOF
{
"id": 117804982,
Expand Down Expand Up @@ -33,6 +35,8 @@ EOF
}

resource "datadog_monitor_json" "orderbook_crossed" {
count = var.enable_precautionary_monitors ? 1 : 0

monitor = <<EOF
{
"id": 120397508,
Expand Down
62 changes: 62 additions & 0 deletions modules/indexer_monitors/precautionary_monitors.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
resource "datadog_monitor_json" "average_block_processing_rate" {
monitor = <<EOF
{
"id": 117804982,
"name": "[${var.environment}] Average Indexer block processing is slow",
"type": "query alert",
"query": "avg(last_5m):avg:ender.processed_block.timing.avg{env:testnet, service:indexer, success:true} / avg:dydxprotocol.blocktime_block_time_ms{env:testnet} > 0.5",
"message": "This is not an actionable alert. When this alert fires, that means that the Indexer is processing blocks slow and more time should be invested in improving Ender latency. Please notify Trading if this alert fires.\n\n${local.monitor_suffix_literal}",
"tags": [
"team:${var.team}",
"env:${var.env_tag}"
],
"options": {
"thresholds": {
"critical": 0.5
},
"notify_audit": false,
"require_full_window": false,
"notify_no_data": true,
"renotify_interval": 0,
"include_tags": false,
"no_data_timeframe": 60,
"new_host_delay": 300,
"silenced": {}
},
"priority": null,
"restricted_roles": null
}
EOF
}

resource "datadog_monitor_json" "p95_block_processing_rate" {
monitor = <<EOF
{
"id": 117804982,
"name": "[${var.environment}] p95 Indexer block processing is slow",
"type": "query alert",
"query": "avg(last_5m):avg:ender.processed_block.timing.95percentile{env:testnet, service:indexer, success:true} / avg:dydxprotocol.blocktime_block_time_ms{env:testnet} > 0.75",
"message": "This is not an actionable alert. When this alert fires, that means that the Indexer is processing blocks slow and more time should be invested in improving Ender latency. Please notify Trading if this alert fires.\n\n${local.monitor_suffix_literal}",
"tags": [
"team:${var.team}",
"env:${var.env_tag}"
],
"options": {
"thresholds": {
"critical": 0.75
},
"notify_audit": false,
"require_full_window": false,
"notify_no_data": true,
"renotify_interval": 0,
"include_tags": false,
"no_data_timeframe": 60,
"new_host_delay": 300,
"silenced": {}
},
"priority": null,
"restricted_roles": null
}
EOF
}

5 changes: 5 additions & 0 deletions modules/indexer_monitors/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,8 @@ variable "url" {
type = string
description = "Indexer URL to monitor, should not include https:// or www. Should be something like `indexer.dydx.exchange`"
}

variable "enable_precautionary_monitors" {
type = bool
description = "Whether to enable precautionary monitors"
}

0 comments on commit f285ffb

Please sign in to comment.