Skip to content

Commit

Permalink
[OTE-821] Add roundtable monitors for update affiliate info and updat…
Browse files Browse the repository at this point in the history
…e wallet total volume (#134)

* upgrade kafka version and reduce session timeout

* Add partition level logging for mainnet MSK

* Fix terraform apply resource conflict

* Add partition offset plot to vulcan dashboard

* dummy

* Add stale compliance data monitor for mainnet

* add roundtable monitors for update affiliate info and update wallet total volume
  • Loading branch information
jerryfan01234 authored and owl-king committed Oct 16, 2024
1 parent bd21af5 commit 30ef8a6
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 1 deletion.
2 changes: 1 addition & 1 deletion indexer/msk.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ resource "aws_msk_configuration" "main" {
message.max.bytes=4194304
unclean.leader.election.enable=true
zookeeper.session.timeout.ms=6000
replica.selector.class = org.apache.kafka.common.replica.RackAwareReplicaSelector
PROPERTIES

lifecycle {
Expand Down Expand Up @@ -57,4 +58,3 @@ resource "aws_msk_cluster" "main" {
revision = aws_msk_configuration.main.latest_revision
}
}

51 changes: 51 additions & 0 deletions modules/indexer_monitors/roundtable_monitors.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
resource "datadog_monitor_json" "roundtable_update_affiliate_info_persistent_cache_stale" {
monitor = <<EOF
{
"name": "[${var.environment}] Update affiliate info roundtable is not running successfully.",
"type": "query alert",
"query": "max(last_5m):avg:rroundtable.persistent_cache_affiliateInfoUpdateTime_lag_seconds{env:${var.environment}} > 600",
"message": "persistentCache.affiliateInfoUpdateTime is more than 10 minutes in the past. This indicates that update-affiliate-info roundtable has not run successfully in past 10 min -> affiliate_info table is stale.",
"tags": [
"team:${var.team}",
"env:${var.env_tag}"
],
"options": {
"thresholds": {
"critical": 600
},
"notify_audit": false,
"include_tags": false,
"notify_no_data": false,
"silenced": {}
},
"priority": null,
"restricted_roles": null
}
EOF
}

resource "datadog_monitor_json" "roundtable_update_wallet_total_volume_persistent_cache_stale" {
monitor = <<EOF
{
"name": "[${var.environment}] Update wallet total volume roundtable is not running successfully.",
"type": "query alert",
"query": "max(last_5m):avg:roundtable.persistent_cache_totalVolumeUpdateTime_lag_seconds{env:${var.environment}} > 600",
"message": "persistentCache.totalVolumeUpdateTime is more than 10 minutes in the past. This indicates that update-wallet-total-volume roundtable has not run successfully in past 10 min -> totalVolume column of wallets table is stale.",
"tags": [
"team:${var.team}",
"env:${var.env_tag}"
],
"options": {
"thresholds": {
"critical": 600
},
"notify_audit": false,
"include_tags": false,
"notify_no_data": false,
"silenced": {}
},
"priority": null,
"restricted_roles": null
}
EOF
}

0 comments on commit 30ef8a6

Please sign in to comment.