Skip to content

Commit

Permalink
(PoC) Alertmanager: Strict initialization mode for the Alertmanager
Browse files Browse the repository at this point in the history
  • Loading branch information
santihernandezc committed Jan 24, 2025
1 parent 8f44959 commit de252bb
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2377,6 +2377,11 @@ sharding_ring:
# CLI flag: -alertmanager.grafana-alertmanager-conditionally-skip-tenant-suffix
[grafana_alertmanager_conditionally_skip_tenant_suffix: <string> | default = ""]
# (experimental) Skip starting the Alertmanager for tenants without a
# non-default, non-empty configuration.
# CLI flag: -alertmanager.strict-initialization-mode
[strict_initialization_mode: <boolean> | default = false]
# (advanced) Maximum number of concurrent GET requests allowed per tenant. The
# zero value (and negative values) result in a limit of GOMAXPROCS or 8,
# whichever is larger. Status code 503 is served for GET requests that would
Expand Down
15 changes: 12 additions & 3 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type MultitenantAlertmanagerConfig struct {

GrafanaAlertmanagerCompatibilityEnabled bool `yaml:"grafana_alertmanager_compatibility_enabled" category:"experimental"`
GrafanaAlertmanagerTenantSuffix string `yaml:"grafana_alertmanager_conditionally_skip_tenant_suffix" category:"experimental"`
StrictInitializationMode bool `yaml:"strict_initialization_mode" category:"experimental"`

MaxConcurrentGetRequestsPerTenant int `yaml:"max_concurrent_get_requests_per_tenant" category:"advanced"`

Expand Down Expand Up @@ -129,6 +130,7 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet, logger
f.BoolVar(&cfg.EnableAPI, "alertmanager.enable-api", true, "Enable the alertmanager config API.")
f.BoolVar(&cfg.GrafanaAlertmanagerCompatibilityEnabled, "alertmanager.grafana-alertmanager-compatibility-enabled", false, "Enable routes to support the migration and operation of the Grafana Alertmanager.")
f.StringVar(&cfg.GrafanaAlertmanagerTenantSuffix, "alertmanager.grafana-alertmanager-conditionally-skip-tenant-suffix", "", "Skip starting the Alertmanager for tenants matching this suffix unless they have a promoted, non-default Grafana Alertmanager configuration.")
f.BoolVar(&cfg.StrictInitializationMode, "alertmanager.strict-initialization-mode", false, "Skip starting the Alertmanager for tenants without a non-default, non-empty configuration.")
f.IntVar(&cfg.MaxConcurrentGetRequestsPerTenant, "alertmanager.max-concurrent-get-requests-per-tenant", 0, "Maximum number of concurrent GET requests allowed per tenant. The zero value (and negative values) result in a limit of GOMAXPROCS or 8, whichever is larger. Status code 503 is served for GET requests that would exceed the concurrency limit.")

f.BoolVar(&cfg.EnableStateCleanup, "alertmanager.enable-state-cleanup", true, "Enables periodic cleanup of alertmanager stateful data (notification logs and silences) from object storage. When enabled, data is removed for any tenant that does not have a configuration.")
Expand Down Expand Up @@ -677,7 +679,7 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s
}

if !startAM {
level.Debug(am.logger).Log("msg", "not initializing alertmanager for grafana tenant without a promoted, non-default configuration", "user", user)
level.Debug(am.logger).Log("msg", "not initializing alertmanager for tenant", "user", user)
amInitSkipped[user] = struct{}{}
continue
}
Expand Down Expand Up @@ -723,20 +725,27 @@ func (am *MultitenantAlertmanager) syncConfigs(ctx context.Context, cfgMap map[s
// computeConfig takes an AlertConfigDescs struct containing Mimir and Grafana configurations.
// It returns the final configuration and a bool indicating whether the Alertmanager should be started for the tenant.
func (am *MultitenantAlertmanager) computeConfig(cfgs alertspb.AlertConfigDescs) (amConfig, bool, error) {
isGrafanaCfgUsable := cfgs.Grafana.Promoted && !cfgs.Grafana.Default
isMimirCfgUsable := cfgs.Mimir.RawConfig != "" && cfgs.Mimir.RawConfig != am.fallbackConfig
if am.cfg.StrictInitializationMode && !isGrafanaCfgUsable && !isMimirCfgUsable {
// Skip starting the Alertmanager if we have no usable configurations.
return amConfig{}, false, nil
}

cfg := amConfig{
AlertConfigDesc: cfgs.Mimir,
tmplExternalURL: am.cfg.ExternalURL.URL,
}

// If the Grafana configuration is either default, not promoted, or empty, use the Mimir configuration.
if !cfgs.Grafana.Promoted || cfgs.Grafana.Default || cfgs.Grafana.RawConfig == "" {
if !isGrafanaCfgUsable || cfgs.Grafana.RawConfig == "" {
level.Debug(am.logger).Log("msg", "using mimir config", "user", cfgs.Mimir.User)
isGrafanaTenant := am.cfg.GrafanaAlertmanagerTenantSuffix != "" && strings.HasSuffix(cfgs.Mimir.User, am.cfg.GrafanaAlertmanagerTenantSuffix)
return cfg, !isGrafanaTenant, nil
}

// If the Mimir configuration is either default or empty, use the Grafana configuration.
if cfgs.Mimir.RawConfig == am.fallbackConfig || cfgs.Mimir.RawConfig == "" {
if !isMimirCfgUsable {
level.Debug(am.logger).Log("msg", "using grafana config with the default globals", "user", cfgs.Mimir.User)
cfg, err := createUsableGrafanaConfig(cfgs.Grafana, am.fallbackConfig)
return cfg, true, err
Expand Down

0 comments on commit de252bb

Please sign in to comment.