Skip to content

Commit

Permalink
Only scale up zone after all leader zone replicas are ready (#164)
Browse files Browse the repository at this point in the history
* Only scale up zone after leader zone replicas are ready

* Update CHANGELOG

* Change to only scaling once all replicas are ready

* Rename config annotation

* Add log line

* remove redundant test

* Update changelog
  • Loading branch information
jhesketh authored Aug 13, 2024
1 parent 53c59f6 commit fa52527
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## main / unreleased

* [FEATURE] Optionally only scale-up a `StatefulSet` once all of the leader `StatefulSet` replicas are ready. Enable with `grafana.com/rollout-upscale-only-when-leader-ready` annotation set to `true`. #164

## v0.17.1

* [ENHANCEMENT] prepare-downscale admission webhook: undo prepare-shutdown calls if adding the `last-downscale` annotation fails. #151
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ For each **rollout group**, the operator **guarantees**:

The operator can also optionally coordinate scaling up and down of `StatefulSets` that are part of the same `rollout-group` based on the `grafana.com/rollout-downscale-leader` annotation. When using this feature, the `grafana.com/min-time-between-zones-downscale` label must also be set on each `StatefulSet`.

This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works. Examples for a multi-AZ ingester group are given below.
This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works.

If the `grafana.com/rollout-upscale-only-when-leader-ready` annotation is set to `true` on a follower `StatefulSet`, the operator will only scale up the follower once all replicas in the leader `StatefulSet` are `ready`. This ensures that the follower zone does not scale up until the leader zone is completely stable.

Example usage for a multi-AZ ingester group:

- For `ingester-zone-a`, add the following:
- Labels:
Expand All @@ -47,7 +51,8 @@ This can be useful for automating the tedious scaling of stateful services like
- `grafana.com/min-time-between-zones-downscale=12h` (change the value here to an appropriate duration)
- `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down)
- Annotations:
- `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay)
- `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay)
- `grafana.com/rollout-upscale-only-when-leader-ready=true` (zone `b` will only scale up once all replicas in zone `a` are ready)
- `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service)
- `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service)
- For `ingester-zone-c`, add the following:
Expand All @@ -56,6 +61,7 @@ This can be useful for automating the tedious scaling of stateful services like
- `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down)
- Annotations:
- `grafana.com/rollout-downscale-leader=ingester-zone-b` (zone `c` will follow zone `b`, after a delay)
- `grafana.com/rollout-upscale-only-when-leader-ready=true` (zone `c` will only scale up once all replicas in zone `b` are ready)
- `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service)
- `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service)

Expand Down
3 changes: 3 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const (
// RolloutDownscaleLeaderAnnotationKey is the name of the leader statefulset that should be used to determine
// the number of replicas in a follower statefulset.
RolloutDownscaleLeaderAnnotationKey = "grafana.com/rollout-downscale-leader"
// RolloutLeaderReadyKey is whether to only scale up once `ready` replicas match the desired replicas.
RolloutLeaderReadyAnnotationKey = "grafana.com/rollout-upscale-only-when-leader-ready"
RolloutLeaderReadyAnnotationValue = "true"

rolloutMirrorReplicasFromResourceAnnotationKeyPrefix = "grafana.com/rollout-mirror-replicas-from-resource"
// RolloutMirrorReplicasFromResourceNameAnnotationKey -- when set (together with "kind" and optionally "api-version" annotations), rollout-operator sets number of
Expand Down
32 changes: 22 additions & 10 deletions pkg/controller/replicas.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,35 @@ func desiredStsReplicas(group string, sts *v1.StatefulSet, all []*v1.StatefulSet
}

leaderReplicas := *leader.Spec.Replicas
leaderReadyReplicas := leader.Status.ReadyReplicas

logger = log.With(
logger,
"follower", sts.GetName(),
"follower_replicas", followerReplicas,
"leader", leader.GetName(),
"leader_replicas", leaderReplicas,
"leader_ready_replicas", leaderReadyReplicas,
"group", group,
)

if leaderReplicas > followerReplicas {
// Scale up is always allowed immediately
// Handle scale-up scenarios
annotations := sts.GetAnnotations()
onlyWhenReady, ok := annotations[config.RolloutLeaderReadyAnnotationKey]
if ok && onlyWhenReady == config.RolloutLeaderReadyAnnotationValue {
// We only scale up once all of the leader pods are ready. Otherwise we do nothing.
if leaderReplicas != leaderReadyReplicas {
level.Debug(logger).Log("msg", "not all leader replicas are ready. Follower replicas not changed.")
return followerReplicas, nil
}
}
return leaderReplicas, nil
}

if leaderReplicas < followerReplicas {
// Add all relevant key-value pairs before checking if the minimum time between
// scale downs has elapsed.
logger = log.With(
logger,
"follower", sts.GetName(),
"follower_replicas", followerReplicas,
"leader", leader.GetName(),
"leader_replicas", leaderReplicas,
"group", group,
)

minTimeElapsed, err := minimumTimeHasElapsed(sts, all, logger)
if err != nil {
return followerReplicas, err
Expand Down
54 changes: 54 additions & 0 deletions pkg/controller/replicas_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,60 @@ func TestReconcileStsReplicas(t *testing.T) {
require.Equal(t, int32(4), replicas)
})

t.Run("scale up only when all leader replicas are ready", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 3))
sts2 := mockStatefulSet("test-zone-b", withReplicas(2, 2), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))
sts3 := mockStatefulSet("test-zone-c", withReplicas(1, 1), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-b",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(2), replicas, "no change in replicas because leader isn't ready yet")

sts1 = mockStatefulSet("test-zone-a", withReplicas(4, 4))
replicasB, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(4), replicasB, "ready to scale zone-b")

sts2 = mockStatefulSet("test-zone-b", withReplicas(replicasB, 2))
replicasC, err := desiredStsReplicas("test", sts3, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(1), replicasC, "no change in replicas because zone-b isn't ready yet")

sts2 = mockStatefulSet("test-zone-b", withReplicas(replicasB, replicasB))
replicasC, err = desiredStsReplicas("test", sts3, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(4), replicasC, "ready to scale zone-c")
})

t.Run("scale up ignoring ready replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 2))
sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(4), replicas)
})

t.Run("scaling down always occurs to desired replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(2, 1))
sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(2), replicas)
})

t.Run("scale down min time error", func(t *testing.T) {
downscale1 := time.Now().UTC().Round(time.Second).Add(-72 * time.Hour)
downscale2 := time.Now().UTC().Round(time.Second).Add(-60 * time.Hour)
Expand Down

0 comments on commit fa52527

Please sign in to comment.