Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a secondary grouping key for rollout operation #190

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pkg/admission/prep_downscale.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,9 @@ func prepareDownscale(ctx context.Context, l log.Logger, ar v1.AdmissionReview,
}

rolloutGroup := lbls[config.RolloutGroupLabelKey]
rolloutSecondaryGroup := lbls[config.RolloutSecondaryGroupLabelKey]
if rolloutGroup != "" {
stsList, err := findStatefulSetsForRolloutGroup(ctx, api, ar.Request.Namespace, rolloutGroup)
stsList, err := findStatefulSetsForRolloutGroup(ctx, api, ar.Request.Namespace, rolloutGroup, rolloutSecondaryGroup)
if err != nil {
level.Warn(logger).Log("msg", "downscale not allowed due to error while finding other statefulsets", "err", err)
return deny(
Expand Down Expand Up @@ -354,7 +355,7 @@ func findPodsForStatefulSet(ctx context.Context, api kubernetes.Interface, names
})
}

func findStatefulSetsForRolloutGroup(ctx context.Context, api kubernetes.Interface, namespace, rolloutGroup string) (*appsv1.StatefulSetList, error) {
func findStatefulSetsForRolloutGroup(ctx context.Context, api kubernetes.Interface, namespace, rolloutGroup string, rolloutSecondaryGroup string) (*appsv1.StatefulSetList, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "admission.findStatefulSetsForRolloutGroup()")
defer span.Finish()

Expand All @@ -366,6 +367,15 @@ func findStatefulSetsForRolloutGroup(ctx context.Context, api kubernetes.Interfa
return nil, err
}
sel := labels.NewSelector().Add(*groupReq)

if rolloutSecondaryGroup != "" {
secGroupReq, err := labels.NewRequirement(config.RolloutSecondaryGroupLabelKey, selection.Equals, []string{rolloutSecondaryGroup})
if err != nil {
return nil, err
}
sel = sel.Add(*secGroupReq)
}

return api.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{
LabelSelector: sel.String(),
})
Expand Down
102 changes: 100 additions & 2 deletions pkg/admission/prep_downscale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ func TestUndoPrepareShutdown(t *testing.T) {
func TestFindStatefulSetWithNonUpdatedReplicas(t *testing.T) {
namespace := "test"
rolloutGroup := "ingester"
rolloutSecondaryGroup := ""
labels := map[string]string{config.RolloutGroupLabelKey: rolloutGroup, "name": "zone-a"}
stsMeta := metav1.ObjectMeta{
Name: "zone-a",
Expand Down Expand Up @@ -525,7 +526,7 @@ func TestFindStatefulSetWithNonUpdatedReplicas(t *testing.T) {
}
api := fake.NewSimpleClientset(objects...)

stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup)
stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup, rolloutSecondaryGroup)
require.NoError(t, err)

sts, err := findStatefulSetWithNonUpdatedReplicas(context.Background(), api, namespace, stsList, stsMeta.Name)
Expand All @@ -537,6 +538,7 @@ func TestFindStatefulSetWithNonUpdatedReplicas(t *testing.T) {
func TestFindStatefulSetWithNonUpdatedReplicas_UnavailableReplicasSameZone(t *testing.T) {
namespace := "test"
rolloutGroup := "ingester"
rolloutSecondaryGroup := ""
labels := map[string]string{config.RolloutGroupLabelKey: rolloutGroup, "name": "zone-a"}
stsMeta := metav1.ObjectMeta{
Name: "zone-a",
Expand All @@ -559,7 +561,103 @@ func TestFindStatefulSetWithNonUpdatedReplicas_UnavailableReplicasSameZone(t *te
}
api := fake.NewSimpleClientset(objects...)

stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup)
stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup, rolloutSecondaryGroup)
require.NoError(t, err)

sts, err := findStatefulSetWithNonUpdatedReplicas(context.Background(), api, namespace, stsList, stsMeta.Name)
require.NoError(t, err)
require.Nil(t, sts)
}

func TestFindStatefulSetWithNonUpdatedReplicasWithSecondaryGroup(t *testing.T) {
namespace := "test"
rolloutGroup := "ingester"
rolloutSecondaryGroup := "mimir"
labels := map[string]string{config.RolloutGroupLabelKey: rolloutGroup, config.RolloutSecondaryGroupLabelKey: rolloutSecondaryGroup, "name": "zone-a"}
stsMeta := metav1.ObjectMeta{
Name: "zone-a",
Namespace: namespace,
Labels: labels,
}
objects := []runtime.Object{
&apps.StatefulSet{
ObjectMeta: stsMeta,
Spec: apps.StatefulSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: stsMeta,
},
},
Status: apps.StatefulSetStatus{
Replicas: 1,
UpdatedReplicas: 1,
},
},
&apps.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: "zone-b",
Namespace: namespace,
Labels: labels,
},
Status: apps.StatefulSetStatus{
Replicas: 1,
UpdatedReplicas: 1,
},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
Namespace: namespace,
Labels: labels,
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
ContainerStatuses: []corev1.ContainerStatus{{
Ready: true,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
}},
},
},
}
api := fake.NewSimpleClientset(objects...)

stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup, rolloutSecondaryGroup)
require.NoError(t, err)

sts, err := findStatefulSetWithNonUpdatedReplicas(context.Background(), api, namespace, stsList, stsMeta.Name)
require.NoError(t, err)
require.NotNil(t, sts)
assert.Equal(t, sts.name, "zone-b")
}

func TestFindStatefulSetWithNonUpdatedReplicasWithSecondaryGroup_UnavailableReplicasSameZone(t *testing.T) {
namespace := "test"
rolloutGroup := "ingester"
rolloutSecondaryGroup := "mimir"
labels := map[string]string{config.RolloutGroupLabelKey: rolloutGroup, config.RolloutSecondaryGroupLabelKey: rolloutSecondaryGroup, "name": "zone-a"}
stsMeta := metav1.ObjectMeta{
Name: "zone-a",
Namespace: namespace,
Labels: labels,
}
objects := []runtime.Object{
&apps.StatefulSet{
ObjectMeta: stsMeta,
Spec: apps.StatefulSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: stsMeta,
},
},
Status: apps.StatefulSetStatus{
Replicas: 1,
UpdatedReplicas: 0,
},
},
}
api := fake.NewSimpleClientset(objects...)

stsList, err := findStatefulSetsForRolloutGroup(context.Background(), api, namespace, rolloutGroup, rolloutSecondaryGroup)
require.NoError(t, err)

sts, err := findStatefulSetWithNonUpdatedReplicas(context.Background(), api, namespace, stsList, stsMeta.Name)
Expand Down
3 changes: 2 additions & 1 deletion pkg/admission/zone_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,9 @@ func (zt *zoneTracker) prepareDownscale(ctx context.Context, l log.Logger, ar v1
}

rolloutGroup := lbls[config.RolloutGroupLabelKey]
rolloutSecondaryGroup := lbls[config.RolloutSecondaryGroupLabelKey]
if rolloutGroup != "" {
stsList, err := findStatefulSetsForRolloutGroup(ctx, api, ar.Request.Namespace, rolloutGroup)
stsList, err := findStatefulSetsForRolloutGroup(ctx, api, ar.Request.Namespace, rolloutGroup, rolloutSecondaryGroup)
if err != nil {
level.Warn(logger).Log("msg", "downscale not allowed due to error while finding other statefulsets", "err", err)
return deny(
Expand Down
3 changes: 2 additions & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ const (
PrepareDownscaleLabelValue = "true"

// RolloutGroupLabelKey is the group to which multiple statefulsets belong and must be operated on together.
RolloutGroupLabelKey = "rollout-group"
RolloutGroupLabelKey = "rollout-group"
RolloutSecondaryGroupLabelKey = "rollout-secondary-group"
// RolloutMaxUnavailableAnnotationKey is the max number of pods in each statefulset that may be stopped at
// one time.
RolloutMaxUnavailableAnnotationKey = "rollout-max-unavailable"
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func (c *RolloutController) reconcile(ctx context.Context) error {
}

// Group statefulsets by the rollout group label. Each group will be reconciled independently.
groups := util.GroupStatefulSetsByLabel(sets, config.RolloutGroupLabelKey)
groups := util.GroupStatefulSetsByLabel(sets, config.RolloutGroupLabelKey, config.RolloutSecondaryGroupLabelKey)
var reconcileErrs error
for groupName, groupSets := range groups {
if err := c.reconcileStatefulSetsGroup(ctx, groupName, groupSets); err != nil {
Expand Down
147 changes: 146 additions & 1 deletion pkg/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ func TestRolloutController_Reconcile(t *testing.T) {
expectedPatchedSets map[string][]string
expectedPatchedResources map[string][]string
expectedErr string
additionalGroups string
additionalGroupFailures string
}{
"should return error if some StatefulSet don't have OnDelete update strategy": {
statefulSets: []runtime.Object{
Expand All @@ -83,6 +85,41 @@ func TestRolloutController_Reconcile(t *testing.T) {
mockStatefulSetPod("ingester-zone-b-2", testPrevRevisionHash),
},
},
"should do nothing if multiple StatefulSets have not-Ready pods reported by the StatefulSet -- add secondary group need update": {
statefulSets: []runtime.Object{
mockStatefulSet("mimir-ingester-zone-a", withPrevRevision(), withReplicas(3, 2)),
mockStatefulSet("mimir-ingester-zone-b", withPrevRevision(), withReplicas(3, 1)),

mockStatefulSet("loki-ingester-zone-a", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
mockStatefulSet("loki-ingester-zone-b", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
},
pods: []runtime.Object{
mockStatefulSetPod("mimir-ingester-zone-a-0", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-1", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-2", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-0", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-1", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-2", testPrevRevisionHash),

mockStatefulSetPod("loki-ingester-zone-a-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-2", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-2", testPrevRevisionHash),
},
expectedDeletedPods: []string{"loki-ingester-zone-a-0", "loki-ingester-zone-a-1"},
additionalGroups: `rollout_operator_group_reconciles_total{rollout_group="ingester-loki"} 1`,
additionalGroupFailures: `rollout_operator_group_reconciles_failed_total{rollout_group="ingester-loki"} 0`,
},
"should do nothing if multiple StatefulSets have not-Ready pods but NOT reported by the StatefulSet status yet": {
statefulSets: []runtime.Object{
mockStatefulSet("ingester-zone-a", withPrevRevision(), withReplicas(3, 3)),
Expand All @@ -101,6 +138,45 @@ func TestRolloutController_Reconcile(t *testing.T) {
}),
},
},
"should do nothing if multiple StatefulSets have not-Ready pods but NOT reported by the StatefulSet status yet -- add secondary group need update": {
statefulSets: []runtime.Object{
mockStatefulSet("mimir-ingester-zone-a", withPrevRevision(), withReplicas(3, 3)),
mockStatefulSet("mimir-ingester-zone-b", withPrevRevision(), withReplicas(3, 3)),

mockStatefulSet("loki-ingester-zone-a", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
mockStatefulSet("loki-ingester-zone-b", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
},
pods: []runtime.Object{
mockStatefulSetPod("mimir-ingester-zone-a-0", testPrevRevisionHash, func(pod *corev1.Pod) {
pod.DeletionTimestamp = util.Now()
}),
mockStatefulSetPod("mimir-ingester-zone-a-1", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-2", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-0", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-1", testPrevRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-2", testPrevRevisionHash, func(pod *corev1.Pod) {
pod.DeletionTimestamp = util.Now()
}),

mockStatefulSetPod("loki-ingester-zone-a-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-2", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-2", testPrevRevisionHash),
},
expectedDeletedPods: []string{"loki-ingester-zone-a-0", "loki-ingester-zone-a-1"},
additionalGroups: `rollout_operator_group_reconciles_total{rollout_group="ingester-loki"} 1`,
additionalGroupFailures: `rollout_operator_group_reconciles_failed_total{rollout_group="ingester-loki"} 0`,
},
"should do nothing if multiple StatefulSets have not-Ready pods but ONLY reported by 1 StatefulSet status": {
statefulSets: []runtime.Object{
mockStatefulSet("ingester-zone-a", withPrevRevision(), withReplicas(3, 2)),
Expand Down Expand Up @@ -131,6 +207,67 @@ func TestRolloutController_Reconcile(t *testing.T) {
mockStatefulSetPod("ingester-zone-b-2", testLastRevisionHash),
},
},
"should do nothing if all pods are updated -- add secondary group": {
statefulSets: []runtime.Object{
mockStatefulSet("mimir-ingester-zone-a"),
mockStatefulSet("loki-ingester-zone-a",
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
mockStatefulSet("mimir-ingester-zone-b"),
mockStatefulSet("loki-ingester-zone-b",
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
},
pods: []runtime.Object{
mockStatefulSetPod("mimir-ingester-zone-a-0", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-1", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-2", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-0", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-1", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-2", testLastRevisionHash),
},
additionalGroups: `rollout_operator_group_reconciles_total{rollout_group="ingester-loki"} 1`,
additionalGroupFailures: `rollout_operator_group_reconciles_failed_total{rollout_group="ingester-loki"} 0`,
},
"should do nothing if all pods are updated -- add secondary group need update": {
statefulSets: []runtime.Object{
mockStatefulSet("mimir-ingester-zone-a"),
mockStatefulSet("mimir-ingester-zone-b"),

mockStatefulSet("loki-ingester-zone-a", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
mockStatefulSet("loki-ingester-zone-b", withReplicas(3, 3),
withLabels(map[string]string{
config.RolloutSecondaryGroupLabelKey: "loki",
}),
),
},
pods: []runtime.Object{
mockStatefulSetPod("mimir-ingester-zone-a-0", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-1", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-a-2", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-0", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-1", testLastRevisionHash),
mockStatefulSetPod("mimir-ingester-zone-b-2", testLastRevisionHash),

mockStatefulSetPod("loki-ingester-zone-a-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-a-2", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-0", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-1", testPrevRevisionHash),
mockStatefulSetPod("loki-ingester-zone-b-2", testPrevRevisionHash),
},
expectedDeletedPods: []string{"loki-ingester-zone-a-0", "loki-ingester-zone-a-1"},
additionalGroups: `rollout_operator_group_reconciles_total{rollout_group="ingester-loki"} 1`,
additionalGroupFailures: `rollout_operator_group_reconciles_failed_total{rollout_group="ingester-loki"} 0`,
},
"should delete pods that needs to be updated, honoring the configured max unavailable": {
statefulSets: []runtime.Object{
mockStatefulSet("ingester-zone-a"),
Expand Down Expand Up @@ -650,16 +787,24 @@ func TestRolloutController_Reconcile(t *testing.T) {
if testData.expectedErr != "" {
expectedFailures = 1
}
addlGroup := ""
addlGroupFailures := ""
if testData.additionalGroups != "" {
addlGroup = testData.additionalGroups
addlGroupFailures = testData.additionalGroupFailures
}

assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(fmt.Sprintf(`
# HELP rollout_operator_group_reconciles_total Total number of reconciles started for a specific rollout group.
# TYPE rollout_operator_group_reconciles_total counter
rollout_operator_group_reconciles_total{rollout_group="ingester"} 1
%s

# HELP rollout_operator_group_reconciles_failed_total Total number of reconciles failed for a specific rollout group.
# TYPE rollout_operator_group_reconciles_failed_total counter
rollout_operator_group_reconciles_failed_total{rollout_group="ingester"} %d
`, expectedFailures)),
%s
`, addlGroup, expectedFailures, addlGroupFailures)),
"rollout_operator_group_reconciles_total",
"rollout_operator_group_reconciles_failed_total"))
})
Expand Down
Loading
Loading