gardener-attic · andrerun · Mar 28, 2024 · Apr 4, 2024 · ialidzhikov · Mar 28, 2024
diff --git a/pkg/ha/ha_service.go b/pkg/ha/ha_service.go
@@ -12,7 +12,8 @@ import (
 
 	"github.com/go-logr/logr"
 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/errors"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	ctlmgr "sigs.k8s.io/controller-runtime/pkg/manager"
 
@@ -68,7 +69,7 @@ func (ha *HAService) setEndpoints(ctx context.Context) error {
 	// Bypass client cache to avoid triggering a cluster wide list-watch for Endpoints - our RBAC does not allow it
 	err := ha.manager.GetAPIReader().Get(ctx, client.ObjectKey{Namespace: ha.namespace, Name: app.Name}, &endpoints)
 	if err != nil {
-		if !errors.IsNotFound(err) {
+		if !apierrors.IsNotFound(err) {
 			return fmt.Errorf("updating the service endpoint to point to the new leader: retrieving endpoints: %w", err)
 		}
 
@@ -98,6 +99,7 @@ func (ha *HAService) Start(ctx context.Context) error {
 
 		select {
 		case <-ctx.Done():
+			_ = ha.cleanUpServiceEndpoints()
 			return fmt.Errorf("starting HA service: %w", ctx.Err())
 		case <-ha.testIsolation.TimeAfter(retryPeriod):
 		}
@@ -108,5 +110,78 @@ func (ha *HAService) Start(ctx context.Context) error {
 		}
 	}
 
-	return nil
+	<-ctx.Done()
+	err := ha.cleanUpServiceEndpoints()
+	if err == nil {
+		err = ctx.Err()
+	}
+	return err
+}
+
+// cleanUpServiceEndpoints is executed upon ending leadership. Its purpose is to remove the Endpoints object created upon acquiring
+// leadership.
+func (ha *HAService) cleanUpServiceEndpoints() error {
+	// Use our own context. This function executes when the main application context is closed.
+	// Also, try to finish before a potential 15 seconds termination grace timeout.
+	ctx, cancel := context.WithTimeout(context.Background(), 14*time.Second)
+	defer cancel()
+	seedClient := ha.manager.GetClient()
-	seedClient := ha.manager.GetClient()
+	client := ha.manager.GetClient()
-	seedClient := ha.manager.GetClient()
+	client := ha.manager.GetClient()
+
+	attempt := 0
+	var err error
+	for {
+		endpoints := corev1.Endpoints{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      app.Name,
+				Namespace: ha.namespace,
+			},
+		}
+		err = seedClient.Get(ctx, client.ObjectKeyFromObject(&endpoints), &endpoints)
+		if err != nil {
+			if apierrors.IsNotFound(err) {
+				ha.log.V(app.VerbosityVerbose).Info("The endpoints object cleanup succeeded: the object was missing")
+				return nil
+			}
+
+			ha.log.V(app.VerbosityInfo).Info("Failed to retrieve the endpoints object", "error", err.Error())
+		} else {
+			// Avoid data race. We don't want to delete the endpoint if it is sending traffic to a replica other than this one.
+			if !ha.isEndpointStillPointingToOurReplica(&endpoints) {
+				// Someone else is using the endpoint. We can't perform safe cleanup. Abandon the object.
+				ha.log.V(app.VerbosityWarning).Info(
+					"Abandoning endpoints object because it was modified by an external actor")
+				return nil
+			}
+
+			// Only delete the endpoint if it is the resource version for which we confirmed that it points to us.
+			deletionPrecondition := client.Preconditions{UID: &endpoints.UID, ResourceVersion: &endpoints.ResourceVersion}
+			err = seedClient.Delete(ctx, &endpoints, deletionPrecondition)
+			if client.IgnoreNotFound(err) == nil {
-			err = seedClient.Delete(ctx, &endpoints, deletionPrecondition)
-			if client.IgnoreNotFound(err) == nil {
+			deletionPrecondition := client.Preconditions{UID: &endpoints.UID, ResourceVersion: &endpoints.ResourceVersion}
+			if err = seedClient.Delete(ctx, endpoints, deletionPrecondition); client.IgnoreNotFound(err) == nil {
-			err = seedClient.Delete(ctx, &endpoints, deletionPrecondition)
-			if client.IgnoreNotFound(err) == nil {
+			deletionPrecondition := client.Preconditions{UID: &endpoints.UID, ResourceVersion: &endpoints.ResourceVersion}
+			if err = seedClient.Delete(ctx, endpoints, deletionPrecondition); client.IgnoreNotFound(err) == nil {
+				// The endpoint was deleted (even if not by us). We call that successful cleanup.
+				ha.log.V(app.VerbosityVerbose).Info("The endpoints object cleanup succeeded")
+				return nil
+			}
+			ha.log.V(app.VerbosityInfo).Info("Failed to delete the endpoints object", "error", err.Error())
+		}
+
+		// Deletion request failed, possibly because of a midair collision. Wait a bit and retry.
+		attempt++
+		if attempt >= 10 {
+			break
+		}
+		time.Sleep(1 * time.Second)
+	}
+
+	ha.log.V(app.VerbosityError).Error(err, "All retries to delete the endpoints object failed. Abandoning object.")
+	return fmt.Errorf("HAService cleanup: deleting endponts object: retrying failed, last error: %w", err)
+}
+
+// Does the endpoints object hold the same values as the ones we previously set to it?
+func (ha *HAService) isEndpointStillPointingToOurReplica(endpoints *corev1.Endpoints) bool {
+	return len(endpoints.Subsets) == 1 &&
+		len(endpoints.Subsets[0].Addresses) == 1 &&
+		endpoints.Subsets[0].Addresses[0].IP == ha.servingIPAddress &&
+		len(endpoints.Subsets[0].Ports) == 1 &&
+		endpoints.Subsets[0].Ports[0].Port == int32(ha.servingPort) &&
+		endpoints.Subsets[0].Ports[0].Protocol == corev1.ProtocolTCP
 }