Skip to content

Commit

Permalink
Cleanup Endpoints object
Browse files Browse the repository at this point in the history
  • Loading branch information
andrerun committed Mar 28, 2024
1 parent c43b206 commit 8326c95
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 74 deletions.
4 changes: 2 additions & 2 deletions cmd/gardener-custom-metrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"go.uber.org/zap/zapcore"
genericapiserver "k8s.io/apiserver/pkg/server"
"k8s.io/component-base/logs"
"k8s.io/component-base/version"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
kmgr "sigs.k8s.io/controller-runtime/pkg/manager"
Expand All @@ -21,7 +22,6 @@ import (
"github.com/gardener/gardener-custom-metrics/pkg/metrics_provider"
gutil "github.com/gardener/gardener-custom-metrics/pkg/util/gardener"
k8sclient "github.com/gardener/gardener-custom-metrics/pkg/util/k8s/client"
"github.com/gardener/gardener-custom-metrics/pkg/version"
)

func main() {
Expand Down Expand Up @@ -79,7 +79,7 @@ func completeAppCLIOptions(

// Create log
log := initLogs(ctx, appOptions.Completed().LogLevel)
log.V(app.VerbosityInfo).Info("Initializing", "version", version.Version)
log.V(app.VerbosityInfo).Info("Initializing", "version", version.Get().GitVersion)

// Create manager
log.V(app.VerbosityInfo).Info("Creating client set")
Expand Down
69 changes: 68 additions & 1 deletion pkg/ha/ha_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
ctlmgr "sigs.k8s.io/controller-runtime/pkg/manager"

Expand Down Expand Up @@ -93,6 +94,7 @@ func (ha *HAService) Start(ctx context.Context) error {

select {
case <-ctx.Done():
_ = ha.cleanUp()
return fmt.Errorf("starting HA service: %w", ctx.Err())
case <-ha.testIsolation.TimeAfter(retryPeriod):
}
Expand All @@ -103,5 +105,70 @@ func (ha *HAService) Start(ctx context.Context) error {
}
}

return nil
<-ctx.Done()
err := ha.cleanUp()
if err == nil {
err = ctx.Err()
}
return err
}

// cleanUp is executed upon ending leadership. Its purpose is to remove the Endpoints object created upon acquiring
// leadership.
func (ha *HAService) cleanUp() error {
// Use our own context. This function executes when the main application context is closed.
// Also, try to finish before a potential 15 seconds termination grace timeout.
ctx, cancel := context.WithTimeout(context.Background(), 14*time.Second)
defer cancel()
seedClient := ha.manager.GetClient()

attempt := 0
var err error
for {
endpoints := corev1.Endpoints{}
err = seedClient.Get(ctx, client.ObjectKey{Namespace: ha.namespace, Name: app.Name}, &endpoints)
if err != nil {
if apierrors.IsNotFound(err) {
ha.log.V(app.VerbosityVerbose).Info("The endpoints object cleanup succeeded: the object was missing")
return nil
}

ha.log.V(app.VerbosityInfo).Info("Failed to retrieve the endpoints object", "error", err.Error())
} else {
// Avoid data race. We don't want to delete the endpoint if it is sending traffic to a replica other than this one.
isEndpointStillPointingToOurReplica :=
len(endpoints.Subsets) == 1 &&
len(endpoints.Subsets[0].Addresses) == 1 &&
endpoints.Subsets[0].Addresses[0].IP == ha.servingIPAddress &&
len(endpoints.Subsets[0].Ports) == 1 &&
endpoints.Subsets[0].Ports[0].Port == int32(ha.servingPort) &&
endpoints.Subsets[0].Ports[0].Protocol == corev1.ProtocolTCP
if !isEndpointStillPointingToOurReplica {
// Someone else is using the endpoint. We can't perform safe cleanup. Abandon the object.
ha.log.V(app.VerbosityWarning).Info(
"Abandoning endpoints object because it was modified by an external actor")
return nil
}

// Only delete the endpoint if it is the resource version for which we confirmed that it points to us.
deletionPrecondition := client.Preconditions{UID: &endpoints.UID, ResourceVersion: &endpoints.ResourceVersion}
err = seedClient.Delete(ctx, &endpoints, deletionPrecondition)
if client.IgnoreNotFound(err) == nil {
// The endpoint was deleted (even if not by us). We call that successful cleanup.
ha.log.V(app.VerbosityVerbose).Info("The endpoints object cleanup succeeded")
return nil
}
ha.log.V(app.VerbosityInfo).Info("Failed to delete the endpoints object", "error", err.Error())
}

// Deletion request failed, possibly because of a midair collision. Wait a bit and retry.
attempt++
if attempt >= 10 {
break
}
time.Sleep(1 * time.Second)
}

ha.log.V(app.VerbosityError).Error(err, "All retries to delete the endpoints object failed. Abandoning object.")
return fmt.Errorf("HAService cleanup: deleting endponts object: retrying failed, last error: %w", err)
}
Loading

0 comments on commit 8326c95

Please sign in to comment.