From bbb1c438efa58d8e99e42562f8b92c1001635bfd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 12 Jan 2024 13:35:37 +1100 Subject: [PATCH] Improve tracing (#115) * Upgrade dskit to bring in the changes from https://github.com/grafana/dskit/pull/467 * Use `SetSpanAndLogTag`. * Add tracing to `zoneTracker`. * Add changelog entry. --- CHANGELOG.md | 2 +- go.mod | 2 +- go.sum | 4 +- pkg/admission/no_downscale.go | 23 ++++----- pkg/admission/prep_downscale.go | 30 +++++------ pkg/admission/zone_tracker.go | 51 +++++++++++++++---- .../grafana/dskit/spanlogger/spanlogger.go | 14 +++++ vendor/modules.txt | 2 +- 8 files changed, 83 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 309673fd3..ae3eadf21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ * [FEATURE] Coordinate downscaling between zones with a ConfigMap instead of annotation, optionally, via the new zoneTracker for the prepare-downscale admission webhook. #107 * [ENHANCEMENT] Expose pprof endpoint for profiling. #109 * [ENHANCEMENT] Change Docker build image to `golang:1.21-bookworm` and update base image to `alpine:3.19`. #97 -* [ENHANCEMENT] Add basic tracing support. #101 #114 +* [ENHANCEMENT] Add basic tracing support. #101 #114 #115 * [ENHANCEMENT] Updated dependencies, including: #111 * `github.com/gorilla/mux` from `v1.8.0` to `v1.8.1` * `github.com/prometheus/client_golang` from `v1.17.0` to `v1.18.0` diff --git a/go.mod b/go.mod index 03c10ad2d..0718328cb 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/go-kit/log v0.2.1 github.com/gorilla/mux v1.8.1 - github.com/grafana/dskit v0.0.0-20240108174153-a150e79e4581 + github.com/grafana/dskit v0.0.0-20240112005349-f30e65d9dee2 github.com/hashicorp/go-multierror v1.1.1 github.com/k3d-io/k3d/v5 v5.6.0 github.com/opentracing-contrib/go-stdlib v1.0.0 diff --git a/go.sum b/go.sum index 7e5d374ac..6a9fe7c85 100644 --- a/go.sum +++ b/go.sum @@ -260,8 +260,8 @@ github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8 github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= -github.com/grafana/dskit v0.0.0-20240108174153-a150e79e4581 h1:m8XnBonHhjenIYLI4cJmTxTAuVlmt0xeiKz6p39sl6Q= -github.com/grafana/dskit v0.0.0-20240108174153-a150e79e4581/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c= +github.com/grafana/dskit v0.0.0-20240112005349-f30e65d9dee2 h1:ozlFUa4X7SzBwvQqnUXIqF991P2afWR2G221y75vJyc= +github.com/grafana/dskit v0.0.0-20240112005349-f30e65d9dee2/go.mod h1:kkWM4WUV230bNG3urVRWPBnSJHs64y/0RmWjftnnn0c= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= diff --git a/pkg/admission/no_downscale.go b/pkg/admission/no_downscale.go index 3b75b985e..9f3890c2e 100644 --- a/pkg/admission/no_downscale.go +++ b/pkg/admission/no_downscale.go @@ -24,29 +24,25 @@ const ( NoDownscaleWebhookPath = "/admission/no-downscale" ) -func NoDownscale(ctx context.Context, logger log.Logger, ar v1.AdmissionReview, api *kubernetes.Clientset) *v1.AdmissionResponse { - logger = log.With(logger, "name", ar.Request.Name, "resource", ar.Request.Resource.Resource, "namespace", ar.Request.Namespace) - spanLogger, ctx := spanlogger.New(ctx, logger, "admission.NoDownscale()", tenantResolver) - defer spanLogger.Span.Finish() - logger = spanLogger +func NoDownscale(ctx context.Context, l log.Logger, ar v1.AdmissionReview, api *kubernetes.Clientset) *v1.AdmissionResponse { + logger, ctx := spanlogger.New(ctx, l, "admission.NoDownscale()", tenantResolver) + defer logger.Span.Finish() - spanLogger.SetTag("object.name", ar.Request.Name) - spanLogger.SetTag("object.resource", ar.Request.Resource.Resource) - spanLogger.SetTag("object.namespace", ar.Request.Namespace) + logger.SetSpanAndLogTag("object.name", ar.Request.Name) + logger.SetSpanAndLogTag("object.resource", ar.Request.Resource.Resource) + logger.SetSpanAndLogTag("object.namespace", ar.Request.Namespace) oldObj, oldGVK, err := codecs.UniversalDeserializer().Decode(ar.Request.OldObject.Raw, nil, nil) if err != nil { return allowErr(logger, "can't decode old object, allowing the change", err) } - logger = log.With(logger, "request_gvk", oldGVK) - spanLogger.SetTag("request.gvk", oldGVK) + logger.SetSpanAndLogTag("request.gvk", oldGVK) oldReplicas, err := replicas(oldObj, oldGVK) if err != nil { return allowErr(logger, "can't get old replicas, allowing the change", err) } - logger = log.With(logger, "old_replicas", int32PtrStr(oldReplicas)) - spanLogger.SetTag("object.old_replicas", int32PtrStr(oldReplicas)) + logger.SetSpanAndLogTag("object.old_replicas", int32PtrStr(oldReplicas)) newObj, newGVK, err := codecs.UniversalDeserializer().Decode(ar.Request.Object.Raw, nil, nil) if err != nil { @@ -57,8 +53,7 @@ func NoDownscale(ctx context.Context, logger log.Logger, ar v1.AdmissionReview, if err != nil { return allowErr(logger, "can't get new replicas, allowing the change", err) } - logger = log.With(logger, "new_replicas", int32PtrStr(newReplicas)) - spanLogger.SetTag("object.new_replicas", int32PtrStr(newReplicas)) + logger.SetSpanAndLogTag("object.new_replicas", int32PtrStr(newReplicas)) // Both replicas are nil, nothing to warn about. if oldReplicas == nil && newReplicas == nil { diff --git a/pkg/admission/prep_downscale.go b/pkg/admission/prep_downscale.go index c91b19ab2..2318436ff 100644 --- a/pkg/admission/prep_downscale.go +++ b/pkg/admission/prep_downscale.go @@ -52,16 +52,14 @@ type httpClient interface { Do(req *http.Request) (*http.Response, error) } -func prepareDownscale(ctx context.Context, logger log.Logger, ar v1.AdmissionReview, api kubernetes.Interface, client httpClient) *v1.AdmissionResponse { - logger = log.With(logger, "name", ar.Request.Name, "resource", ar.Request.Resource.Resource, "namespace", ar.Request.Namespace) - spanLogger, ctx := spanlogger.New(ctx, logger, "admission.prepareDownscale()", tenantResolver) - defer spanLogger.Span.Finish() - logger = spanLogger +func prepareDownscale(ctx context.Context, l log.Logger, ar v1.AdmissionReview, api kubernetes.Interface, client httpClient) *v1.AdmissionResponse { + logger, ctx := spanlogger.New(ctx, l, "admission.prepareDownscale()", tenantResolver) + defer logger.Span.Finish() - spanLogger.SetTag("object.name", ar.Request.Name) - spanLogger.SetTag("object.resource", ar.Request.Resource.Resource) - spanLogger.SetTag("object.namespace", ar.Request.Namespace) - spanLogger.SetTag("request.dry_run", *ar.Request.DryRun) + logger.SetSpanAndLogTag("object.name", ar.Request.Name) + logger.SetSpanAndLogTag("object.resource", ar.Request.Resource.Resource) + logger.SetSpanAndLogTag("object.namespace", ar.Request.Namespace) + logger.SetSpanAndLogTag("request.dry_run", *ar.Request.DryRun) if *ar.Request.DryRun { return &v1.AdmissionResponse{Allowed: true} @@ -71,16 +69,14 @@ func prepareDownscale(ctx context.Context, logger log.Logger, ar v1.AdmissionRev if err != nil { return allowErr(logger, "can't decode old object, allowing the change", err) } - logger = log.With(logger, "request_gvk", oldInfo.gvk, "old_replicas", int32PtrStr(oldInfo.replicas)) - spanLogger.SetTag("request.gvk", oldInfo.gvk) - spanLogger.SetTag("object.old_replicas", int32PtrStr(oldInfo.replicas)) + logger.SetSpanAndLogTag("request.gvk", oldInfo.gvk) + logger.SetSpanAndLogTag("object.old_replicas", int32PtrStr(oldInfo.replicas)) newInfo, err := decodeAndReplicas(ar.Request.Object.Raw) if err != nil { return allowErr(logger, "can't decode new object, allowing the change", err) } - logger = log.With(logger, "new_replicas", int32PtrStr(newInfo.replicas)) - spanLogger.SetTag("object.new_replicas", int32PtrStr(newInfo.replicas)) + logger.SetSpanAndLogTag("object.new_replicas", int32PtrStr(newInfo.replicas)) // Continue if it's a downscale response := checkReplicasChange(logger, oldInfo, newInfo) @@ -476,11 +472,11 @@ func sendPrepareShutdownRequests(ctx context.Context, logger log.Logger, client for _, ep := range eps { ep := ep // https://golang.org/doc/faq#closures_and_goroutines g.Go(func() error { - logger, ctx := spanlogger.New(ctx, log.With(logger, "url", ep.url, "index", ep.index), "admission.PreparePodForShutdown", tenantResolver) + logger, ctx := spanlogger.New(ctx, logger, "admission.PreparePodForShutdown", tenantResolver) defer logger.Span.Finish() - logger.SetTag("url", ep.url) - logger.SetTag("index", ep.index) + logger.SetSpanAndLogTag("url", ep.url) + logger.SetSpanAndLogTag("index", ep.index) req, err := http.NewRequestWithContext(ctx, http.MethodPost, "http://"+ep.url, nil) if err != nil { diff --git a/pkg/admission/zone_tracker.go b/pkg/admission/zone_tracker.go index 3bd29fcf1..9b530046b 100644 --- a/pkg/admission/zone_tracker.go +++ b/pkg/admission/zone_tracker.go @@ -9,6 +9,8 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/spanlogger" + "github.com/opentracing/opentracing-go" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/client-go/kubernetes" @@ -32,8 +34,14 @@ type zoneInfo struct { } // Use a ConfigMap instead of an annotation to track the last time zones were downscaled -func (zt *zoneTracker) prepareDownscale(ctx context.Context, logger log.Logger, ar v1.AdmissionReview, api kubernetes.Interface, client httpClient) *v1.AdmissionResponse { - logger = log.With(logger, "name", ar.Request.Name, "resource", ar.Request.Resource.Resource, "namespace", ar.Request.Namespace) +func (zt *zoneTracker) prepareDownscale(ctx context.Context, l log.Logger, ar v1.AdmissionReview, api kubernetes.Interface, client httpClient) *v1.AdmissionResponse { + logger, ctx := spanlogger.New(ctx, l, "zoneTracker.prepareDownscale()", tenantResolver) + defer logger.Span.Finish() + + logger.SetSpanAndLogTag("object.name", ar.Request.Name) + logger.SetSpanAndLogTag("object.resource", ar.Request.Resource.Resource) + logger.SetSpanAndLogTag("object.namespace", ar.Request.Namespace) + logger.SetSpanAndLogTag("request.dry_run", *ar.Request.DryRun) if *ar.Request.DryRun { return &v1.AdmissionResponse{Allowed: true} @@ -43,13 +51,14 @@ func (zt *zoneTracker) prepareDownscale(ctx context.Context, logger log.Logger, if err != nil { return allowErr(logger, "can't decode old object, allowing the change", err) } - logger = log.With(logger, "request_gvk", oldInfo.gvk, "old_replicas", int32PtrStr(oldInfo.replicas)) + logger.SetSpanAndLogTag("request.gvk", oldInfo.gvk) + logger.SetSpanAndLogTag("object.old_replicas", int32PtrStr(oldInfo.replicas)) newInfo, err := decodeAndReplicas(ar.Request.Object.Raw) if err != nil { return allowErr(logger, "can't decode new object, allowing the change", err) } - logger = log.With(logger, "new_replicas", int32PtrStr(newInfo.replicas)) + logger.SetSpanAndLogTag("object.new_replicas", int32PtrStr(newInfo.replicas)) // Continue if it's a downscale response := checkReplicasChange(logger, oldInfo, newInfo) @@ -154,6 +163,9 @@ func (zt *zoneTracker) prepareDownscale(ctx context.Context, logger log.Logger, // Create the ConfigMap and populate each zone with the current time as a starting point func (zt *zoneTracker) createConfigMap(ctx context.Context, stsList *appsv1.StatefulSetList) (*corev1.ConfigMap, error) { + span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.createConfigMap()") + defer span.Finish() + defaultInfo := &zoneInfo{LastDownscaled: time.Now().UTC().Format(time.RFC3339)} zones := make(map[string]zoneInfo, len(stsList.Items)) for _, sts := range stsList.Items { @@ -182,14 +194,32 @@ func (zt *zoneTracker) createConfigMap(ctx context.Context, stsList *appsv1.Stat return zt.client.CoreV1().ConfigMaps(zt.namespace).Create(ctx, cm, metav1.CreateOptions{}) } -// Get the zoneTracker ConfigMap if it exists, otherwise create it -func (zt *zoneTracker) getOrCreateConfigMap(ctx context.Context, stsList *appsv1.StatefulSetList) (*corev1.ConfigMap, error) { +// Get the zoneTracker ConfigMap if it exists, or nil if it does not exist +func (zt *zoneTracker) getConfigMap(ctx context.Context) (*corev1.ConfigMap, error) { + span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.getConfigMap()") + defer span.Finish() + cm, err := zt.client.CoreV1().ConfigMaps(zt.namespace).Get(ctx, zt.configMapName, metav1.GetOptions{}) - if err == nil { - return cm, nil + if err != nil { + if k8serrors.IsNotFound(err) { + return nil, nil + } + + return nil, err } - if !k8serrors.IsNotFound(err) { + + return cm, nil +} + +// Get the zoneTracker ConfigMap if it exists, otherwise create it +func (zt *zoneTracker) getOrCreateConfigMap(ctx context.Context, stsList *appsv1.StatefulSetList) (*corev1.ConfigMap, error) { + span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.getOrCreateConfigMap()") + defer span.Finish() + + if cm, err := zt.getConfigMap(ctx); err != nil { return nil, err + } else if cm != nil { + return cm, nil } return zt.createConfigMap(ctx, stsList) @@ -217,6 +247,9 @@ func (zt *zoneTracker) loadZones(ctx context.Context, stsList *appsv1.StatefulSe // Save the zones map to the zoneTracker ConfigMap func (zt *zoneTracker) saveZones(ctx context.Context) error { + span, ctx := opentracing.StartSpanFromContext(ctx, "zoneTracker.saveZones()") + defer span.Finish() + // Convert the zones map to ConfigMap data data := make(map[string]string) for zone, zi := range zt.zones { diff --git a/vendor/github.com/grafana/dskit/spanlogger/spanlogger.go b/vendor/github.com/grafana/dskit/spanlogger/spanlogger.go index 08653eda3..c6a872501 100644 --- a/vendor/github.com/grafana/dskit/spanlogger/spanlogger.go +++ b/vendor/github.com/grafana/dskit/spanlogger/spanlogger.go @@ -167,3 +167,17 @@ func (s *SpanLogger) getLogger() log.Logger { } return logger } + +// SetSpanAndLogTag sets a tag on the span used by this SpanLogger, and appends a key/value pair to the logger used for +// future log lines emitted by this SpanLogger. +// +// It is not safe to call this method from multiple goroutines simultaneously. +// It is safe to call this method at the same time as calling other SpanLogger methods, however, this may produce +// inconsistent results (eg. some log lines may be emitted with the provided key/value pair, and others may not). +func (s *SpanLogger) SetSpanAndLogTag(key string, value interface{}) { + s.Span.SetTag(key, value) + + logger := s.getLogger() + wrappedLogger := log.With(logger, key, value) + s.logger.Store(&wrappedLogger) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index f111b4631..d90dd26af 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -243,7 +243,7 @@ github.com/google/uuid # github.com/gorilla/mux v1.8.1 ## explicit; go 1.20 github.com/gorilla/mux -# github.com/grafana/dskit v0.0.0-20240108174153-a150e79e4581 +# github.com/grafana/dskit v0.0.0-20240112005349-f30e65d9dee2 ## explicit; go 1.20 github.com/grafana/dskit/grpcutil github.com/grafana/dskit/httpgrpc