Skip to content

Commit

Permalink
Add profiler address parameter on node-agent
Browse files Browse the repository at this point in the history
This allows us to enable the profiler endpoints on both the
server and the node agent.
This helps me in troubleshooting the high memory usage when
restoring lots of small files.

Refs: #8582

Signed-off-by: Rob Kenis <[email protected]>
  • Loading branch information
Rob Kenis authored and RobKenis committed Jan 15, 2025
1 parent 0543750 commit 62620ba
Showing 1 changed file with 26 additions and 1 deletion.
27 changes: 26 additions & 1 deletion pkg/cmd/cli/nodeagent/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"math"
"net/http"
"net/http/pprof"
"os"
"strings"
"time"
Expand Down Expand Up @@ -86,6 +87,7 @@ const (

type nodeAgentServerConfig struct {
metricsAddress string
profilerAddress string
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
Expand Down Expand Up @@ -124,7 +126,8 @@ func NewServerCommand(f client.Factory) *cobra.Command {
command.Flags().Var(formatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(formatFlag.AllowedValues(), ", ")))
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics.")
command.Flags().StringVar(&config.profilerAddress, "profiler-address", config.profilerAddress, "The address to expose the pprof profiler.")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.")

return command
Expand Down Expand Up @@ -263,6 +266,10 @@ func newNodeAgentServer(logger logrus.FieldLogger, factory client.Factory, confi
func (s *nodeAgentServer) run() {
signals.CancelOnShutdown(s.cancelFunc, s.logger)

if s.config.profilerAddress != "" {
go s.runProfiler()
}

go func() {
metricsMux := http.NewServeMux()
metricsMux.Handle("/metrics", promhttp.Handler())
Expand Down Expand Up @@ -386,6 +393,24 @@ func (s *nodeAgentServer) run() {
}
}

func (s *nodeAgentServer) runProfiler() {
mux := http.NewServeMux()
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)

server := &http.Server{
Addr: s.config.profilerAddress,
Handler: mux,
ReadHeaderTimeout: 3 * time.Second,
}
if err := server.ListenAndServe(); err != nil {
s.logger.WithError(errors.WithStack(err)).Error("error running profiler http server")
}
}

func (s *nodeAgentServer) waitCacheForResume() error {
podInformer, err := s.mgr.GetCache().GetInformer(s.ctx, &v1.Pod{})
if err != nil {
Expand Down

0 comments on commit 62620ba

Please sign in to comment.