Skip to content

Commit

Permalink
*: add maxprocs metrics for mcs (#7604)
Browse files Browse the repository at this point in the history
ref #5839

Signed-off-by: Cabinfever_B <[email protected]>
  • Loading branch information
CabinfeverB authored Dec 25, 2023
1 parent b36b725 commit 8950c3a
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 55 deletions.
27 changes: 16 additions & 11 deletions pkg/mcs/scheduling/server/metrics.go → pkg/basicserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,27 @@ package server

import "github.com/prometheus/client_golang/prometheus"

const (
namespace = "scheduling"
serverSubsystem = "server"
)

var (
// Meta & Server info.
serverInfo = prometheus.NewGaugeVec(
// ServerMaxProcsGauge record the maxprocs.
ServerMaxProcsGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "service",
Name: "maxprocs",
Help: "The value of GOMAXPROCS.",
})

// ServerInfoGauge indicates the pd server info including version and git hash.
ServerInfoGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: serverSubsystem,
Namespace: "pd",
Subsystem: "server",
Name: "info",
Help: "Indicate the scheduling server info, and the value is the start timestamp (s).",
Help: "Indicate the pd server info, and the value is the start timestamp (s).",
}, []string{"version", "hash"})
)

func init() {
prometheus.MustRegister(serverInfo)
prometheus.MustRegister(ServerMaxProcsGauge)
prometheus.MustRegister(ServerInfoGauge)
}
9 changes: 0 additions & 9 deletions pkg/mcs/resourcemanager/server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ const (
)

var (
// Meta & Server info.
serverInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: serverSubsystem,
Name: "info",
Help: "Indicate the resource manager server info, and the value is the start timestamp (s).",
}, []string{"version", "hash"})
// RU cost metrics.
// `sum` is added to the name to maintain compatibility with the previous use of histogram.
readRequestUnitCost = prometheus.NewCounterVec(
Expand Down Expand Up @@ -111,7 +103,6 @@ var (
)

func init() {
prometheus.MustRegister(serverInfo)
prometheus.MustRegister(readRequestUnitCost)
prometheus.MustRegister(writeRequestUnitCost)
prometheus.MustRegister(sqlLayerRequestUnitCost)
Expand Down
4 changes: 3 additions & 1 deletion pkg/mcs/resourcemanager/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"net/http"
"os"
"os/signal"
"runtime"
"strconv"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -294,7 +295,8 @@ func (s *Server) startServer() (err error) {
log.Info("init cluster id", zap.Uint64("cluster-id", s.clusterID))
// The independent Resource Manager service still reuses PD version info since PD and Resource Manager are just
// different service modes provided by the same pd-server binary
serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0)))

uniqueName := s.cfg.GetAdvertiseListenAddr()
uniqueID := memberutil.GenerateUniqueID(uniqueName)
Expand Down
5 changes: 3 additions & 2 deletions pkg/mcs/scheduling/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"net/http"
"os"
"os/signal"
"runtime"
"strconv"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -409,8 +410,8 @@ func (s *Server) startServer() (err error) {
log.Info("init cluster id", zap.Uint64("cluster-id", s.clusterID))
// The independent Scheduling service still reuses PD version info since PD and Scheduling are just
// different service modes provided by the same pd-server binary
serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))

bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0)))
s.serviceID = &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr}
uniqueName := s.cfg.GetAdvertiseListenAddr()
uniqueID := memberutil.GenerateUniqueID(uniqueName)
Expand Down
9 changes: 0 additions & 9 deletions pkg/mcs/tso/server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,6 @@ var (
Help: "Record critical metadata.",
}, []string{"type"})

serverInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "server",
Name: "info",
Help: "Indicate the tso server info, and the value is the start timestamp (s).",
}, []string{"version", "hash"})

tsoHandleDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Expand All @@ -56,6 +48,5 @@ var (
func init() {
prometheus.MustRegister(timeJumpBackCounter)
prometheus.MustRegister(metaDataGauge)
prometheus.MustRegister(serverInfo)
prometheus.MustRegister(tsoHandleDuration)
}
4 changes: 3 additions & 1 deletion pkg/mcs/tso/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"net/http"
"os"
"os/signal"
"runtime"
"strconv"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -359,7 +360,8 @@ func (s *Server) startServer() (err error) {
metaDataGauge.WithLabelValues(fmt.Sprintf("cluster%d", s.clusterID)).Set(0)
// The independent TSO service still reuses PD version info since PD and TSO are just
// different service modes provided by the same pd-server binary
serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0)))

// Initialize the TSO service.
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.Context())
Expand Down
17 changes: 0 additions & 17 deletions server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,6 @@ var (
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 29), // 0.1ms ~ 7hours
}, []string{"address", "store"})

serverInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "server",
Name: "info",
Help: "Indicate the pd server info, and the value is the start timestamp (s).",
}, []string{"version", "hash"})

serviceAuditHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd",
Expand All @@ -152,13 +144,6 @@ var (
Help: "PD server service handling audit",
Buckets: prometheus.DefBuckets,
}, []string{"service", "method", "caller_id", "ip"})
serverMaxProcs = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "pd",
Subsystem: "service",
Name: "maxprocs",
Help: "The value of GOMAXPROCS.",
})

forwardFailCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Expand All @@ -181,11 +166,9 @@ func init() {
prometheus.MustRegister(tsoHandleDuration)
prometheus.MustRegister(regionHeartbeatHandleDuration)
prometheus.MustRegister(storeHeartbeatHandleDuration)
prometheus.MustRegister(serverInfo)
prometheus.MustRegister(bucketReportCounter)
prometheus.MustRegister(bucketReportLatency)
prometheus.MustRegister(serviceAuditHistogram)
prometheus.MustRegister(bucketReportInterval)
prometheus.MustRegister(serverMaxProcs)
prometheus.MustRegister(forwardFailCounter)
}
5 changes: 3 additions & 2 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/pingcap/log"
"github.com/pingcap/sysutil"
"github.com/tikv/pd/pkg/audit"
bs "github.com/tikv/pd/pkg/basicserver"
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/encryption"
"github.com/tikv/pd/pkg/errs"
Expand Down Expand Up @@ -428,7 +429,7 @@ func (s *Server) startServer(ctx context.Context) error {
log.Info("init cluster id", zap.Uint64("cluster-id", s.clusterID))
// It may lose accuracy if use float64 to store uint64. So we store the cluster id in label.
metadataGauge.WithLabelValues(fmt.Sprintf("cluster%d", s.clusterID)).Set(0)
serverInfo.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))
bs.ServerInfoGauge.WithLabelValues(versioninfo.PDReleaseVersion, versioninfo.PDGitHash).Set(float64(time.Now().Unix()))

s.rootPath = endpoint.PDRootPath(s.clusterID)
s.member.InitMemberInfo(s.cfg.AdvertiseClientUrls, s.cfg.AdvertisePeerUrls, s.Name(), s.rootPath)
Expand Down Expand Up @@ -504,7 +505,7 @@ func (s *Server) startServer(ctx context.Context) error {

// Server has started.
atomic.StoreInt64(&s.isRunning, 1)
serverMaxProcs.Set(float64(runtime.GOMAXPROCS(0)))
bs.ServerMaxProcsGauge.Set(float64(runtime.GOMAXPROCS(0)))
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/mcs/resourcemanager/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestResourceManagerServer(t *testing.T) {
re.Equal(http.StatusOK, resp.StatusCode)
respBytes, err := io.ReadAll(resp.Body)
re.NoError(err)
re.Contains(string(respBytes), "resource_manager_server_info")
re.Contains(string(respBytes), "pd_server_info")
}

// Test status handler
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/mcs/scheduling/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ func (suite *apiTestSuite) checkMetrics(cluster *tests.TestCluster) {
re.Equal(http.StatusOK, resp.StatusCode)
respBytes, err := io.ReadAll(resp.Body)
re.NoError(err)
re.Contains(string(respBytes), "scheduling_server_info")
re.Contains(string(respBytes), "pd_server_info")
}

func (suite *apiTestSuite) TestStatus() {
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/mcs/tso/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func (suite *tsoAPITestSuite) TestMetrics() {
re.Equal(http.StatusOK, resp.StatusCode)
respBytes, err := io.ReadAll(resp.Body)
re.NoError(err)
re.Contains(string(respBytes), "tso_server_info")
re.Contains(string(respBytes), "pd_server_info")
}

func (suite *tsoAPITestSuite) TestStatus() {
Expand Down

0 comments on commit 8950c3a

Please sign in to comment.