Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: remove the Heartbeat distribution #9010

Merged
merged 2 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
472 changes: 0 additions & 472 deletions metrics/grafana/pd.json

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,6 @@ func (c *Cluster) HandleStoreHeartbeat(heartbeat *schedulingpb.StoreHeartbeatReq
nowTime := time.Now()
newStore := store.Clone(core.SetStoreStats(stats), core.SetLastHeartbeatTS(nowTime))

if store := c.GetStore(storeID); store != nil {
statistics.UpdateStoreHeartbeatMetrics(store)
}
c.PutStore(newStore)
c.hotStat.Observe(storeID, newStore.GetStoreStats())
c.hotStat.FilterUnhealthyStore(c)
Expand Down
7 changes: 0 additions & 7 deletions pkg/statistics/buckets/bucket_stat_informer.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,3 @@ func (b *BucketTreeItem) calculateHotDegree() {
}
}
}

// collectBucketsMetrics collects the metrics of the hot stats.
func (b *BucketTreeItem) collectBucketsMetrics() {
for _, bucket := range b.stats {
bucketsHotDegreeHist.Observe(float64(bucket.HotDegree))
}
}
1 change: 0 additions & 1 deletion pkg/statistics/buckets/hot_bucket_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ func (h *HotBucketCache) checkBucketsFlow(buckets *metapb.Buckets) (newItem *Buc
}
newItem.inherit(overlaps)
newItem.calculateHotDegree()
newItem.collectBucketsMetrics()
return newItem, overlaps
}

Expand Down
10 changes: 0 additions & 10 deletions pkg/statistics/buckets/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@ import (
)

var (
bucketsHotDegreeHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "buckets_hot_degree_hist",
Help: "Bucketed histogram of bucket hot degree",
Buckets: prometheus.LinearBuckets(-20, 2, 20), // [-20 20]
})

bucketsTaskDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "pd",
Expand All @@ -39,6 +30,5 @@ var (
)

func init() {
prometheus.MustRegister(bucketsHotDegreeHist)
prometheus.MustRegister(bucketsTaskDuration)
}
25 changes: 0 additions & 25 deletions pkg/statistics/hot_peer_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,30 +135,6 @@ func (f *HotPeerCache) incMetrics(action utils.ActionType, storeID uint64) {
f.metrics[storeID][action].Inc()
}

func (f *HotPeerCache) collectPeerMetrics(loads []float64, interval uint64) {
regionHeartbeatIntervalHist.Observe(float64(interval))
if interval == 0 {
return
}
// TODO: use unified metrics. (keep backward compatibility at the same time)
for _, k := range f.kind.RegionStats() {
switch k {
case utils.RegionReadBytes:
readByteHist.Observe(loads[int(k)])
case utils.RegionReadKeys:
readKeyHist.Observe(loads[int(k)])
case utils.RegionWriteBytes:
writeByteHist.Observe(loads[int(k)])
case utils.RegionWriteKeys:
writeKeyHist.Observe(loads[int(k)])
case utils.RegionWriteQueryNum:
writeQueryHist.Observe(loads[int(k)])
case utils.RegionReadQueryNum:
readQueryHist.Observe(loads[int(k)])
}
}
}

// CollectExpiredItems collects expired items, mark them as needDelete and puts them into inherit items
func (f *HotPeerCache) CollectExpiredItems(region *core.RegionInfo) []*HotPeerStat {
regionID := region.GetID()
Expand All @@ -185,7 +161,6 @@ func (f *HotPeerCache) CheckPeerFlow(region *core.RegionInfo, peers []*metapb.Pe
return nil
}

f.collectPeerMetrics(deltaLoads, interval) // update metrics
regionID := region.GetID()

regionPeers := region.GetPeers()
Expand Down
70 changes: 0 additions & 70 deletions pkg/statistics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,70 +81,6 @@ var (
Name: "label_level",
Help: "Number of regions in the different label level.",
}, []string{"type"})
readByteHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "read_byte_hist",
Help: "The distribution of region read bytes",
Buckets: prometheus.ExponentialBuckets(1, 8, 12),
})
writeByteHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "write_byte_hist",
Help: "The distribution of region write bytes",
Buckets: prometheus.ExponentialBuckets(1, 8, 12),
})
readKeyHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "read_key_hist",
Help: "The distribution of region read keys",
Buckets: prometheus.ExponentialBuckets(1, 2, 18),
})
writeKeyHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "write_key_hist",
Help: "The distribution of region write keys",
Buckets: prometheus.ExponentialBuckets(1, 2, 18),
})
readQueryHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "read_query_hist",
Help: "The distribution of region read query",
Buckets: prometheus.ExponentialBuckets(1, 2, 12),
})
writeQueryHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "write_query_hist",
Help: "The distribution of region write query",
Buckets: prometheus.ExponentialBuckets(1, 2, 12),
})
regionHeartbeatIntervalHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "region_heartbeat_interval_hist",
Help: "Bucketed histogram of the batch size of handled requests.",
Buckets: prometheus.LinearBuckets(0, 30, 20),
})
storeHeartbeatIntervalHist = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "pd",
Subsystem: "scheduler",
Name: "store_heartbeat_interval_hist",
Help: "Bucketed histogram of the batch size of handled requests.",
Buckets: prometheus.LinearBuckets(0, 5, 12),
})

regionAbnormalPeerDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand Down Expand Up @@ -187,12 +123,6 @@ func init() {
prometheus.MustRegister(configStatusGauge)
prometheus.MustRegister(StoreLimitGauge)
prometheus.MustRegister(regionLabelLevelGauge)
prometheus.MustRegister(readByteHist)
prometheus.MustRegister(readKeyHist)
prometheus.MustRegister(writeKeyHist)
prometheus.MustRegister(writeByteHist)
prometheus.MustRegister(regionHeartbeatIntervalHist)
prometheus.MustRegister(storeHeartbeatIntervalHist)
prometheus.MustRegister(regionAbnormalPeerDuration)
prometheus.MustRegister(hotCacheFlowQueueStatusGauge)
prometheus.MustRegister(hotPeerSummary)
Expand Down
5 changes: 0 additions & 5 deletions pkg/statistics/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ func (s *StoresStats) FilterUnhealthyStore(cluster core.StoreSetInformer) {
}
}

// UpdateStoreHeartbeatMetrics is used to update store heartbeat interval metrics
func UpdateStoreHeartbeatMetrics(store *core.StoreInfo) {
storeHeartbeatIntervalHist.Observe(time.Since(store.GetLastHeartbeatTS()).Seconds())
}

// RollingStoreStats are multiple sets of recent historical records with specified windows size.
type RollingStoreStats struct {
syncutil.RWMutex
Expand Down
1 change: 0 additions & 1 deletion server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,6 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest
newStore = newStore.Clone(core.SetLastPersistTime(nowTime))
}
}
statistics.UpdateStoreHeartbeatMetrics(store)
// Supply NodeState in the response to help the store handle special cases
// more conveniently, such as avoiding calling `remove_peer` redundantly under
// NodeState_Removing.
Expand Down
Loading