Skip to content

Commit

Permalink
make it simple
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Leung <[email protected]>
  • Loading branch information
rleungx committed Jul 12, 2023
1 parent 779a3f9 commit ac93492
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 422 deletions.
2 changes: 2 additions & 0 deletions pkg/mcs/scheduling/server/apis/v1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/gin-contrib/cors"
"github.com/gin-contrib/gzip"
"github.com/gin-contrib/pprof"
"github.com/gin-gonic/gin"
"github.com/joho/godotenv"
scheserver "github.com/tikv/pd/pkg/mcs/scheduling/server"
Expand Down Expand Up @@ -81,6 +82,7 @@ func NewService(srv *scheserver.Service) *Service {
})
apiHandlerEngine.Use(multiservicesapi.ServiceRedirector())
apiHandlerEngine.GET("metrics", utils.PromHandler())
pprof.Register(apiHandlerEngine)
root := apiHandlerEngine.Group(APIPathPrefix)
s := &Service{
srv: srv,
Expand Down
230 changes: 5 additions & 225 deletions pkg/mcs/scheduling/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"os"
"path/filepath"
"strings"
"time"

"github.com/BurntSushi/toml"
"github.com/pingcap/errors"
Expand All @@ -29,7 +28,6 @@ import (
"github.com/tikv/pd/pkg/utils/configutil"
"github.com/tikv/pd/pkg/utils/grpcutil"
"github.com/tikv/pd/pkg/utils/metricutil"
"github.com/tikv/pd/pkg/utils/typeutil"
"go.uber.org/zap"
)

Expand All @@ -39,54 +37,7 @@ const (
defaultListenAddr = "http://127.0.0.1:3379"
)

// SchedulerConfigs is a slice of customized scheduler configuration.
type SchedulerConfigs []SchedulerConfig

// SchedulerConfig is customized scheduler configuration
type SchedulerConfig struct {
Type string `toml:"type" json:"type"`
Args []string `toml:"args" json:"args"`
Disable bool `toml:"disable" json:"disable"`
ArgsPayload string `toml:"args-payload" json:"args-payload"`
}

// DefaultSchedulers are the schedulers be created by default.
// If these schedulers are not in the persistent configuration, they
// will be created automatically when reloading.
var DefaultSchedulers = SchedulerConfigs{
{Type: "balance-region"},
{Type: "balance-leader"},
{Type: "hot-region"},
}

const (
defaultMaxReplicas = 3
defaultMaxSnapshotCount = 64
defaultMaxPendingPeerCount = 64
defaultEnableDiagnostic = true
defaultMaxStoreDownTime = 30 * time.Minute
defaultLeaderScheduleLimit = 4
defaultRegionScheduleLimit = 2048
defaultHotRegionScheduleLimit = 4
defaultTolerantSizeRatio = 0
defaultLowSpaceRatio = 0.8
defaultHighSpaceRatio = 0.7
defaultRegionScoreFormulaVersion = "v2"
// defaultHotRegionCacheHitsThreshold is the low hit number threshold of the
// hot region.
defaultHotRegionCacheHitsThreshold = 3
defaultSchedulerMaxWaitingOperator = 5
defaultLeaderSchedulePolicy = "count"
defaultEnableJointConsensus = true
defaultEnableCrossTableMerge = true
// When a slow store affected more than 30% of total stores, it will trigger evicting.
defaultSlowStoreEvictingAffectedStoreRatioThreshold = 0.3

defaultStoreLimitVersion = "v1"
defaultHaltScheduling = false
)

// Config is the configuration for the resource manager.
// Config is the configuration for the scheduling.
type Config struct {
BackendEndpoints string `toml:"backend-endpoints" json:"backend-endpoints"`
ListenAddr string `toml:"listen-addr" json:"listen-addr"`
Expand All @@ -103,100 +54,15 @@ type Config struct {
LogProps *log.ZapProperties

Security configutil.SecurityConfig `toml:"security" json:"security"`

// WarningMsgs contains all warnings during parsing.
WarningMsgs []string

// LeaderLease defines the time within which a Resource Manager primary/leader must
// LeaderLease defines the time within which a Scheduling primary/leader must
// update its TTL in etcd, otherwise etcd will expire the leader key and other servers
// can campaign the primary/leader again. Etcd only supports seconds TTL, so here is
// second too.
LeaderLease int64 `toml:"lease" json:"lease"`

// If the snapshot count of one store is greater than this value,
// it will never be used as a source or target store.
MaxSnapshotCount uint64 `toml:"max-snapshot-count" json:"max-snapshot-count"`
MaxPendingPeerCount uint64 `toml:"max-pending-peer-count" json:"max-pending-peer-count"`
// EnableOneWayMerge is the option to enable one way merge. This means a Region can only be merged into the next region of it.
EnableOneWayMerge bool `toml:"enable-one-way-merge" json:"enable-one-way-merge,string"`
// MaxStoreDownTime is the max duration after which
// a store will be considered to be down if it hasn't reported heartbeats.
MaxStoreDownTime typeutil.Duration `toml:"max-store-down-time" json:"max-store-down-time"`
// LeaderScheduleLimit is the max coexist leader schedules.
LeaderScheduleLimit uint64 `toml:"leader-schedule-limit" json:"leader-schedule-limit"`
// LeaderSchedulePolicy is the option to balance leader, there are some policies supported: ["count", "size"], default: "count"
LeaderSchedulePolicy string `toml:"leader-schedule-policy" json:"leader-schedule-policy"`
// RegionScheduleLimit is the max coexist region schedules.
RegionScheduleLimit uint64 `toml:"region-schedule-limit" json:"region-schedule-limit"`
// HotRegionScheduleLimit is the max coexist hot region schedules.
HotRegionScheduleLimit uint64 `toml:"hot-region-schedule-limit" json:"hot-region-schedule-limit"`
// HotRegionCacheHitThreshold is the cache hits threshold of the hot region.
// If the number of times a region hits the hot cache is greater than this
// threshold, it is considered a hot region.
HotRegionCacheHitsThreshold uint64 `toml:"hot-region-cache-hits-threshold" json:"hot-region-cache-hits-threshold"`

// StoreLimit is the limit of scheduling for stores.
StoreLimit map[uint64]StoreLimitConfig `toml:"store-limit" json:"store-limit"`
// TolerantSizeRatio is the ratio of buffer size for balance scheduler.
TolerantSizeRatio float64 `toml:"tolerant-size-ratio" json:"tolerant-size-ratio"`
//
// high space stage transition stage low space stage
// |--------------------|-----------------------------|-------------------------|
// ^ ^ ^ ^
// 0 HighSpaceRatio * capacity LowSpaceRatio * capacity capacity
//
// LowSpaceRatio is the lowest usage ratio of store which regraded as low space.
// When in low space, store region score increases to very large and varies inversely with available size.
LowSpaceRatio float64 `toml:"low-space-ratio" json:"low-space-ratio"`
// HighSpaceRatio is the highest usage ratio of store which regraded as high space.
// High space means there is a lot of spare capacity, and store region score varies directly with used size.
HighSpaceRatio float64 `toml:"high-space-ratio" json:"high-space-ratio"`
// RegionScoreFormulaVersion is used to control the formula used to calculate region score.
RegionScoreFormulaVersion string `toml:"region-score-formula-version" json:"region-score-formula-version"`
// SchedulerMaxWaitingOperator is the max coexist operators for each scheduler.
SchedulerMaxWaitingOperator uint64 `toml:"scheduler-max-waiting-operator" json:"scheduler-max-waiting-operator"`

// EnableDebugMetrics is the option to enable debug metrics.
EnableDebugMetrics bool `toml:"enable-debug-metrics" json:"enable-debug-metrics,string"`
// EnableJointConsensus is the option to enable using joint consensus as a operator step.
EnableJointConsensus bool `toml:"enable-joint-consensus" json:"enable-joint-consensus,string"`

// Schedulers support for loading customized schedulers
Schedulers SchedulerConfigs `toml:"schedulers" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade

// MaxMovableHotPeerSize is the threshold of region size for balance hot region and split bucket scheduler.
// Hot region must be split before moved if it's region size is greater than MaxMovableHotPeerSize.
MaxMovableHotPeerSize int64 `toml:"max-movable-hot-peer-size" json:"max-movable-hot-peer-size,omitempty"`

// EnableDiagnostic is the the option to enable using diagnostic
EnableDiagnostic bool `toml:"enable-diagnostic" json:"enable-diagnostic,string"`

// SlowStoreEvictingAffectedStoreRatioThreshold is the affected ratio threshold when judging a store is slow
// A store's slowness must affected more than `store-count * SlowStoreEvictingAffectedStoreRatioThreshold` to trigger evicting.
SlowStoreEvictingAffectedStoreRatioThreshold float64 `toml:"slow-store-evicting-affected-store-ratio-threshold" json:"slow-store-evicting-affected-store-ratio-threshold,omitempty"`

// StoreLimitVersion is the version of store limit.
// v1: which is based on the region count by rate limit.
// v2: which is based on region size by window size.
StoreLimitVersion string `toml:"store-limit-version" json:"store-limit-version,omitempty"`

// HaltScheduling is the option to halt the scheduling. Once it's on, PD will halt the scheduling,
// and any other scheduling configs will be ignored.
HaltScheduling bool `toml:"halt-scheduling" json:"halt-scheduling,string,omitempty"`
}

// StoreLimitConfig is a config about scheduling rate limit of different types for a store.
type StoreLimitConfig struct {
AddPeer float64 `toml:"add-peer" json:"add-peer"`
RemovePeer float64 `toml:"remove-peer" json:"remove-peer"`
}

func adjustSchedulers(v *SchedulerConfigs, defValue SchedulerConfigs) {
if len(*v) == 0 {
// Make a copy to avoid changing DefaultSchedulers unexpectedly.
// When reloading from storage, the config is passed to json.Unmarshal.
// Without clone, the DefaultSchedulers could be overwritten.
*v = append(defValue[:0:0], defValue...)
}
}

// NewConfig creates a new config.
Expand Down Expand Up @@ -232,7 +98,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error {
return c.Adjust(meta, false)
}

// Adjust is used to adjust the resource manager configurations.
// Adjust is used to adjust the scheduling configurations.
func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error {
configMetaData := configutil.NewConfigMetadata(meta)
if err := configMetaData.CheckUndecoded(); err != nil {
Expand Down Expand Up @@ -270,70 +136,7 @@ func (c *Config) Adjust(meta *toml.MetaData, reloading bool) error {

configutil.AdjustInt64(&c.LeaderLease, utils.DefaultLeaderLease)

// adjust scheduling config
if !meta.IsDefined("max-snapshot-count") {
configutil.AdjustUint64(&c.MaxSnapshotCount, defaultMaxSnapshotCount)
}
if !meta.IsDefined("max-pending-peer-count") {
configutil.AdjustUint64(&c.MaxPendingPeerCount, defaultMaxPendingPeerCount)
}
configutil.AdjustDuration(&c.MaxStoreDownTime, defaultMaxStoreDownTime)
if !meta.IsDefined("leader-schedule-limit") {
configutil.AdjustUint64(&c.LeaderScheduleLimit, defaultLeaderScheduleLimit)
}
if !meta.IsDefined("region-schedule-limit") {
configutil.AdjustUint64(&c.RegionScheduleLimit, defaultRegionScheduleLimit)
}
if !meta.IsDefined("hot-region-schedule-limit") {
configutil.AdjustUint64(&c.HotRegionScheduleLimit, defaultHotRegionScheduleLimit)
}
if !meta.IsDefined("hot-region-cache-hits-threshold") {
configutil.AdjustUint64(&c.HotRegionCacheHitsThreshold, defaultHotRegionCacheHitsThreshold)
}
if !meta.IsDefined("tolerant-size-ratio") {
configutil.AdjustFloat64(&c.TolerantSizeRatio, defaultTolerantSizeRatio)
}
if !meta.IsDefined("scheduler-max-waiting-operator") {
configutil.AdjustUint64(&c.SchedulerMaxWaitingOperator, defaultSchedulerMaxWaitingOperator)
}
if !meta.IsDefined("leader-schedule-policy") {
configutil.AdjustString(&c.LeaderSchedulePolicy, defaultLeaderSchedulePolicy)
}

if !meta.IsDefined("store-limit-version") {
configutil.AdjustString(&c.StoreLimitVersion, defaultStoreLimitVersion)
}

if !meta.IsDefined("enable-joint-consensus") {
c.EnableJointConsensus = defaultEnableJointConsensus
}

configutil.AdjustFloat64(&c.LowSpaceRatio, defaultLowSpaceRatio)
configutil.AdjustFloat64(&c.HighSpaceRatio, defaultHighSpaceRatio)
if !meta.IsDefined("enable-diagnostic") {
c.EnableDiagnostic = defaultEnableDiagnostic
}

// new cluster:v2, old cluster:v1
if !meta.IsDefined("region-score-formula-version") && !reloading {
configutil.AdjustString(&c.RegionScoreFormulaVersion, defaultRegionScoreFormulaVersion)
}

if !meta.IsDefined("halt-scheduling") {
c.HaltScheduling = defaultHaltScheduling
}

adjustSchedulers(&c.Schedulers, DefaultSchedulers)

if c.StoreLimit == nil {
c.StoreLimit = make(map[uint64]StoreLimitConfig)
}

if !meta.IsDefined("slow-store-evicting-affected-store-ratio-threshold") {
configutil.AdjustFloat64(&c.SlowStoreEvictingAffectedStoreRatioThreshold, defaultSlowStoreEvictingAffectedStoreRatioThreshold)
}

return c.validateScheduleConfig()
return nil
}

func (c *Config) adjustLog(meta *configutil.ConfigMetaData) {
Expand Down Expand Up @@ -367,26 +170,3 @@ func (c *Config) Validate() error {

return nil
}

// validateScheduleConfig is used to validate if some scheduling configurations are right.
func (c *Config) validateScheduleConfig() error {
if c.TolerantSizeRatio < 0 {
return errors.New("tolerant-size-ratio should be non-negative")
}
if c.LowSpaceRatio < 0 || c.LowSpaceRatio > 1 {
return errors.New("low-space-ratio should between 0 and 1")
}
if c.HighSpaceRatio < 0 || c.HighSpaceRatio > 1 {
return errors.New("high-space-ratio should between 0 and 1")
}
if c.LowSpaceRatio <= c.HighSpaceRatio {
return errors.New("low-space-ratio should be larger than high-space-ratio")
}
if c.LeaderSchedulePolicy != "count" && c.LeaderSchedulePolicy != "size" {
return errors.Errorf("leader-schedule-policy %v is invalid", c.LeaderSchedulePolicy)
}
if c.SlowStoreEvictingAffectedStoreRatioThreshold == 0 {
return errors.Errorf("slow-store-evicting-affected-store-ratio-threshold is not set")
}
return nil
}
67 changes: 0 additions & 67 deletions pkg/mcs/scheduling/server/config_test.go

This file was deleted.

Loading

0 comments on commit ac93492

Please sign in to comment.