From b0b1e38d798f073a8fb4d2bf5ccf7e33cc8673f0 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 19 Dec 2023 17:21:22 +0800 Subject: [PATCH] schedule: fix location label in placement rule not work well with rule checker (#6660) (#6956) close tikv/pd#6637 Signed-off-by: ti-chi-bot Signed-off-by: Ryan Leung Co-authored-by: Ryan Leung Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- server/schedule/checker/replica_strategy.go | 6 ++ server/schedule/checker/rule_checker.go | 12 ++- server/schedule/checker/rule_checker_test.go | 91 ++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/server/schedule/checker/replica_strategy.go b/server/schedule/checker/replica_strategy.go index 92f35595f73..f293283fb57 100644 --- a/server/schedule/checker/replica_strategy.go +++ b/server/schedule/checker/replica_strategy.go @@ -87,6 +87,9 @@ func (s *ReplicaStrategy) SelectStoreToAdd(coLocationStores []*core.StoreInfo, e // SelectStoreToFix returns a store to replace down/offline old peer. The location // placement after scheduling is allowed to be worse than original. func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, old uint64) (uint64, bool) { + if len(coLocationStores) == 0 { + return 0, false + } // trick to avoid creating a slice with `old` removed. s.swapStoreToFirst(coLocationStores, old) return s.SelectStoreToAdd(coLocationStores[1:]) @@ -95,6 +98,9 @@ func (s *ReplicaStrategy) SelectStoreToFix(coLocationStores []*core.StoreInfo, o // SelectStoreToImprove returns a store to replace oldStore. The location // placement after scheduling should be better than original. func (s *ReplicaStrategy) SelectStoreToImprove(coLocationStores []*core.StoreInfo, old uint64) (uint64, bool) { + if len(coLocationStores) == 0 { + return 0, false + } // trick to avoid creating a slice with `old` removed. s.swapStoreToFirst(coLocationStores, old) oldStore := s.cluster.GetStore(old) diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 9298061d5c7..f9602ebc892 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -366,7 +366,7 @@ func (c *RuleChecker) allowLeader(fit *placement.RegionFit, peer *metapb.Peer) b } func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, rf *placement.RuleFit) (*operator.Operator, error) { - if len(rf.Rule.LocationLabels) == 0 || rf.Rule.Count <= 1 { + if len(rf.Rule.LocationLabels) == 0 { return nil, nil } @@ -376,7 +376,15 @@ func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, rf *placement.R if oldStore == 0 { return nil, nil } - newStore, filterByTempState := strategy.SelectStoreToImprove(ruleStores, oldStore) + var coLocationStores []*core.StoreInfo + regionStores := c.cluster.GetRegionStores(region) + for _, s := range regionStores { + if placement.MatchLabelConstraints(s, rf.Rule.LabelConstraints) { + coLocationStores = append(coLocationStores, s) + } + } + + newStore, filterByTempState := strategy.SelectStoreToImprove(coLocationStores, oldStore) if newStore == 0 { log.Debug("no replacement store", zap.Uint64("region-id", region.GetID())) c.handleFilterState(region, filterByTempState) diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index eeb1e7c8865..902e972e50f 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -1792,3 +1792,94 @@ func (suite *ruleCheckerTestSuite) TestPendingList() { _, exist = suite.rc.pendingList.Get(1) suite.False(exist) } + +func (suite *ruleCheckerTestSuite) TestLocationLabels() { + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z1", "rack": "r2", "host": "h1"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z1", "rack": "r2", "host": "h1"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) + suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) + suite.cluster.AddLeaderRegionWithRange(1, "", "", 1, 2, 5) + rule1 := &placement.Rule{ + GroupID: "pd", + ID: "test1", + Role: placement.Leader, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "zone", + Op: placement.In, + Values: []string{"z1"}, + }, + }, + LocationLabels: []string{"rack"}, + } + rule2 := &placement.Rule{ + GroupID: "pd", + ID: "test2", + Role: placement.Voter, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "zone", + Op: placement.In, + Values: []string{"z1"}, + }, + }, + LocationLabels: []string{"rack"}, + } + rule3 := &placement.Rule{ + GroupID: "pd", + ID: "test3", + Role: placement.Voter, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "zone", + Op: placement.In, + Values: []string{"z2"}, + }, + }, + LocationLabels: []string{"rack"}, + } + suite.ruleManager.SetRule(rule1) + suite.ruleManager.SetRule(rule2) + suite.ruleManager.SetRule(rule3) + suite.ruleManager.DeleteRule("pd", "default") + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("move-to-better-location", op.Desc()) +} + +func (suite *ruleCheckerTestSuite) TestTiFlashLocationLabels() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z1", "rack": "r2", "host": "h1"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z1", "rack": "r2", "host": "h1"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) + suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "rack": "r3", "host": "h2"}) + suite.cluster.AddLabelsStore(7, 1, map[string]string{"engine": "tiflash"}) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{3, 5}, []uint64{7}) + + rule1 := &placement.Rule{ + GroupID: "tiflash", + ID: "test1", + Role: placement.Learner, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "engine", + Op: placement.In, + Values: []string{"tiflash"}, + }, + }, + } + suite.ruleManager.SetRule(rule1) + rule := suite.ruleManager.GetRule("pd", "default") + rule.LocationLabels = []string{"zone", "rack", "host"} + suite.ruleManager.SetRule(rule) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) +}