From 7fa19d3572b83505cf931fd3be7755ef513c51a4 Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 9 May 2024 11:03:16 +0800 Subject: [PATCH 01/24] check primary Signed-off-by: husharp --- pkg/election/leadership.go | 17 +++++++ pkg/mcs/discovery/discover.go | 62 ++++++++++++++++++++++++-- pkg/mcs/discovery/registry_entry.go | 2 + pkg/mcs/scheduling/server/server.go | 41 +++++++++++++++++ pkg/mcs/tso/server/server.go | 4 ++ pkg/mcs/utils/util.go | 49 ++++++++++++++++++++ pkg/member/member.go | 22 ++++----- pkg/member/participant.go | 18 ++++---- pkg/tso/allocator_manager.go | 4 ++ pkg/tso/global_allocator.go | 39 +++++++++++++++- server/apiv2/handlers/micro_service.go | 43 ++++++++++++++++++ 11 files changed, 277 insertions(+), 24 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 02f519dbc75..755a7d6d331 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -64,6 +64,8 @@ type Leadership struct { leaderKey string leaderValue string + LeaderWatch bool + keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc keepAliveCancelFuncLock syncutil.Mutex @@ -72,6 +74,14 @@ type Leadership struct { campaignTimes []time.Time } +func (ls *Leadership) SetLeaderWatch(val bool) { + ls.LeaderWatch = val +} + +func (ls *Leadership) GetLeaderValue() string { + return ls.leaderValue +} + // NewLeadership creates a new Leadership. func NewLeadership(client *clientv3.Client, leaderKey, purpose string) *Leadership { leadership := &Leadership{ @@ -375,6 +385,12 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } + // only API update the leader key to transfer the leader will meet + if ev.Type == mvccpb.PUT && ls.LeaderWatch { + log.Info("[LeaderWatch] current leadership is updated", zap.Int64("watchRevision", revision), + zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) + return + } } revision = wresp.Header.Revision + 1 } @@ -393,4 +409,5 @@ func (ls *Leadership) Reset() { } ls.keepAliveCancelFuncLock.Unlock() ls.getLease().Close() + ls.LeaderWatch = false } diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 1ce5ecda51d..55d5732ec34 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -15,12 +15,15 @@ package discovery import ( + "math/rand" "strconv" + "time" "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/utils" + "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/etcdutil" "go.etcd.io/etcd/clientv3" @@ -45,14 +48,14 @@ func Discover(cli *clientv3.Client, clusterID, serviceName string) ([]string, er } // GetMSMembers returns all the members of the specified service name. -func GetMSMembers(name string, client *clientv3.Client) ([]ServiceRegistryEntry, error) { - switch name { +func GetMSMembers(serviceName string, client *clientv3.Client) ([]ServiceRegistryEntry, error) { + switch serviceName { case utils.TSOServiceName, utils.SchedulingServiceName, utils.ResourceManagerServiceName: clusterID, err := etcdutil.GetClusterID(client, utils.ClusterIDPath) if err != nil { return nil, err } - servicePath := ServicePath(strconv.FormatUint(clusterID, 10), name) + servicePath := ServicePath(strconv.FormatUint(clusterID, 10), serviceName) resps, err := kv.NewSlowLogTxn(client).Then(clientv3.OpGet(servicePath, clientv3.WithPrefix())).Commit() if err != nil { return nil, errs.ErrEtcdKVGet.Wrap(err).GenWithStackByCause() @@ -75,5 +78,56 @@ func GetMSMembers(name string, client *clientv3.Client) ([]ServiceRegistryEntry, return entries, nil } - return nil, errors.Errorf("unknown service name %s", name) + return nil, errors.Errorf("unknown service name %s", serviceName) +} + +func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimary string) error { + log.Info("transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimary), zap.String("to", newPrimary)) + entries, err := GetMSMembers(serviceName, client) + if err != nil { + return err + } + + // Do nothing when I am the only member of cluster. + if len(entries) == 1 && newPrimary == "" { + return errors.New("no valid follower to transfer primary") + } + + var primaryIDs []string + var memberValues []string + for _, member := range entries { + if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.ServiceAddr == newPrimary) { + primaryIDs = append(primaryIDs, member.ServiceAddr) + memberValues = append(memberValues, string(member.MemberValue)) + } + } + if len(primaryIDs) == 0 { + return errors.New("no valid follower to transfer primary") + } + + r := rand.New(rand.NewSource(time.Now().UnixNano())) + nextPrimaryID := r.Intn(len(primaryIDs)) + + clusterID, err := etcdutil.GetClusterID(client, utils.ClusterIDPath) + if err != nil { + return errors.Errorf("failed to get cluster ID: %v", err) + } + + var primaryKey string + switch serviceName { + case utils.SchedulingServiceName: + primaryKey = endpoint.SchedulingPrimaryPath(clusterID) + case utils.TSOServiceName: + tsoRootPath := endpoint.TSOSvcRootPath(clusterID) + primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, utils.DefaultKeyspaceGroupID) + } + + // update primary key to notify old primary server. + putResp, err := kv.NewSlowLogTxn(client). + Then(clientv3.OpPut(primaryKey, memberValues[nextPrimaryID])). + Commit() + if err != nil || !putResp.Succeeded { + return errors.Errorf("failed to write primary flag for %s", serviceName) + } + return nil } diff --git a/pkg/mcs/discovery/registry_entry.go b/pkg/mcs/discovery/registry_entry.go index bf11ae5c8a4..ede9f12172e 100644 --- a/pkg/mcs/discovery/registry_entry.go +++ b/pkg/mcs/discovery/registry_entry.go @@ -23,11 +23,13 @@ import ( // ServiceRegistryEntry is the registry entry of a service type ServiceRegistryEntry struct { + Name string `json:"name"` ServiceAddr string `json:"service-addr"` Version string `json:"version"` GitHash string `json:"git-hash"` DeployPath string `json:"deploy-path"` StartTimestamp int64 `json:"start-timestamp"` + MemberValue []byte `json:"member-value"` } // Serialize this service registry entry diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 47a7cf9962b..adb72fd52c8 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -128,6 +128,10 @@ func (s *Server) GetBackendEndpoints() string { return s.cfg.BackendEndpoints } +func (s *Server) GetParticipant() *member.Participant { + return s.participant +} + // SetLogLevel sets log level. func (s *Server) SetLogLevel(level string) error { if !logutil.IsLevelLegal(level) { @@ -243,6 +247,17 @@ func (s *Server) primaryElectionLoop() { log.Info("the scheduling primary has changed, try to re-campaign a primary") } + // To make sure the expected leader(if exist) and primary are on the same server. + expectedPrimary := utils.GetExpectedPrimary(s.participant.GetLeaderPath(), s.GetClient()) + if expectedPrimary != "" && expectedPrimary != s.participant.GetLeadership().GetLeaderValue() { + log.Info("skip campaigning of scheduling primary and check later", + zap.String("server-name", s.Name()), + zap.String("target-primary-id", expectedPrimary), + zap.Uint64("member-id", s.participant.ID())) + time.Sleep(200 * time.Millisecond) + continue + } + s.campaignLeader() } } @@ -290,6 +305,9 @@ func (s *Server) campaignLeader() { member.ServiceMemberGauge.WithLabelValues(serviceName).Set(1) log.Info("scheduling primary is ready to serve", zap.String("scheduling-primary-name", s.participant.Name())) + exitPrimary := make(chan struct{}) + go s.primaryWatch(exitPrimary) + leaderTicker := time.NewTicker(utils.LeaderTickInterval) defer leaderTicker.Stop() @@ -304,10 +322,31 @@ func (s *Server) campaignLeader() { // Server is closed and it should return nil. log.Info("server is closed") return + case <-exitPrimary: + log.Info("no longer a primary/leader because primary have been updated, the scheduling primary/leader will step down") + return } } } +func (s *Server) primaryWatch(exitPrimary chan struct{}) { + _, revision, err := s.participant.GetPersistentLeader() + if err != nil { + log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + return + } + log.Info("[primary] start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) + // Watch will keep looping and never return unless the primary has changed. + s.participant.GetLeadership().SetLeaderWatch(true) + s.participant.GetLeadership().Watch(s.serverLoopCtx, revision+1) + s.participant.GetLeadership().SetLeaderWatch(false) + + utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) + + s.participant.UnsetLeader() + exitPrimary <- struct{}{} +} + // Close closes the server. func (s *Server) Close() { if !atomic.CompareAndSwapInt64(&s.isRunning, 1, 0) { @@ -425,6 +464,7 @@ func (s *Server) startServer() (err error) { GitHash: versioninfo.PDGitHash, DeployPath: deployPath, StartTimestamp: s.StartTimestamp(), + Name: s.Name(), } uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) @@ -436,6 +476,7 @@ func (s *Server) startServer() (err error) { ListenUrls: []string{s.cfg.GetAdvertiseListenAddr()}, } s.participant.InitInfo(p, endpoint.SchedulingSvcRootPath(s.clusterID), utils.PrimaryKey, "primary election") + s.serviceID.MemberValue = []byte(s.participant.MemberValue()) s.service = &Service{Server: s} s.AddServiceReadyCallback(s.startCluster) diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index c38c7142730..17f7312a8b7 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -380,6 +380,7 @@ func (s *Server) startServer() (err error) { GitHash: versioninfo.PDGitHash, DeployPath: deployPath, StartTimestamp: s.StartTimestamp(), + Name: s.Name(), } s.keyspaceGroupManager = tso.NewKeyspaceGroupManager( s.serverLoopCtx, s.serviceID, s.GetClient(), s.GetHTTPClient(), s.cfg.AdvertiseListenAddr, @@ -387,6 +388,9 @@ func (s *Server) startServer() (err error) { if err := s.keyspaceGroupManager.Initialize(); err != nil { return err } + // Initialize the service ID with the member value of the primary of the default keyspace group. + memberValue, err := s.GetMember(utils.DefaultKeyspaceID, utils.DefaultKeyspaceGroupID) + s.serviceID.MemberValue = []byte(memberValue.MemberValue()) s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} s.service = &Service{Server: s} diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index b6ac2eb37e5..7bbebef5693 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -16,6 +16,7 @@ package utils import ( "context" + "github.com/tikv/pd/pkg/storage/kv" "net" "net/http" "os" @@ -51,6 +52,9 @@ const ( ClusterIDPath = "/pd/cluster_id" // retryInterval is the interval to retry. retryInterval = time.Second + // ExpectedPrimary is the path to store the expected primary + // ONLY SET VALUE BY API + ExpectedPrimary = "expected_primary" ) // InitClusterID initializes the cluster ID. @@ -70,6 +74,51 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes) } +// GetExpectedPrimary indicates API has changed the primary, ONLY SET VALUE BY API. +func GetExpectedPrimary(keyPath string, client *clientv3.Client) string { + leader, err := etcdutil.GetValue(client, strings.Join([]string{keyPath, ExpectedPrimary}, "/")) + if err != nil { + log.Error("get expected primary key error", errs.ZapError(err)) + return "" + } + + return string(leader) +} + +// RemoveExpectedPrimary removes the expected primary key. +func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { + // remove expected leader key + resp, err := kv.NewSlowLogTxn(client). + Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). + Commit() + if err != nil && !resp.Succeeded { + log.Error("change primary error", errs.ZapError(err)) + return + } +} + +// SetExpectedPrimary sets the expected primary key when the current primary has exited. +func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { + // write a flag to indicate the current primary has exited + leaderRaw, err := etcdutil.GetValue(client, leaderPath) + if err != nil { + log.Error("[primary] get primary key error", zap.Error(err)) + return + } + + // write a flag to indicate the current primary has exited + resp, err := kv.NewSlowLogTxn(client). + Then( + clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"), string(leaderRaw)), + // indicate the current primary has exited + clientv3.OpDelete(leaderPath)). + Commit() + if err != nil && !resp.Succeeded { + log.Error("change primary error", errs.ZapError(err)) + return + } +} + // PromHandler is a handler to get prometheus metrics. func PromHandler() gin.HandlerFunc { return func(c *gin.Context) { diff --git a/pkg/member/member.go b/pkg/member/member.go index af504d83963..4522eb7ae33 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -148,8 +148,8 @@ func (m *EmbeddedEtcdMember) setLeader(member *pdpb.Member) { m.lastLeaderUpdatedTime.Store(time.Now()) } -// unsetLeader unsets the member's PD leader. -func (m *EmbeddedEtcdMember) unsetLeader() { +// UnsetLeader unsets the member's PD leader. +func (m *EmbeddedEtcdMember) UnsetLeader() { m.leader.Store(&pdpb.Member{}) m.lastLeaderUpdatedTime.Store(time.Now()) } @@ -210,8 +210,8 @@ func (m *EmbeddedEtcdMember) PreCheckLeader() error { return nil } -// getPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). -func (m *EmbeddedEtcdMember) getPersistentLeader() (*pdpb.Member, int64, error) { +// GetPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). +func (m *EmbeddedEtcdMember) GetPersistentLeader() (any, int64, error) { leader := &pdpb.Member{} ok, rev, err := etcdutil.GetProtoMsgWithModRev(m.client, m.GetLeaderPath(), leader) if err != nil { @@ -233,17 +233,17 @@ func (m *EmbeddedEtcdMember) CheckLeader() (ElectionLeader, bool) { return nil, true } - leader, revision, err := m.getPersistentLeader() + leaderRaw, revision, err := m.GetPersistentLeader() if err != nil { log.Error("getting pd leader meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, true } - if leader == nil { + if leaderRaw == nil { // no leader yet return nil, false } - + leader := leaderRaw.(*pdpb.Member) if m.IsSameLeader(leader) { // oh, we are already a PD leader, which indicates we may meet something wrong // in previous CampaignLeader. We should delete the leadership and campaign again. @@ -269,14 +269,14 @@ func (m *EmbeddedEtcdMember) CheckLeader() (ElectionLeader, bool) { func (m *EmbeddedEtcdMember) WatchLeader(ctx context.Context, leader *pdpb.Member, revision int64) { m.setLeader(leader) m.leadership.Watch(ctx, revision) - m.unsetLeader() + m.UnsetLeader() } // ResetLeader is used to reset the PD member's current leadership. // Basically it will reset the leader lease and unset leader info. func (m *EmbeddedEtcdMember) ResetLeader() { m.leadership.Reset() - m.unsetLeader() + m.UnsetLeader() } // CheckPriority checks whether the etcd leader should be moved according to the priority. @@ -324,8 +324,8 @@ func (m *EmbeddedEtcdMember) GetEtcdLeader() uint64 { } // IsSameLeader checks whether a server is the leader itself. -func (m *EmbeddedEtcdMember) IsSameLeader(leader *pdpb.Member) bool { - return leader.GetMemberId() == m.ID() +func (m *EmbeddedEtcdMember) IsSameLeader(leader any) bool { + return leader.(*pdpb.Member).GetMemberId() == m.ID() } // InitMemberInfo initializes the member info. diff --git a/pkg/member/participant.go b/pkg/member/participant.go index 8a0ffadd31e..f74d17aee22 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -154,8 +154,8 @@ func (m *Participant) setLeader(member participant) { m.lastLeaderUpdatedTime.Store(time.Now()) } -// unsetLeader unsets the member's leader. -func (m *Participant) unsetLeader() { +// UnsetLeader unsets the member's leader. +func (m *Participant) UnsetLeader() { leader := NewParticipantByService(m.serviceName) m.leader.Store(leader) m.lastLeaderUpdatedTime.Store(time.Now()) @@ -164,6 +164,7 @@ func (m *Participant) unsetLeader() { // EnableLeader declares the member itself to be the leader. func (m *Participant) EnableLeader() { m.setLeader(m.member) + utils.RemoveExpectedPrimary(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the leader. @@ -205,8 +206,8 @@ func (*Participant) PreCheckLeader() error { return nil } -// getPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). -func (m *Participant) getPersistentLeader() (participant, int64, error) { +// GetPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). +func (m *Participant) GetPersistentLeader() (any, int64, error) { leader := NewParticipantByService(m.serviceName) ok, rev, err := etcdutil.GetProtoMsgWithModRev(m.client, m.GetLeaderPath(), leader) if err != nil { @@ -228,17 +229,18 @@ func (m *Participant) CheckLeader() (ElectionLeader, bool) { return nil, true } - leader, revision, err := m.getPersistentLeader() + leaderRaw, revision, err := m.GetPersistentLeader() if err != nil { log.Error("getting the leader meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, true } - if leader == nil { + if leaderRaw == nil { // no leader yet return nil, false } + leader := leaderRaw.(participant) if m.IsSameLeader(leader) { // oh, we are already the leader, which indicates we may meet something wrong // in previous CampaignLeader. We should delete the leadership and campaign again. @@ -264,14 +266,14 @@ func (m *Participant) CheckLeader() (ElectionLeader, bool) { func (m *Participant) WatchLeader(ctx context.Context, leader participant, revision int64) { m.setLeader(leader) m.leadership.Watch(ctx, revision) - m.unsetLeader() + m.UnsetLeader() } // ResetLeader is used to reset the member's current leadership. // Basically it will reset the leader lease and unset leader info. func (m *Participant) ResetLeader() { m.leadership.Reset() - m.unsetLeader() + m.UnsetLeader() } // IsSameLeader checks whether a server is the leader itself. diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index f1683de1352..e3a0a8302f3 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -145,6 +145,10 @@ type ElectionMember interface { GetDCLocationPath(id uint64) string // PreCheckLeader does some pre-check before checking whether it's the leader. PreCheckLeader() error + // GetPersistentLeader returns the persistent leader. + GetPersistentLeader() (any, int64, error) + // UnsetLeader unsets the member's leader. + UnsetLeader() } // AllocatorManager is used to manage the TSO Allocators a PD server holds. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index f90dc5f26fe..1ca854dfed9 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -560,6 +560,17 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0)) } + // To make sure the expected leader(if exist) and primary are on the same server. + targetPrimary := mcsutils.GetExpectedPrimary(gta.member.GetLeaderPath(), gta.member.Client()) + if targetPrimary != "" && targetPrimary != gta.member.GetLeadership().GetLeaderValue() { + log.Info("skip campaigning of scheduling primary and check later", + zap.String("server-name", gta.member.Name()), + zap.String("target-primary-id", targetPrimary), + zap.Uint64("member-id", gta.member.ID())) + time.Sleep(200 * time.Millisecond) + continue + } + gta.campaignLeader() } } @@ -596,7 +607,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { gta.member.ResetLeader() }) - // maintain the the leadership, after this, TSO can be service. + // maintain the leadership, after this, TSO can be service. gta.member.KeepLeader(ctx) log.Info("campaign tso primary ok", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), @@ -635,6 +646,9 @@ func (gta *GlobalTSOAllocator) campaignLeader() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("tso-primary-name", gta.member.Name())) + exitPrimary := make(chan struct{}) + go gta.primaryWatch(exitPrimary) + leaderTicker := time.NewTicker(mcsutils.LeaderTickInterval) defer leaderTicker.Stop() @@ -651,10 +665,33 @@ func (gta *GlobalTSOAllocator) campaignLeader() { log.Info("exit leader campaign", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0)) return + case <-exitPrimary: + log.Info("no longer a primary because primary have been updated, the TSO primary/leader will step down") + return } } } +func (gta *GlobalTSOAllocator) primaryWatch(exitPrimary chan struct{}) { + _, revision, err := gta.member.GetPersistentLeader() + if err != nil { + log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + return + } + log.Info("[primary] start to watch the primary", + logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), + zap.String("campaign-tso-primary-name", gta.member.Name())) + // Watch will keep looping and never return unless the primary has changed. + gta.member.GetLeadership().SetLeaderWatch(true) + gta.member.GetLeadership().Watch(gta.ctx, revision+1) + gta.member.GetLeadership().SetLeaderWatch(false) + + mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) + + gta.member.UnsetLeader() + exitPrimary <- struct{}{} +} + func (gta *GlobalTSOAllocator) getMetrics() *tsoMetrics { return gta.timestampOracle.metrics } diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index fd44665530f..07fa53224d2 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -28,6 +28,7 @@ func RegisterMicroService(r *gin.RouterGroup) { router := r.Group("ms") router.GET("members/:service", GetMembers) router.GET("primary/:service", GetPrimary) + router.POST("primary/transfer/:service", TransferPrimary) } // GetMembers gets all members of the cluster for the specified service. @@ -77,3 +78,45 @@ func GetPrimary(c *gin.Context) { c.AbortWithStatusJSON(http.StatusInternalServerError, "please specify service") } + +// TransferPrimary transfers the primary member of the specified service. +// @Tags primary +// @Summary Transfer the primary member of the specified service. +// @Produce json +// @Param service path string true "service name" +// @Param new_primary query string false "new primary address" +// @Success 200 {object} string +// @Router /ms/primary/transfer/{service} [post] +func TransferPrimary(c *gin.Context) { + svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) + if !svr.IsAPIServiceMode() { + c.AbortWithStatusJSON(http.StatusNotFound, "not support micro service") + return + } + + if service := c.Param("service"); len(service) > 0 { + var input map[string]string + if err := c.Bind(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + newPrimary := "" + if v, ok := input["new_primary"]; ok { + newPrimary = v + } + oldPrimary, _ := svr.GetServicePrimaryAddr(c.Request.Context(), service) + if oldPrimary == newPrimary { + c.AbortWithStatusJSON(http.StatusInternalServerError, "new primary is the same as the old one") + return + } + if err := discovery.TransferPrimary(svr.GetClient(), service, oldPrimary, newPrimary); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, "success") + return + } + + c.AbortWithStatusJSON(http.StatusInternalServerError, "please specify service") +} From 1f13fa2ce85e2bae7d83591d39607dea4aae155c Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 9 May 2024 11:55:35 +0800 Subject: [PATCH 02/24] make test happy Signed-off-by: husharp --- pkg/election/leadership.go | 29 +++++++++++++++++++++-------- pkg/mcs/scheduling/server/server.go | 27 ++++++++++++++++++++++++--- pkg/mcs/utils/util.go | 8 +++----- pkg/tso/global_allocator.go | 27 ++++++++++++++++++++++++--- 4 files changed, 72 insertions(+), 19 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 755a7d6d331..5d0c5655a33 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -64,7 +64,10 @@ type Leadership struct { leaderKey string leaderValue string - LeaderWatch bool + leaderWatch struct { + syncutil.RWMutex + val bool + } keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc @@ -74,10 +77,6 @@ type Leadership struct { campaignTimes []time.Time } -func (ls *Leadership) SetLeaderWatch(val bool) { - ls.LeaderWatch = val -} - func (ls *Leadership) GetLeaderValue() string { return ls.leaderValue } @@ -123,6 +122,20 @@ func (ls *Leadership) GetLeaderKey() string { return ls.leaderKey } +// SetLeaderWatch sets the leader watch flag. +func (ls *Leadership) SetLeaderWatch(val bool) { + ls.leaderWatch.Lock() + ls.leaderWatch.val = val + ls.leaderWatch.Unlock() +} + +// GetLeaderWatch gets the leader watch flag. +func (ls *Leadership) GetLeaderWatch() bool { + ls.leaderWatch.RLock() + defer ls.leaderWatch.RUnlock() + return ls.leaderWatch.val +} + // GetCampaignTimesNum is used to get the campaign times of the leader within `campaignTimesRecordTimeout`. func (ls *Leadership) GetCampaignTimesNum() int { if ls == nil { @@ -386,8 +399,8 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { return } // only API update the leader key to transfer the leader will meet - if ev.Type == mvccpb.PUT && ls.LeaderWatch { - log.Info("[LeaderWatch] current leadership is updated", zap.Int64("watchRevision", revision), + if ev.Type == mvccpb.PUT && ls.GetLeaderWatch() { + log.Info("[LeaderWatch] current leadership is updated", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } @@ -409,5 +422,5 @@ func (ls *Leadership) Reset() { } ls.keepAliveCancelFuncLock.Unlock() ls.getLease().Close() - ls.LeaderWatch = false + ls.SetLeaderWatch(false) } diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index adb72fd52c8..0593058e3cd 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -17,6 +17,7 @@ package server import ( "context" "fmt" + "github.com/tikv/pd/pkg/utils/etcdutil" "net/http" "os" "os/signal" @@ -306,7 +307,7 @@ func (s *Server) campaignLeader() { log.Info("scheduling primary is ready to serve", zap.String("scheduling-primary-name", s.participant.Name())) exitPrimary := make(chan struct{}) - go s.primaryWatch(exitPrimary) + go s.primaryWatch(ctx, exitPrimary) leaderTicker := time.NewTicker(utils.LeaderTickInterval) defer leaderTicker.Stop() @@ -329,7 +330,7 @@ func (s *Server) campaignLeader() { } } -func (s *Server) primaryWatch(exitPrimary chan struct{}) { +func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { _, revision, err := s.participant.GetPersistentLeader() if err != nil { log.Error("[primary] getting the leader meets error", errs.ZapError(err)) @@ -341,10 +342,30 @@ func (s *Server) primaryWatch(exitPrimary chan struct{}) { s.participant.GetLeadership().Watch(s.serverLoopCtx, revision+1) s.participant.GetLeadership().SetLeaderWatch(false) + // only API update primary will set the expected leader + // check leader key whether deleted + leaderRaw, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) + if err != nil { + log.Error("[primary] get primary key error", zap.Error(err)) + return + } + if leaderRaw == nil { + log.Info("[primary] leader key is deleted, the primary will step down") + return + } + utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) s.participant.UnsetLeader() - exitPrimary <- struct{}{} + for { + select { + case <-ctx.Done(): + log.Info("[primary] exit the primary watch loop") + return + case exitPrimary <- struct{}{}: + return + } + } } // Close closes the server. diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 7bbebef5693..b4d737c99e3 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -16,7 +16,6 @@ package utils import ( "context" - "github.com/tikv/pd/pkg/storage/kv" "net" "net/http" "os" @@ -33,6 +32,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/soheilhy/cmux" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/apiutil/multiservicesapi" "github.com/tikv/pd/pkg/utils/etcdutil" @@ -91,7 +91,7 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { resp, err := kv.NewSlowLogTxn(client). Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). Commit() - if err != nil && !resp.Succeeded { + if err != nil || !resp.Succeeded { log.Error("change primary error", errs.ZapError(err)) return } @@ -99,13 +99,11 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { // SetExpectedPrimary sets the expected primary key when the current primary has exited. func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { - // write a flag to indicate the current primary has exited leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { log.Error("[primary] get primary key error", zap.Error(err)) return } - // write a flag to indicate the current primary has exited resp, err := kv.NewSlowLogTxn(client). Then( @@ -113,7 +111,7 @@ func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { // indicate the current primary has exited clientv3.OpDelete(leaderPath)). Commit() - if err != nil && !resp.Succeeded { + if err != nil || !resp.Succeeded { log.Error("change primary error", errs.ZapError(err)) return } diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 1ca854dfed9..b2e02ef09cb 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -18,6 +18,7 @@ import ( "context" "errors" "fmt" + "github.com/tikv/pd/pkg/utils/etcdutil" "runtime/trace" "sync" "sync/atomic" @@ -647,7 +648,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { zap.String("tso-primary-name", gta.member.Name())) exitPrimary := make(chan struct{}) - go gta.primaryWatch(exitPrimary) + go gta.primaryWatch(ctx, exitPrimary) leaderTicker := time.NewTicker(mcsutils.LeaderTickInterval) defer leaderTicker.Stop() @@ -672,7 +673,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { } } -func (gta *GlobalTSOAllocator) primaryWatch(exitPrimary chan struct{}) { +func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { _, revision, err := gta.member.GetPersistentLeader() if err != nil { log.Error("[primary] getting the leader meets error", errs.ZapError(err)) @@ -686,10 +687,30 @@ func (gta *GlobalTSOAllocator) primaryWatch(exitPrimary chan struct{}) { gta.member.GetLeadership().Watch(gta.ctx, revision+1) gta.member.GetLeadership().SetLeaderWatch(false) + // only API update primary will set the expected leader + // check leader key whether deleted + leaderRaw, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) + if err != nil { + log.Error("[primary] get primary key error", zap.Error(err)) + return + } + if leaderRaw == nil { + log.Info("[primary] leader key is deleted, the primary will step down") + return + } + mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) gta.member.UnsetLeader() - exitPrimary <- struct{}{} + for { + select { + case <-ctx.Done(): + log.Info("[primary] exit the primary watch loop") + return + case exitPrimary <- struct{}{}: + return + } + } } func (gta *GlobalTSOAllocator) getMetrics() *tsoMetrics { From af995cc466b5c3899b94b6828f24f09d075d0e8d Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 9 May 2024 14:50:34 +0800 Subject: [PATCH 03/24] address comment and add test Signed-off-by: husharp --- client/http/api.go | 5 ++ client/http/interface.go | 17 +++++ client/http/request_info.go | 1 + pkg/election/leadership.go | 13 +--- pkg/mcs/discovery/discover.go | 3 + pkg/mcs/scheduling/server/server.go | 9 +-- pkg/mcs/tso/server/server.go | 3 - pkg/tso/global_allocator.go | 5 +- pkg/tso/keyspace_group_manager.go | 1 + server/apiv2/handlers/micro_service.go | 2 +- tests/integrations/mcs/members/member_test.go | 66 +++++++++++++++++++ 11 files changed, 105 insertions(+), 20 deletions(-) diff --git a/client/http/api.go b/client/http/api.go index a1ca96b38f1..757ff3642fe 100644 --- a/client/http/api.go +++ b/client/http/api.go @@ -205,6 +205,11 @@ func MicroServicePrimary(service string) string { return fmt.Sprintf("%s/primary/%s", microServicePrefix, service) } +// MicroServicePrimaryTransfer returns the path of PD HTTP API to transfer the primary of microservice. +func MicroServicePrimaryTransfer(service string) string { + return fmt.Sprintf("%s/primary/transfer/%s", microServicePrefix, service) +} + // GetUpdateKeyspaceConfigURL returns the path of PD HTTP API to update keyspace config. func GetUpdateKeyspaceConfigURL(keyspaceName string) string { return fmt.Sprintf(KeyspaceConfig, keyspaceName) diff --git a/client/http/interface.go b/client/http/interface.go index 7b15291d9e7..2424598856f 100644 --- a/client/http/interface.go +++ b/client/http/interface.go @@ -95,6 +95,7 @@ type Client interface { /* Micro Service interfaces */ GetMicroServiceMembers(context.Context, string) ([]MicroServiceMember, error) GetMicroServicePrimary(context.Context, string) (string, error) + TransferMicroServicePrimary(context.Context, string, string) error DeleteOperators(context.Context) error /* Keyspace interface */ @@ -908,6 +909,22 @@ func (c *client) GetMicroServicePrimary(ctx context.Context, service string) (st return primary, err } +func (c *client) TransferMicroServicePrimary(ctx context.Context, service, newPrimary string) error { + reqData, err := json.Marshal(struct { + NewPrimary string `json:"new_primary"` + }{ + NewPrimary: newPrimary, + }) + if err != nil { + return errors.Trace(err) + } + return c.request(ctx, newRequestInfo(). + WithName(transferMicroServicePrimaryName). + WithURI(MicroServicePrimaryTransfer(service)). + WithMethod(http.MethodPost). + WithBody(reqData)) +} + // GetPDVersion gets the release version of the PD binary. func (c *client) GetPDVersion(ctx context.Context) (string, error) { var ver struct { diff --git a/client/http/request_info.go b/client/http/request_info.go index 0ce7072d1ba..6dfcfe0e905 100644 --- a/client/http/request_info.go +++ b/client/http/request_info.go @@ -72,6 +72,7 @@ const ( getMinResolvedTSByStoresIDsName = "GetMinResolvedTSByStoresIDs" getMicroServiceMembersName = "GetMicroServiceMembers" getMicroServicePrimaryName = "GetMicroServicePrimary" + transferMicroServicePrimaryName = "TransferMicroServicePrimary" getPDVersionName = "GetPDVersion" resetTSName = "ResetTS" resetBaseAllocIDName = "ResetBaseAllocID" diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 5d0c5655a33..63f8e8ea54f 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -64,10 +64,7 @@ type Leadership struct { leaderKey string leaderValue string - leaderWatch struct { - syncutil.RWMutex - val bool - } + leaderWatch atomic.Bool keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc @@ -124,16 +121,12 @@ func (ls *Leadership) GetLeaderKey() string { // SetLeaderWatch sets the leader watch flag. func (ls *Leadership) SetLeaderWatch(val bool) { - ls.leaderWatch.Lock() - ls.leaderWatch.val = val - ls.leaderWatch.Unlock() + ls.leaderWatch.Store(val) } // GetLeaderWatch gets the leader watch flag. func (ls *Leadership) GetLeaderWatch() bool { - ls.leaderWatch.RLock() - defer ls.leaderWatch.RUnlock() - return ls.leaderWatch.val + return ls.leaderWatch.Load() } // GetCampaignTimesNum is used to get the campaign times of the leader within `campaignTimesRecordTimeout`. diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 55d5732ec34..4635ec373ba 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -98,6 +98,9 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar for _, member := range entries { if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.ServiceAddr == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) + if string(member.MemberValue) == "" { + return errors.New("member value is empty") + } memberValues = append(memberValues, string(member.MemberValue)) } } diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 0593058e3cd..a6180ca4c3a 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -250,11 +250,12 @@ func (s *Server) primaryElectionLoop() { // To make sure the expected leader(if exist) and primary are on the same server. expectedPrimary := utils.GetExpectedPrimary(s.participant.GetLeaderPath(), s.GetClient()) - if expectedPrimary != "" && expectedPrimary != s.participant.GetLeadership().GetLeaderValue() { + if expectedPrimary != "" && expectedPrimary != s.participant.MemberValue() { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), zap.String("target-primary-id", expectedPrimary), - zap.Uint64("member-id", s.participant.ID())) + zap.Uint64("member-id", s.participant.ID()), + zap.String("cur-memberValue", s.participant.MemberValue())) time.Sleep(200 * time.Millisecond) continue } @@ -324,7 +325,7 @@ func (s *Server) campaignLeader() { log.Info("server is closed") return case <-exitPrimary: - log.Info("no longer a primary/leader because primary have been updated, the scheduling primary/leader will step down") + log.Info("no longer a primary because primary have been updated, the scheduling primary will step down") return } } @@ -357,10 +358,10 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) s.participant.UnsetLeader() + defer log.Info("[primary] exit the primary watch loop") for { select { case <-ctx.Done(): - log.Info("[primary] exit the primary watch loop") return case exitPrimary <- struct{}{}: return diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 17f7312a8b7..2a59183ad02 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -388,9 +388,6 @@ func (s *Server) startServer() (err error) { if err := s.keyspaceGroupManager.Initialize(); err != nil { return err } - // Initialize the service ID with the member value of the primary of the default keyspace group. - memberValue, err := s.GetMember(utils.DefaultKeyspaceID, utils.DefaultKeyspaceGroupID) - s.serviceID.MemberValue = []byte(memberValue.MemberValue()) s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} s.service = &Service{Server: s} diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index b2e02ef09cb..495c5ae6744 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -563,11 +563,12 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { // To make sure the expected leader(if exist) and primary are on the same server. targetPrimary := mcsutils.GetExpectedPrimary(gta.member.GetLeaderPath(), gta.member.Client()) - if targetPrimary != "" && targetPrimary != gta.member.GetLeadership().GetLeaderValue() { + if targetPrimary != "" && targetPrimary != gta.member.MemberValue() { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", gta.member.Name()), zap.String("target-primary-id", targetPrimary), - zap.Uint64("member-id", gta.member.ID())) + zap.Uint64("member-id", gta.member.ID()), + zap.String("cur-memberValue", gta.member.MemberValue())) time.Sleep(200 * time.Millisecond) continue } diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index 2930357e2b4..ec1a3fff6a2 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -737,6 +737,7 @@ func (kgm *KeyspaceGroupManager) updateKeyspaceGroup(group *endpoint.KeyspaceGro ListenUrls: []string{kgm.cfg.GetAdvertiseListenAddr()}, } participant.InitInfo(p, endpoint.KeyspaceGroupsElectionPath(kgm.tsoSvcRootPath, group.ID), mcsutils.PrimaryKey, "keyspace group primary election") + kgm.tsoServiceID.MemberValue = []byte(participant.MemberValue()) // If the keyspace group is in split, we should ensure that the primary elected by the new keyspace group // is always on the same TSO Server node as the primary of the old keyspace group, and this constraint cannot // be broken until the entire split process is completed. diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index 07fa53224d2..96179c41b0a 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -96,7 +96,7 @@ func TransferPrimary(c *gin.Context) { if service := c.Param("service"); len(service) > 0 { var input map[string]string - if err := c.Bind(&input); err != nil { + if err := c.BindJSON(&input); err != nil { c.String(http.StatusBadRequest, err.Error()) return } diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index 87a667e5344..4864fb3f099 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -17,6 +17,7 @@ package members_test import ( "context" "testing" + "time" "github.com/stretchr/testify/suite" pdClient "github.com/tikv/pd/client/http" @@ -35,6 +36,9 @@ type memberTestSuite struct { server *tests.TestServer backendEndpoints string dialClient pdClient.Client + + tsoNodes map[string]bs.Server + schedulingNodes map[string]bs.Server } func TestMemberTestSuite(t *testing.T) { @@ -65,6 +69,7 @@ func (suite *memberTestSuite) SetupTest() { }) } tests.WaitForPrimaryServing(re, nodes) + suite.tsoNodes = nodes // Scheduling nodes = make(map[string]bs.Server) @@ -76,6 +81,7 @@ func (suite *memberTestSuite) SetupTest() { }) } tests.WaitForPrimaryServing(re, nodes) + suite.schedulingNodes = nodes suite.cleanupFunc = append(suite.cleanupFunc, func() { cancel() @@ -113,3 +119,63 @@ func (suite *memberTestSuite) TestPrimary() { re.NoError(err) re.NotEmpty(primary) } + +func (suite *memberTestSuite) TestTransferPrimary() { + re := suite.Require() + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + + supportedServices := []string{"tso", "scheduling"} + for _, service := range supportedServices { + var nodes map[string]bs.Server + switch service { + case "tso": + nodes = suite.tsoNodes + case "scheduling": + nodes = suite.schedulingNodes + } + + // Test resign primary by random + primary, err = suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + err = suite.dialClient.TransferMicroServicePrimary(suite.ctx, service, "") + re.NoError(err) + + testutil.Eventually(re, func() bool { + for _, member := range nodes { + if member.GetAddr() != primary && member.IsServing() { + return true + } + } + return false + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + + // Test transfer primary to a specific node + var newPrimary string + for _, member := range nodes { + if member.GetAddr() != primary { + newPrimary = member.GetAddr() + break + } + } + err = suite.dialClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + re.NoError(err) + + testutil.Eventually(re, func() bool { + return nodes[newPrimary].IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + primary, err = suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + re.Equal(primary, newPrimary) + + // Test transfer primary to a non-exist node + newPrimary = "http://" + err = suite.dialClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + re.Error(err) + } +} From 8d36be5914a30eb8126d3a9ee54678befa5b72c8 Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 9 May 2024 21:19:08 +0800 Subject: [PATCH 04/24] only trigger by updating Signed-off-by: husharp --- pkg/election/leadership.go | 6 ++-- pkg/mcs/scheduling/server/server.go | 39 ++++++++++++-------------- pkg/mcs/utils/util.go | 8 ++++-- pkg/member/member.go | 10 +++---- pkg/member/participant.go | 9 +++--- pkg/tso/allocator_manager.go | 2 -- pkg/tso/global_allocator.go | 39 ++++++++++++-------------- server/apiv2/handlers/micro_service.go | 4 +-- 8 files changed, 56 insertions(+), 61 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 63f8e8ea54f..b9a5d8ac1c2 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -124,8 +124,8 @@ func (ls *Leadership) SetLeaderWatch(val bool) { ls.leaderWatch.Store(val) } -// GetLeaderWatch gets the leader watch flag. -func (ls *Leadership) GetLeaderWatch() bool { +// IsLeader gets the leader watch flag. +func (ls *Leadership) IsLeader() bool { return ls.leaderWatch.Load() } @@ -392,7 +392,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { return } // only API update the leader key to transfer the leader will meet - if ev.Type == mvccpb.PUT && ls.GetLeaderWatch() { + if ev.Type == mvccpb.PUT && ls.IsLeader() { log.Info("[LeaderWatch] current leadership is updated", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index a6180ca4c3a..bf0077395bf 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -249,7 +249,7 @@ func (s *Server) primaryElectionLoop() { } // To make sure the expected leader(if exist) and primary are on the same server. - expectedPrimary := utils.GetExpectedPrimary(s.participant.GetLeaderPath(), s.GetClient()) + expectedPrimary := utils.GetExpectedPrimary(s.GetClient(), s.participant.GetLeaderPath()) if expectedPrimary != "" && expectedPrimary != s.participant.MemberValue() { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), @@ -332,39 +332,36 @@ func (s *Server) campaignLeader() { } func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { - _, revision, err := s.participant.GetPersistentLeader() - if err != nil { + resp, err := etcdutil.EtcdKVGet(s.participant.GetLeadership().GetClient(), s.participant.GetLeaderPath()) + if err != nil || resp == nil || len(resp.Kvs) == 0 { log.Error("[primary] getting the leader meets error", errs.ZapError(err)) return } log.Info("[primary] start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) // Watch will keep looping and never return unless the primary has changed. s.participant.GetLeadership().SetLeaderWatch(true) - s.participant.GetLeadership().Watch(s.serverLoopCtx, revision+1) + s.participant.GetLeadership().Watch(s.serverLoopCtx, resp.Kvs[0].ModRevision+1) s.participant.GetLeadership().SetLeaderWatch(false) // only API update primary will set the expected leader - // check leader key whether deleted - leaderRaw, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) + curPrimary, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) if err != nil { - log.Error("[primary] get primary key error", zap.Error(err)) - return - } - if leaderRaw == nil { - log.Info("[primary] leader key is deleted, the primary will step down") + log.Error("[primary] getting the leader meets error", errs.ZapError(err)) return } + // only trigger by updating primary + if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) - utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) - - s.participant.UnsetLeader() - defer log.Info("[primary] exit the primary watch loop") - for { - select { - case <-ctx.Done(): - return - case exitPrimary <- struct{}{}: - return + s.participant.UnsetLeader() + defer log.Info("[primary] exit the primary watch loop") + for { + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: + return + } } } } diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index b4d737c99e3..62bf8b320ad 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -75,8 +75,8 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err } // GetExpectedPrimary indicates API has changed the primary, ONLY SET VALUE BY API. -func GetExpectedPrimary(keyPath string, client *clientv3.Client) string { - leader, err := etcdutil.GetValue(client, strings.Join([]string{keyPath, ExpectedPrimary}, "/")) +func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { + leader, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimary}, "/")) if err != nil { log.Error("get expected primary key error", errs.ZapError(err)) return "" @@ -86,7 +86,10 @@ func GetExpectedPrimary(keyPath string, client *clientv3.Client) string { } // RemoveExpectedPrimary removes the expected primary key. +// - removed when campaign success +// - removed when server is closed func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { + log.Info("remove expected primary key", zap.String("leaderPath", leaderPath)) // remove expected leader key resp, err := kv.NewSlowLogTxn(client). Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). @@ -99,6 +102,7 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { // SetExpectedPrimary sets the expected primary key when the current primary has exited. func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { + log.Info("set expected primary key", zap.String("leaderPath", leaderPath)) leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { log.Error("[primary] get primary key error", zap.Error(err)) diff --git a/pkg/member/member.go b/pkg/member/member.go index 4522eb7ae33..e1c8994cb9f 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -210,8 +210,8 @@ func (m *EmbeddedEtcdMember) PreCheckLeader() error { return nil } -// GetPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). -func (m *EmbeddedEtcdMember) GetPersistentLeader() (any, int64, error) { +// getPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). +func (m *EmbeddedEtcdMember) getPersistentLeader() (*pdpb.Member, int64, error) { leader := &pdpb.Member{} ok, rev, err := etcdutil.GetProtoMsgWithModRev(m.client, m.GetLeaderPath(), leader) if err != nil { @@ -233,17 +233,17 @@ func (m *EmbeddedEtcdMember) CheckLeader() (ElectionLeader, bool) { return nil, true } - leaderRaw, revision, err := m.GetPersistentLeader() + leader, revision, err := m.getPersistentLeader() if err != nil { log.Error("getting pd leader meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, true } - if leaderRaw == nil { + if leader == nil { // no leader yet return nil, false } - leader := leaderRaw.(*pdpb.Member) + if m.IsSameLeader(leader) { // oh, we are already a PD leader, which indicates we may meet something wrong // in previous CampaignLeader. We should delete the leadership and campaign again. diff --git a/pkg/member/participant.go b/pkg/member/participant.go index f74d17aee22..bd2ec4d0cf6 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -206,8 +206,8 @@ func (*Participant) PreCheckLeader() error { return nil } -// GetPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). -func (m *Participant) GetPersistentLeader() (any, int64, error) { +// getPersistentLeader gets the corresponding leader from etcd by given leaderPath (as the key). +func (m *Participant) getPersistentLeader() (participant, int64, error) { leader := NewParticipantByService(m.serviceName) ok, rev, err := etcdutil.GetProtoMsgWithModRev(m.client, m.GetLeaderPath(), leader) if err != nil { @@ -229,18 +229,17 @@ func (m *Participant) CheckLeader() (ElectionLeader, bool) { return nil, true } - leaderRaw, revision, err := m.GetPersistentLeader() + leader, revision, err := m.getPersistentLeader() if err != nil { log.Error("getting the leader meets error", errs.ZapError(err)) time.Sleep(200 * time.Millisecond) return nil, true } - if leaderRaw == nil { + if leader == nil { // no leader yet return nil, false } - leader := leaderRaw.(participant) if m.IsSameLeader(leader) { // oh, we are already the leader, which indicates we may meet something wrong // in previous CampaignLeader. We should delete the leadership and campaign again. diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index e3a0a8302f3..1e67646dd17 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -145,8 +145,6 @@ type ElectionMember interface { GetDCLocationPath(id uint64) string // PreCheckLeader does some pre-check before checking whether it's the leader. PreCheckLeader() error - // GetPersistentLeader returns the persistent leader. - GetPersistentLeader() (any, int64, error) // UnsetLeader unsets the member's leader. UnsetLeader() } diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 495c5ae6744..e17ec3f74ec 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -18,7 +18,6 @@ import ( "context" "errors" "fmt" - "github.com/tikv/pd/pkg/utils/etcdutil" "runtime/trace" "sync" "sync/atomic" @@ -33,6 +32,7 @@ import ( "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/tsoutil" "github.com/tikv/pd/pkg/utils/typeutil" @@ -562,7 +562,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { } // To make sure the expected leader(if exist) and primary are on the same server. - targetPrimary := mcsutils.GetExpectedPrimary(gta.member.GetLeaderPath(), gta.member.Client()) + targetPrimary := mcsutils.GetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) if targetPrimary != "" && targetPrimary != gta.member.MemberValue() { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", gta.member.Name()), @@ -675,8 +675,8 @@ func (gta *GlobalTSOAllocator) campaignLeader() { } func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { - _, revision, err := gta.member.GetPersistentLeader() - if err != nil { + resp, err := etcdutil.EtcdKVGet(gta.member.GetLeadership().GetClient(), gta.member.GetLeaderPath()) + if err != nil || resp == nil || len(resp.Kvs) == 0 { log.Error("[primary] getting the leader meets error", errs.ZapError(err)) return } @@ -685,31 +685,28 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha zap.String("campaign-tso-primary-name", gta.member.Name())) // Watch will keep looping and never return unless the primary has changed. gta.member.GetLeadership().SetLeaderWatch(true) - gta.member.GetLeadership().Watch(gta.ctx, revision+1) + gta.member.GetLeadership().Watch(gta.ctx, resp.Kvs[0].ModRevision+1) gta.member.GetLeadership().SetLeaderWatch(false) // only API update primary will set the expected leader - // check leader key whether deleted - leaderRaw, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) + curPrimary, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) if err != nil { - log.Error("[primary] get primary key error", zap.Error(err)) - return - } - if leaderRaw == nil { - log.Info("[primary] leader key is deleted, the primary will step down") + log.Error("[primary] getting the leader meets error", errs.ZapError(err)) return } - mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) + if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) - gta.member.UnsetLeader() - for { - select { - case <-ctx.Done(): - log.Info("[primary] exit the primary watch loop") - return - case exitPrimary <- struct{}{}: - return + gta.member.UnsetLeader() + for { + select { + case <-ctx.Done(): + log.Info("[primary] exit the primary watch loop") + return + case exitPrimary <- struct{}{}: + return + } } } } diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index 96179c41b0a..954313dbd75 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -84,8 +84,8 @@ func GetPrimary(c *gin.Context) { // @Summary Transfer the primary member of the specified service. // @Produce json // @Param service path string true "service name" -// @Param new_primary query string false "new primary address" -// @Success 200 {object} string +// @Param new_primary body string false "new primary address" +// @Success 200 string string // @Router /ms/primary/transfer/{service} [post] func TransferPrimary(c *gin.Context) { svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) From 2433f0c757a0afea34b42030c01f79f81d539018 Mon Sep 17 00:00:00 2001 From: husharp Date: Fri, 10 May 2024 14:49:38 +0800 Subject: [PATCH 05/24] change log Signed-off-by: husharp --- pkg/mcs/scheduling/server/server.go | 10 +++++----- pkg/mcs/utils/util.go | 6 +++--- pkg/tso/global_allocator.go | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index bf0077395bf..c17c1e5fe17 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -17,7 +17,6 @@ package server import ( "context" "fmt" - "github.com/tikv/pd/pkg/utils/etcdutil" "net/http" "os" "os/signal" @@ -56,6 +55,7 @@ import ( "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/apiutil" + "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/grpcutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/memberutil" @@ -334,10 +334,10 @@ func (s *Server) campaignLeader() { func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { resp, err := etcdutil.EtcdKVGet(s.participant.GetLeadership().GetClient(), s.participant.GetLeaderPath()) if err != nil || resp == nil || len(resp.Kvs) == 0 { - log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + log.Error("scheduling primary getting the primary meets error", errs.ZapError(err)) return } - log.Info("[primary] start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) + log.Info("scheduling primary start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) // Watch will keep looping and never return unless the primary has changed. s.participant.GetLeadership().SetLeaderWatch(true) s.participant.GetLeadership().Watch(s.serverLoopCtx, resp.Kvs[0].ModRevision+1) @@ -346,7 +346,7 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { // only API update primary will set the expected leader curPrimary, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) if err != nil { - log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + log.Error("scheduling primary getting the leader meets error", errs.ZapError(err)) return } // only trigger by updating primary @@ -354,7 +354,7 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) s.participant.UnsetLeader() - defer log.Info("[primary] exit the primary watch loop") + defer log.Info("scheduling primary exit the primary watch loop") for { select { case <-ctx.Done(): diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 62bf8b320ad..ccdf926a793 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -86,8 +86,8 @@ func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { } // RemoveExpectedPrimary removes the expected primary key. -// - removed when campaign success -// - removed when server is closed +// - removed when campaign new primary success +// - removed when old primary server is closed func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Info("remove expected primary key", zap.String("leaderPath", leaderPath)) // remove expected leader key @@ -105,7 +105,7 @@ func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Info("set expected primary key", zap.String("leaderPath", leaderPath)) leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { - log.Error("[primary] get primary key error", zap.Error(err)) + log.Error("get primary key error", zap.Error(err)) return } // write a flag to indicate the current primary has exited diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index e17ec3f74ec..9fb159bd3ad 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -677,10 +677,10 @@ func (gta *GlobalTSOAllocator) campaignLeader() { func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { resp, err := etcdutil.EtcdKVGet(gta.member.GetLeadership().GetClient(), gta.member.GetLeaderPath()) if err != nil || resp == nil || len(resp.Kvs) == 0 { - log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + log.Error("tso primary getting the primary meets error", errs.ZapError(err)) return } - log.Info("[primary] start to watch the primary", + log.Info("tso primary start to watch the primary", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("campaign-tso-primary-name", gta.member.Name())) // Watch will keep looping and never return unless the primary has changed. @@ -691,7 +691,7 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha // only API update primary will set the expected leader curPrimary, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) if err != nil { - log.Error("[primary] getting the leader meets error", errs.ZapError(err)) + log.Error("tso primary getting the leader meets error", errs.ZapError(err)) return } @@ -699,10 +699,10 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) gta.member.UnsetLeader() + defer log.Info("tso primary exit the primary watch loop") for { select { case <-ctx.Done(): - log.Info("[primary] exit the primary watch loop") return case exitPrimary <- struct{}{}: return From dd72b9c19707ccbdb1801d379b3982a7944df23f Mon Sep 17 00:00:00 2001 From: husharp Date: Fri, 10 May 2024 17:08:58 +0800 Subject: [PATCH 06/24] address comment Signed-off-by: husharp --- pkg/election/leadership.go | 4 ++-- pkg/mcs/discovery/discover.go | 17 +++++++++-------- pkg/mcs/scheduling/server/server.go | 12 +++++------- pkg/mcs/utils/util.go | 7 +++---- pkg/tso/global_allocator.go | 16 +++++++--------- server/apiv2/handlers/micro_service.go | 15 +++++++++++++-- 6 files changed, 39 insertions(+), 32 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index b9a5d8ac1c2..a85bcb5c637 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -391,9 +391,9 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } - // only API update the leader key to transfer the leader will meet + // only API update the leader key to transfer the primary will meet if ev.Type == mvccpb.PUT && ls.IsLeader() { - log.Info("[LeaderWatch] current leadership is updated", + log.Info("[PrimaryWatch] current leadership is updated", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 4635ec373ba..e5186901426 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -81,7 +81,8 @@ func GetMSMembers(serviceName string, client *clientv3.Client) ([]ServiceRegistr return nil, errors.Errorf("unknown service name %s", serviceName) } -func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimary string) error { +// TransferPrimary transfers the primary of the specified service. +func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimary string, keyspaceGroupID uint32) error { log.Info("transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimary), zap.String("to", newPrimary)) entries, err := GetMSMembers(serviceName, client) if err != nil { @@ -89,23 +90,23 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } // Do nothing when I am the only member of cluster. - if len(entries) == 1 && newPrimary == "" { - return errors.New("no valid follower to transfer primary") + if len(entries) == 1 { + return errors.New("no valid secondary to transfer primary") } var primaryIDs []string - var memberValues []string + var secondaryValues []string for _, member := range entries { if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.ServiceAddr == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) if string(member.MemberValue) == "" { return errors.New("member value is empty") } - memberValues = append(memberValues, string(member.MemberValue)) + secondaryValues = append(secondaryValues, string(member.MemberValue)) } } if len(primaryIDs) == 0 { - return errors.New("no valid follower to transfer primary") + return errors.New("no valid secondary to transfer primary") } r := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -122,12 +123,12 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar primaryKey = endpoint.SchedulingPrimaryPath(clusterID) case utils.TSOServiceName: tsoRootPath := endpoint.TSOSvcRootPath(clusterID) - primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, utils.DefaultKeyspaceGroupID) + primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) } // update primary key to notify old primary server. putResp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(primaryKey, memberValues[nextPrimaryID])). + Then(clientv3.OpPut(primaryKey, secondaryValues[nextPrimaryID])). Commit() if err != nil || !putResp.Succeeded { return errors.Errorf("failed to write primary flag for %s", serviceName) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index c17c1e5fe17..de7d0376618 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -355,13 +355,11 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { s.participant.UnsetLeader() defer log.Info("scheduling primary exit the primary watch loop") - for { - select { - case <-ctx.Done(): - return - case exitPrimary <- struct{}{}: - return - } + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: + return } } } diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index ccdf926a793..913a5669cd7 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -76,18 +76,17 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err // GetExpectedPrimary indicates API has changed the primary, ONLY SET VALUE BY API. func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { - leader, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimary}, "/")) + primary, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimary}, "/")) if err != nil { log.Error("get expected primary key error", errs.ZapError(err)) return "" } - return string(leader) + return string(primary) } // RemoveExpectedPrimary removes the expected primary key. -// - removed when campaign new primary success -// - removed when old primary server is closed +// - removed when campaign new primary successfully func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Info("remove expected primary key", zap.String("leaderPath", leaderPath)) // remove expected leader key diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 9fb159bd3ad..8e5cf04c923 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -668,7 +668,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0)) return case <-exitPrimary: - log.Info("no longer a primary because primary have been updated, the TSO primary/leader will step down") + log.Info("no longer a primary because primary have been updated, the TSO primary will step down") return } } @@ -694,19 +694,17 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha log.Error("tso primary getting the leader meets error", errs.ZapError(err)) return } - + // only trigger by updating primary if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) gta.member.UnsetLeader() defer log.Info("tso primary exit the primary watch loop") - for { - select { - case <-ctx.Done(): - return - case exitPrimary <- struct{}{}: - return - } + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: + return } } } diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index 954313dbd75..b27594790b3 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -16,9 +16,11 @@ package handlers import ( "net/http" + "strconv" "github.com/gin-gonic/gin" "github.com/tikv/pd/pkg/mcs/discovery" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/server" "github.com/tikv/pd/server/apiv2/middlewares" ) @@ -101,16 +103,25 @@ func TransferPrimary(c *gin.Context) { return } - newPrimary := "" + newPrimary, keyspaceGroupID := "", utils.DefaultKeyspaceGroupID if v, ok := input["new_primary"]; ok { newPrimary = v } + + if v, ok := input["keyspace_group_id"]; ok { + keyspaceGroupIDRaw, err := strconv.ParseUint(v, 10, 32) + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + keyspaceGroupID = uint32(keyspaceGroupIDRaw) + } oldPrimary, _ := svr.GetServicePrimaryAddr(c.Request.Context(), service) if oldPrimary == newPrimary { c.AbortWithStatusJSON(http.StatusInternalServerError, "new primary is the same as the old one") return } - if err := discovery.TransferPrimary(svr.GetClient(), service, oldPrimary, newPrimary); err != nil { + if err := discovery.TransferPrimary(svr.GetClient(), service, oldPrimary, newPrimary, keyspaceGroupID); err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) return } From a39300e04db71a6858f29cbcd000fb5e71f5857f Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 13 May 2024 13:23:28 +0800 Subject: [PATCH 07/24] change to name Signed-off-by: husharp --- cmd/pd-server/main.go | 3 +++ pkg/mcs/discovery/discover.go | 2 +- pkg/mcs/resourcemanager/server/config.go | 1 + pkg/mcs/scheduling/server/config/config.go | 1 + pkg/mcs/tso/server/config.go | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cmd/pd-server/main.go b/cmd/pd-server/main.go index bd75309ed8a..1b63a1fc04d 100644 --- a/cmd/pd-server/main.go +++ b/cmd/pd-server/main.go @@ -94,6 +94,7 @@ func NewTSOServiceCommand() *cobra.Command { Short: "Run the TSO service", Run: tso.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this TSO member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") @@ -114,6 +115,7 @@ func NewSchedulingServiceCommand() *cobra.Command { Short: "Run the scheduling service", Run: scheduling.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this Scheduling member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") @@ -134,6 +136,7 @@ func NewResourceManagerServiceCommand() *cobra.Command { Short: "Run the resource manager service", Run: resource_manager.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this resource management member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index e5186901426..525acd8275e 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -97,7 +97,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar var primaryIDs []string var secondaryValues []string for _, member := range entries { - if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.ServiceAddr == newPrimary) { + if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) if string(member.MemberValue) == "" { return errors.New("member value is empty") diff --git a/pkg/mcs/resourcemanager/server/config.go b/pkg/mcs/resourcemanager/server/config.go index 70862ffb89c..a86fc67f019 100644 --- a/pkg/mcs/resourcemanager/server/config.go +++ b/pkg/mcs/resourcemanager/server/config.go @@ -194,6 +194,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") diff --git a/pkg/mcs/scheduling/server/config/config.go b/pkg/mcs/scheduling/server/config/config.go index 091771bc38c..07bb12049c0 100644 --- a/pkg/mcs/scheduling/server/config/config.go +++ b/pkg/mcs/scheduling/server/config/config.go @@ -104,6 +104,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") diff --git a/pkg/mcs/tso/server/config.go b/pkg/mcs/tso/server/config.go index 06e9054e117..29be191ec26 100644 --- a/pkg/mcs/tso/server/config.go +++ b/pkg/mcs/tso/server/config.go @@ -167,6 +167,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") From 51708b50bf7659653f47f93ad8a1fee964774b64 Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 13 May 2024 14:43:41 +0800 Subject: [PATCH 08/24] make test happy Signed-off-by: husharp --- pkg/mcs/resourcemanager/server/testutil.go | 4 +++- pkg/mcs/scheduling/server/testutil.go | 2 ++ pkg/mcs/tso/server/testutil.go | 2 ++ tests/integrations/mcs/members/member_test.go | 2 +- tests/testutil.go | 2 ++ 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/resourcemanager/server/testutil.go b/pkg/mcs/resourcemanager/server/testutil.go index 3de0e32c0ab..a4615ca554b 100644 --- a/pkg/mcs/resourcemanager/server/testutil.go +++ b/pkg/mcs/resourcemanager/server/testutil.go @@ -49,16 +49,18 @@ func NewTestServer(ctx context.Context, re *require.Assertions, cfg *Config) (*S // GenerateConfig generates a new config with the given options. func GenerateConfig(c *Config) (*Config, error) { arguments := []string{ + "--name=" + c.Name, "--listen-addr=" + c.ListenAddr, "--advertise-listen-addr=" + c.AdvertiseListenAddr, "--backend-endpoints=" + c.BackendEndpoints, } flagSet := pflag.NewFlagSet("test", pflag.ContinueOnError) + flagSet.StringP("name", "", "", "human-readable name for this resource manager member") flagSet.BoolP("version", "V", false, "print version information and exit") flagSet.StringP("config", "", "", "config file") flagSet.StringP("backend-endpoints", "", "", "url for etcd client") - flagSet.StringP("listen-addr", "", "", "listen address for tso service") + flagSet.StringP("listen-addr", "", "", "listen address for resource manager service") flagSet.StringP("advertise-listen-addr", "", "", "advertise urls for listen address (default '${listen-addr}')") flagSet.StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") flagSet.StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") diff --git a/pkg/mcs/scheduling/server/testutil.go b/pkg/mcs/scheduling/server/testutil.go index 74baac44808..4ebc9266125 100644 --- a/pkg/mcs/scheduling/server/testutil.go +++ b/pkg/mcs/scheduling/server/testutil.go @@ -50,12 +50,14 @@ func NewTestServer(ctx context.Context, re *require.Assertions, cfg *config.Conf // GenerateConfig generates a new config with the given options. func GenerateConfig(c *config.Config) (*config.Config, error) { arguments := []string{ + "--name=" + c.Name, "--listen-addr=" + c.ListenAddr, "--advertise-listen-addr=" + c.AdvertiseListenAddr, "--backend-endpoints=" + c.BackendEndpoints, } flagSet := pflag.NewFlagSet("test", pflag.ContinueOnError) + flagSet.StringP("name", "", "", "human-readable name for this scheduling member") flagSet.BoolP("version", "V", false, "print version information and exit") flagSet.StringP("config", "", "", "config file") flagSet.StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/pkg/mcs/tso/server/testutil.go b/pkg/mcs/tso/server/testutil.go index cf5d45e7754..5dcfd4759b9 100644 --- a/pkg/mcs/tso/server/testutil.go +++ b/pkg/mcs/tso/server/testutil.go @@ -34,12 +34,14 @@ func MustNewGrpcClient(re *require.Assertions, addr string) (*grpc.ClientConn, t // GenerateConfig generates a new config with the given options. func GenerateConfig(c *Config) (*Config, error) { arguments := []string{ + "--name=" + c.Name, "--listen-addr=" + c.ListenAddr, "--advertise-listen-addr=" + c.AdvertiseListenAddr, "--backend-endpoints=" + c.BackendEndpoints, } flagSet := pflag.NewFlagSet("test", pflag.ContinueOnError) + flagSet.StringP("name", "", "", "human-readable name for this tso member") flagSet.BoolP("version", "V", false, "print version information and exit") flagSet.StringP("config", "", "", "config file") flagSet.StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index 4864fb3f099..9cc493475f3 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -158,7 +158,7 @@ func (suite *memberTestSuite) TestTransferPrimary() { var newPrimary string for _, member := range nodes { if member.GetAddr() != primary { - newPrimary = member.GetAddr() + newPrimary = member.Name() break } } diff --git a/tests/testutil.go b/tests/testutil.go index 5d9905af64c..db07f2eb5ed 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -84,6 +84,7 @@ func StartSingleTSOTestServerWithoutCheck(ctx context.Context, re *require.Asser cfg := tso.NewConfig() cfg.BackendEndpoints = backendEndpoints cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr cfg, err := tso.GenerateConfig(cfg) re.NoError(err) // Setup the logger. @@ -121,6 +122,7 @@ func StartSingleSchedulingTestServer(ctx context.Context, re *require.Assertions cfg := sc.NewConfig() cfg.BackendEndpoints = backendEndpoints cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr cfg, err := scheduling.GenerateConfig(cfg) re.NoError(err) From c6d2bc34fb58cefe5ad7d798dbceea2f4a69b4c1 Mon Sep 17 00:00:00 2001 From: husharp Date: Tue, 14 May 2024 10:35:18 +0800 Subject: [PATCH 09/24] address comment and change some comments Signed-off-by: husharp --- pkg/election/leadership.go | 22 +++++++++++----------- pkg/mcs/scheduling/server/server.go | 12 ++++++------ pkg/mcs/utils/util.go | 4 ++-- pkg/tso/global_allocator.go | 10 +++++----- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index a85bcb5c637..cafe60505ad 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -64,14 +64,14 @@ type Leadership struct { leaderKey string leaderValue string - leaderWatch atomic.Bool - keepAliveCtx context.Context keepAliveCancelFunc context.CancelFunc keepAliveCancelFuncLock syncutil.Mutex // campaignTimes is used to record the campaign times of the leader within `campaignTimesRecordTimeout`. // It is ordered by time to prevent the leader from campaigning too frequently. campaignTimes []time.Time + // primaryWatch is ONLY set to true when the use `/ms/primary/transfer` API. + primaryWatch atomic.Bool } func (ls *Leadership) GetLeaderValue() string { @@ -119,14 +119,14 @@ func (ls *Leadership) GetLeaderKey() string { return ls.leaderKey } -// SetLeaderWatch sets the leader watch flag. -func (ls *Leadership) SetLeaderWatch(val bool) { - ls.leaderWatch.Store(val) +// SetPrimaryWatch sets the primary watch flag. +func (ls *Leadership) SetPrimaryWatch(val bool) { + ls.primaryWatch.Store(val) } -// IsLeader gets the leader watch flag. -func (ls *Leadership) IsLeader() bool { - return ls.leaderWatch.Load() +// IsPrimary gets the primary watch flag. +func (ls *Leadership) IsPrimary() bool { + return ls.primaryWatch.Load() } // GetCampaignTimesNum is used to get the campaign times of the leader within `campaignTimesRecordTimeout`. @@ -392,8 +392,8 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { return } // only API update the leader key to transfer the primary will meet - if ev.Type == mvccpb.PUT && ls.IsLeader() { - log.Info("[PrimaryWatch] current leadership is updated", + if ev.Type == mvccpb.PUT && ls.IsPrimary() { + log.Info("current leadership is updated", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } @@ -415,5 +415,5 @@ func (ls *Leadership) Reset() { } ls.keepAliveCancelFuncLock.Unlock() ls.getLease().Close() - ls.SetLeaderWatch(false) + ls.SetPrimaryWatch(false) } diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index de7d0376618..4e2a4a095d5 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -255,7 +255,7 @@ func (s *Server) primaryElectionLoop() { zap.String("server-name", s.Name()), zap.String("target-primary-id", expectedPrimary), zap.Uint64("member-id", s.participant.ID()), - zap.String("cur-memberValue", s.participant.MemberValue())) + zap.String("cur-member-value", s.participant.MemberValue())) time.Sleep(200 * time.Millisecond) continue } @@ -339,17 +339,17 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { } log.Info("scheduling primary start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) // Watch will keep looping and never return unless the primary has changed. - s.participant.GetLeadership().SetLeaderWatch(true) - s.participant.GetLeadership().Watch(s.serverLoopCtx, resp.Kvs[0].ModRevision+1) - s.participant.GetLeadership().SetLeaderWatch(false) + s.participant.GetLeadership().SetPrimaryWatch(true) + s.participant.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) + s.participant.GetLeadership().SetPrimaryWatch(false) - // only API update primary will set the expected leader + // only `/ms/primary/transfer` API update primary will set the expected primary curPrimary, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) if err != nil { log.Error("scheduling primary getting the leader meets error", errs.ZapError(err)) return } - // only trigger by updating primary + // `exitPrimary` only triggered by updating primary if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 913a5669cd7..17ed86ae896 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -88,7 +88,7 @@ func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { // RemoveExpectedPrimary removes the expected primary key. // - removed when campaign new primary successfully func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { - log.Info("remove expected primary key", zap.String("leaderPath", leaderPath)) + log.Info("remove expected primary key", zap.String("leader-path", leaderPath)) // remove expected leader key resp, err := kv.NewSlowLogTxn(client). Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). @@ -101,7 +101,7 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { // SetExpectedPrimary sets the expected primary key when the current primary has exited. func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { - log.Info("set expected primary key", zap.String("leaderPath", leaderPath)) + log.Info("set expected primary key", zap.String("leader-path", leaderPath)) leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { log.Error("get primary key error", zap.Error(err)) diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 8e5cf04c923..a1a742c0059 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -684,17 +684,17 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("campaign-tso-primary-name", gta.member.Name())) // Watch will keep looping and never return unless the primary has changed. - gta.member.GetLeadership().SetLeaderWatch(true) - gta.member.GetLeadership().Watch(gta.ctx, resp.Kvs[0].ModRevision+1) - gta.member.GetLeadership().SetLeaderWatch(false) + gta.member.GetLeadership().SetPrimaryWatch(true) + gta.member.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) + gta.member.GetLeadership().SetPrimaryWatch(false) - // only API update primary will set the expected leader + // only `/ms/primary/transfer` API update primary will set the expected primary curPrimary, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) if err != nil { log.Error("tso primary getting the leader meets error", errs.ZapError(err)) return } - // only trigger by updating primary + // `exitPrimary` only triggered by updating primary if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) From a4c5c29eedb23eedc7595b3b62eb3f7e0b3ae8ca Mon Sep 17 00:00:00 2001 From: husharp Date: Tue, 14 May 2024 14:28:29 +0800 Subject: [PATCH 10/24] add more test Signed-off-by: husharp --- pkg/mcs/discovery/discover.go | 12 +- pkg/mcs/scheduling/server/server.go | 2 +- pkg/mcs/utils/util.go | 12 +- pkg/member/member.go | 2 + pkg/tso/global_allocator.go | 2 +- tests/integrations/mcs/members/member_test.go | 142 +++++++++++++++++- tests/testutil.go | 2 +- 7 files changed, 163 insertions(+), 11 deletions(-) diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 525acd8275e..ae9027ed040 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -126,12 +126,20 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) } + // remove possible residual value + utils.RemoveExpectedPrimary(client, primaryKey) + + // only give the primary lease to the new primary + grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) + if err != nil { + return errors.Errorf("failed to grant lease for %s, err: %v", serviceName, err) + } // update primary key to notify old primary server. putResp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(primaryKey, secondaryValues[nextPrimaryID])). + Then(clientv3.OpPut(primaryKey, secondaryValues[nextPrimaryID], clientv3.WithLease(grantResp.ID))). Commit() if err != nil || !putResp.Succeeded { - return errors.Errorf("failed to write primary flag for %s", serviceName) + return errors.Errorf("failed to write primary flag for %s, err: %v", serviceName, err) } return nil } diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 4e2a4a095d5..4dd41c8d4eb 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -325,7 +325,7 @@ func (s *Server) campaignLeader() { log.Info("server is closed") return case <-exitPrimary: - log.Info("no longer a primary because primary have been updated, the scheduling primary will step down") + log.Info("no longer be primary because primary have been updated, the scheduling primary will step down") return } } diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 17ed86ae896..7a38d642f7f 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -87,6 +87,7 @@ func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { // RemoveExpectedPrimary removes the expected primary key. // - removed when campaign new primary successfully +// - removed when appoint new primary by API. func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Info("remove expected primary key", zap.String("leader-path", leaderPath)) // remove expected leader key @@ -94,7 +95,7 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). Commit() if err != nil || !resp.Succeeded { - log.Error("change primary error", errs.ZapError(err)) + log.Error("change expected primary error", errs.ZapError(err)) return } } @@ -107,15 +108,20 @@ func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Error("get primary key error", zap.Error(err)) return } + grantResp, err := client.Grant(client.Ctx(), DefaultLeaderLease) + if err != nil { + log.Error("grant lease for expected primary error", errs.ZapError(err)) + return + } // write a flag to indicate the current primary has exited resp, err := kv.NewSlowLogTxn(client). Then( - clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"), string(leaderRaw)), + clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"), string(leaderRaw), clientv3.WithLease(grantResp.ID)), // indicate the current primary has exited clientv3.OpDelete(leaderPath)). Commit() if err != nil || !resp.Succeeded { - log.Error("change primary error", errs.ZapError(err)) + log.Error("change expected primary error", errs.ZapError(err)) return } } diff --git a/pkg/member/member.go b/pkg/member/member.go index e1c8994cb9f..b9027703403 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -32,6 +32,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/etcdutil" "go.etcd.io/etcd/clientv3" @@ -157,6 +158,7 @@ func (m *EmbeddedEtcdMember) UnsetLeader() { // EnableLeader sets the member itself to a PD leader. func (m *EmbeddedEtcdMember) EnableLeader() { m.setLeader(m.member) + utils.RemoveExpectedPrimary(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the PD leader. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index a1a742c0059..99af41bacaa 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -668,7 +668,7 @@ func (gta *GlobalTSOAllocator) campaignLeader() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0)) return case <-exitPrimary: - log.Info("no longer a primary because primary have been updated, the TSO primary will step down") + log.Info("no longer be primary because primary have been updated, the TSO primary will step down") return } } diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index 9cc493475f3..69c848937b5 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -16,13 +16,14 @@ package members_test import ( "context" + "fmt" "testing" "time" + "github.com/pingcap/failpoint" "github.com/stretchr/testify/suite" pdClient "github.com/tikv/pd/client/http" bs "github.com/tikv/pd/pkg/basicserver" - "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/utils/tempurl" "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/tests" @@ -61,7 +62,7 @@ func (suite *memberTestSuite) SetupTest() { // TSO nodes := make(map[string]bs.Server) - for i := 0; i < utils.DefaultKeyspaceGroupReplicaCount; i++ { + for i := 0; i < 3; i++ { s, cleanup := tests.StartSingleTSOTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) nodes[s.GetAddr()] = s suite.cleanupFunc = append(suite.cleanupFunc, func() { @@ -102,7 +103,7 @@ func (suite *memberTestSuite) TestMembers() { re := suite.Require() members, err := suite.dialClient.GetMicroServiceMembers(suite.ctx, "tso") re.NoError(err) - re.Len(members, utils.DefaultKeyspaceGroupReplicaCount) + re.Len(members, 3) members, err = suite.dialClient.GetMicroServiceMembers(suite.ctx, "scheduling") re.NoError(err) @@ -120,6 +121,42 @@ func (suite *memberTestSuite) TestPrimary() { re.NotEmpty(primary) } +func (suite *memberTestSuite) TestCampaignPrimaryWhileServerClose() { + re := suite.Require() + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + + supportedServices := []string{"tso", "scheduling"} + for _, service := range supportedServices { + var nodes map[string]bs.Server + switch service { + case "tso": + nodes = suite.tsoNodes + case "scheduling": + nodes = suite.schedulingNodes + } + + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + + // Close old and new primary to mock campaign primary + for _, member := range nodes { + if member.GetAddr() != primary { + nodes[member.Name()].Close() + break + } + } + nodes[primary].Close() + tests.WaitForPrimaryServing(re, nodes) + + // primary should be different with before + onlyPrimary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + re.NotEqual(primary, onlyPrimary) + } +} + func (suite *memberTestSuite) TestTransferPrimary() { re := suite.Require() primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, "tso") @@ -179,3 +216,102 @@ func (suite *memberTestSuite) TestTransferPrimary() { re.Error(err) } } + +func (suite *memberTestSuite) TestCampaignPrimaryAfterTransfer() { + re := suite.Require() + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + + supportedServices := []string{"tso", "scheduling"} + for _, service := range supportedServices { + var nodes map[string]bs.Server + switch service { + case "tso": + nodes = suite.tsoNodes + case "scheduling": + nodes = suite.schedulingNodes + } + + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + + // Test transfer primary to a specific node + var newPrimary string + for _, member := range nodes { + if member.GetAddr() != primary { + newPrimary = member.Name() + break + } + } + err = suite.dialClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + re.NoError(err) + + tests.WaitForPrimaryServing(re, nodes) + newPrimary, err = suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + re.NotEqual(primary, newPrimary) + + // Close old and new primary to mock campaign primary + nodes[primary].Close() + nodes[newPrimary].Close() + tests.WaitForPrimaryServing(re, nodes) + // Primary should be different with before + onlyPrimary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + re.NotEqual(primary, onlyPrimary) + re.NotEqual(newPrimary, onlyPrimary) + } +} + +func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpired() { + re := suite.Require() + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + + supportedServices := []string{"tso", "scheduling"} + for _, service := range supportedServices { + var nodes map[string]bs.Server + switch service { + case "tso": + nodes = suite.tsoNodes + case "scheduling": + nodes = suite.schedulingNodes + } + + primary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + + // Test transfer primary to a specific node + var newPrimary string + for _, member := range nodes { + if member.GetAddr() != primary { + newPrimary = member.Name() + break + } + } + // Mock the new primary can not grant leader which means the lease will expire + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", fmt.Sprintf("return(\"%s\")", newPrimary))) + err = suite.dialClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + re.NoError(err) + + // Wait for the old primary exit and new primary campaign + testutil.Eventually(re, func() bool { + return !nodes[primary].IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + // Wait for the new primary lease to expire which is `DefaultLeaderLease` + time.Sleep(4 * time.Second) + // TODO: Add campaign times check in mcs to avoid frequent campaign + // for now, close the current primary to mock the server down + nodes[newPrimary].Close() + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/election/skipGrantLeader")) + + tests.WaitForPrimaryServing(re, nodes) + // Primary should be different with before + onlyPrimary, err := suite.dialClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + re.NotEqual(newPrimary, onlyPrimary) + } +} diff --git a/tests/testutil.go b/tests/testutil.go index db07f2eb5ed..495dd547c4f 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -159,7 +159,7 @@ func WaitForPrimaryServing(re *require.Assertions, serverMap map[string]bs.Serve } } return false - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + }, testutil.WithWaitFor(10*time.Second), testutil.WithTickInterval(50*time.Millisecond)) return primary } From b235bc1152a7942d71d42222b0e078d91ccd0106 Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 1 Jul 2024 11:47:29 +0800 Subject: [PATCH 11/24] address comment and add more comment Signed-off-by: husharp --- pkg/election/leadership.go | 14 +++--- pkg/mcs/discovery/discover.go | 9 ++-- pkg/mcs/scheduling/server/server.go | 18 +++++-- pkg/mcs/utils/util.go | 12 +++-- pkg/tso/global_allocator.go | 24 ++++++---- tests/integrations/mcs/members/member_test.go | 47 +++++++++++++++++++ 6 files changed, 95 insertions(+), 29 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 3071b5f7bea..ef5a4b7ac10 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -71,14 +71,11 @@ type Leadership struct { // campaignTimes is used to record the campaign times of the leader within `campaignTimesRecordTimeout`. // It is ordered by time to prevent the leader from campaigning too frequently. campaignTimes []time.Time - // primaryWatch is ONLY set to true when the use `/ms/primary/transfer` API. + // primaryWatch is for the primary watch only, + // which is used to reuse `Watch` interface in `Leadership`. primaryWatch atomic.Bool } -func (ls *Leadership) GetLeaderValue() string { - return ls.leaderValue -} - // NewLeadership creates a new Leadership. func NewLeadership(client *clientv3.Client, leaderKey, purpose string) *Leadership { leadership := &Leadership{ @@ -392,10 +389,11 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } - // only API update the leader key to transfer the primary will meet + // ONLY `/ms/primary/transfer` API update primary will meet this condition. if ev.Type == mvccpb.PUT && ls.IsPrimary() { - log.Info("current leadership is updated", - zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) + log.Info("current leadership is updated", zap.Int64("revision", wresp.Header.Revision), + zap.String("leader-key", ls.leaderKey), zap.Any("value", ls.leaderValue), + zap.String("purpose", ls.purpose)) return } } diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index ae9027ed040..6813cccadf7 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -15,6 +15,7 @@ package discovery import ( + "fmt" "math/rand" "strconv" "time" @@ -91,7 +92,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar // Do nothing when I am the only member of cluster. if len(entries) == 1 { - return errors.New("no valid secondary to transfer primary") + return errors.New(fmt.Sprintf("no valid secondary to transfer primary, the only member is %s", entries[0].Name)) } var primaryIDs []string @@ -100,7 +101,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) if string(member.MemberValue) == "" { - return errors.New("member value is empty") + return errors.New(fmt.Sprintf("member %s value is empty", member.Name)) } secondaryValues = append(secondaryValues, string(member.MemberValue)) } @@ -126,10 +127,10 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) } - // remove possible residual value + // remove possible residual value. utils.RemoveExpectedPrimary(client, primaryKey) - // only give the primary lease to the new primary + // grant the primary lease to the new primary. grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) if err != nil { return errors.Errorf("failed to grant lease for %s, err: %v", serviceName, err) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 4dd41c8d4eb..3fe6187f8f8 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -248,12 +248,14 @@ func (s *Server) primaryElectionLoop() { log.Info("the scheduling primary has changed, try to re-campaign a primary") } - // To make sure the expected leader(if exist) and primary are on the same server. + // To make sure the expected primary(if existed) and new primary are on the same server. expectedPrimary := utils.GetExpectedPrimary(s.GetClient(), s.participant.GetLeaderPath()) + // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. + // expected primary ONLY SET BY `/ms/primary/transfer` API. if expectedPrimary != "" && expectedPrimary != s.participant.MemberValue() { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), - zap.String("target-primary-id", expectedPrimary), + zap.String("expected-primary-id", expectedPrimary), zap.Uint64("member-id", s.participant.ID()), zap.String("cur-member-value", s.participant.MemberValue())) time.Sleep(200 * time.Millisecond) @@ -331,6 +333,11 @@ func (s *Server) campaignLeader() { } } +// primaryWatch watches `/ms/primary/transfer` API whether changed the primary. +// 1. modify the expected primary flag to the new primary +// 2. modify memory status +// 3. exit the primary watch loop +// 4. delete the leader key func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { resp, err := etcdutil.EtcdKVGet(s.participant.GetLeadership().GetClient(), s.participant.GetLeaderPath()) if err != nil || resp == nil || len(resp.Kvs) == 0 { @@ -343,21 +350,22 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { s.participant.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) s.participant.GetLeadership().SetPrimaryWatch(false) - // only `/ms/primary/transfer` API update primary will set the expected primary + // only `/ms/primary/transfer` API update primary will set `leaderPath` to the expected primary. curPrimary, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) if err != nil { log.Error("scheduling primary getting the leader meets error", errs.ZapError(err)) return } - // `exitPrimary` only triggered by updating primary if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + // 1. modify the expected primary flag to the new primary. utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) - + // 2. modify memory status. s.participant.UnsetLeader() defer log.Info("scheduling primary exit the primary watch loop") select { case <-ctx.Done(): return + // 3. exit the primary watch loop, 4.`exitPrimary` will help delete the leader key. case exitPrimary <- struct{}{}: return } diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 7a38d642f7f..93b3cdfa104 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -52,8 +52,12 @@ const ( ClusterIDPath = "/pd/cluster_id" // retryInterval is the interval to retry. retryInterval = time.Second - // ExpectedPrimary is the path to store the expected primary - // ONLY SET VALUE BY API + // ExpectedPrimary is the path to store the expected primary , ONLY Triggered BY `/ms/primary/transfer` API. + // This flag likes a fence to avoid exited 2 primaries in the cluster simultaneously. + // 1. Since follower will campaign a new primary when it found the `leader_key` is deleted. + // **We can ensure `expected_primary` is set before deleting the `leader_key`.** + // 2. Old primary will set `expected_primary` firstly, + // then delete the `leader_key` which will trigger the follower to campaign a new primary. ExpectedPrimary = "expected_primary" ) @@ -74,7 +78,7 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes) } -// GetExpectedPrimary indicates API has changed the primary, ONLY SET VALUE BY API. +// GetExpectedPrimary indicates API has changed the primary. func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { primary, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimary}, "/")) if err != nil { @@ -86,7 +90,7 @@ func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { } // RemoveExpectedPrimary removes the expected primary key. -// - removed when campaign new primary successfully +// - removed when campaign new primary successfully. // - removed when appoint new primary by API. func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { log.Info("remove expected primary key", zap.String("leader-path", leaderPath)) diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 99af41bacaa..3b6a3c4aa69 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -561,12 +561,14 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0)) } - // To make sure the expected leader(if exist) and primary are on the same server. - targetPrimary := mcsutils.GetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) - if targetPrimary != "" && targetPrimary != gta.member.MemberValue() { - log.Info("skip campaigning of scheduling primary and check later", + // To make sure the expected primary(if existed) and new primary are on the same server. + expectedPrimary := mcsutils.GetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) + // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. + // expected primary ONLY SET BY `/ms/primary/transfer` API. + if expectedPrimary != "" && expectedPrimary != gta.member.MemberValue() { + log.Info("skip campaigning of tso primary and check later", zap.String("server-name", gta.member.Name()), - zap.String("target-primary-id", targetPrimary), + zap.String("expected-primary-id", expectedPrimary), zap.Uint64("member-id", gta.member.ID()), zap.String("cur-memberValue", gta.member.MemberValue())) time.Sleep(200 * time.Millisecond) @@ -674,6 +676,11 @@ func (gta *GlobalTSOAllocator) campaignLeader() { } } +// primaryWatch watches `/ms/primary/transfer` API whether changed the primary. +// 1. modify the expected primary flag to the new primary +// 2. modify memory status +// 3. exit the primary watch loop +// 4. delete the leader key func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { resp, err := etcdutil.EtcdKVGet(gta.member.GetLeadership().GetClient(), gta.member.GetLeaderPath()) if err != nil || resp == nil || len(resp.Kvs) == 0 { @@ -688,21 +695,22 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha gta.member.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) gta.member.GetLeadership().SetPrimaryWatch(false) - // only `/ms/primary/transfer` API update primary will set the expected primary + // only `/ms/primary/transfer` API update primary will set `leaderPath` to the expected primary. curPrimary, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) if err != nil { log.Error("tso primary getting the leader meets error", errs.ZapError(err)) return } - // `exitPrimary` only triggered by updating primary if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + // 1. modify the expected primary flag to the new primary. mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) - + // 2. modify memory status. gta.member.UnsetLeader() defer log.Info("tso primary exit the primary watch loop") select { case <-ctx.Done(): return + // 3. exit the primary watch loop, 4.`exitPrimary` will help delete the leader key. case exitPrimary <- struct{}{}: return } diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index ba8f6efd7fd..27a65b75f2d 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -270,6 +270,53 @@ func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpired() { re.NoError(err) re.NotEmpty(primary) + supportedServices := []string{"tso", "scheduling"} + for _, service := range supportedServices { + var nodes map[string]bs.Server + switch service { + case "tso": + nodes = suite.tsoNodes + case "scheduling": + nodes = suite.schedulingNodes + } + + primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, service) + re.NoError(err) + + // Test transfer primary to a specific node + var newPrimary string + for _, member := range nodes { + if member.GetAddr() != primary { + newPrimary = member.Name() + break + } + } + // Mock the new primary can not grant leader which means the lease will expire + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", fmt.Sprintf("return(\"%s\")", newPrimary))) + err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + re.NoError(err) + + // Wait for the old primary exit and new primary campaign + testutil.Eventually(re, func() bool { + return !nodes[primary].IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + // Wait for the new primary lease to expire which is `DefaultLeaderLease` + time.Sleep(4 * time.Second) + // TODO: Add campaign times check in mcs to avoid frequent campaign + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/election/skipGrantLeader")) + // Can still work after lease expired + tests.WaitForPrimaryServing(re, nodes) + } +} + +// TestTransferPrimaryWhileLeaseExpiredAndServerDown tests transfer primary while lease expired and server down +func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpiredAndServerDown() { + re := suite.Require() + primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, "tso") + re.NoError(err) + re.NotEmpty(primary) + supportedServices := []string{"tso", "scheduling"} for _, service := range supportedServices { var nodes map[string]bs.Server From f659782d3e2ca62b334d3c3e6960dd8b5fc32feb Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 1 Jul 2024 14:51:38 +0800 Subject: [PATCH 12/24] add more comment Signed-off-by: husharp --- pkg/mcs/discovery/discover.go | 2 +- pkg/mcs/discovery/registry_entry.go | 2 ++ server/apiv2/handlers/micro_service.go | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 6813cccadf7..8c4bc0556cb 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -107,7 +107,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } } if len(primaryIDs) == 0 { - return errors.New("no valid secondary to transfer primary") + return errors.New(fmt.Sprintf("no valid secondary to transfer primary, from %s to %s", oldPrimary, newPrimary)) } r := rand.New(rand.NewSource(time.Now().UnixNano())) diff --git a/pkg/mcs/discovery/registry_entry.go b/pkg/mcs/discovery/registry_entry.go index ede9f12172e..ad442d9a61a 100644 --- a/pkg/mcs/discovery/registry_entry.go +++ b/pkg/mcs/discovery/registry_entry.go @@ -23,6 +23,8 @@ import ( // ServiceRegistryEntry is the registry entry of a service type ServiceRegistryEntry struct { + // The specific value will be assigned only if the startup parameter is added. + // If not assigned, the default value(service-hostname) will be used. Name string `json:"name"` ServiceAddr string `json:"service-addr"` Version string `json:"version"` diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index b27594790b3..b1c290a1fb1 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -86,7 +86,7 @@ func GetPrimary(c *gin.Context) { // @Summary Transfer the primary member of the specified service. // @Produce json // @Param service path string true "service name" -// @Param new_primary body string false "new primary address" +// @Param new_primary body string false "new primary name" // @Success 200 string string // @Router /ms/primary/transfer/{service} [post] func TransferPrimary(c *gin.Context) { From 204ffd51807040c99b7a62a2a552694445b990a4 Mon Sep 17 00:00:00 2001 From: husharp Date: Wed, 3 Jul 2024 13:17:37 +0800 Subject: [PATCH 13/24] address comment Signed-off-by: husharp --- pkg/mcs/discovery/discover.go | 8 ++++---- pkg/mcs/scheduling/server/server.go | 4 ++-- pkg/mcs/utils/util.go | 30 ++++++++++++++--------------- pkg/member/member.go | 2 +- pkg/member/participant.go | 2 +- pkg/tso/global_allocator.go | 4 ++-- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 8c4bc0556cb..d06c886b70b 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -96,14 +96,14 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } var primaryIDs []string - var secondaryValues []string + var memberValues []string for _, member := range entries { if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) if string(member.MemberValue) == "" { return errors.New(fmt.Sprintf("member %s value is empty", member.Name)) } - secondaryValues = append(secondaryValues, string(member.MemberValue)) + memberValues = append(memberValues, string(member.MemberValue)) } } if len(primaryIDs) == 0 { @@ -128,7 +128,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } // remove possible residual value. - utils.RemoveExpectedPrimary(client, primaryKey) + utils.ClearPrimaryExpectationFlag(client, primaryKey) // grant the primary lease to the new primary. grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) @@ -137,7 +137,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } // update primary key to notify old primary server. putResp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(primaryKey, secondaryValues[nextPrimaryID], clientv3.WithLease(grantResp.ID))). + Then(clientv3.OpPut(primaryKey, memberValues[nextPrimaryID], clientv3.WithLease(grantResp.ID))). Commit() if err != nil || !putResp.Succeeded { return errors.Errorf("failed to write primary flag for %s, err: %v", serviceName, err) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 9822f5cb8e8..991d806b0b0 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -249,7 +249,7 @@ func (s *Server) primaryElectionLoop() { } // To make sure the expected primary(if existed) and new primary are on the same server. - expectedPrimary := utils.GetExpectedPrimary(s.GetClient(), s.participant.GetLeaderPath()) + expectedPrimary := utils.AttachExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. if expectedPrimary != "" && expectedPrimary != s.participant.MemberValue() { @@ -358,7 +358,7 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { } if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { // 1. modify the expected primary flag to the new primary. - utils.SetExpectedPrimary(s.participant.Client(), s.participant.GetLeaderPath()) + utils.MarkExpectedPrimaryFlag(s.participant.Client(), s.participant.GetLeaderPath()) // 2. modify memory status. s.participant.UnsetLeader() defer log.Info("scheduling primary exit the primary watch loop") diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 37e5c425d9d..f4e9f6d8465 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -52,13 +52,13 @@ const ( ClusterIDPath = "/pd/cluster_id" // retryInterval is the interval to retry. retryInterval = time.Second - // ExpectedPrimary is the path to store the expected primary , ONLY Triggered BY `/ms/primary/transfer` API. + // ExpectedPrimaryFlag is the flag to indicate the expected primary, ONLY marked BY `/ms/primary/transfer` API. // This flag likes a fence to avoid exited 2 primaries in the cluster simultaneously. // 1. Since follower will campaign a new primary when it found the `leader_key` is deleted. // **We can ensure `expected_primary` is set before deleting the `leader_key`.** - // 2. Old primary will set `expected_primary` firstly, + // 2. Old primary will mark `expected_primary` firstly, // then delete the `leader_key` which will trigger the follower to campaign a new primary. - ExpectedPrimary = "expected_primary" + ExpectedPrimaryFlag = "expected_primary" ) // InitClusterID initializes the cluster ID. @@ -78,25 +78,25 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes) } -// GetExpectedPrimary indicates API has changed the primary. -func GetExpectedPrimary(client *clientv3.Client, leaderPath string) string { - primary, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimary}, "/")) +// AttachExpectedPrimaryFlag attaches the expected primary flag. +func AttachExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) string { + primary, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/")) if err != nil { - log.Error("get expected primary key error", errs.ZapError(err)) + log.Error("get expected primary flag error", errs.ZapError(err)) return "" } return string(primary) } -// RemoveExpectedPrimary removes the expected primary key. +// ClearPrimaryExpectationFlag clears the expected primary flag. // - removed when campaign new primary successfully. // - removed when appoint new primary by API. -func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { - log.Info("remove expected primary key", zap.String("leader-path", leaderPath)) +func ClearPrimaryExpectationFlag(client *clientv3.Client, leaderPath string) { + log.Info("remove expected primary flag", zap.String("primary-path", leaderPath)) // remove expected leader key resp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"))). + Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/"))). Commit() if err != nil || !resp.Succeeded { log.Error("change expected primary error", errs.ZapError(err)) @@ -104,9 +104,9 @@ func RemoveExpectedPrimary(client *clientv3.Client, leaderPath string) { } } -// SetExpectedPrimary sets the expected primary key when the current primary has exited. -func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { - log.Info("set expected primary key", zap.String("leader-path", leaderPath)) +// MarkExpectedPrimaryFlag marks the expected primary flag when the current primary has exited. +func MarkExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) { + log.Info("set expected primary flag", zap.String("leader-path", leaderPath)) leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { log.Error("get primary key error", zap.Error(err)) @@ -120,7 +120,7 @@ func SetExpectedPrimary(client *clientv3.Client, leaderPath string) { // write a flag to indicate the current primary has exited resp, err := kv.NewSlowLogTxn(client). Then( - clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimary}, "/"), string(leaderRaw), clientv3.WithLease(grantResp.ID)), + clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/"), string(leaderRaw), clientv3.WithLease(grantResp.ID)), // indicate the current primary has exited clientv3.OpDelete(leaderPath)). Commit() diff --git a/pkg/member/member.go b/pkg/member/member.go index 5512b1aa47c..d66507069ec 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -158,7 +158,7 @@ func (m *EmbeddedEtcdMember) UnsetLeader() { // EnableLeader sets the member itself to a PD leader. func (m *EmbeddedEtcdMember) EnableLeader() { m.setLeader(m.member) - utils.RemoveExpectedPrimary(m.client, m.GetLeaderPath()) + utils.ClearPrimaryExpectationFlag(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the PD leader. diff --git a/pkg/member/participant.go b/pkg/member/participant.go index bd2ec4d0cf6..821f885471c 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -164,7 +164,7 @@ func (m *Participant) UnsetLeader() { // EnableLeader declares the member itself to be the leader. func (m *Participant) EnableLeader() { m.setLeader(m.member) - utils.RemoveExpectedPrimary(m.client, m.GetLeaderPath()) + utils.ClearPrimaryExpectationFlag(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the leader. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 3b6a3c4aa69..7bebbe65884 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -562,7 +562,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { } // To make sure the expected primary(if existed) and new primary are on the same server. - expectedPrimary := mcsutils.GetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) + expectedPrimary := mcsutils.AttachExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. if expectedPrimary != "" && expectedPrimary != gta.member.MemberValue() { @@ -703,7 +703,7 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha } if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { // 1. modify the expected primary flag to the new primary. - mcsutils.SetExpectedPrimary(gta.member.Client(), gta.member.GetLeaderPath()) + mcsutils.MarkExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // 2. modify memory status. gta.member.UnsetLeader() defer log.Info("tso primary exit the primary watch loop") From ec8e737816cbc9d7727af88ccd632c052d701634 Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 4 Jul 2024 17:50:26 +0800 Subject: [PATCH 14/24] remove redundant wait Signed-off-by: husharp --- pkg/election/leadership.go | 7 ++++++- tests/integrations/mcs/members/member_test.go | 11 ++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index a0240a3f04c..622f35accb8 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -173,10 +173,15 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl ls.setLease(newLease) failpoint.Inject("skipGrantLeader", func(val failpoint.Value) { + name, ok := val.(string) + if name == "" { + // return directly when not set the name + failpoint.Return(errors.Errorf("failed to grant lease")) + } var member pdpb.Member _ = member.Unmarshal([]byte(leaderData)) - name, ok := val.(string) if ok && member.Name == name { + // only return when the name is set and the name is equal to the leader name failpoint.Return(errors.Errorf("failed to grant lease")) } }) diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index 27a65b75f2d..e5fadf73b59 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -16,7 +16,6 @@ package members_test import ( "context" - "fmt" "testing" "time" @@ -292,17 +291,16 @@ func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpired() { } } // Mock the new primary can not grant leader which means the lease will expire - re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", fmt.Sprintf("return(\"%s\")", newPrimary))) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", `return()`)) err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) re.NoError(err) // Wait for the old primary exit and new primary campaign + // cannot check newPrimary isServing when skipGrantLeader is enabled testutil.Eventually(re, func() bool { return !nodes[primary].IsServing() }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - // Wait for the new primary lease to expire which is `DefaultLeaderLease` - time.Sleep(4 * time.Second) // TODO: Add campaign times check in mcs to avoid frequent campaign re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/election/skipGrantLeader")) // Can still work after lease expired @@ -339,17 +337,16 @@ func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpiredAndServerDown( } } // Mock the new primary can not grant leader which means the lease will expire - re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", fmt.Sprintf("return(\"%s\")", newPrimary))) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", `return()`)) err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) re.NoError(err) // Wait for the old primary exit and new primary campaign + // cannot check newPrimary isServing when skipGrantLeader is enabled testutil.Eventually(re, func() bool { return !nodes[primary].IsServing() }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - // Wait for the new primary lease to expire which is `DefaultLeaderLease` - time.Sleep(4 * time.Second) // TODO: Add campaign times check in mcs to avoid frequent campaign // for now, close the current primary to mock the server down nodes[newPrimary].Close() From 4c7f8acc22613e3f62a9e76ba2c01e17c5b5b11a Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 25 Jul 2024 10:53:35 +0800 Subject: [PATCH 15/24] changed by name Signed-off-by: husharp --- pkg/election/leadership.go | 2 +- pkg/mcs/discovery/discover.go | 7 +------ pkg/mcs/discovery/registry_entry.go | 1 - pkg/mcs/scheduling/server/server.go | 6 +++--- pkg/mcs/utils/util.go | 1 + pkg/tso/global_allocator.go | 5 +++-- pkg/tso/keyspace_group_manager.go | 1 - 7 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 622f35accb8..433d9a679a6 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -397,7 +397,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { // ONLY `/ms/primary/transfer` API update primary will meet this condition. if ev.Type == mvccpb.PUT && ls.IsPrimary() { log.Info("current leadership is updated", zap.Int64("revision", wresp.Header.Revision), - zap.String("leader-key", ls.leaderKey), zap.Any("value", ls.leaderValue), + zap.String("leader-key", ls.leaderKey), zap.ByteString("cur-value", ev.Kv.Value), zap.String("purpose", ls.purpose)) return } diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index d06c886b70b..15af0a135ab 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -96,14 +96,9 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } var primaryIDs []string - var memberValues []string for _, member := range entries { if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) - if string(member.MemberValue) == "" { - return errors.New(fmt.Sprintf("member %s value is empty", member.Name)) - } - memberValues = append(memberValues, string(member.MemberValue)) } } if len(primaryIDs) == 0 { @@ -137,7 +132,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } // update primary key to notify old primary server. putResp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(primaryKey, memberValues[nextPrimaryID], clientv3.WithLease(grantResp.ID))). + Then(clientv3.OpPut(primaryKey, primaryIDs[nextPrimaryID], clientv3.WithLease(grantResp.ID))). Commit() if err != nil || !putResp.Succeeded { return errors.Errorf("failed to write primary flag for %s, err: %v", serviceName, err) diff --git a/pkg/mcs/discovery/registry_entry.go b/pkg/mcs/discovery/registry_entry.go index ad442d9a61a..db4ac44a2cc 100644 --- a/pkg/mcs/discovery/registry_entry.go +++ b/pkg/mcs/discovery/registry_entry.go @@ -31,7 +31,6 @@ type ServiceRegistryEntry struct { GitHash string `json:"git-hash"` DeployPath string `json:"deploy-path"` StartTimestamp int64 `json:"start-timestamp"` - MemberValue []byte `json:"member-value"` } // Serialize this service registry entry diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 991d806b0b0..45915c55d97 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -23,6 +23,7 @@ import ( "path/filepath" "runtime" "strconv" + "strings" "sync" "sync/atomic" "syscall" @@ -252,7 +253,7 @@ func (s *Server) primaryElectionLoop() { expectedPrimary := utils.AttachExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. - if expectedPrimary != "" && expectedPrimary != s.participant.MemberValue() { + if expectedPrimary != "" && !strings.Contains(s.participant.MemberValue(), expectedPrimary) { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), zap.String("expected-primary-id", expectedPrimary), @@ -356,7 +357,7 @@ func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { log.Error("scheduling primary getting the leader meets error", errs.ZapError(err)) return } - if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + if curPrimary != nil && resp.Kvs[0].Value != nil && !strings.Contains(string(resp.Kvs[0].Value), string(curPrimary)) { // 1. modify the expected primary flag to the new primary. utils.MarkExpectedPrimaryFlag(s.participant.Client(), s.participant.GetLeaderPath()) // 2. modify memory status. @@ -503,7 +504,6 @@ func (s *Server) startServer() (err error) { ListenUrls: []string{s.cfg.GetAdvertiseListenAddr()}, } s.participant.InitInfo(p, endpoint.SchedulingSvcRootPath(s.clusterID), utils.PrimaryKey, "primary election") - s.serviceID.MemberValue = []byte(s.participant.MemberValue()) s.service = &Service{Server: s} s.AddServiceReadyCallback(s.startCluster) diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index f4e9f6d8465..d81ae71c793 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -107,6 +107,7 @@ func ClearPrimaryExpectationFlag(client *clientv3.Client, leaderPath string) { // MarkExpectedPrimaryFlag marks the expected primary flag when the current primary has exited. func MarkExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) { log.Info("set expected primary flag", zap.String("leader-path", leaderPath)) + // We have updated new primary(server's addr) in `leaderPath` by `/ms/primary/transfer` API. leaderRaw, err := etcdutil.GetValue(client, leaderPath) if err != nil { log.Error("get primary key error", zap.Error(err)) diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 7bebbe65884..8fd1434a357 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "runtime/trace" + "strings" "sync" "sync/atomic" "time" @@ -565,7 +566,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { expectedPrimary := mcsutils.AttachExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. - if expectedPrimary != "" && expectedPrimary != gta.member.MemberValue() { + if expectedPrimary != "" && !strings.Contains(gta.member.MemberValue(), expectedPrimary) { log.Info("skip campaigning of tso primary and check later", zap.String("server-name", gta.member.Name()), zap.String("expected-primary-id", expectedPrimary), @@ -701,7 +702,7 @@ func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary cha log.Error("tso primary getting the leader meets error", errs.ZapError(err)) return } - if curPrimary != nil && resp.Kvs[0].Value != nil && string(curPrimary) != string(resp.Kvs[0].Value) { + if curPrimary != nil && resp.Kvs[0].Value != nil && !strings.Contains(string(resp.Kvs[0].Value), string(curPrimary)) { // 1. modify the expected primary flag to the new primary. mcsutils.MarkExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // 2. modify memory status. diff --git a/pkg/tso/keyspace_group_manager.go b/pkg/tso/keyspace_group_manager.go index 559e71a1472..83a1369d2f2 100644 --- a/pkg/tso/keyspace_group_manager.go +++ b/pkg/tso/keyspace_group_manager.go @@ -737,7 +737,6 @@ func (kgm *KeyspaceGroupManager) updateKeyspaceGroup(group *endpoint.KeyspaceGro ListenUrls: []string{kgm.cfg.GetAdvertiseListenAddr()}, } participant.InitInfo(p, endpoint.KeyspaceGroupsElectionPath(kgm.tsoSvcRootPath, group.ID), mcsutils.PrimaryKey, "keyspace group primary election") - kgm.tsoServiceID.MemberValue = []byte(participant.MemberValue()) // If the keyspace group is in split, we should ensure that the primary elected by the new keyspace group // is always on the same TSO Server node as the primary of the old keyspace group, and this constraint cannot // be broken until the entire split process is completed. From 36b5a82741a3f93ca3c2f2bb31afbca202ab656c Mon Sep 17 00:00:00 2001 From: husharp Date: Thu, 25 Jul 2024 22:55:22 +0800 Subject: [PATCH 16/24] refine code Signed-off-by: husharp --- pkg/election/leadership.go | 21 +++---- pkg/election/leadership_test.go | 4 +- pkg/election/lease.go | 22 +++++--- pkg/election/lease_test.go | 6 +- pkg/mcs/discovery/discover.go | 21 +++---- pkg/mcs/scheduling/server/server.go | 83 +++++++++++++++------------- pkg/mcs/utils/util.go | 52 +++++------------ pkg/member/member.go | 2 - pkg/member/participant.go | 1 - pkg/tso/global_allocator.go | 86 ++++++++++++++++------------- 10 files changed, 147 insertions(+), 151 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 433d9a679a6..52e14eb9880 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -87,17 +87,18 @@ func NewLeadership(client *clientv3.Client, leaderKey, purpose string) *Leadersh return leadership } -// getLease gets the lease of leadership, only if leadership is valid, +// GetLease gets the lease of leadership, only if leadership is valid, // i.e. the owner is a true leader, the lease is not nil. -func (ls *Leadership) getLease() *lease { +func (ls *Leadership) GetLease() *Lease { l := ls.lease.Load() if l == nil { return nil } - return l.(*lease) + return l.(*Lease) } -func (ls *Leadership) setLease(lease *lease) { +// SetLease sets the lease of leadership. +func (ls *Leadership) SetLease(lease *Lease) { ls.lease.Store(lease) } @@ -165,12 +166,12 @@ func (ls *Leadership) AddCampaignTimes() { func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...clientv3.Cmp) error { ls.leaderValue = leaderData // Create a new lease to campaign - newLease := &lease{ + newLease := &Lease{ Purpose: ls.purpose, client: ls.client, lease: clientv3.NewLease(ls.client), } - ls.setLease(newLease) + ls.SetLease(newLease) failpoint.Inject("skipGrantLeader", func(val failpoint.Value) { name, ok := val.(string) @@ -218,12 +219,12 @@ func (ls *Leadership) Keep(ctx context.Context) { ls.keepAliveCancelFuncLock.Lock() ls.keepAliveCtx, ls.keepAliveCancelFunc = context.WithCancel(ctx) ls.keepAliveCancelFuncLock.Unlock() - go ls.getLease().KeepAlive(ls.keepAliveCtx) + go ls.GetLease().KeepAlive(ls.keepAliveCtx) } // Check returns whether the leadership is still available. func (ls *Leadership) Check() bool { - return ls != nil && ls.getLease() != nil && !ls.getLease().IsExpired() + return ls != nil && ls.GetLease() != nil && !ls.GetLease().IsExpired() } // LeaderTxn returns txn() with a leader comparison to guarantee that @@ -410,7 +411,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { // Reset does some defer jobs such as closing lease, resetting lease etc. func (ls *Leadership) Reset() { - if ls == nil || ls.getLease() == nil { + if ls == nil || ls.GetLease() == nil { return } ls.keepAliveCancelFuncLock.Lock() @@ -418,6 +419,6 @@ func (ls *Leadership) Reset() { ls.keepAliveCancelFunc() } ls.keepAliveCancelFuncLock.Unlock() - ls.getLease().Close() + ls.GetLease().Close() ls.SetPrimaryWatch(false) } diff --git a/pkg/election/leadership_test.go b/pkg/election/leadership_test.go index 40f0bcbee23..e16c5842542 100644 --- a/pkg/election/leadership_test.go +++ b/pkg/election/leadership_test.go @@ -100,9 +100,9 @@ func TestLeadership(t *testing.T) { leadership2.Keep(ctx) // Check the lease. - lease1 := leadership1.getLease() + lease1 := leadership1.GetLease() re.NotNil(lease1) - lease2 := leadership2.getLease() + lease2 := leadership2.GetLease() re.NotNil(lease2) re.True(lease1.IsExpired()) diff --git a/pkg/election/lease.go b/pkg/election/lease.go index 45d702def5e..055e7da41de 100644 --- a/pkg/election/lease.go +++ b/pkg/election/lease.go @@ -34,9 +34,9 @@ const ( slowRequestTime = etcdutil.DefaultSlowRequestTime ) -// lease is used as the low-level mechanism for campaigning and renewing elected leadership. +// Lease is used as the low-level mechanism for campaigning and renewing elected leadership. // The way to gain and maintain leadership is to update and keep the lease alive continuously. -type lease struct { +type Lease struct { // purpose is used to show what this election for Purpose string // etcd client and lease @@ -48,8 +48,16 @@ type lease struct { expireTime atomic.Value } +func NewLease(client *clientv3.Client, purpose string) *Lease { + return &Lease{ + Purpose: purpose, + client: client, + lease: clientv3.NewLease(client), + } +} + // Grant uses `lease.Grant` to initialize the lease and expireTime. -func (l *lease) Grant(leaseTimeout int64) error { +func (l *Lease) Grant(leaseTimeout int64) error { if l == nil { return errs.ErrEtcdGrantLease.GenWithStackByCause("lease is nil") } @@ -71,7 +79,7 @@ func (l *lease) Grant(leaseTimeout int64) error { } // Close releases the lease. -func (l *lease) Close() error { +func (l *Lease) Close() error { if l == nil { return nil } @@ -92,7 +100,7 @@ func (l *lease) Close() error { // IsExpired checks if the lease is expired. If it returns true, // current leader should step down and try to re-elect again. -func (l *lease) IsExpired() bool { +func (l *Lease) IsExpired() bool { if l == nil || l.expireTime.Load() == nil { return true } @@ -100,7 +108,7 @@ func (l *lease) IsExpired() bool { } // KeepAlive auto renews the lease and update expireTime. -func (l *lease) KeepAlive(ctx context.Context) { +func (l *Lease) KeepAlive(ctx context.Context) { defer logutil.LogPanic() if l == nil { @@ -146,7 +154,7 @@ func (l *lease) KeepAlive(ctx context.Context) { } // Periodically call `lease.KeepAliveOnce` and post back latest received expire time into the channel. -func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-chan time.Time { +func (l *Lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-chan time.Time { ch := make(chan time.Time) go func() { diff --git a/pkg/election/lease_test.go b/pkg/election/lease_test.go index 3d8515eadb2..8c600da5a3a 100644 --- a/pkg/election/lease_test.go +++ b/pkg/election/lease_test.go @@ -30,12 +30,12 @@ func TestLease(t *testing.T) { defer clean() // Create the lease. - lease1 := &lease{ + lease1 := &Lease{ Purpose: "test_lease_1", client: client, lease: clientv3.NewLease(client), } - lease2 := &lease{ + lease2 := &Lease{ Purpose: "test_lease_2", client: client, lease: clientv3.NewLease(client), @@ -95,7 +95,7 @@ func TestLeaseKeepAlive(t *testing.T) { defer clean() // Create the lease. - lease := &lease{ + lease := &Lease{ Purpose: "test_lease", client: client, lease: clientv3.NewLease(client), diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 15af0a135ab..f1251d79d0e 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -113,29 +113,22 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar return errors.Errorf("failed to get cluster ID: %v", err) } - var primaryKey string + var primaryPath string switch serviceName { case utils.SchedulingServiceName: - primaryKey = endpoint.SchedulingPrimaryPath(clusterID) + primaryPath = endpoint.SchedulingPrimaryPath(clusterID) case utils.TSOServiceName: tsoRootPath := endpoint.TSOSvcRootPath(clusterID) - primaryKey = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) + primaryPath = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) } - // remove possible residual value. - utils.ClearPrimaryExpectationFlag(client, primaryKey) - - // grant the primary lease to the new primary. grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) if err != nil { - return errors.Errorf("failed to grant lease for %s, err: %v", serviceName, err) + return errors.Errorf("failed to grant lease for expected primary, err: %v", err) } - // update primary key to notify old primary server. - putResp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(primaryKey, primaryIDs[nextPrimaryID], clientv3.WithLease(grantResp.ID))). - Commit() - if err != nil || !putResp.Succeeded { - return errors.Errorf("failed to write primary flag for %s, err: %v", serviceName, err) + _, err = utils.MarkExpectedPrimaryFlag(client, primaryPath, primaryIDs[nextPrimaryID], grantResp.ID) + if err != nil { + return errors.Errorf("failed to mark expected primary flag for %s, err: %v", serviceName, err) } return nil } diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 45915c55d97..b0c0c6a421a 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -41,6 +41,7 @@ import ( bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/discovery" "github.com/tikv/pd/pkg/mcs/scheduling/server/config" @@ -56,12 +57,12 @@ import ( "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/apiutil" - "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/grpcutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/memberutil" "github.com/tikv/pd/pkg/utils/metricutil" "github.com/tikv/pd/pkg/versioninfo" + "go.etcd.io/etcd/clientv3" "go.uber.org/zap" "google.golang.org/grpc" ) @@ -306,13 +307,19 @@ func (s *Server) campaignLeader() { cb() } }() + // check expected primary and watch the primary. + exitPrimary := make(chan struct{}) + expectedLease, revision, err := s.keepExpectedPrimaryAlive(ctx) + if err != nil { + log.Error("prepare primary watch error", errs.ZapError(err)) + return + } + go s.expectedPrimaryWatch(ctx, expectedLease, revision+1, exitPrimary) s.participant.EnableLeader() + member.ServiceMemberGauge.WithLabelValues(serviceName).Set(1) log.Info("scheduling primary is ready to serve", zap.String("scheduling-primary-name", s.participant.Name())) - exitPrimary := make(chan struct{}) - go s.primaryWatch(ctx, exitPrimary) - leaderTicker := time.NewTicker(utils.LeaderTickInterval) defer leaderTicker.Stop() @@ -334,42 +341,44 @@ func (s *Server) campaignLeader() { } } -// primaryWatch watches `/ms/primary/transfer` API whether changed the primary. -// 1. modify the expected primary flag to the new primary -// 2. modify memory status -// 3. exit the primary watch loop -// 4. delete the leader key -func (s *Server) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { - resp, err := etcdutil.EtcdKVGet(s.participant.GetLeadership().GetClient(), s.participant.GetLeaderPath()) - if err != nil || resp == nil || len(resp.Kvs) == 0 { - log.Error("scheduling primary getting the primary meets error", errs.ZapError(err)) - return +// keepExpectedPrimaryAlive keeps the expected primary alive. +// We use lease to keep `expected primary` healthy. +// ONLY reset by the following conditions: +// - changed by`/ms/primary/transfer` API. +// - server closed. +func (s *Server) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { + const propose = "scheduling-primary-watch" + lease := election.NewLease(s.GetClient(), propose) + if err := lease.Grant(s.cfg.LeaderLease); err != nil { + log.Error("grant lease for expected primary error", errs.ZapError(err)) + return nil, 0, err } - log.Info("scheduling primary start to watch the primary", zap.Stringer("scheduling-primary", s.participant.GetLeader())) - // Watch will keep looping and never return unless the primary has changed. - s.participant.GetLeadership().SetPrimaryWatch(true) - s.participant.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) - s.participant.GetLeadership().SetPrimaryWatch(false) - - // only `/ms/primary/transfer` API update primary will set `leaderPath` to the expected primary. - curPrimary, err := etcdutil.GetValue(s.participant.Client(), s.participant.GetLeaderPath()) + revision, err := utils.MarkExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath(), s.participant.MemberValue(), + lease.ID.Load().(clientv3.LeaseID)) if err != nil { - log.Error("scheduling primary getting the leader meets error", errs.ZapError(err)) + log.Error("mark expected primary error", errs.ZapError(err)) + return nil, 0, err + } + // Keep alive the current primary leadership to indicate that the server is still alive. + // Watch the expected primary path to check whether the expected primary has changed. + expectedPrimary := election.NewLeadership(s.GetClient(), utils.ExpectedPrimaryPath(s.participant.GetLeaderPath()), propose) + expectedPrimary.SetLease(lease) + expectedPrimary.Keep(ctx) + return expectedPrimary, revision, nil +} + +// expectedPrimaryWatch watches `/ms/primary/transfer` API whether changed the expected primary. +func (s *Server) expectedPrimaryWatch(ctx context.Context, expectedPrimary *election.Leadership, revision int64, exitPrimary chan struct{}) { + log.Info("scheduling primary start to watch the expected primary", zap.String("scheduling-primary", s.participant.MemberValue())) + expectedPrimary.SetPrimaryWatch(true) + expectedPrimary.Watch(ctx, revision) + expectedPrimary.Reset() + defer log.Info("scheduling primary exit the expected primary watch loop") + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: return - } - if curPrimary != nil && resp.Kvs[0].Value != nil && !strings.Contains(string(resp.Kvs[0].Value), string(curPrimary)) { - // 1. modify the expected primary flag to the new primary. - utils.MarkExpectedPrimaryFlag(s.participant.Client(), s.participant.GetLeaderPath()) - // 2. modify memory status. - s.participant.UnsetLeader() - defer log.Info("scheduling primary exit the primary watch loop") - select { - case <-ctx.Done(): - return - // 3. exit the primary watch loop, 4.`exitPrimary` will help delete the leader key. - case exitPrimary <- struct{}{}: - return - } } } diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index d81ae71c793..4a8b35ff611 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -16,6 +16,7 @@ package utils import ( "context" + "fmt" "net" "net/http" "os" @@ -78,9 +79,14 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes) } +// ExpectedPrimaryPath formats the primary path with the expected primary flag. +func ExpectedPrimaryPath(primaryPath string) string { + return fmt.Sprintf("%s/%s", primaryPath, ExpectedPrimaryFlag) +} + // AttachExpectedPrimaryFlag attaches the expected primary flag. -func AttachExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) string { - primary, err := etcdutil.GetValue(client, strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/")) +func AttachExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { + primary, err := etcdutil.GetValue(client, ExpectedPrimaryPath(primaryPath)) if err != nil { log.Error("get expected primary flag error", errs.ZapError(err)) return "" @@ -89,46 +95,18 @@ func AttachExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) strin return string(primary) } -// ClearPrimaryExpectationFlag clears the expected primary flag. -// - removed when campaign new primary successfully. -// - removed when appoint new primary by API. -func ClearPrimaryExpectationFlag(client *clientv3.Client, leaderPath string) { - log.Info("remove expected primary flag", zap.String("primary-path", leaderPath)) - // remove expected leader key - resp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpDelete(strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/"))). - Commit() - if err != nil || !resp.Succeeded { - log.Error("change expected primary error", errs.ZapError(err)) - return - } -} - -// MarkExpectedPrimaryFlag marks the expected primary flag when the current primary has exited. -func MarkExpectedPrimaryFlag(client *clientv3.Client, leaderPath string) { - log.Info("set expected primary flag", zap.String("leader-path", leaderPath)) - // We have updated new primary(server's addr) in `leaderPath` by `/ms/primary/transfer` API. - leaderRaw, err := etcdutil.GetValue(client, leaderPath) - if err != nil { - log.Error("get primary key error", zap.Error(err)) - return - } - grantResp, err := client.Grant(client.Ctx(), DefaultLeaderLease) - if err != nil { - log.Error("grant lease for expected primary error", errs.ZapError(err)) - return - } +// MarkExpectedPrimaryFlag marks the expected primary flag when the primary is specified. +func MarkExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leaderRaw string, leaseID clientv3.LeaseID) (int64, error) { + log.Info("set expected primary flag", zap.String("leader-path", ExpectedPrimaryPath(primaryPath))) // write a flag to indicate the current primary has exited resp, err := kv.NewSlowLogTxn(client). - Then( - clientv3.OpPut(strings.Join([]string{leaderPath, ExpectedPrimaryFlag}, "/"), string(leaderRaw), clientv3.WithLease(grantResp.ID)), - // indicate the current primary has exited - clientv3.OpDelete(leaderPath)). + Then(clientv3.OpPut(ExpectedPrimaryPath(primaryPath), leaderRaw, clientv3.WithLease(leaseID))). Commit() if err != nil || !resp.Succeeded { - log.Error("change expected primary error", errs.ZapError(err)) - return + log.Error("mark expected primary error", errs.ZapError(err)) + return 0, err } + return resp.Header.Revision, nil } // PromHandler is a handler to get prometheus metrics. diff --git a/pkg/member/member.go b/pkg/member/member.go index d66507069ec..7a58a976f28 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -32,7 +32,6 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" - "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/etcdutil" "go.etcd.io/etcd/clientv3" @@ -158,7 +157,6 @@ func (m *EmbeddedEtcdMember) UnsetLeader() { // EnableLeader sets the member itself to a PD leader. func (m *EmbeddedEtcdMember) EnableLeader() { m.setLeader(m.member) - utils.ClearPrimaryExpectationFlag(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the PD leader. diff --git a/pkg/member/participant.go b/pkg/member/participant.go index 821f885471c..d70dd43aa9c 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -164,7 +164,6 @@ func (m *Participant) UnsetLeader() { // EnableLeader declares the member itself to be the leader. func (m *Participant) EnableLeader() { m.setLeader(m.member) - utils.ClearPrimaryExpectationFlag(m.client, m.GetLeaderPath()) } // GetLeaderPath returns the path of the leader. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 8fd1434a357..bc0d47896fb 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -28,15 +28,16 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/prometheus/client_golang/prometheus" + "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" - "github.com/tikv/pd/pkg/utils/etcdutil" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/tsoutil" "github.com/tikv/pd/pkg/utils/typeutil" + "go.etcd.io/etcd/clientv3" "go.uber.org/zap" "google.golang.org/grpc" ) @@ -636,8 +637,17 @@ func (gta *GlobalTSOAllocator) campaignLeader() { gta.am.ResetAllocatorGroup(GlobalDCLocation) }() - tsoLabel := fmt.Sprintf("TSO Service Group %d", gta.getGroupID()) + // check expected primary and watch the primary. + exitPrimary := make(chan struct{}) + expectedLease, revision, err := gta.keepExpectedPrimaryAlive(ctx) + if err != nil { + log.Error("prepare primary watch error", errs.ZapError(err)) + return + } + go gta.expectedPrimaryWatch(ctx, expectedLease, revision+1, exitPrimary) gta.member.EnableLeader() + + tsoLabel := fmt.Sprintf("TSO Service Group %d", gta.getGroupID()) member.ServiceMemberGauge.WithLabelValues(tsoLabel).Set(1) defer resetLeaderOnce.Do(func() { cancel() @@ -651,9 +661,6 @@ func (gta *GlobalTSOAllocator) campaignLeader() { logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("tso-primary-name", gta.member.Name())) - exitPrimary := make(chan struct{}) - go gta.primaryWatch(ctx, exitPrimary) - leaderTicker := time.NewTicker(mcsutils.LeaderTickInterval) defer leaderTicker.Stop() @@ -677,44 +684,47 @@ func (gta *GlobalTSOAllocator) campaignLeader() { } } -// primaryWatch watches `/ms/primary/transfer` API whether changed the primary. -// 1. modify the expected primary flag to the new primary -// 2. modify memory status -// 3. exit the primary watch loop -// 4. delete the leader key -func (gta *GlobalTSOAllocator) primaryWatch(ctx context.Context, exitPrimary chan struct{}) { - resp, err := etcdutil.EtcdKVGet(gta.member.GetLeadership().GetClient(), gta.member.GetLeaderPath()) - if err != nil || resp == nil || len(resp.Kvs) == 0 { - log.Error("tso primary getting the primary meets error", errs.ZapError(err)) - return +// keepExpectedPrimaryAlive keeps the expected primary alive. +// We use lease to keep `expected primary` healthy. +// ONLY reset by the following conditions: +// - changed by`/ms/primary/transfer` API. +// - server closed. +func (gta *GlobalTSOAllocator) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { + const propose = "tso-primary-watch" + cli := gta.member.Client() + newLease := election.NewLease(cli, propose) + if err := newLease.Grant(gta.am.leaderLease); err != nil { + return nil, 0, err + } + + revision, err := mcsutils.MarkExpectedPrimaryFlag(cli, gta.member.GetLeaderPath(), gta.member.MemberValue(), + newLease.ID.Load().(clientv3.LeaseID)) + if err != nil { + log.Error("mark expected primary error", errs.ZapError(err)) + return nil, 0, err } + // Keep alive the current primary leadership to indicate that the server is still alive. + // Watch the expected primary path to check whether the expected primary has changed. + expectedLease := election.NewLeadership(cli, mcsutils.ExpectedPrimaryPath(gta.member.GetLeaderPath()), propose) + expectedLease.SetLease(newLease) + expectedLease.Keep(ctx) + return expectedLease, revision, nil +} + +// primaryWatch watches `/ms/primary/transfer` API whether changed the expected primary. +func (gta *GlobalTSOAllocator) expectedPrimaryWatch(ctx context.Context, expectedLease *election.Leadership, revision int64, exitPrimary chan struct{}) { log.Info("tso primary start to watch the primary", logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), zap.String("campaign-tso-primary-name", gta.member.Name())) - // Watch will keep looping and never return unless the primary has changed. - gta.member.GetLeadership().SetPrimaryWatch(true) - gta.member.GetLeadership().Watch(ctx, resp.Kvs[0].ModRevision+1) - gta.member.GetLeadership().SetPrimaryWatch(false) - - // only `/ms/primary/transfer` API update primary will set `leaderPath` to the expected primary. - curPrimary, err := etcdutil.GetValue(gta.member.Client(), gta.member.GetLeaderPath()) - if err != nil { - log.Error("tso primary getting the leader meets error", errs.ZapError(err)) + expectedLease.SetPrimaryWatch(true) + expectedLease.Watch(ctx, revision) + expectedLease.Reset() + defer log.Info("tso primary exit the primary watch loop") + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: return - } - if curPrimary != nil && resp.Kvs[0].Value != nil && !strings.Contains(string(resp.Kvs[0].Value), string(curPrimary)) { - // 1. modify the expected primary flag to the new primary. - mcsutils.MarkExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) - // 2. modify memory status. - gta.member.UnsetLeader() - defer log.Info("tso primary exit the primary watch loop") - select { - case <-ctx.Done(): - return - // 3. exit the primary watch loop, 4.`exitPrimary` will help delete the leader key. - case exitPrimary <- struct{}{}: - return - } } } From ea8d9e3161dd9c0e2000c7901446e8e8162b7e74 Mon Sep 17 00:00:00 2001 From: husharp Date: Tue, 30 Jul 2024 14:43:08 +0800 Subject: [PATCH 17/24] address comment Signed-off-by: husharp --- cmd/pd-server/main.go | 4 ++-- pkg/mcs/discovery/discover.go | 5 ++--- pkg/mcs/scheduling/server/server.go | 8 ++++---- pkg/mcs/utils/util.go | 4 ++-- pkg/tso/global_allocator.go | 8 ++++---- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cmd/pd-server/main.go b/cmd/pd-server/main.go index 25157b8b0c3..b879e0c4b44 100644 --- a/cmd/pd-server/main.go +++ b/cmd/pd-server/main.go @@ -94,7 +94,7 @@ func NewTSOServiceCommand() *cobra.Command { Short: "Run the TSO service", Run: tso.CreateServerWrapper, } - cmd.Flags().StringP("name", "", "", "human-readable name for this TSO member") + cmd.Flags().StringP("name", "", "", "human-readable name for this tso member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") @@ -115,7 +115,7 @@ func NewSchedulingServiceCommand() *cobra.Command { Short: "Run the scheduling service", Run: scheduling.CreateServerWrapper, } - cmd.Flags().StringP("name", "", "", "human-readable name for this Scheduling member") + cmd.Flags().StringP("name", "", "", "human-readable name for this scheduling member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index f1251d79d0e..a6cd44eb367 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -15,7 +15,6 @@ package discovery import ( - "fmt" "math/rand" "strconv" "time" @@ -92,7 +91,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar // Do nothing when I am the only member of cluster. if len(entries) == 1 { - return errors.New(fmt.Sprintf("no valid secondary to transfer primary, the only member is %s", entries[0].Name)) + return errors.Errorf("no valid secondary to transfer primary, the only member is %s", entries[0].Name) } var primaryIDs []string @@ -102,7 +101,7 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar } } if len(primaryIDs) == 0 { - return errors.New(fmt.Sprintf("no valid secondary to transfer primary, from %s to %s", oldPrimary, newPrimary)) + return errors.Errorf("no valid secondary to transfer primary, from %s to %s", oldPrimary, newPrimary) } r := rand.New(rand.NewSource(time.Now().UnixNano())) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index b0c0c6a421a..573d4153262 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -251,7 +251,7 @@ func (s *Server) primaryElectionLoop() { } // To make sure the expected primary(if existed) and new primary are on the same server. - expectedPrimary := utils.AttachExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) + expectedPrimary := utils.GetExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. if expectedPrimary != "" && !strings.Contains(s.participant.MemberValue(), expectedPrimary) { @@ -347,8 +347,8 @@ func (s *Server) campaignLeader() { // - changed by`/ms/primary/transfer` API. // - server closed. func (s *Server) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { - const propose = "scheduling-primary-watch" - lease := election.NewLease(s.GetClient(), propose) + const purpose = "scheduling-primary-watch" + lease := election.NewLease(s.GetClient(), purpose) if err := lease.Grant(s.cfg.LeaderLease); err != nil { log.Error("grant lease for expected primary error", errs.ZapError(err)) return nil, 0, err @@ -361,7 +361,7 @@ func (s *Server) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leader } // Keep alive the current primary leadership to indicate that the server is still alive. // Watch the expected primary path to check whether the expected primary has changed. - expectedPrimary := election.NewLeadership(s.GetClient(), utils.ExpectedPrimaryPath(s.participant.GetLeaderPath()), propose) + expectedPrimary := election.NewLeadership(s.GetClient(), utils.ExpectedPrimaryPath(s.participant.GetLeaderPath()), purpose) expectedPrimary.SetLease(lease) expectedPrimary.Keep(ctx) return expectedPrimary, revision, nil diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 4a8b35ff611..d724fb28010 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -84,8 +84,8 @@ func ExpectedPrimaryPath(primaryPath string) string { return fmt.Sprintf("%s/%s", primaryPath, ExpectedPrimaryFlag) } -// AttachExpectedPrimaryFlag attaches the expected primary flag. -func AttachExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { +// GetExpectedPrimaryFlag gets the expected primary flag. +func GetExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { primary, err := etcdutil.GetValue(client, ExpectedPrimaryPath(primaryPath)) if err != nil { log.Error("get expected primary flag error", errs.ZapError(err)) diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index bc0d47896fb..4988d7e4ae0 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -564,7 +564,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { } // To make sure the expected primary(if existed) and new primary are on the same server. - expectedPrimary := mcsutils.AttachExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) + expectedPrimary := mcsutils.GetExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `/ms/primary/transfer` API. if expectedPrimary != "" && !strings.Contains(gta.member.MemberValue(), expectedPrimary) { @@ -690,9 +690,9 @@ func (gta *GlobalTSOAllocator) campaignLeader() { // - changed by`/ms/primary/transfer` API. // - server closed. func (gta *GlobalTSOAllocator) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { - const propose = "tso-primary-watch" + const purpose = "tso-primary-watch" cli := gta.member.Client() - newLease := election.NewLease(cli, propose) + newLease := election.NewLease(cli, purpose) if err := newLease.Grant(gta.am.leaderLease); err != nil { return nil, 0, err } @@ -705,7 +705,7 @@ func (gta *GlobalTSOAllocator) keepExpectedPrimaryAlive(ctx context.Context) (*e } // Keep alive the current primary leadership to indicate that the server is still alive. // Watch the expected primary path to check whether the expected primary has changed. - expectedLease := election.NewLeadership(cli, mcsutils.ExpectedPrimaryPath(gta.member.GetLeaderPath()), propose) + expectedLease := election.NewLeadership(cli, mcsutils.ExpectedPrimaryPath(gta.member.GetLeaderPath()), purpose) expectedLease.SetLease(newLease) expectedLease.Keep(ctx) return expectedLease, revision, nil From ffb7b1b6d50d970dfb0e8785c428dc025ed709e0 Mon Sep 17 00:00:00 2001 From: husharp Date: Wed, 31 Jul 2024 10:36:55 +0800 Subject: [PATCH 18/24] refine code Signed-off-by: husharp --- client/http/api.go | 5 - client/http/interface.go | 17 --- client/http/request_info.go | 1 - pkg/election/leadership.go | 5 +- pkg/election/lease.go | 2 + pkg/mcs/discovery/discover.go | 31 +++-- pkg/mcs/scheduling/server/apis/v1/api.go | 37 ++++++ pkg/mcs/scheduling/server/server.go | 52 +------- pkg/mcs/tso/server/apis/v1/api.go | 51 ++++++++ pkg/mcs/utils/expected_primary.go | 114 ++++++++++++++++++ pkg/mcs/utils/util.go | 39 ------ pkg/member/member.go | 8 +- pkg/member/participant.go | 24 +++- pkg/tso/allocator_manager.go | 2 - pkg/tso/global_allocator.go | 63 +++------- server/apiv2/handlers/micro_service.go | 54 --------- tests/integrations/mcs/members/member_test.go | 66 +++++++--- 17 files changed, 322 insertions(+), 249 deletions(-) create mode 100644 pkg/mcs/utils/expected_primary.go diff --git a/client/http/api.go b/client/http/api.go index f787327a97b..3376a48770d 100644 --- a/client/http/api.go +++ b/client/http/api.go @@ -206,11 +206,6 @@ func MicroServicePrimary(service string) string { return fmt.Sprintf("%s/primary/%s", microServicePrefix, service) } -// MicroServicePrimaryTransfer returns the path of PD HTTP API to transfer the primary of microservice. -func MicroServicePrimaryTransfer(service string) string { - return fmt.Sprintf("%s/primary/transfer/%s", microServicePrefix, service) -} - // GetUpdateKeyspaceConfigURL returns the path of PD HTTP API to update keyspace config. func GetUpdateKeyspaceConfigURL(keyspaceName string) string { return fmt.Sprintf(KeyspaceConfig, keyspaceName) diff --git a/client/http/interface.go b/client/http/interface.go index 108e1a25e40..f90ab19624f 100644 --- a/client/http/interface.go +++ b/client/http/interface.go @@ -103,7 +103,6 @@ type Client interface { /* Micro Service interfaces */ GetMicroServiceMembers(context.Context, string) ([]MicroServiceMember, error) GetMicroServicePrimary(context.Context, string) (string, error) - TransferMicroServicePrimary(context.Context, string, string) error DeleteOperators(context.Context) error /* Keyspace interface */ @@ -960,22 +959,6 @@ func (c *client) GetMicroServicePrimary(ctx context.Context, service string) (st return primary, err } -func (c *client) TransferMicroServicePrimary(ctx context.Context, service, newPrimary string) error { - reqData, err := json.Marshal(struct { - NewPrimary string `json:"new_primary"` - }{ - NewPrimary: newPrimary, - }) - if err != nil { - return errors.Trace(err) - } - return c.request(ctx, newRequestInfo(). - WithName(transferMicroServicePrimaryName). - WithURI(MicroServicePrimaryTransfer(service)). - WithMethod(http.MethodPost). - WithBody(reqData)) -} - // GetPDVersion gets the release version of the PD binary. func (c *client) GetPDVersion(ctx context.Context) (string, error) { var ver struct { diff --git a/client/http/request_info.go b/client/http/request_info.go index 9aa2b50f1b4..783220bcc60 100644 --- a/client/http/request_info.go +++ b/client/http/request_info.go @@ -77,7 +77,6 @@ const ( getMinResolvedTSByStoresIDsName = "GetMinResolvedTSByStoresIDs" getMicroServiceMembersName = "GetMicroServiceMembers" getMicroServicePrimaryName = "GetMicroServicePrimary" - transferMicroServicePrimaryName = "TransferMicroServicePrimary" getPDVersionName = "GetPDVersion" resetTSName = "ResetTS" resetBaseAllocIDName = "ResetBaseAllocID" diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 52e14eb9880..b706407a9c2 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -390,12 +390,13 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { } for _, ev := range wresp.Events { - if ev.Type == mvccpb.DELETE { + // ensure `{service}/primary/transfer` API will not meet this condition. + if ev.Type == mvccpb.DELETE && !ls.IsPrimary() { log.Info("current leadership is deleted", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return } - // ONLY `/ms/primary/transfer` API update primary will meet this condition. + // ONLY `{service}/primary/transfer` API update primary will meet this condition. if ev.Type == mvccpb.PUT && ls.IsPrimary() { log.Info("current leadership is updated", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.ByteString("cur-value", ev.Kv.Value), diff --git a/pkg/election/lease.go b/pkg/election/lease.go index 055e7da41de..c2e9eb97117 100644 --- a/pkg/election/lease.go +++ b/pkg/election/lease.go @@ -48,6 +48,7 @@ type Lease struct { expireTime atomic.Value } +// NewLease creates a new Lease instance. func NewLease(client *clientv3.Client, purpose string) *Lease { return &Lease{ Purpose: purpose, @@ -117,6 +118,7 @@ func (l *Lease) KeepAlive(ctx context.Context) { ctx, cancel := context.WithCancel(ctx) defer cancel() timeCh := l.keepAliveWorker(ctx, l.leaseTimeout/3) + defer log.Info("lease keep alive stopped", zap.String("purpose", l.Purpose)) var maxExpire time.Time timer := time.NewTimer(l.leaseTimeout) diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index a6cd44eb367..c95f8944835 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/storage/endpoint" @@ -82,8 +83,13 @@ func GetMSMembers(serviceName string, client *clientv3.Client) ([]ServiceRegistr } // TransferPrimary transfers the primary of the specified service. -func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimary string, keyspaceGroupID uint32) error { - log.Info("transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimary), zap.String("to", newPrimary)) +// keyspaceGroupID is optional, only used for TSO service. +func TransferPrimary(client *clientv3.Client, lease *election.Lease, serviceName, + oldPrimaryAddr, newPrimary string, keyspaceGroupID uint32) error { + if lease == nil { + return errors.New("current lease is nil, please check leadership") + } + log.Info("try to transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimaryAddr), zap.String("to", newPrimary)) entries, err := GetMSMembers(serviceName, client) if err != nil { return err @@ -96,12 +102,13 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar var primaryIDs []string for _, member := range entries { - if (newPrimary == "" && member.ServiceAddr != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { + // TODO: judged by `addr` and `name` now, should unify them to `name` in the future. + if (newPrimary == "" && member.ServiceAddr != oldPrimaryAddr) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) } } if len(primaryIDs) == 0 { - return errors.Errorf("no valid secondary to transfer primary, from %s to %s", oldPrimary, newPrimary) + return errors.Errorf("no valid secondary to transfer primary, from %s to %s", oldPrimaryAddr, newPrimary) } r := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -112,6 +119,17 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar return errors.Errorf("failed to get cluster ID: %v", err) } + // update expected primary flag + grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) + if err != nil { + return errors.Errorf("failed to grant lease for expected primary, err: %v", err) + } + + // revoke current primary's lease to ensure keepalive goroutine of primary exits. + if err := lease.Close(); err != nil { + return errors.Errorf("failed to revoke current primary's lease: %v", err) + } + var primaryPath string switch serviceName { case utils.SchedulingServiceName: @@ -120,11 +138,6 @@ func TransferPrimary(client *clientv3.Client, serviceName, oldPrimary, newPrimar tsoRootPath := endpoint.TSOSvcRootPath(clusterID) primaryPath = endpoint.KeyspaceGroupPrimaryPath(tsoRootPath, keyspaceGroupID) } - - grantResp, err := client.Grant(client.Ctx(), utils.DefaultLeaderLease) - if err != nil { - return errors.Errorf("failed to grant lease for expected primary, err: %v", err) - } _, err = utils.MarkExpectedPrimaryFlag(client, primaryPath, primaryIDs[nextPrimaryID], grantResp.ID) if err != nil { return errors.Errorf("failed to mark expected primary flag for %s, err: %v", serviceName, err) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 39aa11927ca..9e0280e7781 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -30,6 +30,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/discovery" scheserver "github.com/tikv/pd/pkg/mcs/scheduling/server" mcsutils "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/response" @@ -119,6 +120,7 @@ func NewService(srv *scheserver.Service) *Service { s.RegisterHotspotRouter() s.RegisterRegionsRouter() s.RegisterStoresRouter() + s.RegisterPrimaryRouter() return s } @@ -225,6 +227,12 @@ func (s *Service) RegisterConfigRouter() { regions.GET("/:id/labels", getRegionLabels) } +// RegisterPrimaryRouter registers the router of the config handler. +func (s *Service) RegisterPrimaryRouter() { + router := s.root.Group("primary") + router.POST("transfer", transferPrimary) +} + // @Tags admin // @Summary Change the log level. // @Produce json @@ -1477,3 +1485,32 @@ func getRegionByID(c *gin.Context) { } c.Data(http.StatusOK, "application/json", b) } + +// TransferPrimary transfers the primary member. +// @Tags primary +// @Summary Transfer the primary member of the specified service. +// @Produce json +// @Param service path string true "service name" +// @Param new_primary body string false "new primary name" +// @Success 200 string string +// @Router /primary/transfer [post] +func transferPrimary(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + var input map[string]string + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + newPrimary := "" + if v, ok := input["new_primary"]; ok { + newPrimary = v + } + + if err := discovery.TransferPrimary(svr.GetClient(), svr.GetParticipant().GetExpectedPrimaryLease(), + mcsutils.SchedulingServiceName, svr.GetAddr(), newPrimary, 0); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, "success") +} diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 573d4153262..603ca696233 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -41,7 +41,6 @@ import ( bs "github.com/tikv/pd/pkg/basicserver" "github.com/tikv/pd/pkg/cache" "github.com/tikv/pd/pkg/core" - "github.com/tikv/pd/pkg/election" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mcs/discovery" "github.com/tikv/pd/pkg/mcs/scheduling/server/config" @@ -62,7 +61,6 @@ import ( "github.com/tikv/pd/pkg/utils/memberutil" "github.com/tikv/pd/pkg/utils/metricutil" "github.com/tikv/pd/pkg/versioninfo" - "go.etcd.io/etcd/clientv3" "go.uber.org/zap" "google.golang.org/grpc" ) @@ -253,7 +251,7 @@ func (s *Server) primaryElectionLoop() { // To make sure the expected primary(if existed) and new primary are on the same server. expectedPrimary := utils.GetExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. - // expected primary ONLY SET BY `/ms/primary/transfer` API. + // expected primary ONLY SET BY `{service}/primary/transfer` API. if expectedPrimary != "" && !strings.Contains(s.participant.MemberValue(), expectedPrimary) { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), @@ -309,12 +307,13 @@ func (s *Server) campaignLeader() { }() // check expected primary and watch the primary. exitPrimary := make(chan struct{}) - expectedLease, revision, err := s.keepExpectedPrimaryAlive(ctx) + lease, err := utils.KeepExpectedPrimaryAlive(ctx, s.GetClient(), exitPrimary, + s.cfg.LeaderLease, s.participant.GetLeaderPath(), s.participant.MemberValue(), utils.SchedulingServiceName) if err != nil { - log.Error("prepare primary watch error", errs.ZapError(err)) + log.Error("prepare scheduling primary watch error", errs.ZapError(err)) return } - go s.expectedPrimaryWatch(ctx, expectedLease, revision+1, exitPrimary) + s.participant.SetExpectedPrimaryLease(lease) s.participant.EnableLeader() member.ServiceMemberGauge.WithLabelValues(serviceName).Set(1) @@ -341,47 +340,6 @@ func (s *Server) campaignLeader() { } } -// keepExpectedPrimaryAlive keeps the expected primary alive. -// We use lease to keep `expected primary` healthy. -// ONLY reset by the following conditions: -// - changed by`/ms/primary/transfer` API. -// - server closed. -func (s *Server) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { - const purpose = "scheduling-primary-watch" - lease := election.NewLease(s.GetClient(), purpose) - if err := lease.Grant(s.cfg.LeaderLease); err != nil { - log.Error("grant lease for expected primary error", errs.ZapError(err)) - return nil, 0, err - } - revision, err := utils.MarkExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath(), s.participant.MemberValue(), - lease.ID.Load().(clientv3.LeaseID)) - if err != nil { - log.Error("mark expected primary error", errs.ZapError(err)) - return nil, 0, err - } - // Keep alive the current primary leadership to indicate that the server is still alive. - // Watch the expected primary path to check whether the expected primary has changed. - expectedPrimary := election.NewLeadership(s.GetClient(), utils.ExpectedPrimaryPath(s.participant.GetLeaderPath()), purpose) - expectedPrimary.SetLease(lease) - expectedPrimary.Keep(ctx) - return expectedPrimary, revision, nil -} - -// expectedPrimaryWatch watches `/ms/primary/transfer` API whether changed the expected primary. -func (s *Server) expectedPrimaryWatch(ctx context.Context, expectedPrimary *election.Leadership, revision int64, exitPrimary chan struct{}) { - log.Info("scheduling primary start to watch the expected primary", zap.String("scheduling-primary", s.participant.MemberValue())) - expectedPrimary.SetPrimaryWatch(true) - expectedPrimary.Watch(ctx, revision) - expectedPrimary.Reset() - defer log.Info("scheduling primary exit the expected primary watch loop") - select { - case <-ctx.Done(): - return - case exitPrimary <- struct{}{}: - return - } -} - // Close closes the server. func (s *Server) Close() { if !atomic.CompareAndSwapInt64(&s.isRunning, 1, 0) { diff --git a/pkg/mcs/tso/server/apis/v1/api.go b/pkg/mcs/tso/server/apis/v1/api.go index 44f4b353d58..8d0b656ba41 100644 --- a/pkg/mcs/tso/server/apis/v1/api.go +++ b/pkg/mcs/tso/server/apis/v1/api.go @@ -26,9 +26,11 @@ import ( "github.com/pingcap/kvproto/pkg/tsopb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/discovery" tsoserver "github.com/tikv/pd/pkg/mcs/tso/server" "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/storage/endpoint" + "github.com/tikv/pd/pkg/tso" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/apiutil/multiservicesapi" "github.com/tikv/pd/pkg/utils/logutil" @@ -97,6 +99,7 @@ func NewService(srv *tsoserver.Service) *Service { s.RegisterKeyspaceGroupRouter() s.RegisterHealthRouter() s.RegisterConfigRouter() + s.RegisterPrimaryRouter() return s } @@ -125,6 +128,12 @@ func (s *Service) RegisterConfigRouter() { router.GET("", getConfig) } +// RegisterPrimaryRouter registers the router of the config handler. +func (s *Service) RegisterPrimaryRouter() { + router := s.root.Group("primary") + router.POST("transfer", transferPrimary) +} + func changeLogLevel(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*tsoserver.Service) var level string @@ -265,3 +274,45 @@ func getConfig(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*tsoserver.Service) c.IndentedJSON(http.StatusOK, svr.GetConfig()) } + +// TransferPrimary transfers the primary member of the specified service. +// @Tags primary +// @Summary Transfer the primary member of the specified service. +// @Produce json +// @Param service path string true "service name" +// @Param new_primary body string false "new primary name" +// @Param keyspace_group_id body string false "keyspace group id" +// @Success 200 string string +// @Router /primary/transfer [post] +func transferPrimary(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*tsoserver.Service) + var input map[string]string + if err := c.BindJSON(&input); err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + + newPrimary, keyspaceGroupID := "", utils.DefaultKeyspaceGroupID + if v, ok := input["new_primary"]; ok { + newPrimary = v + } + + allocator, err := svr.GetTSOAllocatorManager(keyspaceGroupID) + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + + globalAllocator, err := allocator.GetAllocator(tso.GlobalDCLocation) + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + + if err := discovery.TransferPrimary(svr.GetClient(), globalAllocator.(*tso.GlobalTSOAllocator).GetExpectedPrimaryLease(), + utils.TSOServiceName, svr.GetAddr(), newPrimary, keyspaceGroupID); err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, "success") +} diff --git a/pkg/mcs/utils/expected_primary.go b/pkg/mcs/utils/expected_primary.go new file mode 100644 index 00000000000..f91f3a1a175 --- /dev/null +++ b/pkg/mcs/utils/expected_primary.go @@ -0,0 +1,114 @@ +// Copyright 2024 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "fmt" + + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/election" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/storage/kv" + "github.com/tikv/pd/pkg/utils/etcdutil" + "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" +) + +// ExpectedPrimaryFlag is the flag to indicate the expected primary. +// 1. When the primary was campaigned successfully, it will set the `expected_primary` flag. +// 2. Using `{service}/primary/transfer` API will revoke the previous lease and set a new `expected_primary` flag. +// This flag used to help new primary to campaign successfully while other secondaries can skip the campaign. +const ExpectedPrimaryFlag = "expected_primary" + +// ExpectedPrimaryPath formats the primary path with the expected primary flag. +func ExpectedPrimaryPath(primaryPath string) string { + return fmt.Sprintf("%s/%s", primaryPath, ExpectedPrimaryFlag) +} + +// GetExpectedPrimaryFlag gets the expected primary flag. +func GetExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { + path := ExpectedPrimaryPath(primaryPath) + primary, err := etcdutil.GetValue(client, path) + if err != nil { + log.Error("get expected primary flag error", errs.ZapError(err), zap.String("primary-path", path)) + return "" + } + + return string(primary) +} + +// MarkExpectedPrimaryFlag marks the expected primary flag when the primary is specified. +func MarkExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leaderRaw string, leaseID clientv3.LeaseID) (int64, error) { + path := ExpectedPrimaryPath(primaryPath) + log.Info("set expected primary flag", zap.String("primary-path", path), zap.String("leader-raw", leaderRaw)) + // write a flag to indicate the expected primary. + resp, err := kv.NewSlowLogTxn(client). + Then(clientv3.OpPut(ExpectedPrimaryPath(primaryPath), leaderRaw, clientv3.WithLease(leaseID))). + Commit() + if err != nil || !resp.Succeeded { + log.Error("mark expected primary error", errs.ZapError(err), zap.String("primary-path", path)) + return 0, err + } + return resp.Header.Revision, nil +} + +// KeepExpectedPrimaryAlive keeps the expected primary alive. +// We use lease to keep `expected primary` healthy. +// ONLY reset by the following conditions: +// - changed by `{service}/primary/transfer` API. +// - leader lease expired. +// ONLY primary called this function. +func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPrimary chan struct{}, + leaseTimeout int64, leaderPath, memberValue, service string) (*election.Lease, error) { + log.Info("primary start to watch the expected primary", zap.String("service", service), zap.String("primary-value", memberValue)) + service = fmt.Sprintf("%s-expected-primary", service) + lease := election.NewLease(cli, service) + if err := lease.Grant(leaseTimeout); err != nil { + return nil, err + } + + revision, err := MarkExpectedPrimaryFlag(cli, leaderPath, memberValue, lease.ID.Load().(clientv3.LeaseID)) + if err != nil { + log.Error("mark expected primary error", errs.ZapError(err)) + return nil, err + } + // Keep alive the current expected primary leadership to indicate that the server is still alive. + // Watch the expected primary path to check whether the expected primary has changed by `{service}/primary/transfer` API. + expectedPrimary := election.NewLeadership(cli, ExpectedPrimaryPath(leaderPath), service) + expectedPrimary.SetLease(lease) + expectedPrimary.Keep(ctx) + + go watchExpectedPrimary(ctx, expectedPrimary, revision+1, exitPrimary) + return lease, nil +} + +// watchExpectedPrimary watches `{service}/primary/transfer` API whether changed the expected primary. +func watchExpectedPrimary(ctx context.Context, + expectedPrimary *election.Leadership, revision int64, exitPrimary chan struct{}) { + expectedPrimary.SetPrimaryWatch(true) + // ONLY exited watch by the following conditions: + // - changed by `{service}/primary/transfer` API. + // - leader lease expired. + expectedPrimary.Watch(ctx, revision) + expectedPrimary.Reset() + defer log.Info("primary exit the primary watch loop") + select { + case <-ctx.Done(): + return + case exitPrimary <- struct{}{}: + return + } +} diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index d724fb28010..fb78f0b4be3 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -16,7 +16,6 @@ package utils import ( "context" - "fmt" "net" "net/http" "os" @@ -33,7 +32,6 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/soheilhy/cmux" "github.com/tikv/pd/pkg/errs" - "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/apiutil/multiservicesapi" "github.com/tikv/pd/pkg/utils/etcdutil" @@ -53,13 +51,6 @@ const ( ClusterIDPath = "/pd/cluster_id" // retryInterval is the interval to retry. retryInterval = time.Second - // ExpectedPrimaryFlag is the flag to indicate the expected primary, ONLY marked BY `/ms/primary/transfer` API. - // This flag likes a fence to avoid exited 2 primaries in the cluster simultaneously. - // 1. Since follower will campaign a new primary when it found the `leader_key` is deleted. - // **We can ensure `expected_primary` is set before deleting the `leader_key`.** - // 2. Old primary will mark `expected_primary` firstly, - // then delete the `leader_key` which will trigger the follower to campaign a new primary. - ExpectedPrimaryFlag = "expected_primary" ) // InitClusterID initializes the cluster ID. @@ -79,36 +70,6 @@ func InitClusterID(ctx context.Context, client *clientv3.Client) (id uint64, err return 0, errors.Errorf("failed to init cluster ID after retrying %d times", maxRetryTimes) } -// ExpectedPrimaryPath formats the primary path with the expected primary flag. -func ExpectedPrimaryPath(primaryPath string) string { - return fmt.Sprintf("%s/%s", primaryPath, ExpectedPrimaryFlag) -} - -// GetExpectedPrimaryFlag gets the expected primary flag. -func GetExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { - primary, err := etcdutil.GetValue(client, ExpectedPrimaryPath(primaryPath)) - if err != nil { - log.Error("get expected primary flag error", errs.ZapError(err)) - return "" - } - - return string(primary) -} - -// MarkExpectedPrimaryFlag marks the expected primary flag when the primary is specified. -func MarkExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leaderRaw string, leaseID clientv3.LeaseID) (int64, error) { - log.Info("set expected primary flag", zap.String("leader-path", ExpectedPrimaryPath(primaryPath))) - // write a flag to indicate the current primary has exited - resp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(ExpectedPrimaryPath(primaryPath), leaderRaw, clientv3.WithLease(leaseID))). - Commit() - if err != nil || !resp.Succeeded { - log.Error("mark expected primary error", errs.ZapError(err)) - return 0, err - } - return resp.Header.Revision, nil -} - // PromHandler is a handler to get prometheus metrics. func PromHandler() gin.HandlerFunc { return func(c *gin.Context) { diff --git a/pkg/member/member.go b/pkg/member/member.go index 7a58a976f28..32dab54b4b2 100644 --- a/pkg/member/member.go +++ b/pkg/member/member.go @@ -148,8 +148,8 @@ func (m *EmbeddedEtcdMember) setLeader(member *pdpb.Member) { m.lastLeaderUpdatedTime.Store(time.Now()) } -// UnsetLeader unsets the member's PD leader. -func (m *EmbeddedEtcdMember) UnsetLeader() { +// unsetLeader unsets the member's PD leader. +func (m *EmbeddedEtcdMember) unsetLeader() { m.leader.Store(&pdpb.Member{}) m.lastLeaderUpdatedTime.Store(time.Now()) } @@ -270,14 +270,14 @@ func (m *EmbeddedEtcdMember) CheckLeader() (ElectionLeader, bool) { func (m *EmbeddedEtcdMember) WatchLeader(ctx context.Context, leader *pdpb.Member, revision int64) { m.setLeader(leader) m.leadership.Watch(ctx, revision) - m.UnsetLeader() + m.unsetLeader() } // ResetLeader is used to reset the PD member's current leadership. // Basically it will reset the leader lease and unset leader info. func (m *EmbeddedEtcdMember) ResetLeader() { m.leadership.Reset() - m.UnsetLeader() + m.unsetLeader() } // CheckPriority checks whether the etcd leader should be moved according to the priority. diff --git a/pkg/member/participant.go b/pkg/member/participant.go index d70dd43aa9c..6f9c44d8c27 100644 --- a/pkg/member/participant.go +++ b/pkg/member/participant.go @@ -67,6 +67,8 @@ type Participant struct { campaignChecker atomic.Value // Store as leadershipCheckFunc // lastLeaderUpdatedTime is the last time when the leader is updated. lastLeaderUpdatedTime atomic.Value + // expectedPrimaryLease is the expected lease for the primary. + expectedPrimaryLease atomic.Value // stored as *election.Lease } // NewParticipant create a new Participant. @@ -154,8 +156,8 @@ func (m *Participant) setLeader(member participant) { m.lastLeaderUpdatedTime.Store(time.Now()) } -// UnsetLeader unsets the member's leader. -func (m *Participant) UnsetLeader() { +// unsetLeader unsets the member's leader. +func (m *Participant) unsetLeader() { leader := NewParticipantByService(m.serviceName) m.leader.Store(leader) m.lastLeaderUpdatedTime.Store(time.Now()) @@ -264,14 +266,14 @@ func (m *Participant) CheckLeader() (ElectionLeader, bool) { func (m *Participant) WatchLeader(ctx context.Context, leader participant, revision int64) { m.setLeader(leader) m.leadership.Watch(ctx, revision) - m.UnsetLeader() + m.unsetLeader() } // ResetLeader is used to reset the member's current leadership. // Basically it will reset the leader lease and unset leader info. func (m *Participant) ResetLeader() { m.leadership.Reset() - m.UnsetLeader() + m.unsetLeader() } // IsSameLeader checks whether a server is the leader itself. @@ -374,6 +376,20 @@ func (m *Participant) SetCampaignChecker(checker leadershipCheckFunc) { m.campaignChecker.Store(checker) } +// SetExpectedPrimaryLease sets the expected lease for the primary. +func (m *Participant) SetExpectedPrimaryLease(lease *election.Lease) { + m.expectedPrimaryLease.Store(lease) +} + +// GetExpectedPrimaryLease gets the expected lease for the primary. +func (m *Participant) GetExpectedPrimaryLease() *election.Lease { + l := m.expectedPrimaryLease.Load() + if l == nil { + return nil + } + return l.(*election.Lease) +} + // NewParticipantByService creates a new participant by service name. func NewParticipantByService(serviceName string) (p participant) { switch serviceName { diff --git a/pkg/tso/allocator_manager.go b/pkg/tso/allocator_manager.go index bc369a9e297..62a4fb97a57 100644 --- a/pkg/tso/allocator_manager.go +++ b/pkg/tso/allocator_manager.go @@ -145,8 +145,6 @@ type ElectionMember interface { GetDCLocationPath(id uint64) string // PreCheckLeader does some pre-check before checking whether it's the leader. PreCheckLeader() error - // UnsetLeader unsets the member's leader. - UnsetLeader() } // AllocatorManager is used to manage the TSO Allocators a PD server holds. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 4988d7e4ae0..b7bff866b27 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -37,7 +37,6 @@ import ( "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/tsoutil" "github.com/tikv/pd/pkg/utils/typeutil" - "go.etcd.io/etcd/clientv3" "go.uber.org/zap" "google.golang.org/grpc" ) @@ -81,8 +80,10 @@ type GlobalTSOAllocator struct { // for global TSO synchronization am *AllocatorManager // for election use - member ElectionMember - timestampOracle *timestampOracle + member ElectionMember + // expectedPrimaryLease is used to store the expected primary lease. + expectedPrimaryLease atomic.Value // store as *election.LeaderLease + timestampOracle *timestampOracle // syncRTT is the RTT duration a SyncMaxTS RPC call will cost, // which is used to estimate the MaxTS in a Global TSO generation // to reduce the gRPC network IO latency. @@ -566,7 +567,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { // To make sure the expected primary(if existed) and new primary are on the same server. expectedPrimary := mcsutils.GetExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. - // expected primary ONLY SET BY `/ms/primary/transfer` API. + // expected primary ONLY SET BY `{service}/primary/transfer` API. if expectedPrimary != "" && !strings.Contains(gta.member.MemberValue(), expectedPrimary) { log.Info("skip campaigning of tso primary and check later", zap.String("server-name", gta.member.Name()), @@ -639,12 +640,13 @@ func (gta *GlobalTSOAllocator) campaignLeader() { // check expected primary and watch the primary. exitPrimary := make(chan struct{}) - expectedLease, revision, err := gta.keepExpectedPrimaryAlive(ctx) + lease, err := mcsutils.KeepExpectedPrimaryAlive(ctx, gta.member.Client(), exitPrimary, + gta.am.leaderLease, gta.member.GetLeaderPath(), gta.member.MemberValue(), mcsutils.TSOServiceName) if err != nil { - log.Error("prepare primary watch error", errs.ZapError(err)) + log.Error("prepare tso primary watch error", errs.ZapError(err)) return } - go gta.expectedPrimaryWatch(ctx, expectedLease, revision+1, exitPrimary) + gta.expectedPrimaryLease.Store(lease) gta.member.EnableLeader() tsoLabel := fmt.Sprintf("TSO Service Group %d", gta.getGroupID()) @@ -684,48 +686,13 @@ func (gta *GlobalTSOAllocator) campaignLeader() { } } -// keepExpectedPrimaryAlive keeps the expected primary alive. -// We use lease to keep `expected primary` healthy. -// ONLY reset by the following conditions: -// - changed by`/ms/primary/transfer` API. -// - server closed. -func (gta *GlobalTSOAllocator) keepExpectedPrimaryAlive(ctx context.Context) (*election.Leadership, int64, error) { - const purpose = "tso-primary-watch" - cli := gta.member.Client() - newLease := election.NewLease(cli, purpose) - if err := newLease.Grant(gta.am.leaderLease); err != nil { - return nil, 0, err - } - - revision, err := mcsutils.MarkExpectedPrimaryFlag(cli, gta.member.GetLeaderPath(), gta.member.MemberValue(), - newLease.ID.Load().(clientv3.LeaseID)) - if err != nil { - log.Error("mark expected primary error", errs.ZapError(err)) - return nil, 0, err - } - // Keep alive the current primary leadership to indicate that the server is still alive. - // Watch the expected primary path to check whether the expected primary has changed. - expectedLease := election.NewLeadership(cli, mcsutils.ExpectedPrimaryPath(gta.member.GetLeaderPath()), purpose) - expectedLease.SetLease(newLease) - expectedLease.Keep(ctx) - return expectedLease, revision, nil -} - -// primaryWatch watches `/ms/primary/transfer` API whether changed the expected primary. -func (gta *GlobalTSOAllocator) expectedPrimaryWatch(ctx context.Context, expectedLease *election.Leadership, revision int64, exitPrimary chan struct{}) { - log.Info("tso primary start to watch the primary", - logutil.CondUint32("keyspace-group-id", gta.getGroupID(), gta.getGroupID() > 0), - zap.String("campaign-tso-primary-name", gta.member.Name())) - expectedLease.SetPrimaryWatch(true) - expectedLease.Watch(ctx, revision) - expectedLease.Reset() - defer log.Info("tso primary exit the primary watch loop") - select { - case <-ctx.Done(): - return - case exitPrimary <- struct{}{}: - return +// GetExpectedPrimaryLease returns the expected primary lease. +func (gta *GlobalTSOAllocator) GetExpectedPrimaryLease() *election.Lease { + l := gta.expectedPrimaryLease.Load() + if l == nil { + return nil } + return l.(*election.Lease) } func (gta *GlobalTSOAllocator) getMetrics() *tsoMetrics { diff --git a/server/apiv2/handlers/micro_service.go b/server/apiv2/handlers/micro_service.go index b1c290a1fb1..fd44665530f 100644 --- a/server/apiv2/handlers/micro_service.go +++ b/server/apiv2/handlers/micro_service.go @@ -16,11 +16,9 @@ package handlers import ( "net/http" - "strconv" "github.com/gin-gonic/gin" "github.com/tikv/pd/pkg/mcs/discovery" - "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/server" "github.com/tikv/pd/server/apiv2/middlewares" ) @@ -30,7 +28,6 @@ func RegisterMicroService(r *gin.RouterGroup) { router := r.Group("ms") router.GET("members/:service", GetMembers) router.GET("primary/:service", GetPrimary) - router.POST("primary/transfer/:service", TransferPrimary) } // GetMembers gets all members of the cluster for the specified service. @@ -80,54 +77,3 @@ func GetPrimary(c *gin.Context) { c.AbortWithStatusJSON(http.StatusInternalServerError, "please specify service") } - -// TransferPrimary transfers the primary member of the specified service. -// @Tags primary -// @Summary Transfer the primary member of the specified service. -// @Produce json -// @Param service path string true "service name" -// @Param new_primary body string false "new primary name" -// @Success 200 string string -// @Router /ms/primary/transfer/{service} [post] -func TransferPrimary(c *gin.Context) { - svr := c.MustGet(middlewares.ServerContextKey).(*server.Server) - if !svr.IsAPIServiceMode() { - c.AbortWithStatusJSON(http.StatusNotFound, "not support micro service") - return - } - - if service := c.Param("service"); len(service) > 0 { - var input map[string]string - if err := c.BindJSON(&input); err != nil { - c.String(http.StatusBadRequest, err.Error()) - return - } - - newPrimary, keyspaceGroupID := "", utils.DefaultKeyspaceGroupID - if v, ok := input["new_primary"]; ok { - newPrimary = v - } - - if v, ok := input["keyspace_group_id"]; ok { - keyspaceGroupIDRaw, err := strconv.ParseUint(v, 10, 32) - if err != nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) - return - } - keyspaceGroupID = uint32(keyspaceGroupIDRaw) - } - oldPrimary, _ := svr.GetServicePrimaryAddr(c.Request.Context(), service) - if oldPrimary == newPrimary { - c.AbortWithStatusJSON(http.StatusInternalServerError, "new primary is the same as the old one") - return - } - if err := discovery.TransferPrimary(svr.GetClient(), service, oldPrimary, newPrimary, keyspaceGroupID); err != nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) - return - } - c.IndentedJSON(http.StatusOK, "success") - return - } - - c.AbortWithStatusJSON(http.StatusInternalServerError, "please specify service") -} diff --git a/tests/integrations/mcs/members/member_test.go b/tests/integrations/mcs/members/member_test.go index e5fadf73b59..30f18eaf719 100644 --- a/tests/integrations/mcs/members/member_test.go +++ b/tests/integrations/mcs/members/member_test.go @@ -15,7 +15,11 @@ package members_test import ( + "bytes" "context" + "encoding/json" + "fmt" + "net/http" "testing" "time" @@ -158,10 +162,6 @@ func (suite *memberTestSuite) TestCampaignPrimaryWhileServerClose() { func (suite *memberTestSuite) TestTransferPrimary() { re := suite.Require() - primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, "tso") - re.NoError(err) - re.NotEmpty(primary) - supportedServices := []string{"tso", "scheduling"} for _, service := range supportedServices { var nodes map[string]bs.Server @@ -173,10 +173,17 @@ func (suite *memberTestSuite) TestTransferPrimary() { } // Test resign primary by random - primary, err = suite.pdClient.GetMicroServicePrimary(suite.ctx, service) + primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, service) re.NoError(err) - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, "") + + newPrimaryData := make(map[string]any) + newPrimaryData["new_primary"] = "" + data, _ := json.Marshal(newPrimaryData) + resp, err := tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) re.NoError(err) + re.Equal(http.StatusOK, resp.StatusCode) + resp.Body.Close() testutil.Eventually(re, func() bool { for _, member := range nodes { @@ -187,7 +194,7 @@ func (suite *memberTestSuite) TestTransferPrimary() { return false }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, service) + primary, err = suite.pdClient.GetMicroServicePrimary(suite.ctx, service) re.NoError(err) // Test transfer primary to a specific node @@ -198,10 +205,16 @@ func (suite *memberTestSuite) TestTransferPrimary() { break } } - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + newPrimaryData["new_primary"] = newPrimary + data, _ = json.Marshal(newPrimaryData) + resp, err = tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) re.NoError(err) + re.Equal(http.StatusOK, resp.StatusCode) + resp.Body.Close() testutil.Eventually(re, func() bool { + println("newPrimary", newPrimary, nodes[newPrimary].IsServing()) return nodes[newPrimary].IsServing() }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) @@ -211,17 +224,18 @@ func (suite *memberTestSuite) TestTransferPrimary() { // Test transfer primary to a non-exist node newPrimary = "http://" - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) - re.Error(err) + newPrimaryData["new_primary"] = newPrimary + data, _ = json.Marshal(newPrimaryData) + resp, err = tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) + re.NoError(err) + re.Equal(http.StatusInternalServerError, resp.StatusCode) + resp.Body.Close() } } func (suite *memberTestSuite) TestCampaignPrimaryAfterTransfer() { re := suite.Require() - primary, err := suite.pdClient.GetMicroServicePrimary(suite.ctx, "tso") - re.NoError(err) - re.NotEmpty(primary) - supportedServices := []string{"tso", "scheduling"} for _, service := range supportedServices { var nodes map[string]bs.Server @@ -243,8 +257,14 @@ func (suite *memberTestSuite) TestCampaignPrimaryAfterTransfer() { break } } - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + newPrimaryData := make(map[string]any) + newPrimaryData["new_primary"] = newPrimary + data, _ := json.Marshal(newPrimaryData) + resp, err := tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) re.NoError(err) + re.Equal(http.StatusOK, resp.StatusCode) + resp.Body.Close() tests.WaitForPrimaryServing(re, nodes) newPrimary, err = suite.pdClient.GetMicroServicePrimary(suite.ctx, service) @@ -292,8 +312,14 @@ func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpired() { } // Mock the new primary can not grant leader which means the lease will expire re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", `return()`)) - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + newPrimaryData := make(map[string]any) + newPrimaryData["new_primary"] = newPrimary + data, _ := json.Marshal(newPrimaryData) + resp, err := tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) re.NoError(err) + re.Equal(http.StatusOK, resp.StatusCode) + resp.Body.Close() // Wait for the old primary exit and new primary campaign // cannot check newPrimary isServing when skipGrantLeader is enabled @@ -338,8 +364,14 @@ func (suite *memberTestSuite) TestTransferPrimaryWhileLeaseExpiredAndServerDown( } // Mock the new primary can not grant leader which means the lease will expire re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/election/skipGrantLeader", `return()`)) - err = suite.pdClient.TransferMicroServicePrimary(suite.ctx, service, newPrimary) + newPrimaryData := make(map[string]any) + newPrimaryData["new_primary"] = "" + data, _ := json.Marshal(newPrimaryData) + resp, err := tests.TestDialClient.Post(fmt.Sprintf("%s/%s/api/v1/primary/transfer", primary, service), + "application/json", bytes.NewBuffer(data)) re.NoError(err) + re.Equal(http.StatusOK, resp.StatusCode) + resp.Body.Close() // Wait for the old primary exit and new primary campaign // cannot check newPrimary isServing when skipGrantLeader is enabled From d037a6aec9a6723fcbdbeb5ae1b2edec4c7db329 Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 12 Aug 2024 14:35:00 +0800 Subject: [PATCH 19/24] remove delete Signed-off-by: husharp --- pkg/election/leadership.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index b706407a9c2..8d629362757 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -390,8 +390,7 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { } for _, ev := range wresp.Events { - // ensure `{service}/primary/transfer` API will not meet this condition. - if ev.Type == mvccpb.DELETE && !ls.IsPrimary() { + if ev.Type == mvccpb.DELETE { log.Info("current leadership is deleted", zap.Int64("revision", wresp.Header.Revision), zap.String("leader-key", ls.leaderKey), zap.String("purpose", ls.purpose)) return From e711fd9e156108dae3447cc88af7371dfe73c39b Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 12 Aug 2024 14:52:12 +0800 Subject: [PATCH 20/24] refine purpose and lease Signed-off-by: husharp --- pkg/election/leadership.go | 6 +----- pkg/mcs/utils/expected_primary.go | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 8d629362757..856fa77fb1c 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -166,11 +166,7 @@ func (ls *Leadership) AddCampaignTimes() { func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...clientv3.Cmp) error { ls.leaderValue = leaderData // Create a new lease to campaign - newLease := &Lease{ - Purpose: ls.purpose, - client: ls.client, - lease: clientv3.NewLease(ls.client), - } + newLease := NewLease(ls.client, ls.purpose) ls.SetLease(newLease) failpoint.Inject("skipGrantLeader", func(val failpoint.Value) { diff --git a/pkg/mcs/utils/expected_primary.go b/pkg/mcs/utils/expected_primary.go index f91f3a1a175..e1a2e0b9e90 100644 --- a/pkg/mcs/utils/expected_primary.go +++ b/pkg/mcs/utils/expected_primary.go @@ -74,7 +74,7 @@ func MarkExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leader func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPrimary chan struct{}, leaseTimeout int64, leaderPath, memberValue, service string) (*election.Lease, error) { log.Info("primary start to watch the expected primary", zap.String("service", service), zap.String("primary-value", memberValue)) - service = fmt.Sprintf("%s-expected-primary", service) + service = fmt.Sprintf("%s expected primary", service) lease := election.NewLease(cli, service) if err := lease.Grant(leaseTimeout); err != nil { return nil, err From 7f0a426693d4a0bcc57ec31c3b82387d3a5aa6d5 Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 12 Aug 2024 16:14:42 +0800 Subject: [PATCH 21/24] refine code Signed-off-by: husharp --- pkg/mcs/discovery/discover.go | 3 ++- pkg/mcs/scheduling/server/apis/v1/api.go | 7 +++---- pkg/mcs/tso/server/apis/v1/api.go | 9 ++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 66d7f96fd50..c219cbc047f 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -15,6 +15,8 @@ package discovery import ( + "strconv" + "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" @@ -23,7 +25,6 @@ import ( "github.com/tikv/pd/pkg/utils/etcdutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" - "strconv" ) // Discover is used to get all the service instances of the specified service name. diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 6a7c69ac787..e9dc4c4669b 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -228,7 +228,7 @@ func (s *Service) RegisterConfigRouter() { regions.GET("/:id/labels", getRegionLabels) } -// RegisterPrimaryRouter registers the router of the config handler. +// RegisterPrimaryRouter registers the router of the primary handler. func (s *Service) RegisterPrimaryRouter() { router := s.root.Group("primary") router.POST("transfer", transferPrimary) @@ -1487,11 +1487,10 @@ func getRegionByID(c *gin.Context) { c.Data(http.StatusOK, "application/json", b) } -// TransferPrimary transfers the primary member. +// TransferPrimary transfers the primary member to `new_primary`. // @Tags primary -// @Summary Transfer the primary member of the specified service. +// @Summary Transfer the primary member to `new_primary`. // @Produce json -// @Param service path string true "service name" // @Param new_primary body string false "new primary name" // @Success 200 string string // @Router /primary/transfer [post] diff --git a/pkg/mcs/tso/server/apis/v1/api.go b/pkg/mcs/tso/server/apis/v1/api.go index c22779c3928..1b2227d47ce 100644 --- a/pkg/mcs/tso/server/apis/v1/api.go +++ b/pkg/mcs/tso/server/apis/v1/api.go @@ -128,7 +128,7 @@ func (s *Service) RegisterConfigRouter() { router.GET("", getConfig) } -// RegisterPrimaryRouter registers the router of the config handler. +// RegisterPrimaryRouter registers the router of the primary handler. func (s *Service) RegisterPrimaryRouter() { router := s.root.Group("primary") router.POST("transfer", transferPrimary) @@ -275,13 +275,11 @@ func getConfig(c *gin.Context) { c.IndentedJSON(http.StatusOK, svr.GetConfig()) } -// TransferPrimary transfers the primary member of the specified service. +// TransferPrimary transfers the primary member to `new_primary`. // @Tags primary -// @Summary Transfer the primary member of the specified service. +// @Summary Transfer the primary member to `new_primary`. // @Produce json -// @Param service path string true "service name" // @Param new_primary body string false "new primary name" -// @Param keyspace_group_id body string false "keyspace group id" // @Success 200 string string // @Router /primary/transfer [post] func transferPrimary(c *gin.Context) { @@ -292,6 +290,7 @@ func transferPrimary(c *gin.Context) { return } + // We only support default keyspace group now. newPrimary, keyspaceGroupID := "", constant.DefaultKeyspaceGroupID if v, ok := input["new_primary"]; ok { newPrimary = v From d810ed131dc0664af323a99bdaf3678e566669d6 Mon Sep 17 00:00:00 2001 From: husharp Date: Mon, 12 Aug 2024 16:30:34 +0800 Subject: [PATCH 22/24] address comment Signed-off-by: husharp --- pkg/election/leadership.go | 2 +- pkg/election/lease_test.go | 19 +++---------------- pkg/mcs/scheduling/server/server.go | 3 ++- pkg/mcs/utils/expected_primary.go | 10 +++++----- pkg/tso/global_allocator.go | 2 +- 5 files changed, 12 insertions(+), 24 deletions(-) diff --git a/pkg/election/leadership.go b/pkg/election/leadership.go index 856fa77fb1c..1361d685b57 100644 --- a/pkg/election/leadership.go +++ b/pkg/election/leadership.go @@ -171,7 +171,7 @@ func (ls *Leadership) Campaign(leaseTimeout int64, leaderData string, cmps ...cl failpoint.Inject("skipGrantLeader", func(val failpoint.Value) { name, ok := val.(string) - if name == "" { + if len(name) == 0 { // return directly when not set the name failpoint.Return(errors.Errorf("failed to grant lease")) } diff --git a/pkg/election/lease_test.go b/pkg/election/lease_test.go index 8c600da5a3a..3a02de97239 100644 --- a/pkg/election/lease_test.go +++ b/pkg/election/lease_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/utils/etcdutil" - "go.etcd.io/etcd/clientv3" ) func TestLease(t *testing.T) { @@ -30,16 +29,8 @@ func TestLease(t *testing.T) { defer clean() // Create the lease. - lease1 := &Lease{ - Purpose: "test_lease_1", - client: client, - lease: clientv3.NewLease(client), - } - lease2 := &Lease{ - Purpose: "test_lease_2", - client: client, - lease: clientv3.NewLease(client), - } + lease1 := NewLease(client, "test_lease_1") + lease2 := NewLease(client, "test_lease_2") re.True(lease1.IsExpired()) re.True(lease2.IsExpired()) re.NoError(lease1.Close()) @@ -95,11 +86,7 @@ func TestLeaseKeepAlive(t *testing.T) { defer clean() // Create the lease. - lease := &Lease{ - Purpose: "test_lease", - client: client, - lease: clientv3.NewLease(client), - } + lease := NewLease(client, "test_lease") re.NoError(lease.Grant(defaultLeaseTimeout)) ch := lease.keepAliveWorker(context.Background(), 2*time.Second) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 27e931aa59c..e1753cf2972 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -133,6 +133,7 @@ func (s *Server) GetBackendEndpoints() string { return s.cfg.BackendEndpoints } +// GetParticipant returns the participant. func (s *Server) GetParticipant() *member.Participant { return s.participant } @@ -251,7 +252,7 @@ func (s *Server) primaryElectionLoop() { expectedPrimary := utils.GetExpectedPrimaryFlag(s.GetClient(), s.participant.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `{service}/primary/transfer` API. - if expectedPrimary != "" && !strings.Contains(s.participant.MemberValue(), expectedPrimary) { + if len(expectedPrimary) > 0 && !strings.Contains(s.participant.MemberValue(), expectedPrimary) { log.Info("skip campaigning of scheduling primary and check later", zap.String("server-name", s.Name()), zap.String("expected-primary-id", expectedPrimary), diff --git a/pkg/mcs/utils/expected_primary.go b/pkg/mcs/utils/expected_primary.go index befbc0642a3..3de155f1b27 100644 --- a/pkg/mcs/utils/expected_primary.go +++ b/pkg/mcs/utils/expected_primary.go @@ -33,15 +33,15 @@ import ( "go.uber.org/zap" ) -// ExpectedPrimaryFlag is the flag to indicate the expected primary. +// expectedPrimaryFlag is the flag to indicate the expected primary. // 1. When the primary was campaigned successfully, it will set the `expected_primary` flag. // 2. Using `{service}/primary/transfer` API will revoke the previous lease and set a new `expected_primary` flag. // This flag used to help new primary to campaign successfully while other secondaries can skip the campaign. -const ExpectedPrimaryFlag = "expected_primary" +const expectedPrimaryFlag = "expected_primary" // ExpectedPrimaryPath formats the primary path with the expected primary flag. func ExpectedPrimaryPath(primaryPath string) string { - return fmt.Sprintf("%s/%s", primaryPath, ExpectedPrimaryFlag) + return fmt.Sprintf("%s/%s", primaryPath, expectedPrimaryFlag) } // GetExpectedPrimaryFlag gets the expected primary flag. @@ -77,7 +77,7 @@ func markExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leader // - changed by `{service}/primary/transfer` API. // - leader lease expired. // ONLY primary called this function. -func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPrimary chan struct{}, +func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPrimary chan<- struct{}, leaseTimeout int64, leaderPath, memberValue, service string) (*election.Lease, error) { log.Info("primary start to watch the expected primary", zap.String("service", service), zap.String("primary-value", memberValue)) service = fmt.Sprintf("%s expected primary", service) @@ -103,7 +103,7 @@ func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPri // watchExpectedPrimary watches `{service}/primary/transfer` API whether changed the expected primary. func watchExpectedPrimary(ctx context.Context, - expectedPrimary *election.Leadership, revision int64, exitPrimary chan struct{}) { + expectedPrimary *election.Leadership, revision int64, exitPrimary chan<- struct{}) { expectedPrimary.SetPrimaryWatch(true) // ONLY exited watch by the following conditions: // - changed by `{service}/primary/transfer` API. diff --git a/pkg/tso/global_allocator.go b/pkg/tso/global_allocator.go index 1e62b0c76bc..38511ee2913 100644 --- a/pkg/tso/global_allocator.go +++ b/pkg/tso/global_allocator.go @@ -569,7 +569,7 @@ func (gta *GlobalTSOAllocator) primaryElectionLoop() { expectedPrimary := mcsutils.GetExpectedPrimaryFlag(gta.member.Client(), gta.member.GetLeaderPath()) // skip campaign the primary if the expected primary is not empty and not equal to the current memberValue. // expected primary ONLY SET BY `{service}/primary/transfer` API. - if expectedPrimary != "" && !strings.Contains(gta.member.MemberValue(), expectedPrimary) { + if len(expectedPrimary) > 0 && !strings.Contains(gta.member.MemberValue(), expectedPrimary) { log.Info("skip campaigning of tso primary and check later", zap.String("server-name", gta.member.Name()), zap.String("expected-primary-id", expectedPrimary), From 43830ec9ea80d92008be663ec5c26b3d137373a9 Mon Sep 17 00:00:00 2001 From: husharp Date: Tue, 13 Aug 2024 09:04:27 +0800 Subject: [PATCH 23/24] non-essential exported Signed-off-by: husharp --- pkg/mcs/utils/expected_primary.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/mcs/utils/expected_primary.go b/pkg/mcs/utils/expected_primary.go index 3de155f1b27..1ea34d7ddf3 100644 --- a/pkg/mcs/utils/expected_primary.go +++ b/pkg/mcs/utils/expected_primary.go @@ -39,14 +39,14 @@ import ( // This flag used to help new primary to campaign successfully while other secondaries can skip the campaign. const expectedPrimaryFlag = "expected_primary" -// ExpectedPrimaryPath formats the primary path with the expected primary flag. -func ExpectedPrimaryPath(primaryPath string) string { +// expectedPrimaryPath formats the primary path with the expected primary flag. +func expectedPrimaryPath(primaryPath string) string { return fmt.Sprintf("%s/%s", primaryPath, expectedPrimaryFlag) } // GetExpectedPrimaryFlag gets the expected primary flag. func GetExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string { - path := ExpectedPrimaryPath(primaryPath) + path := expectedPrimaryPath(primaryPath) primary, err := etcdutil.GetValue(client, path) if err != nil { log.Error("get expected primary flag error", errs.ZapError(err), zap.String("primary-path", path)) @@ -58,11 +58,11 @@ func GetExpectedPrimaryFlag(client *clientv3.Client, primaryPath string) string // markExpectedPrimaryFlag marks the expected primary flag when the primary is specified. func markExpectedPrimaryFlag(client *clientv3.Client, primaryPath string, leaderRaw string, leaseID clientv3.LeaseID) (int64, error) { - path := ExpectedPrimaryPath(primaryPath) + path := expectedPrimaryPath(primaryPath) log.Info("set expected primary flag", zap.String("primary-path", path), zap.String("leader-raw", leaderRaw)) // write a flag to indicate the expected primary. resp, err := kv.NewSlowLogTxn(client). - Then(clientv3.OpPut(ExpectedPrimaryPath(primaryPath), leaderRaw, clientv3.WithLease(leaseID))). + Then(clientv3.OpPut(expectedPrimaryPath(primaryPath), leaderRaw, clientv3.WithLease(leaseID))). Commit() if err != nil || !resp.Succeeded { log.Error("mark expected primary error", errs.ZapError(err), zap.String("primary-path", path)) @@ -93,7 +93,7 @@ func KeepExpectedPrimaryAlive(ctx context.Context, cli *clientv3.Client, exitPri } // Keep alive the current expected primary leadership to indicate that the server is still alive. // Watch the expected primary path to check whether the expected primary has changed by `{service}/primary/transfer` API. - expectedPrimary := election.NewLeadership(cli, ExpectedPrimaryPath(leaderPath), service) + expectedPrimary := election.NewLeadership(cli, expectedPrimaryPath(leaderPath), service) expectedPrimary.SetLease(lease) expectedPrimary.Keep(ctx) From 2d9a3b0e5da1a8e50251c4510368e5b3085394c7 Mon Sep 17 00:00:00 2001 From: husharp Date: Tue, 13 Aug 2024 12:24:04 +0800 Subject: [PATCH 24/24] refine check name Signed-off-by: husharp --- pkg/mcs/scheduling/server/apis/v1/api.go | 2 +- pkg/mcs/tso/server/apis/v1/api.go | 2 +- pkg/mcs/utils/expected_primary.go | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index e9dc4c4669b..8b9427a8896 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -1508,7 +1508,7 @@ func transferPrimary(c *gin.Context) { } if err := mcsutils.TransferPrimary(svr.GetClient(), svr.GetParticipant().GetExpectedPrimaryLease(), - constant.SchedulingServiceName, svr.GetAddr(), newPrimary, 0); err != nil { + constant.SchedulingServiceName, svr.Name(), newPrimary, 0); err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) return } diff --git a/pkg/mcs/tso/server/apis/v1/api.go b/pkg/mcs/tso/server/apis/v1/api.go index 1b2227d47ce..19b3a1be612 100644 --- a/pkg/mcs/tso/server/apis/v1/api.go +++ b/pkg/mcs/tso/server/apis/v1/api.go @@ -309,7 +309,7 @@ func transferPrimary(c *gin.Context) { } if err := utils.TransferPrimary(svr.GetClient(), globalAllocator.(*tso.GlobalTSOAllocator).GetExpectedPrimaryLease(), - constant.TSOServiceName, svr.GetAddr(), newPrimary, keyspaceGroupID); err != nil { + constant.TSOServiceName, svr.Name(), newPrimary, keyspaceGroupID); err != nil { c.AbortWithStatusJSON(http.StatusInternalServerError, err.Error()) return } diff --git a/pkg/mcs/utils/expected_primary.go b/pkg/mcs/utils/expected_primary.go index 1ea34d7ddf3..d44b2eae436 100644 --- a/pkg/mcs/utils/expected_primary.go +++ b/pkg/mcs/utils/expected_primary.go @@ -122,11 +122,11 @@ func watchExpectedPrimary(ctx context.Context, // TransferPrimary transfers the primary of the specified service. // keyspaceGroupID is optional, only used for TSO service. func TransferPrimary(client *clientv3.Client, lease *election.Lease, serviceName, - oldPrimaryAddr, newPrimary string, keyspaceGroupID uint32) error { + oldPrimary, newPrimary string, keyspaceGroupID uint32) error { if lease == nil { return errors.New("current lease is nil, please check leadership") } - log.Info("try to transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimaryAddr), zap.String("to", newPrimary)) + log.Info("try to transfer primary", zap.String("service", serviceName), zap.String("from", oldPrimary), zap.String("to", newPrimary)) entries, err := discovery.GetMSMembers(serviceName, client) if err != nil { return err @@ -139,13 +139,12 @@ func TransferPrimary(client *clientv3.Client, lease *election.Lease, serviceName var primaryIDs []string for _, member := range entries { - // TODO: judged by `addr` and `name` now, should unify them to `name` in the future. - if (newPrimary == "" && member.ServiceAddr != oldPrimaryAddr) || (newPrimary != "" && member.Name == newPrimary) { + if (newPrimary == "" && member.Name != oldPrimary) || (newPrimary != "" && member.Name == newPrimary) { primaryIDs = append(primaryIDs, member.ServiceAddr) } } if len(primaryIDs) == 0 { - return errors.Errorf("no valid secondary to transfer primary, from %s to %s", oldPrimaryAddr, newPrimary) + return errors.Errorf("no valid secondary to transfer primary, from %s to %s", oldPrimary, newPrimary) } r := rand.New(rand.NewSource(time.Now().UnixNano()))