Skip to content

Commit

Permalink
Fix "unavailible" when hub and agent clock out of sync.
Browse files Browse the repository at this point in the history
Signed-off-by: xuezhaojun <[email protected]>
  • Loading branch information
xuezhaojun committed Nov 14, 2023
1 parent 29ffff0 commit cee6ba2
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 7 deletions.
65 changes: 58 additions & 7 deletions pkg/registration/hub/lease/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package lease

import (
"context"
"sync"
"time"

"github.com/openshift/library-go/pkg/controller/factory"
Expand Down Expand Up @@ -34,11 +35,44 @@ var (

// leaseController checks the lease of managed clusters on hub cluster to determine whether a managed cluster is available.
type leaseController struct {
kubeClient kubernetes.Interface
patcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]
clusterLister clusterv1listers.ManagedClusterLister
leaseLister coordlisters.LeaseLister
eventRecorder events.Recorder
kubeClient kubernetes.Interface
patcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]
clusterLister clusterv1listers.ManagedClusterLister
leaseLister coordlisters.LeaseLister
eventRecorder events.Recorder
clusterHealthMap *clusterHealthMap
}

type clusterHealth struct {
savedLease *coordv1.Lease
probeTimestamp time.Time
}

func (h *clusterHealth) deepCopy() *clusterHealth {
if h == nil {
return nil
}
return &clusterHealth{
savedLease: h.savedLease.DeepCopy(),
probeTimestamp: h.probeTimestamp,
}
}

type clusterHealthMap struct {
sync.RWMutex
clusterHealths map[string]*clusterHealth
}

func (m *clusterHealthMap) getDeepCopy(clusterName string) *clusterHealth {
m.RLock()
defer m.RUnlock()
return m.clusterHealths[clusterName].deepCopy()
}

func (m *clusterHealthMap) set(clusterName string, clusterHealth *clusterHealth) {
m.Lock()
defer m.Unlock()
m.clusterHealths[clusterName] = clusterHealth
}

// NewClusterLeaseController creates a cluster lease controller on hub cluster.
Expand All @@ -56,6 +90,9 @@ func NewClusterLeaseController(
clusterLister: clusterInformer.Lister(),
leaseLister: leaseInformer.Lister(),
eventRecorder: recorder.WithComponentSuffix("managed-cluster-lease-controller"),
clusterHealthMap: &clusterHealthMap{
clusterHealths: make(map[string]*clusterHealth),
},
}
return factory.New().
WithFilteredEventsInformersQueueKeysFunc(
Expand Down Expand Up @@ -118,8 +155,22 @@ func (c *leaseController) sync(ctx context.Context, syncCtx factory.SyncContext)
gracePeriod = time.Duration(leaseDurationTimes*LeaseDurationSeconds) * time.Second
}

now := time.Now()
if !now.Before(observedLease.Spec.RenewTime.Add(gracePeriod)) {
ch := c.clusterHealthMap.getDeepCopy(clusterName)
defer c.clusterHealthMap.set(clusterName, ch)

if ch == nil {
ch = &clusterHealth{
probeTimestamp: time.Now(),
}
}

// Update the probe time if agent lease is renewed.
if ch.savedLease == nil || ch.savedLease.Spec.RenewTime.Before(observedLease.Spec.RenewTime) {
ch.savedLease = observedLease
ch.probeTimestamp = time.Now()
}

if time.Now().After(ch.probeTimestamp.Add(gracePeriod)) {
// the lease is not updated constantly, change the cluster available condition to unknown
if err := c.updateClusterStatus(ctx, cluster); err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/registration/hub/lease/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ func TestSync(t *testing.T) {
clusterLister: clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(),
leaseLister: leaseInformerFactory.Coordination().V1().Leases().Lister(),
eventRecorder: syncCtx.Recorder(),
clusterHealthMap: &clusterHealthMap{
clusterHealths: make(map[string]*clusterHealth),
},
}
syncErr := ctrl.sync(context.TODO(), syncCtx)
if syncErr != nil {
Expand Down

0 comments on commit cee6ba2

Please sign in to comment.