Skip to content

Commit

Permalink
node labels: clean up log messages
Browse files Browse the repository at this point in the history
The node label controller is triggered for every node change
event and is currently more verbose than it should.

We'll update it so that it logs a message only if there are
actual configuration changes.
  • Loading branch information
petrutlucian94 committed Jan 29, 2025
1 parent 1c6739a commit ad3ff49
Showing 1 changed file with 7 additions and 16 deletions.
23 changes: 7 additions & 16 deletions src/k8s/pkg/k8sd/controllers/node_label.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,58 +56,52 @@ func (c *NodeLabelController) Run(ctx context.Context) {
}

func (c *NodeLabelController) reconcileFailureDomain(ctx context.Context, node *v1.Node) error {
log := log.FromContext(ctx)

azLabel, azFound := node.Labels["topology.kubernetes.io/zone"]
var failureDomain uint64
if azFound && azLabel != "" {
log.Info("Node availability zone found", "label", azLabel)
// k8s-dqlite expects the failure domain (availability zone) to be an uint64
// value defined in $dbStateDir/failure-domain. Both k8s-snap Dqlite databases
// need to be updated (k8sd and k8s-dqlite).
failureDomain = snaputil.NodeLabelToDqliteFailureDomain(azLabel)
} else {
log.Info("The node availability zone label is unset, clearing failure domain")
failureDomain = 0
}

log.Info("Setting failure domain", "failure domain", failureDomain, "availability zone", azLabel)
if err := c.updateDqliteFailureDomain(ctx, c.snap, failureDomain); err != nil {
if err := c.updateDqliteFailureDomain(ctx, failureDomain, azLabel); err != nil {
return fmt.Errorf("failed to update failure-domain, error: %w", err)
}

return nil
}

func (c *NodeLabelController) updateDqliteFailureDomain(ctx context.Context, snap snap.Snap, failureDomain uint64) error {
func (c *NodeLabelController) updateDqliteFailureDomain(ctx context.Context, failureDomain uint64, availabilityZone string) error {
log := log.FromContext(ctx)

// We need to update both k8s-snap Dqlite databases (k8sd and k8s-dqlite).
k8sDqliteStateDir := snap.K8sDqliteStateDir()
k8sdDbStateDir := filepath.Join(snap.K8sdStateDir(), "database")
k8sDqliteStateDir := c.snap.K8sDqliteStateDir()
k8sdDbStateDir := filepath.Join(c.snap.K8sdStateDir(), "database")

log.Info("Updating k8s-dqlite failure domain", "failure domain", failureDomain)
modified, err := snaputil.UpdateDqliteFailureDomain(failureDomain, k8sDqliteStateDir)
if err != nil {
return err
}
log.Info("Updated k8s-dqlite failure domain", "restart needed", modified)

if modified {
log.Info("Updated k8s-dqlite failure domain", "failure domain", failureDomain, "availability zone", availabilityZone)
if err = c.snap.RestartService(ctx, "k8s-dqlite"); err != nil {
return fmt.Errorf("failed to restart k8s-dqlite to apply failure domain: %w", err)
}
}

log.Info("Updating k8sd failure domain", "failure domain", failureDomain)
modified, err = snaputil.UpdateDqliteFailureDomain(failureDomain, k8sdDbStateDir)
if err != nil {
return err
}
log.Info("Updated k8sd failure domain", "restart needed", modified)

// TODO: use Microcluster API once it becomes available. This should
// prevent a service restart, at the moment k8sd needs to restart itself.
if modified {
log.Info("Updated k8sd failure domain", "failure domain", failureDomain, "availability zone", availabilityZone)
if err := c.snap.RestartService(ctx, "k8sd"); err != nil {
return fmt.Errorf("failed to restart k8sd to apply failure domain: %w", err)
}
Expand All @@ -118,9 +112,6 @@ func (c *NodeLabelController) updateDqliteFailureDomain(ctx context.Context, sna
}

func (c *NodeLabelController) reconcile(ctx context.Context, node *v1.Node) error {
log := log.FromContext(ctx)
log.Info("reconciling node labels", "name", node.Name)

if err := c.reconcileFailureDomain(ctx, node); err != nil {
return fmt.Errorf("failed to reconcile failure domain: %w", err)
}
Expand Down

0 comments on commit ad3ff49

Please sign in to comment.