fix: node identity flip

The issue shows up in our tests as: ``` === RUN TestIntegration/api.DiscoverySuite/TestRegistries discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found ``` It should be a minor issue for non-KubeSpan'ed clusters (as members get correctly de-duplicated), but might cause connectivity issues for KubeSpan'ed clusters. The issue comes from the short mount in the sequencer around `loadConfig` step: as the mount time is short, it triggers a race in the node identity controller when it tries to read existing identity from `/system/state`, but as the partition is unmounted by the time it tries to read, it assumes there's no identity and establishes a new one. Eventually, it will write new identity back to disk, but that new identity is different from the previous one, so it creates another entry for itself in the discovery service. A proper solution is a volume mount controller, but a temporary band aid is to avoid broadcasting mount notification for this short `STATE` mount via resources, so that controller isn't triggered. Signed-off-by: Andrey Smirnov <[email protected]>
siderolabs · Dec 13, 2024 · b32a06b · b32a06b
1 parent 5823606
commit b32a06b
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 21 deletions.
diff --git a/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go b/internal/app/machined/pkg/runtime/v1alpha1/v1alpha1_sequencer_tasks.go
@@ -1646,7 +1646,7 @@ func MountStatePartition(required bool) func(seq runtime.Sequence, _ any) (runti
 				}
 			}
 
-			return mount.SystemPartitionMount(ctx, r, logger, constants.StatePartitionLabel)
+			return mount.SystemPartitionMount(ctx, r, logger, constants.StatePartitionLabel, !required)
 		}, "mountStatePartition"
 	}
 }
@@ -1665,7 +1665,7 @@ func MountEphemeralPartition(runtime.Sequence, any) (runtime.TaskExecutionFunc,
 			return err
 		}
 
-		return mount.SystemPartitionMount(ctx, r, logger, constants.EphemeralPartitionLabel,
+		return mount.SystemPartitionMount(ctx, r, logger, constants.EphemeralPartitionLabel, false,
 			mountv2.WithProjectQuota(r.Config().Machine().Features().DiskQuotaSupportEnabled()))
 	}, "mountEphemeralPartition"
 }

diff --git a/internal/pkg/mount/system.go b/internal/pkg/mount/system.go
@@ -33,7 +33,7 @@ func IdempotentSystemPartitionMounter(r runtime.Runtime) func(label string, opts
 			return nil
 		}
 
-		return SystemPartitionMount(context.Background(), r, log.Default(), label, opts...)
+		return SystemPartitionMount(context.Background(), r, log.Default(), label, false, opts...)
 	}
 }
 
@@ -48,7 +48,7 @@ func IsSystemPartitionMounted(label string) bool {
 }
 
 // SystemPartitionMount mounts a system partition by the label.
-func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Logger, label string, opts ...mountv2.NewPointOption) (err error) {
+func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Logger, label string, silent bool, opts ...mountv2.NewPointOption) (err error) {
 	volumeStatus, err := safe.StateGetByID[*block.VolumeStatus](ctx, r.State().V1Alpha2().Resources(), label)
 	if err != nil {
 		return fmt.Errorf("error getting volume status %q: %w", label, err)
@@ -77,26 +77,29 @@ func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Lo
 		return err
 	}
 
-	// record mount as the resource
-	mountStatus := runtimeres.NewMountStatus(v1alpha1.NamespaceName, label)
-	mountStatus.TypedSpec().Source = volumeStatus.TypedSpec().MountLocation
-	mountStatus.TypedSpec().Target = volumeConfig.TypedSpec().Mount.TargetPath
-	mountStatus.TypedSpec().FilesystemType = volumeStatus.TypedSpec().Filesystem.String()
-	mountStatus.TypedSpec().Encrypted = volumeStatus.TypedSpec().EncryptionProvider != block.EncryptionProviderNone
+	// silent mounts skip resource notification to other components
+	if !silent {
+		// record mount as the resource
+		mountStatus := runtimeres.NewMountStatus(v1alpha1.NamespaceName, label)
+		mountStatus.TypedSpec().Source = volumeStatus.TypedSpec().MountLocation
+		mountStatus.TypedSpec().Target = volumeConfig.TypedSpec().Mount.TargetPath
+		mountStatus.TypedSpec().FilesystemType = volumeStatus.TypedSpec().Filesystem.String()
+		mountStatus.TypedSpec().Encrypted = volumeStatus.TypedSpec().EncryptionProvider != block.EncryptionProviderNone
 
-	if mountStatus.TypedSpec().Encrypted {
-		encryptionProviders := make(map[string]struct{})
+		if mountStatus.TypedSpec().Encrypted {
+			encryptionProviders := make(map[string]struct{})
 
-		for _, cfg := range volumeConfig.TypedSpec().Encryption.Keys {
-			encryptionProviders[cfg.Type.String()] = struct{}{}
-		}
+			for _, cfg := range volumeConfig.TypedSpec().Encryption.Keys {
+				encryptionProviders[cfg.Type.String()] = struct{}{}
+			}
 
-		mountStatus.TypedSpec().EncryptionProviders = maps.Keys(encryptionProviders)
-	}
+			mountStatus.TypedSpec().EncryptionProviders = maps.Keys(encryptionProviders)
+		}
 
-	// ignore the error if the MountStatus already exists, as many mounts are silently skipped with the flag SkipIfMounted
-	if err = r.State().V1Alpha2().Resources().Create(context.Background(), mountStatus); err != nil && !state.IsConflictError(err) {
-		return fmt.Errorf("error creating mount status resource: %w", err)
+		// ignore the error if the MountStatus already exists, as many mounts are silently skipped with the flag SkipIfMounted
+		if err = r.State().V1Alpha2().Resources().Create(context.Background(), mountStatus); err != nil && !state.IsConflictError(err) {
+			return fmt.Errorf("error creating mount status resource: %w", err)
+		}
 	}
 
 	mountpointsMutex.Lock()
@@ -127,7 +130,9 @@ func SystemPartitionUnmount(r runtime.Runtime, logger *log.Logger, label string)
 	}
 
 	if err = r.State().V1Alpha2().Resources().Destroy(context.Background(), runtimeres.NewMountStatus(v1alpha1.NamespaceName, label).Metadata()); err != nil {
-		return fmt.Errorf("error destroying mount status resource: %w", err)
+		if !state.IsNotFoundError(err) {
+			return fmt.Errorf("error destroying mount status resource: %w", err)
+		}
 	}
 
 	mountpointsMutex.Lock()