From 00e63929fdadaf1b9e690a01c96b8bc10f332223 Mon Sep 17 00:00:00 2001 From: Julien Duchesne Date: Tue, 8 Oct 2024 10:13:30 -0400 Subject: [PATCH] Fix flaky memberlist `TestRejoin` test (#598) It randomly failed here: #525 and in some of the test runs here: #596 This test fails due to the probe interval being 5s, which is the same time we poll for --- .github/workflows/find-flaky-tests.yml | 1 + kv/memberlist/memberlist_client_test.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/find-flaky-tests.yml b/.github/workflows/find-flaky-tests.yml index c3a41d668..6089ce7f7 100644 --- a/.github/workflows/find-flaky-tests.yml +++ b/.github/workflows/find-flaky-tests.yml @@ -8,6 +8,7 @@ jobs: find-flaky-tests: # Only run this workflow when the comment contains '/find-flaky-tests' or it's manually triggered if: contains(github.event.comment.body, '/find-flaky-tests') || github.event_name == 'workflow_dispatch' + fail-fast: false strategy: matrix: runs: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] # Run the build workflow 20 times diff --git a/kv/memberlist/memberlist_client_test.go b/kv/memberlist/memberlist_client_test.go index 54409babd..4ba71ce89 100644 --- a/kv/memberlist/memberlist_client_test.go +++ b/kv/memberlist/memberlist_client_test.go @@ -1240,20 +1240,20 @@ func TestRejoin(t *testing.T) { return mkv2.memberlist.NumMembers() } - poll(t, 5*time.Second, 2, membersFunc) + poll(t, 7*time.Second, 2, membersFunc) // Probe interval is 5s, with 2s timeout, so probe for 7s. // Shutdown first KV require.NoError(t, services.StopAndAwaitTerminated(context.Background(), mkv1)) // Second KV should see single member now. - poll(t, 5*time.Second, 1, membersFunc) + poll(t, 7*time.Second, 1, membersFunc) // Let's start first KV again. It is not configured to join the cluster, but KV2 is rejoining. mkv1 = NewKV(cfg1, log.NewNopLogger(), &dnsProviderMock{}, prometheus.NewPedanticRegistry()) require.NoError(t, services.StartAndAwaitRunning(context.Background(), mkv1)) defer services.StopAndAwaitTerminated(context.Background(), mkv1) //nolint:errcheck - poll(t, 5*time.Second, 2, membersFunc) + poll(t, 7*time.Second, 2, membersFunc) } func TestMessageBuffer(t *testing.T) {