Skip to content

Commit

Permalink
Address test flakiness with injectAsymmetricDrops (#35)
Browse files Browse the repository at this point in the history
* Bump up log level ERROR in SharedResources
* Log decide view change in MembershipService
* Reduce logging in Retries implementation
* Reduce logging in Retries implementation
* Increase test maxTries
* Remove fast FD timeouts

Signed-off-by: Lalith Suresh <[email protected]>
  • Loading branch information
lalithsuresh authored Mar 31, 2021
1 parent d6e228e commit a5a9bf2
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion rapid/src/main/java/com/vrg/rapid/FastPaxos.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ private void handleFastRoundProposal(final FastRoundPhase2bMessage proposalMessa
final int F = (int) Math.floor((membershipSize - 1) / 4.0); // Fast Paxos resiliency.
if (votesReceived.size() >= membershipSize - F) {
if (count >= membershipSize - F) {
LOG.trace("Decided on a view change: {}", proposalMessage.getEndpointsList());
LOG.trace("Decided on a view change: {}", Utils.loggable(proposalMessage.getEndpointsList()));
// We have a successful proposal. Consume it.
onDecidedWrapped.accept(proposalMessage.getEndpointsList());
} else {
Expand Down
3 changes: 3 additions & 0 deletions rapid/src/main/java/com/vrg/rapid/MembershipService.java
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,9 @@ private ListenableFuture<RapidResponse> handleLeaveMessage(final RapidRequest re
* and any node that is currently in the membership list will be removed from it.
*/
private void decideViewChange(final List<Endpoint> proposal) {
LOG.info("Decide view change called in current configuration {} ({} nodes), for proposal {}",
membershipView.getCurrentConfigurationId(), membershipView.getMembershipSize(),
Utils.loggable(proposal));
// The first step is to disable our failure detectors in anticipation of new ones to be created.
cancelFailureDetectorJobs();

Expand Down
3 changes: 1 addition & 2 deletions rapid/src/test/java/com/vrg/rapid/ClusterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ public void failTenRandomNodes() throws IOException, InterruptedException {
*/
@Test
public void injectAsymmetricDrops() throws IOException, InterruptedException {
useFastFailureDetectionTimeouts();
final int numNodes = 50;
final int numFailingNodes = 10;
final Endpoint seedEndpoint = Utils.hostFromParts("127.0.0.1", basePort);
Expand All @@ -353,7 +352,7 @@ public void injectAsymmetricDrops() throws IOException, InterruptedException {
// we may have less than numFailedNodes entries in the set
failedNodes.forEach(host -> dropFirstNAtServer(host, 100, RapidRequest.ContentCase.PROBEMESSAGE));
createCluster(numNodes, seedEndpoint);
waitAndVerifyAgreement(numNodes - failedNodes.size(), 10, 1000);
waitAndVerifyAgreement(numNodes - failedNodes.size(), 20, 1500);
verifyNumClusterInstances(numNodes);
}

Expand Down
8 changes: 5 additions & 3 deletions rapid/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
# permissions and limitations under the License.
#

log4j.rootLogger=ERROR, STDOUT
log4j.rootLogger=OFF, STDOUT
log4j.logger.com.vrg.rapid.Cluster=ERROR
log4j.logger.com.vrg.rapid.MembershipService=ERROR
log4j.logger.com.vrg.rapid.Paxos=ERROR
log4j.logger.com.vrg.rapid.MembershipService=INFO
log4j.logger.com.vrg.rapid.Paxos=INFO
log4j.logger.com.vrg.rapid.FastPaxos=ERROR
log4j.logger.com.vrg.rapid.SharedResources=ERROR
log4j.logger.com.vrg.rapid.messaging.impl.Retries=OFF
log4j.logger.io.grpc.internal.ManagedChannelOrphanWrapper=OFF
log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender
log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout
Expand Down

0 comments on commit a5a9bf2

Please sign in to comment.