Skip to content

Commit

Permalink
Improved effectivity of waiting for stop/start instance actions
Browse files Browse the repository at this point in the history
- Original code caused local port exhaustion
- Original code used busy spinning instead of signals

Signed-off-by: David Matějček <[email protected]>
  • Loading branch information
dmatej committed Dec 28, 2024
1 parent 77b4cf4 commit 21fe61c
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 149 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Contributors to the Eclipse Foundation
* Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation
* Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
Expand Down Expand Up @@ -81,7 +81,7 @@ protected int executeCommand() throws CommandException, CommandValidationExcepti

private void checkRunning() throws CommandException {
programOpts.setInteractive(false); // don't prompt for password
if (ProcessUtils.isListening(adminAddress) && isThisDAS(getDomainRootDir())) {
if (isThisDAS(getDomainRootDir()) && ProcessUtils.isListening(adminAddress)) {
String msg = strings.get("domain.is.running", getDomainName(), getDomainRootDir());
throw new IllegalStateException(msg);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Contributors to the Eclipse Foundation
* Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation
* Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
Expand Down Expand Up @@ -43,6 +43,7 @@
import org.glassfish.api.ActionReport.ExitCode;
import org.glassfish.api.admin.CommandException;

import static com.sun.enterprise.admin.cli.CLIConstants.DEATH_TIMEOUT_MS;
import static com.sun.enterprise.admin.cli.CLIConstants.DEFAULT_ADMIN_PORT;
import static com.sun.enterprise.admin.cli.CLIConstants.DEFAULT_HOSTNAME;
import static com.sun.enterprise.admin.cli.ProgramOptions.PasswordLocation.LOCAL_PASSWORD;
Expand Down Expand Up @@ -331,22 +332,15 @@ protected final void waitForRestart(final int oldPid, final HostAndPort oldAdmin
logger.log(Level.FINEST, "waitForRestart(oldPid={0}, oldAdminAddress={1}, newAdminAddress={2})",
new Object[] {oldPid, oldAdminAddress, newAdminAddress});

final Supplier<Boolean> signStop = () -> {
if (!ProcessUtils.isListening(oldAdminAddress)) {
return true;
}
int newPid = getServerPid();
if (newPid < 0) {
// is listening, but not responding.
// Could be also another application, but then
// remote _restart-domain call should already fail.
return true;
}
// stopped and started again
return oldPid != newPid;
};
final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS);
final boolean printDots = !programOpts.isTerse();
if (!ProcessUtils.waitFor(signStop, Duration.ofMillis(CLIConstants.DEATH_TIMEOUT_MS), printDots)) {
final boolean stopped;
if (isLocal()) {
stopped = ProcessUtils.waitWhileIsAlive(oldPid, timeout, printDots);
} else {
stopped = ProcessUtils.waitWhileListening(oldAdminAddress, timeout, printDots);
}
if (!stopped) {
throw new CommandException(I18N.get("restartDomain.noGFStart"));
}
logger.log(CONFIG, "Server instance is stopped, now we wait for the start on {0}", newAdminAddress);
Expand Down Expand Up @@ -383,7 +377,7 @@ protected final void waitForRestart(final int oldPid, final HostAndPort oldAdmin


/**
* Get uptime from the server.
* @return uptime from the server.
*/
protected final long getUptime() throws CommandException {
RemoteCLICommand cmd = new RemoteCLICommand("uptime", programOpts, env);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,21 @@ public StartServerHelper(boolean terse, ServerDirs serverDirs, GFLauncher launch
boolean debug) {
this.terse = terse;
this.launcher = launcher;
info = launcher.getInfo();
this.info = launcher.getInfo();

if (info.isDomain()) {
serverOrDomainName = info.getDomainName();
this.serverOrDomainName = info.getDomainName();
} else {
serverOrDomainName = info.getInstanceName();
this.serverOrDomainName = info.getInstanceName();
}

addresses = info.getAdminAddresses();
this.addresses = info.getAdminAddresses();
this.serverDirs = serverDirs;
pidFile = serverDirs.getPidFile();
this.pidFile = serverDirs.getPidFile();
this.masterPassword = masterPassword;

// it will be < 0 if both --debug is false and debug-enabled=false in jvm-config
debugPort = launcher.getDebugPort();
this.debugPort = launcher.getDebugPort();
}


Expand Down Expand Up @@ -205,13 +205,7 @@ private void waitForParentToDie() throws CommandException {
return;
}
LOG.log(DEBUG, "Waiting for death of the parent process with pid={0}", pid);
final Supplier<Boolean> parentDeathSign = () -> {
if (pid != null && ProcessUtils.isAlive(pid)) {
return false;
}
return !isListeningOnAnyEndpoint();
};
if (!ProcessUtils.waitFor(parentDeathSign, Duration.ofMillis(DEATH_TIMEOUT_MS), false)) {
if (!ProcessUtils.waitWhileIsAlive(pid, Duration.ofMillis(DEATH_TIMEOUT_MS), false)) {
throw new CommandException(I18N.get("deathwait_timeout", DEATH_TIMEOUT_MS));
}
LOG.log(DEBUG, "Parent process with PID={0} is dead and all admin endpoints are free.", pid);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import java.lang.System.Logger;
import java.lang.System.Logger.Level;
import java.time.Duration;
import java.util.function.Supplier;

import org.glassfish.api.Param;
import org.glassfish.api.admin.CommandException;
Expand Down Expand Up @@ -143,8 +142,7 @@ protected int dasNotRunning() throws CommandException {
if (isLocal()) {
try {
File prevPid = getServerDirs().getLastPidFile();
File watchedPid = getServerDirs().getPidFile();
ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
ProcessUtils.kill(prevPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
} catch (KillNotPossibleException e) {
throw new CommandException(e.getMessage(), e);
}
Expand All @@ -169,19 +167,34 @@ protected int dasNotRunning() throws CommandException {
*/
protected void doCommand() throws CommandException {
// run the remote stop-domain command and throw away the output
RemoteCLICommand cmd = new RemoteCLICommand(getName(), programOpts, env);
File watchedPid = isLocal() ? getServerDirs().getPidFile() : null;
final RemoteCLICommand cmd = new RemoteCLICommand(getName(), programOpts, env);
final File watchedPid = isLocal() ? getServerDirs().getPidFile() : null;
final Long oldPid = ProcessUtils.loadPid(watchedPid);
final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS);
final boolean printDots = !programOpts.isTerse();
try {
cmd.executeAndReturnOutput("stop-domain", "--force", force.toString());
waitForDeath(watchedPid);
if (printDots) {
// use stdout because logger always appends a newline
System.out.print(Strings.get("StopDomain.WaitDASDeath") + " ");
}
final boolean dead;
if (isLocal()) {
dead = ProcessUtils.waitWhileIsAlive(oldPid, timeout, printDots);
} else {
dead = ProcessUtils.waitWhileListening(addr, timeout, printDots);
}
if (!dead) {
throw new CommandException(Strings.get("StopDomain.DASNotDead", timeout.toSeconds()));
}
} catch (Exception e) {
// The domain server may have died so fast we didn't have time to
// get the (always successful!!) return data. This is NOT AN ERROR!
LOG.log(Level.DEBUG, "Remote stop-domain call failed.", e);
if (kill && isLocal()) {
try {
File prevPid = getServerDirs().getLastPidFile();
ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
ProcessUtils.kill(prevPid, timeout, printDots);
return;
} catch (Exception ex) {
e.addSuppressed(ex);
Expand All @@ -190,27 +203,4 @@ protected void doCommand() throws CommandException {
throw e;
}
}

/**
* Wait for the server to die.
*
* @param watchedPid
*/
private void waitForDeath(File watchedPid) throws CommandException {
if (!programOpts.isTerse()) {
// use stdout because logger always appends a newline
System.out.print(Strings.get("StopDomain.WaitDASDeath") + " ");
}
final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS);
final Supplier<Boolean> deathSign;
if (isLocal()) {
deathSign = () -> !ProcessUtils.isListening(addr) && !ProcessUtils.isAlive(watchedPid);
} else {
deathSign = () -> !ProcessUtils.isListening(addr);
}
final boolean dead = ProcessUtils.waitFor(deathSign, timeout, !programOpts.isTerse());
if (!dead) {
throw new CommandException(Strings.get("StopDomain.DASNotDead", timeout.toSeconds()));
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Contributors to the Eclipse Foundation
* Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation
* Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
Expand All @@ -24,7 +24,6 @@

import java.io.File;
import java.time.Duration;
import java.util.function.Supplier;
import java.util.logging.Level;

import org.glassfish.api.Param;
Expand Down Expand Up @@ -111,8 +110,7 @@ protected int instanceNotRunning() throws CommandException {
if (kill) {
try {
File lastPid = getServerDirs().getLastPidFile();
File watchedPid = getServerDirs().getPidFile();
ProcessUtils.kill(lastPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
ProcessUtils.kill(lastPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
} catch (KillNotPossibleException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
return -1;
Expand Down Expand Up @@ -150,19 +148,28 @@ protected void doCommand() throws CommandException {
* most likely means we're talking to the wrong server.
*/
programOpts.setInteractive(false);
RemoteCLICommand cmd = new RemoteCLICommand("_stop-instance", programOpts, env);

final Long pid = ProcessUtils.loadPid(getServerDirs().getPidFile());
final boolean printDots = !programOpts.isTerse();
final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS);
final RemoteCLICommand cmd = new RemoteCLICommand("_stop-instance", programOpts, env);
try {
cmd.executeAndReturnOutput("_stop-instance", "--force", force.toString());
waitForDeath();
if (printDots) {
System.out.print(Strings.get("StopInstance.waitForDeath") + " ");
}
final boolean dead = ProcessUtils.waitWhileIsAlive(pid, timeout, printDots);
if (!dead) {
throw new CommandException(Strings.get("StopInstance.instanceNotDead", DEATH_TIMEOUT_MS / 1000));
}
} catch (Exception e) {
// The server may have died so fast we didn't have time to
// get the (always successful!!) return data. This is NOT AN ERROR!
logger.log(Level.CONFIG, "Remote stop-instance call failed.", e);
if (kill) {
try {
File prevPid = getServerDirs().getLastPidFile();
File watchedPid = getServerDirs().getPidFile();
ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse());
ProcessUtils.kill(prevPid, timeout, printDots);
return;
} catch (Exception ex) {
e.addSuppressed(ex);
Expand All @@ -171,20 +178,4 @@ protected void doCommand() throws CommandException {
throw e;
}
}

/**
* Wait for the server to die.
*/
private void waitForDeath() throws CommandException {
if (!programOpts.isTerse()) {
// use stdout because logger always appends a newline
System.out.print(Strings.get("StopInstance.waitForDeath") + " ");
}
final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS);
final Supplier<Boolean> deathSign = () -> !ProcessUtils.isAlive(getServerDirs().getPidFile());
final boolean dead = ProcessUtils.waitFor(deathSign, timeout, !programOpts.isTerse());
if (!dead) {
throw new CommandException(Strings.get("StopInstance.instanceNotDead", DEATH_TIMEOUT_MS / 1000));
}
}
}
Loading

0 comments on commit 21fe61c

Please sign in to comment.