Skip to content

Commit

Permalink
More robust waiting for the process death
Browse files Browse the repository at this point in the history
- Reverted usage of ProcessHandle.onExit.get as it doesn't work in containers
  which don't have strict reaper. The zombie project is considered as alive
  and get then hangs forever.
- Added waitpid, however it is not installed everywhere - if it is missing,
  we sleep for 1 second instead. That should be enough so the operating system
  could do the cleanup.

Signed-off-by: David Matějček <[email protected]>
  • Loading branch information
dmatej committed Dec 29, 2024
1 parent 21fe61c commit 190fd0b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@
import java.time.Duration;
import java.time.Instant;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Supplier;

import static com.sun.enterprise.util.StringUtils.ok;
Expand Down Expand Up @@ -76,23 +73,7 @@ private ProcessUtils() {
* @return true if the handle was not found or exited before timeout. False otherwise.
*/
public static boolean waitWhileIsAlive(final long pid, Duration timeout, boolean printDots) {
Optional<ProcessHandle> handle = ProcessHandle.of(pid);
if (handle.isEmpty()) {
return true;
}
final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots);
try {
handle.get().onExit().get(timeout.toSeconds(), TimeUnit.SECONDS);
return true;
} catch (TimeoutException e) {
LOG.log(TRACE, "Timeout while waiting for exit of process with id " + pid + ". Returning false.", e);
return false;
} catch (InterruptedException | ExecutionException e) {
LOG.log(TRACE, "Exception while waiting for exit of process with id " + pid + ". Returning true.", e);
return true;
} finally {
DotPrinter.stopWaiting(dotPrinter);
}
return waitFor(() -> !isAlive(pid), timeout, printDots);
}


Expand All @@ -115,13 +96,28 @@ public static boolean isAlive(final File pidFile) {
*/
public static boolean isAlive(final long pid) {
Optional<ProcessHandle> handle = ProcessHandle.of(pid);
if (handle.isEmpty()) {
return false;
}
if (!handle.get().isAlive()) {
return handle.isPresent() ? isAlive(handle.get()) : false;
}


/**
* The {@link Process#isAlive()} returns true even for zombies so we implemented
* this method which considers zombies as dead.
*
* @param process
* @return true if the process with is alive.
*/
public static boolean isAlive(final ProcessHandle process) {
if (!process.isAlive()) {
return false;
}
Info info = handle.get().info();
// This is a trick to avoid zombies on some systems (ie containers on Jenkins)
// Operating system there does the cleanup much later, so we can still access
// zombies to process their output despite for us would be better we would
// not see them any more.
// The ProcessHandle.onExit blocks forever for zombies in docker containers
// without proper process reaper.
final Info info = process.info();
if (info.commandLine().isEmpty() && !(OS.isWindowsForSure() && info.command().isPresent())) {
LOG.log(TRACE, "Could not retrieve command line for the pid {0},"
+ " therefore we assume that the process stopped.");
Expand All @@ -130,7 +126,6 @@ public static boolean isAlive(final long pid) {
return true;
}


/**
* Blocks until the endpoint closes the connection or timeout comes first.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ protected final void doExecute(AdminCommandContext context) {
}
scheduleReincarnation();
}
// else we just return a special int from System.exit()
gfKernel.stop();
} catch (Exception e) {
throw new Error(strings.get("restart.server.failure"), e);
Expand All @@ -116,6 +115,7 @@ protected final void doExecute(AdminCommandContext context) {
} else {
restartType = debug ? RESTART_DEBUG_ON : RESTART_DEBUG_OFF;
}
// return a special int from System.exit()
System.exit(restartType);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ private static PrintStream getLogFileOld(Instant startTime) {
}


private static ProcessBuilder prepareStartup(Instant now, String classpath, String[] sysprops, String classname,
private static ProcessBuilder prepareStartup(Instant startTime, String classpath, String[] sysprops, String classname,
String[] args) {
final Path javaExecutable = detectJavaExecutable();
final List<String> cmdline = new ArrayList<>();
Expand Down Expand Up @@ -217,21 +217,19 @@ private static ProcessBuilder prepareStartup(Instant now, String classpath, Stri
// To avoid conflict of the debug port used both by old and new JVM,
// we will force waiting for the end of the old JVM.
outerCommand = new ArrayList<>();
outerCommand.add("nohup");
outerCommand.add("bash");
outerCommand.add("-c");
outerCommand.add("tail --pid=" + ProcessHandle.current().pid() + " -f /dev/null && "
+ cmdline.stream().collect(Collectors.joining(" ")));
// waitpid is not everywhere.
outerCommand.add("(waitpid -e " + ProcessHandle.current().pid() + " || sleep 1) && '"
+ cmdline.stream().collect(Collectors.joining("' '"))
+ (LOG_RESTART ? "' > '" + LOGDIR.resolve("restart-" + startTime + "-new.log'") : "'"));
}

final ProcessBuilder builder = new ProcessBuilder(outerCommand);
builder.directory(new File(System.getProperty("user.dir")));
if (LOG_RESTART) {
builder.redirectErrorStream(true);
builder.redirectOutput(LOGDIR.resolve("restart-" + now + "-new.log").toFile());
} else {
builder.redirectError(Redirect.DISCARD);
builder.redirectOutput(Redirect.DISCARD);
}
builder.redirectError(Redirect.DISCARD);
builder.redirectOutput(Redirect.DISCARD);
return builder;
}

Expand Down

0 comments on commit 190fd0b

Please sign in to comment.