From 24b4717cebb083a5ecafefaba647e539129311d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 19:34:26 +0100 Subject: [PATCH] More robust waiting for the process death MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Matějček --- Jenkinsfile | 16 +++---- .../main/admin/test/AsadminLoggingITest.java | 8 +++- .../universal/process/ProcessUtils.java | 47 +++++++++---------- .../enterprise/v3/admin/RestartServer.java | 2 +- .../enterprise/v3/admin/StartServerHook.java | 14 ++---- 5 files changed, 41 insertions(+), 46 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c2a0b3307b8..01cfb7c01ab 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -158,6 +158,7 @@ spec: PORT_ADMIN=4848 PORT_HTTP=8080 PORT_HTTPS=8181 + AS_RESTART_LOGFILES="true" } options { @@ -187,22 +188,19 @@ spec: container('maven') { dumpSysInfo() sh ''' - # Validate the structure in all submodules (especially version ids) - mvn -B -e -fae clean validate -Ptck,set-version-id,staging - # Until we fix ANTLR in cmp-support-sqlstore, broken in parallel builds. Just -Pfast after the fix. mvn -B -e install -Pfastest,staging -T4C - ./gfbuild.sh archive_bundles - ./gfbuild.sh archive_embedded - mvn -B -e clean -Pstaging - tar -c -C ${WORKSPACE}/appserver/tests common_test.sh gftest.sh appserv-tests quicklook | gzip --fast > ${WORKSPACE}/bundles/appserv_tests.tar.gz - ls -la ${WORKSPACE}/bundles - ls -la ${WORKSPACE}/embedded + mvn -B -e clean install -pl :admin-tests-parent -amd ''' } archiveArtifacts artifacts: 'bundles/*.zip', onlyIfSuccessful: true archiveArtifacts artifacts: 'embedded/*', onlyIfSuccessful: true stash includes: 'bundles/*', name: 'build-bundles' } + post { + always { + archiveArtifacts artifacts: "**/*.log*", onlyIfSuccessful: false + } + } } stage('mvn-tests') { steps { diff --git a/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/AsadminLoggingITest.java b/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/AsadminLoggingITest.java index 12682968db3..72deaf55b9f 100644 --- a/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/AsadminLoggingITest.java +++ b/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/AsadminLoggingITest.java @@ -16,6 +16,8 @@ package org.glassfish.main.admin.test; +import com.sun.enterprise.util.OS; + import java.io.File; import java.io.FileReader; import java.io.LineNumberReader; @@ -68,9 +70,13 @@ public class AsadminLoggingITest { private static final Asadmin ASADMIN = GlassFishTestEnvironment.getAsadmin(); @BeforeAll - public static void fillUpServerLog() { + public static void fillUpServerLog() throws Exception { // Fill up the server log. AsadminResult result = ASADMIN.exec(60_000, "restart-domain"); + if (OS.isLinux()) { + new ProcessBuilder("ps", "-lAf").inheritIO().start().waitFor(); + } + assertThat(result, asadminOK()); } diff --git a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java index 26b40498e8e..f938e934110 100644 --- a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java +++ b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java @@ -34,9 +34,6 @@ import java.time.Duration; import java.time.Instant; import java.util.Optional; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import static com.sun.enterprise.util.StringUtils.ok; @@ -76,23 +73,7 @@ private ProcessUtils() { * @return true if the handle was not found or exited before timeout. False otherwise. */ public static boolean waitWhileIsAlive(final long pid, Duration timeout, boolean printDots) { - Optional handle = ProcessHandle.of(pid); - if (handle.isEmpty()) { - return true; - } - final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots); - try { - handle.get().onExit().get(timeout.toSeconds(), TimeUnit.SECONDS); - return true; - } catch (TimeoutException e) { - LOG.log(TRACE, "Timeout while waiting for exit of process with id " + pid + ". Returning false.", e); - return false; - } catch (InterruptedException | ExecutionException e) { - LOG.log(TRACE, "Exception while waiting for exit of process with id " + pid + ". Returning true.", e); - return true; - } finally { - DotPrinter.stopWaiting(dotPrinter); - } + return waitFor(() -> !isAlive(pid), timeout, printDots); } @@ -115,13 +96,28 @@ public static boolean isAlive(final File pidFile) { */ public static boolean isAlive(final long pid) { Optional handle = ProcessHandle.of(pid); - if (handle.isEmpty()) { - return false; - } - if (!handle.get().isAlive()) { + return handle.isPresent() ? isAlive(handle.get()) : false; + } + + + /** + * The {@link Process#isAlive()} returns true even for zombies so we implemented + * this method which considers zombies as dead. + * + * @param process + * @return true if the process with is alive. + */ + public static boolean isAlive(final ProcessHandle process) { + if (!process.isAlive()) { return false; } - Info info = handle.get().info(); + // This is a trick to avoid zombies on some systems (ie containers on Jenkins) + // Operating system there does the cleanup much later, so we can still access + // zombies to process their output despite for us would be better we would + // not see them any more. + // The ProcessHandle.onExit blocks forever for zombies in docker containers + // without proper process reaper. + final Info info = process.info(); if (info.commandLine().isEmpty() && !(OS.isWindowsForSure() && info.command().isPresent())) { LOG.log(TRACE, "Could not retrieve command line for the pid {0}," + " therefore we assume that the process stopped."); @@ -130,7 +126,6 @@ public static boolean isAlive(final long pid) { return true; } - /** * Blocks until the endpoint closes the connection or timeout comes first. * diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java index 6214fe54d4f..a0c663715ce 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java @@ -104,7 +104,6 @@ protected final void doExecute(AdminCommandContext context) { } scheduleReincarnation(); } - // else we just return a special int from System.exit() gfKernel.stop(); } catch (Exception e) { throw new Error(strings.get("restart.server.failure"), e); @@ -116,6 +115,7 @@ protected final void doExecute(AdminCommandContext context) { } else { restartType = debug ? RESTART_DEBUG_ON : RESTART_DEBUG_OFF; } + // return a special int from System.exit() System.exit(restartType); } diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java index 43ae119d848..52866ba8d49 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java @@ -217,21 +217,17 @@ private static ProcessBuilder prepareStartup(Instant now, String classpath, Stri // To avoid conflict of the debug port used both by old and new JVM, // we will force waiting for the end of the old JVM. outerCommand = new ArrayList<>(); + outerCommand.add("nohup"); outerCommand.add("bash"); outerCommand.add("-c"); - outerCommand.add("tail --pid=" + ProcessHandle.current().pid() + " -f /dev/null && " - + cmdline.stream().collect(Collectors.joining(" "))); + outerCommand.add("(waitpid -e " + ProcessHandle.current().pid() + " || sleep 1) && '" + + cmdline.stream().collect(Collectors.joining("' '")) + + (LOG_RESTART ? "' > " + LOGDIR.resolve("restart-" + now + "-new.log") : "")); } final ProcessBuilder builder = new ProcessBuilder(outerCommand); builder.directory(new File(System.getProperty("user.dir"))); - if (LOG_RESTART) { - builder.redirectErrorStream(true); - builder.redirectOutput(LOGDIR.resolve("restart-" + now + "-new.log").toFile()); - } else { - builder.redirectError(Redirect.DISCARD); - builder.redirectOutput(Redirect.DISCARD); - } + builder.inheritIO(); return builder; }