Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🍒 #8191 - TempLocationManager Fixes and Improvements #8199

Merged
merged 3 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.bootstrap.config.provider.ConfigProvider;
import datadog.trace.util.AgentTaskScheduler;
import datadog.trace.util.PidHelper;
import java.io.IOException;
import java.nio.file.FileVisitResult;
Expand All @@ -15,10 +16,10 @@
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -30,6 +31,9 @@
*/
public final class TempLocationManager {
private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class);
private static final Pattern JFR_DIR_PATTERN =
Pattern.compile("\\d{4}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{6}");
private static final String TEMPDIR_PREFIX = "pid_";

private static final class SingletonHolder {
private static final TempLocationManager INSTANCE = new TempLocationManager();
Expand Down Expand Up @@ -62,7 +66,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE
default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {}
}

private class CleanupVisitor implements FileVisitor<Path> {
private final class CleanupVisitor implements FileVisitor<Path> {
private boolean shouldClean;

private final Set<String> pidSet = PidHelper.getJavaPids();
Expand Down Expand Up @@ -98,14 +102,19 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
terminated = true;
return FileVisitResult.TERMINATE;
}
if (cleanSelf && JFR_DIR_PATTERN.matcher(dir.getFileName().toString()).matches()) {
// do not delete JFR repository on 'self-cleanup' - it conflicts with the JFR's own cleanup
return FileVisitResult.SKIP_SUBTREE;
}

cleanupTestHook.preVisitDirectory(dir, attrs);

if (dir.equals(baseTempDir)) {
return FileVisitResult.CONTINUE;
}
String fileName = dir.getFileName().toString();
// the JFR repository directories are under <basedir>/pid_<pid>
String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null;
String pid = fileName.startsWith(TEMPDIR_PREFIX) ? fileName.substring(4) : null;
boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid());
shouldClean |= cleanSelf ? isSelfPid : !isSelfPid && !pidSet.contains(pid);
if (shouldClean) {
Expand Down Expand Up @@ -167,18 +176,43 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
}
String fileName = dir.getFileName().toString();
// reset the flag only if we are done cleaning the top-level directory
shouldClean = !fileName.startsWith("pid_");
shouldClean = !fileName.startsWith(TEMPDIR_PREFIX);
}
return FileVisitResult.CONTINUE;
}
}

private final class CleanupTask implements Runnable {
private final CountDownLatch latch = new CountDownLatch(1);
private volatile Throwable throwable = null;

@Override
public void run() {
try {
cleanup(false);
} catch (OutOfMemoryError oom) {
throw oom;
} catch (Throwable t) {
throwable = t;
} finally {
latch.countDown();
}
}

boolean await(long timeout, TimeUnit unit) throws Throwable {
boolean ret = latch.await(timeout, unit);
if (throwable != null) {
throw throwable;
}
return ret;
}
}

private final Path baseTempDir;
private final Path tempDir;
private final long cutoffSeconds;

private final CompletableFuture<Void> cleanupTask;

private final CleanupTask cleanupTask = new CleanupTask();
private final CleanupHook cleanupTestHook;

/**
Expand All @@ -200,11 +234,7 @@ public static TempLocationManager getInstance() {
static TempLocationManager getInstance(boolean waitForCleanup) {
TempLocationManager instance = SingletonHolder.INSTANCE;
if (waitForCleanup) {
try {
instance.waitForCleanup(5, TimeUnit.SECONDS);
} catch (TimeoutException ignored) {

}
instance.waitForCleanup(5, TimeUnit.SECONDS);
}
return instance;
}
Expand All @@ -214,10 +244,11 @@ private TempLocationManager() {
}

TempLocationManager(ConfigProvider configProvider) {
this(configProvider, CleanupHook.EMPTY);
this(configProvider, true, CleanupHook.EMPTY);
}

TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) {
TempLocationManager(
ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) {
cleanupTestHook = testHook;

// In order to avoid racy attempts to clean up files which are currently being processed in a
Expand Down Expand Up @@ -255,21 +286,21 @@ private TempLocationManager() {
baseTempDir = configuredTempDir.resolve("ddprof");
baseTempDir.toFile().deleteOnExit();

tempDir = baseTempDir.resolve("pid_" + pid);
cleanupTask = CompletableFuture.runAsync(() -> cleanup(false));
tempDir = baseTempDir.resolve(TEMPDIR_PREFIX + pid);
if (runStartupCleanup) {
// do not execute the background cleanup task when running in tests
AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false));
}

Thread selfCleanup =
new Thread(
() -> {
try {
waitForCleanup(1, TimeUnit.SECONDS);
} catch (TimeoutException e) {
if (!waitForCleanup(1, TimeUnit.SECONDS)) {
log.info(
"Cleanup task timed out. {} temp directory might not have been cleaned up properly",
tempDir);
} finally {
cleanup(true);
}
cleanup(true);
},
"Temp Location Manager Cleanup");
Runtime.getRuntime().addShutdownHook(selfCleanup);
Expand Down Expand Up @@ -344,6 +375,19 @@ boolean cleanup(boolean cleanSelf) {
*/
boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
try {
if (!Files.exists(baseTempDir)) {
// not event the main temp location exists; nothing to clean up
return true;
}
try (Stream<Path> paths = Files.walk(baseTempDir)) {
if (paths.noneMatch(
path ->
Files.isDirectory(path)
&& path.getFileName().toString().startsWith(TEMPDIR_PREFIX))) {
// nothing to clean up; bail out early
return true;
}
}
cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit);
CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit);
Files.walkFileTree(baseTempDir, visitor);
Expand All @@ -359,21 +403,24 @@ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
}

// accessible for tests
void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException {
boolean waitForCleanup(long timeout, TimeUnit unit) {
try {
cleanupTask.get(timeout, unit);
return cleanupTask.await(timeout, unit);
} catch (InterruptedException e) {
cleanupTask.cancel(true);
log.debug("Temp directory cleanup was interrupted");
Thread.currentThread().interrupt();
} catch (TimeoutException e) {
cleanupTask.cancel(true);
throw e;
} catch (ExecutionException e) {
} catch (Throwable t) {
if (log.isDebugEnabled()) {
log.debug("Failed to cleanup temp directory: {}", tempDir, e);
log.debug("Failed to cleanup temp directory: {}", tempDir, t);
} else {
log.debug("Failed to cleanup temp directory: {}", tempDir);
}
}
return false;
}

// accessible for tests
void createDirStructure() throws IOException {
Files.createDirectories(baseTempDir);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.UUID;
import java.util.concurrent.Phaser;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.LockSupport;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -173,7 +174,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
}
};

TempLocationManager mgr = instance(baseDir, blocker);
TempLocationManager mgr = instance(baseDir, true, blocker);

// wait for the cleanup start
phaser.arriveAndAwaitAdvance();
Expand All @@ -187,8 +188,9 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx

@ParameterizedTest
@MethodSource("timeoutTestArguments")
void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception {
long timeoutMs = 500;
void testCleanupWithTimeout(boolean selfCleanup, boolean shouldSucceed, String section)
throws Exception {
long timeoutMs = 10;
TempLocationManager.CleanupHook delayer =
new TempLocationManager.CleanupHook() {
@Override
Expand Down Expand Up @@ -229,30 +231,63 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
Files.createTempDirectory(
"ddprof-test-",
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
TempLocationManager instance = instance(baseDir, delayer);
boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS);
assertTrue(rslt);
TempLocationManager instance = instance(baseDir, false, delayer);
Path mytempdir = instance.getTempDir();
Path otherTempdir = mytempdir.getParent().resolve("pid_0000");
Files.createDirectories(otherTempdir);
Files.createFile(mytempdir.resolve("dummy"));
Files.createFile(otherTempdir.resolve("dummy"));
boolean rslt =
instance.cleanup(
selfCleanup, (long) (timeoutMs * (shouldSucceed ? 10 : 0.5d)), TimeUnit.MILLISECONDS);
assertEquals(shouldSucceed, rslt);
}

@Test
void testShortCircuit() throws Exception {
Path baseDir =
Files.createTempDirectory(
"ddprof-test-",
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
AtomicBoolean executed = new AtomicBoolean();
TempLocationManager.CleanupHook hook =
new TempLocationManager.CleanupHook() {
@Override
public void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {
executed.set(true);
}
};
TempLocationManager instance = instance(baseDir, false, hook);

instance.createDirStructure();

boolean ret = instance.cleanup(false);
assertTrue(ret);
assertFalse(executed.get());
}

private static Stream<Arguments> timeoutTestArguments() {
List<Arguments> argumentsList = new ArrayList<>();
for (boolean selfCleanup : new boolean[] {true, false}) {
for (String intercepted :
new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) {
argumentsList.add(Arguments.of(selfCleanup, intercepted));
argumentsList.add(Arguments.of(selfCleanup, true, intercepted));
argumentsList.add(Arguments.of(selfCleanup, false, intercepted));
}
}
return argumentsList.stream();
}

private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook)
private TempLocationManager instance(
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook)
throws IOException {
Properties props = new Properties();
props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString());
props.put(
ProfilingConfig.PROFILING_UPLOAD_PERIOD,
"0"); // to force immediate cleanup; must be a string value!

return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook);
return new TempLocationManager(
ConfigProvider.withPropertiesOverride(props), withStartupCleanup, cleanupHook);
}
}
Loading