Skip to content

Commit

Permalink
Avoid deadlock issues caused by logging in a loop.
Browse files Browse the repository at this point in the history
Signed-off-by: wangfakang <[email protected]>
  • Loading branch information
wangfakang committed Apr 25, 2024
1 parent ab2b89c commit 7ed5d25
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/misc/param.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,21 @@ void initEnv() {

void ncclLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache) {
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
if (__builtin_expect(__atomic_load_n(cache, __ATOMIC_RELAXED) != uninitialized, true)) {
return;
}

pthread_mutex_lock(&mutex);
if (__atomic_load_n(cache, __ATOMIC_RELAXED) == uninitialized) {
const char* str = ncclGetEnv(env);
int64_t value = deftVal;
if (str && strlen(str) > 0) {
errno = 0;
value = strtoll(str, nullptr, 0);
// To prevent deadlock issues caused by logging in a loop,
// so cache the value before the log operation.
__atomic_store_n(cache, errno ? deftVal : value, __ATOMIC_RELAXED);
if (errno) {
value = deftVal;
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
} else {
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
Expand All @@ -84,4 +90,4 @@ const char *ncclGetEnv(const char *name) {
static pthread_once_t once = PTHREAD_ONCE_INIT;
pthread_once(&once, initEnv);
return getenv(name);
}
}

0 comments on commit 7ed5d25

Please sign in to comment.