Skip to content

Commit

Permalink
Avoid deadlock issues caused by logging in a loop.
Browse files Browse the repository at this point in the history
Signed-off-by: wangfakang <[email protected]>
  • Loading branch information
wangfakang committed Apr 25, 2024
1 parent ab2b89c commit bfdb006
Showing 1 changed file with 19 additions and 11 deletions.
30 changes: 19 additions & 11 deletions src/misc/param.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,29 @@ void initEnv() {

void ncclLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache) {
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
if (__builtin_expect(__atomic_load_n(cache, __ATOMIC_RELAXED) != uninitialized, true)) {
return;
}

pthread_mutex_lock(&mutex);
if (__atomic_load_n(cache, __ATOMIC_RELAXED) == uninitialized) {
const char* str = ncclGetEnv(env);
int64_t value = deftVal;
if (str && strlen(str) > 0) {
errno = 0;
value = strtoll(str, nullptr, 0);
if (errno) {
value = deftVal;
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
} else {
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
}
if (!str || strlen(str) <= 0) {
__atomic_store_n(cache, value, __ATOMIC_RELAXED);
pthread_mutex_unlock(&mutex);
return;
}
errno = 0;
value = strtoll(str, nullptr, 0);
// To prevent deadlock issues caused by logging in a loop,
// so cache the value before the log operation.
__atomic_store_n(cache, errno ? deftVal : value, __ATOMIC_RELAXED);
if (errno) {
INFO(NCCL_ALL,"Invalid value %s for %s, using default %lld.", str, env, (long long)deftVal);
} else {
INFO(NCCL_ENV,"%s set by environment to %lld.", env, (long long)value);
}
__atomic_store_n(cache, value, __ATOMIC_RELAXED);
}
pthread_mutex_unlock(&mutex);
}
Expand All @@ -84,4 +92,4 @@ const char *ncclGetEnv(const char *name) {
static pthread_once_t once = PTHREAD_ONCE_INIT;
pthread_once(&once, initEnv);
return getenv(name);
}
}

0 comments on commit bfdb006

Please sign in to comment.