Skip to content

Commit

Permalink
#2382: ccm-lb: add backoff to avoid performance issues w/locking
Browse files Browse the repository at this point in the history
  • Loading branch information
lifflander committed Dec 18, 2024
1 parent e485ac7 commit 2ea8a29
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
21 changes: 14 additions & 7 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,7 @@ void TemperedLB::doLBStages(LoadType start_imb) {
double const memory_usage = computeMemoryUsage();

vt_debug_print(
terse, temperedlb,
normal, temperedlb,
"Current memory info: total memory usage={}, shared blocks here={}, "
"memory_threshold={}\n", memory_usage,
getSharedBlocksHere().size(), mem_thresh_
Expand Down Expand Up @@ -1472,7 +1472,7 @@ void TemperedLB::informAsync() {

if (is_overloaded_) {
vt_debug_print(
terse, temperedlb,
normal, temperedlb,
"TemperedLB::informAsync: trial={}, iter={}, known underloaded={}\n",
trial_, iter_, underloaded_.size()
);
Expand Down Expand Up @@ -2510,8 +2510,8 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) {

vt_debug_print(
normal, temperedlb,
"lockObtained: is_locked_={}, is_swapping_={}\n",
is_locked_, is_swapping_
"lockObtained: is_locked_={}, is_swapping_={}, locking_rank_={}, msg->locked_node={}, is_swapping={}\n",
is_locked_, is_swapping_, locking_rank_, msg->locked_node, is_swapping_
);

auto cur_epoch = theMsg()->getEpoch();
Expand All @@ -2527,7 +2527,7 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) {
if (is_locked_ && locking_rank_ <= msg->locked_node) {
proxy_[msg->locked_node].template send<&TemperedLB::releaseLock>();
theTerm()->consume(cur_epoch);
try_locks_.emplace(msg->locked_node, msg->locked_c_try);
try_locks_.emplace(msg->locked_node, msg->locked_c_try, 1);
//pending_actions_.push_back(action);
} else if (is_locked_) {
pending_actions_.push_back(action);
Expand All @@ -2539,7 +2539,6 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) {
"lockObtained: running action immediately\n"
);


action();
}
}
Expand All @@ -2551,14 +2550,22 @@ void TemperedLB::satisfyLockRequest() {
for (auto&& tl : try_locks_) {
vt_debug_print(
verbose, temperedlb,
"satisfyLockRequest: node={}, c_try={}\n", tl.requesting_node, tl.c_try
"satisfyLockRequest: node={}, c_try={}, forced_release={}\n",
tl.requesting_node, tl.c_try, tl.forced_release
);
}

auto iter = try_locks_.begin();
auto lock = *iter;
try_locks_.erase(iter);

if (lock.forced_release) {
std::this_thread::sleep_for(std::chrono::milliseconds(5));
lock.forced_release = false;
try_locks_.insert(lock);
return;
}

auto const this_node = theContext()->getNode();

vt_debug_print(
Expand Down
6 changes: 4 additions & 2 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,13 +435,15 @@ struct TemperedLB : BaseLB {
//////////////////////////////////////////////////////////////////////////////

struct TryLock {
TryLock(NodeType in_requesting, double in_c_try)
TryLock(NodeType in_requesting, double in_c_try, int in_forced_release = 0)
: requesting_node(in_requesting),
c_try(in_c_try)
c_try(in_c_try),
forced_release(in_forced_release)
{ }

NodeType requesting_node = uninitialized_destination;
double c_try = 0;
int forced_release = 0;

double operator<(TryLock const& other) const {
// sort in reverse order so the best is first!
Expand Down

0 comments on commit 2ea8a29

Please sign in to comment.