Skip to content

Commit

Permalink
Merge branch 'hotfix/SDK-4753_Gfxproc_causes_a_hang_on_desktop_app' i…
Browse files Browse the repository at this point in the history
…nto 'release/v8.0.2'

SDK-4753 Gfxproc causes a hang on desktop app [hotfix 8.0.2]

See merge request sdk/sdk!6162
  • Loading branch information
sergiohs84 committed Jan 7, 2025
2 parents 0a90f22 + 7236016 commit d6a5d20
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 99 deletions.
14 changes: 7 additions & 7 deletions include/mega/gfx/isolatedprocess.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,20 @@ class CancellableSleeper
bool mCancelled = false;
};

class AutoStartLauncher
class AutoStartLauncher: public std::enable_shared_from_this<AutoStartLauncher>
{
public:
AutoStartLauncher(const std::vector<std::string>& argv, std::function<void()> shutdowner);

~AutoStartLauncher();
bool start();

void shutDownOnce();
void stop();

private:

bool startUntilSuccess(Process& process);

bool startLaunchLoopThread();

void exitLaunchLoopThread();
bool exitLaunchLoopThread();

std::vector<std::string> mArgv;

Expand Down Expand Up @@ -137,12 +135,14 @@ class GfxIsolatedProcess

GfxIsolatedProcess(const Params& params);

~GfxIsolatedProcess();

const std::string& endpointName() const { return mEndpointName; }
private:

std::string mEndpointName;

AutoStartLauncher mLauncher;
std::shared_ptr<AutoStartLauncher> mLauncher;

HelloBeater mBeater;
};
Expand Down
111 changes: 70 additions & 41 deletions src/gfx/isolatedprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,6 @@ AutoStartLauncher::AutoStartLauncher(const std::vector<std::string>& argv, std::
mShutdowner(std::move(shutdowner))
{
assert(!mArgv.empty());

// preventive check: at least one element (executable)
if (!mArgv.empty())
{
// launch loop thread
startLaunchLoopThread();
}
else
{
LOG_fatal << "AutoStartLauncher argv is empty";
}
}

bool AutoStartLauncher::startUntilSuccess(Process& process)
Expand Down Expand Up @@ -201,11 +190,14 @@ bool AutoStartLauncher::startUntilSuccess(Process& process)
return false;
}

bool AutoStartLauncher::startLaunchLoopThread()
bool AutoStartLauncher::start()
{
static const milliseconds maxBackoff(400);
static const milliseconds maxBackoff(3000);
static const milliseconds fastFailureThreshold(1000);

if (mArgv.empty())
return false;

// There are permanent startup failure such as missing DLL. This is not likey to happen
// at customer's side as it will be installed properly. It is more likely during development
// and testing phases. We want to implement some backOff to reduce CPU usage if it does happen
Expand All @@ -222,7 +214,8 @@ bool AutoStartLauncher::startLaunchLoopThread()
// if less than threshhold, it fails right after startup.
if ((used < fastFailureThreshold) && !mShuttingDown)
{
LOG_err << "process existed too fast: " << used.count() << " backoff" << backOff.count() << "ms";
// LOG_verbose << "process existed too fast: " << used.count() << " backoff "
// << backOff.count() << "ms";
mSleeper.sleep(backOff);
backOff = std::min(backOff * 2, maxBackoff); // double it and maxBackoff at most
}
Expand All @@ -233,19 +226,25 @@ bool AutoStartLauncher::startLaunchLoopThread()
}
};

auto launcher = [this, backoffForFastFailure]() {
auto launcher = [this, backoffForFastFailure]()
{
// Keep a copy, so the object is always live while the code is running
auto keepRef = shared_from_this();

mThreadIsRunning = true;

backoffForFastFailure([this](){
Process process;
if (startUntilSuccess(process))
{
bool ret = process.wait();
LOG_debug << "wait: " << ret
<< " hasSignal: " << process.hasTerminateBySignal()
<< " " << (process.hasTerminateBySignal() ? std::to_string(process.getTerminatingSignal()) : "")
<< " hasExited: " << process.hasExited()
<< " " << (process.hasExited() ? std::to_string(process.getExitCode()) : "");
LOG_verbose << "wait: " << ret << " hasSignal: " << process.hasTerminateBySignal()
<< " "
<< (process.hasTerminateBySignal() ?
std::to_string(process.getTerminatingSignal()) :
"")
<< " hasExited: " << process.hasExited() << " "
<< (process.hasExited() ? std::to_string(process.getExitCode()) : "");
}
});

Expand All @@ -262,19 +261,33 @@ bool AutoStartLauncher::startLaunchLoopThread()
// is just starting. so we'll retry in the loop, but there is no reason it
// couldn't be shut down in 15 seconds
//
void AutoStartLauncher::exitLaunchLoopThread()
// @return true if thread exits, otherwise false
bool AutoStartLauncher::exitLaunchLoopThread()
{
milliseconds backOff(10);
while (mThreadIsRunning && backOff < 15s)
milliseconds interval{10};
milliseconds totalWaitTime{0};
while (mThreadIsRunning && totalWaitTime < 15s)
{
LOG_verbose << "interval " << interval.count() << " totalWaitTime "
<< totalWaitTime.count();

// shutdown the started process
if (mShutdowner) mShutdowner();
std::this_thread::sleep_for(backOff);
backOff += 10ms;

// wait
std::this_thread::sleep_for(interval);

// Update total wait time
totalWaitTime += interval;

// backoff
interval += 10ms;
}

return !mThreadIsRunning;
}

void AutoStartLauncher::shutDownOnce()
void AutoStartLauncher::stop()
{
bool wasShuttingdown = mShuttingDown.exchange(true);
if (wasShuttingdown)
Expand All @@ -287,17 +300,23 @@ void AutoStartLauncher::shutDownOnce()
// cancel sleeper, thread in sleep is woken up if it is
mSleeper.cancel();

exitLaunchLoopThread();
if (mThread.joinable()) mThread.join();
if (exitLaunchLoopThread())
{
if (mThread.joinable())
mThread.join();
}
else
{
// Defensive: the thread doesn't exit, detach the thread
// We had such bug and it is usually a bug
assert(false && "AutoStartLauncher detaching loop thread");
LOG_warn << "AutoStartLauncher detaching loop thread";
mThread.detach();
}

LOG_info << "AutoStartLauncher is down";
}

AutoStartLauncher::~AutoStartLauncher()
{
shutDownOnce();
}

bool CancellableSleeper::sleep(const milliseconds& period)
{
std::unique_lock<std::mutex> l(mMutex);
Expand Down Expand Up @@ -345,12 +364,22 @@ std::vector<std::string> GfxIsolatedProcess::Params::toArgs() const
// We divide keepAliveInSeconds by three to set up mBeater so that it allows at least two
// beats within the keep-alive period.
GfxIsolatedProcess::GfxIsolatedProcess(const Params& params):
mEndpointName(params.endpointName),
mLauncher(params.toArgs(),
[endpointName = params.endpointName]()
{
shutdown(endpointName);
}),
mBeater(seconds(params.keepAliveInSeconds / 3), params.endpointName)
{}
mEndpointName{
params.endpointName
},
mLauncher{new AutoStartLauncher{params.toArgs(),
[endpointName = params.endpointName]()
{
shutdown(endpointName);
}}},
mBeater{seconds(params.keepAliveInSeconds / 3), params.endpointName}
{
mLauncher->start();
}

GfxIsolatedProcess::~GfxIsolatedProcess()
{
mLauncher->stop();
}

} // Namespace
8 changes: 4 additions & 4 deletions src/process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,21 +205,21 @@ bool Process::run(const vector<string>& args, const unordered_map<string, string
if (dup2(childReadFd, STDOUT_FILENO) == -1)
{
reportError("Could not redirect stdout");
exit(1);
_exit(EXIT_FAILURE);
}
// stderr
::close(STDERR_FILENO);
if (dup2(childReadErrorFd, STDERR_FILENO) == -1)
{
reportError("Could not redirect stderr");
exit(1);
_exit(EXIT_FAILURE);
}

// Prepare command-line arguments for child process
if (args.empty())
{
cerr << "Process: Can not execute, no arguments given" << endl;
exit(1);
_exit(EXIT_FAILURE);
}
vector<char*> argv;
for (vector<string>::const_iterator i = args.begin(); i != args.end(); ++i)
Expand All @@ -237,7 +237,7 @@ bool Process::run(const vector<string>& args, const unordered_map<string, string
// cerr so parent process sees this
cerr << "Could not execute '" + string(argv[0]) + "'" << ": " << savedErrno << ": " << strerror(savedErrno) << endl;
reportError("Could not execute '" + string(argv[0]) + "'", savedErrno);
exit(1);
_exit(EXIT_FAILURE);
}
// else --> parent process

Expand Down
Loading

0 comments on commit d6a5d20

Please sign in to comment.