From c5bf70336611b9e83f2fb4a679eca366313f3f4c Mon Sep 17 00:00:00 2001 From: Nicholas Sharp Date: Sun, 29 Dec 2024 20:08:38 -0500 Subject: [PATCH] Fixes for loading and configuration of headless EGL rendering (#307) * use extensions to fetch platform devices for EGL * fix typo * small fixes, formatter * device preference sorting * few more fixes * use vector of device * use EGLDeviceEXT * use list of ints * two more small fixes * string safety and logging * nest logging correctly * check for extension * missing semicolon * more logging * cast * test * test * try fake initialize * compile fixes * revert debugging * clean up logging * cleanup * back to front * cleanup * add user-facing options for headless setup * fix framecount check * try testing EGL on ci * fix ci script * fix backend string * improve logging string formatting * clean up context stack on shutdown * don't check asan leaks in egl test * use asan settings for both configurations * comment clarity --- .github/workflows/linux.yml | 18 +- examples/demo-app/demo_app.cpp | 12 +- include/polyscope/options.h | 13 +- include/polyscope/polyscope.h | 6 + include/polyscope/render/engine.h | 3 + .../polyscope/render/opengl/gl_engine_egl.h | 7 + src/options.cpp | 3 + src/polyscope.cpp | 19 +++ src/render/initialize_backend.cpp | 48 +++--- src/render/opengl/gl_engine_egl.cpp | 161 +++++++++++++++++- test/include/polyscope_test.h | 7 +- test/src/basics_test.cpp | 5 + 12 files changed, 260 insertions(+), 42 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8f7b2b00..656953a0 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -27,8 +27,14 @@ jobs: - name: build run: cd test/build && make - - name: run test + - name: run test mock backend run: cd test/build && ./bin/polyscope-test --gtest_catch_exceptions=0 backend=openGL_mock + + - name: run test egl backend + # We get memory leaks inside of EGL that I can't track down. With ASAN, this means the exit code is always nonzero, + # which is indistinguishable from tests failing. The ASAN_OPTIONS=detect_leaks=0 skips checking leaks for this test + # as a workaround. + run: cd test/build && ASAN_OPTIONS=detect_leaks=0 ./bin/polyscope-test --gtest_catch_exceptions=0 backend=openGL3_egl build_shared: strategy: @@ -45,10 +51,16 @@ jobs: run: sudo apt-get update && sudo apt-get install -y xorg-dev libglu1-mesa-dev xpra xserver-xorg-video-dummy freeglut3-dev - name: configure - run: cd test && mkdir build && cd build && cmake -DBUILD_SHARED_LIBS=TRUE -DCMAKE_BUILD_TYPE=Debug -DPOLYSCOPE_BACKEND_OPENGL3_GLFW=ON -DPOLYSCOPE_BACKEND_OPENGL_MOCK=ON .. + run: cd test && mkdir build && cd build && cmake -DBUILD_SHARED_LIBS=TRUE -DCMAKE_BUILD_TYPE=Debug -DPOLYSCOPE_BACKEND_OPENGL3_GLFW=ON -DPOLYSCOPE_BACKEND_OPENGL_MOCK=ON -DPOLYSCOPE_BACKEND_OPENGL3_EGL=ON .. - name: build run: cd test/build && make - - name: run test + - name: run test mock backend run: cd test/build && ./bin/polyscope-test --gtest_catch_exceptions=0 backend=openGL_mock + + - name: run test egl backend + # We get memory leaks inside of EGL that I can't track down. With ASAN, this means the exit code is always nonzero, + # which is indistinguishable from tests failing. The ASAN_OPTIONS=detect_leaks=0 skips checking leaks for this test + # as a workaround. + run: cd test/build && ASAN_OPTIONS=detect_leaks=0 ./bin/polyscope-test --gtest_catch_exceptions=0 backend=openGL3_egl diff --git a/examples/demo-app/demo_app.cpp b/examples/demo-app/demo_app.cpp index f7a0d6cb..27b826a2 100644 --- a/examples/demo-app/demo_app.cpp +++ b/examples/demo-app/demo_app.cpp @@ -846,6 +846,7 @@ int main(int argc, char** argv) { // polyscope::options::maxFPS = -1; polyscope::options::verbosity = 100; polyscope::options::enableRenderErrorChecks = true; + polyscope::options::allowHeadlessBackends = true; // Initialize polyscope polyscope::init(); @@ -871,9 +872,14 @@ int main(int argc, char** argv) { // Add a few gui elements polyscope::state::userCallback = callback; - // Show the gui - polyscope::show(); - + if (polyscope::isHeadless()) { + // save a screenshot to prove we initialized + std::cout << "Headless mode detected, saving screenshot" << std::endl; + polyscope::screenshot("headless_screenshot.png"); + } else { + // Show the gui + polyscope::show(); + } // main loop using manual frameTick() instead // while (true) { // polyscope::frameTick(); diff --git a/include/polyscope/options.h b/include/polyscope/options.h index a439ddda..356412ea 100644 --- a/include/polyscope/options.h +++ b/include/polyscope/options.h @@ -13,7 +13,9 @@ namespace polyscope { -namespace options { // A general name to use when referring to the program in window headings. +namespace options { + +// A general name to use when referring to the program in window headings. extern std::string programName; // How much should polyscope print to std::out? @@ -28,6 +30,10 @@ extern std::string printPrefix; // Should errors throw exceptions, or just display? (default false) extern bool errorsThrowExceptions; +// Allow initialization to create headless backends when selecting a backend automatically +// (they can still created explicitly by name) (default: false) +extern bool allowHeadlessBackends; + // Don't let the main loop run at more than this speed. (-1 disables) (default: 60) extern int maxFPS; @@ -119,6 +125,11 @@ extern std::function configureImGuiStyleCallback; // assign your own function to create custom styles. If this callback is null, default fonts will be used. extern std::function()> prepareImGuiFontsCallback; +// === Backend and low-level options + +// When using the EGL backend, which device to try to initialize with +// (default is -1 which means try all of them) +extern int eglDeviceIndex; // === Debug options diff --git a/include/polyscope/polyscope.h b/include/polyscope/polyscope.h index c7205399..25f1f29d 100644 --- a/include/polyscope/polyscope.h +++ b/include/polyscope/polyscope.h @@ -66,6 +66,12 @@ void shutdown(bool allowMidFrameShutdown=false); // deciding when to exit your control loop when using frameTick() bool windowRequestsClose(); +// Is Polyscope running in 'headless' mode? Headless means there is no physical display to open windows on, +// e.g. when running on a remote server. It is still possible to run Polyscope in such settings with a supported +// backend (currently, the EGL backend only), and render to save screenshots or for other purposes. +// Can only be called after initialization. +bool isHeadless(); + // === Global variables === namespace state { diff --git a/include/polyscope/render/engine.h b/include/polyscope/render/engine.h index 3d71e8c0..7a89a93b 100644 --- a/include/polyscope/render/engine.h +++ b/include/polyscope/render/engine.h @@ -445,6 +445,9 @@ class Engine { virtual void shutdown() {}; virtual void checkError(bool fatal = false) = 0; void buildEngineGui(); + + // 'headless' means there is no physical display to actually render to, e.g. when running on a remote server + virtual bool isHeadless() { return false; } virtual void clearDisplay(); virtual void bindDisplay(); diff --git a/include/polyscope/render/opengl/gl_engine_egl.h b/include/polyscope/render/opengl/gl_engine_egl.h index a57f8863..45f50d41 100644 --- a/include/polyscope/render/opengl/gl_engine_egl.h +++ b/include/polyscope/render/opengl/gl_engine_egl.h @@ -12,6 +12,7 @@ #include "glad/glad.h" // glad must come first #include +#include #endif @@ -41,6 +42,9 @@ class GLEngineEGL : public GLEngine { virtual void shutdown() override; void swapDisplayBuffers() override; void checkError(bool fatal = false) override; + + // EGL backend is always headless + virtual bool isHeadless() override { return true; } // === Windowing and framework things @@ -74,6 +78,9 @@ class GLEngineEGL : public GLEngine { // Internal windowing and engine details EGLDisplay eglDisplay; EGLContext eglContext; + + // helpers + void sortAvailableDevicesByPreference(std::vector& deviceInds, EGLDeviceEXT rawDevices[]); }; } // namespace backend_openGL3 diff --git a/src/options.cpp b/src/options.cpp index 5bcaca32..3b87de4d 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -9,6 +9,7 @@ namespace options { std::string programName = "Polyscope"; int verbosity = 2; std::string printPrefix = "[polyscope] "; +bool allowHeadlessBackends = false; bool errorsThrowExceptions = false; bool debugDrawPickBuffer = false; int maxFPS = 60; @@ -55,6 +56,8 @@ bool openImGuiWindowForUserCallback = true; std::function configureImGuiStyleCallback = configureImGuiStyle; std::function()> prepareImGuiFontsCallback = prepareImGuiFonts; +// Backend and low-level options +int eglDeviceIndex = -1; // means "try all of them" // enabled by default in debug mode #ifndef NDEBUG diff --git a/src/polyscope.cpp b/src/polyscope.cpp index 575f8e2b..a21fd16f 100644 --- a/src/polyscope.cpp +++ b/src/polyscope.cpp @@ -893,6 +893,14 @@ void show(size_t forFrames) { if (!state::initialized) { exception("must initialize Polyscope with polyscope::init() before calling polyscope::show()."); } + + if (isHeadless() && forFrames == 0) { + info("You called show() while in headless mode. In headless mode there is no display to create windows on. By " + "default, the show() call will block indefinitely. If you did not mean to run in headless mode, check the " + "initialization settings. Otherwise, be sure to set a callback to make something happen while polyscope is " + "showing the UI, or use functions like screenshot() to render directly without calling show()."); + } + unshowRequested = false; // the popContext() doesn't quit until _after_ the last frame, so we need to decrement by 1 to get the count right @@ -934,6 +942,16 @@ bool windowRequestsClose() { return false; } +bool isHeadless() { + if (!isInitialized()) { + exception("must initialize Polyscope with init() before calling isHeadless()."); + } + if (render::engine) { + return render::engine->isHeadless(); + } + return false; +} + void shutdown(bool allowMidFrameShutdown) { if (!allowMidFrameShutdown && contextStack.size() > 1) { @@ -955,6 +973,7 @@ void shutdown(bool allowMidFrameShutdown) { // Shut down the render engine render::engine->shutdown(); delete render::engine; + contextStack.clear(); render::engine = nullptr; state::backend = ""; state::initialized = false; diff --git a/src/render/initialize_backend.cpp b/src/render/initialize_backend.cpp index 95e20772..7c19283d 100644 --- a/src/render/initialize_backend.cpp +++ b/src/render/initialize_backend.cpp @@ -46,6 +46,7 @@ void initializeRenderEngine(std::string backend) { // Attempt to automatically initialize by trynig bool initSucces = false; + std::string extraMessage = ""; #ifdef POLYSCOPE_BACKEND_OPENGL3_GLFW_ENABLED // First try GLFW, if available @@ -55,40 +56,47 @@ void initializeRenderEngine(std::string backend) { initSucces = true; } catch (const std::exception& e) { if (options::verbosity > 0) { - info("Attempting automatic initialization. Could not initialize backend [openGL3_glfw]. Message: " + - std::string(e.what())); + info("Automatic initialization status: could not initialize backend [openGL3_glfw]."); } } if (initSucces) return; #endif #ifdef POLYSCOPE_BACKEND_OPENGL3_EGL_ENABLED - // Then, try EGL if available - engineBackendName = "openGL3_egl"; - try { - backend_openGL3::initializeRenderEngine_egl(); - initSucces = true; - } catch (const std::exception& e) { - if (options::verbosity > 0) { - info("Attempting automatic initialization. Could not initialize backend [openGL3_egl]. Message: " + - std::string(e.what())); + + if (options::allowHeadlessBackends) { + + // Then, try EGL if available + engineBackendName = "openGL3_egl"; + try { + backend_openGL3::initializeRenderEngine_egl(); + initSucces = true; + } catch (const std::exception& e) { + if (options::verbosity > 0) { + info("Automatic initialization status: could not initialize backend [openGL3_egl]."); + } } - } - if (initSucces) { - if (options::verbosity > 0) { - info("Automatic initialization could not create an interactive backend, and created a headless backend " - "instead. This likely means no displays are available. With the headless backend, you can still run " - "Polyscope and even render, for instance to record screenshots. However no interactive windows can be " - "created."); + if (initSucces) { + if (options::verbosity > 0) { + info("Automatic initialization could not create an interactive backend, and created a headless backend " + "instead. This likely means no displays are available. With the headless backend, you can still run " + "Polyscope and even render, for instance to save images of visualizations. However no interactive " + "windows can be created."); + } + return; } - return; + + } else { + extraMessage = " The headless EGL backend was available, but allowHeadlessBackends=false. Set it to true for " + "headless initialization."; } + #endif // Don't bother trying the 'mock' backend, it is unlikely to be what the user wants from the 'auto' option // Failure - exception("Automatic initialization: no Polyscope backends could be initialized successfully."); + exception("Automatic initialization: no Polyscope backends could be initialized successfully." + extraMessage); } else { exception("unrecognized Polyscope backend " + backend); diff --git a/src/render/opengl/gl_engine_egl.cpp b/src/render/opengl/gl_engine_egl.cpp index 81a0c3cf..c6c0c01d 100644 --- a/src/render/opengl/gl_engine_egl.cpp +++ b/src/render/opengl/gl_engine_egl.cpp @@ -12,7 +12,12 @@ #include "stb_image.h" #include +#include #include +#include + +#include +#include namespace polyscope { namespace render { @@ -20,6 +25,16 @@ namespace backend_openGL3 { namespace { // anonymous helpers +// Helper function to get an EGL (extension?) function and error-check that +// we got it successfully +void* getEGLProcAddressAndCheck(std::string name) { + void* procAddr = (void*)(eglGetProcAddress(name.c_str())); + if (!procAddr) { + error("EGL failed to get function pointer for " + name); + } + return procAddr; +} + void checkEGLError(bool fatal = true) { if (!options::enableRenderErrorChecks) { @@ -129,21 +144,69 @@ void GLEngineEGL::initialize() { // === Initialize EGL - // Get the default display - eglDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY); - if (eglDisplay == EGL_NO_DISPLAY) { - exception("ERROR: Failed to initialize EGL, could not get default display"); + // Pre-load required extension functions + PFNEGLQUERYDEVICESEXTPROC eglQueryDevicesEXT = + (PFNEGLQUERYDEVICESEXTPROC)getEGLProcAddressAndCheck("eglQueryDevicesEXT"); + + // Query the available EGL devices + const int N_MAX_DEVICE = 256; + EGLDeviceEXT rawDevices[N_MAX_DEVICE]; + EGLint nDevices; + if (!eglQueryDevicesEXT(N_MAX_DEVICE, rawDevices, &nDevices)) { + error("EGL: Failed to query devices."); + } + if (nDevices == 0) { + error("EGL: No devices found."); } + info("EGL: Found " + std::to_string(nDevices) + " EGL devices."); + + // Build an ordered list of which devices to try initializing with + std::vector deviceIndsToTry; + if (options::eglDeviceIndex == -1) { + info("EGL: No device index specified, attempting to intialize with each device in heuristic-guess order until " + "success."); - // Configure + deviceIndsToTry.resize(nDevices); + std::iota(deviceIndsToTry.begin(), deviceIndsToTry.end(), 0); + sortAvailableDevicesByPreference(deviceIndsToTry, rawDevices); + + } else { + info("EGL: Device index " + std::to_string(options::eglDeviceIndex) + " manually selected, using that device."); + + if (options::eglDeviceIndex >= nDevices) { + error("EGL: Device index " + std::to_string(options::eglDeviceIndex) + " manually selected, but only " + + std::to_string(nDevices) + " devices available."); + } + + deviceIndsToTry.push_back(options::eglDeviceIndex); + } + + bool successfulInit = false; EGLint majorVer, minorVer; - bool success = eglInitialize(eglDisplay, &majorVer, &minorVer); - if (!success) { - checkEGLError(false); + for (int32_t iDevice : deviceIndsToTry) { + + info("EGL: Attempting initialization with device " + std::to_string(iDevice)); + EGLDeviceEXT device = rawDevices[iDevice]; + + // Get an EGLDisplay for the device + // (use the -platform / EXT version because it is the only one that seems to work in headless environments) + eglDisplay = eglGetPlatformDisplay(EGL_PLATFORM_DEVICE_EXT, device, NULL); + if (eglDisplay == EGL_NO_DISPLAY) { + continue; + } + + // Configure + successfulInit = eglInitialize(eglDisplay, &majorVer, &minorVer); + if (successfulInit) { + break; + } + } + + if (!successfulInit) { exception("ERROR: Failed to initialize EGL"); } checkEGLError(); - + info("EGL: Initialization successful"); // this has something to do with the EGL configuration, I don't understand exactly what // clang-format off @@ -222,6 +285,86 @@ void GLEngineEGL::initialize() { checkError(); } +void GLEngineEGL::sortAvailableDevicesByPreference(std::vector& deviceInds, EGLDeviceEXT rawDevices[]) { + + // check that we actually have the query extension + const char* extensions = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + if (extensions && std::string(extensions).find("EGL_EXT_device_query") != std::string::npos) { + // good case, supported + } else { + info("EGL: cannot sort devices by preference, EGL_EXT_device_query is not supported"); + return; + } + + // Pre-load required extension functions + PFNEGLQUERYDEVICESTRINGEXTPROC eglQueryDeviceStringEXT = + (PFNEGLQUERYDEVICESTRINGEXTPROC)getEGLProcAddressAndCheck("eglQueryDeviceStringEXT"); + + // Build a list of devices and assign a score to each + std::vector> scoreDevices; + for (int32_t iDevice : deviceInds) { + EGLDeviceEXT device = rawDevices[iDevice]; + int score = 0; + + // Heuristic, non-software renderers seem to come last, so add a term to the score that prefers later-listed entries + // TODO find a way to test for software rsterization for real + score += iDevice; + + const char* vendorStrRaw = eglQueryDeviceStringEXT(device, EGL_VENDOR); + + // NOTE: on many machines (cloud VMs?) the query string above is nullptr, and this whole function does nothing + // useful + if (vendorStrRaw == nullptr) { + if (polyscope::options::verbosity > 5) { + std::cout << polyscope::options::printPrefix << " EGLDevice " << iDevice << " -- vendor: " << "NULL" + << " priority score: " << score << std::endl; + } + scoreDevices.emplace_back(score, iDevice); + continue; + } + + std::string vendorStr = vendorStrRaw; + + // lower-case it for the checks below + std::transform(vendorStr.begin(), vendorStr.end(), vendorStr.begin(), + [](unsigned char c) { return std::tolower(c); }); + + // Problem: we want to detect and prefer discrete graphics cars over integrated GPUs and + // software / VM renderers. However, I can't figure out how to get an "is integrated" + // property from the query device strings above. Even worse, 'AMD" and "Intel" are both + // ambiguous and could refer to the integrated GPU or a discrete GPU. + // + // As a workaround, we assign scores based on the vendor: nvidia is always discrete, amd could be either, intel is + // usually integrated, but still preferred over software renderers + // + // ONEDAY: figure out a better policy to detect discrete devices.... + + // assign scores based on vendors to prefer discrete gpus + const int32_t VENDOR_MULT = 100; // give this score entry a very high preference + if (vendorStr.find("intel") != std::string::npos) score += 1 * VENDOR_MULT; + if (vendorStr.find("amd") != std::string::npos) score += 2 * VENDOR_MULT; + if (vendorStr.find("nvidia") != std::string::npos) score += 3 * VENDOR_MULT; + + // at high verbosity levels, log the priority + if (polyscope::options::verbosity > 5) { + std::cout << polyscope::options::printPrefix << " EGLDevice " << iDevice << " -- vendor: " << vendorStr + << " priority score: " << score << std::endl; + } + + scoreDevices.emplace_back(score, iDevice); + } + + // sort them by highest score + std::sort(scoreDevices.begin(), scoreDevices.end()); + std::reverse(scoreDevices.begin(), scoreDevices.end()); + + + // store them back in the given array + for (size_t i = 0; i < deviceInds.size(); i++) { + deviceInds[i] = std::get<1>(scoreDevices[i]); + } +} + void GLEngineEGL::initializeImGui() { diff --git a/test/include/polyscope_test.h b/test/include/polyscope_test.h index c5b4a6e9..c3d8a572 100644 --- a/test/include/polyscope_test.h +++ b/test/include/polyscope_test.h @@ -38,12 +38,7 @@ class PolyscopeTest : public ::testing::Test { // Per-test-suite tear-down. // Called after the last test in this test suite. // Can be omitted if not needed. - /* - static void TearDownTestSuite() { - delete shared_resource_; - shared_resource_ = NULL; - } - */ + static void TearDownTestSuite() { polyscope::shutdown(); } // You can define per-test set-up logic as usual. // virtual void SetUp() { ... } diff --git a/test/src/basics_test.cpp b/test/src/basics_test.cpp index afb2aafc..7f2cdc9d 100644 --- a/test/src/basics_test.cpp +++ b/test/src/basics_test.cpp @@ -88,6 +88,11 @@ TEST_F(PolyscopeTest, ShutdownAndReinitialize) { polyscope::shutdown(); SetUpTestSuite(); polyscope::show(3); + + // do it twice -- we've had some bugs where the first shutdown doesn't clean up properly + polyscope::shutdown(); + SetUpTestSuite(); + polyscope::show(3); } // Make sure that creating an empty buffer does not throw errors