From 9f2005113b7576ae37162d9a6c0522bb904f94c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 29 May 2023 10:53:50 +0200 Subject: [PATCH 1/4] Add a function to the dev menu to intentionally lose the VK device. To be used for debugging recovery. --- Common/GPU/Vulkan/VulkanContext.cpp | 65 +++++++++++++++++++---------- Common/GPU/Vulkan/VulkanContext.h | 8 +++- Common/GPU/Vulkan/VulkanLoader.cpp | 1 + Common/GPU/Vulkan/thin3d_vulkan.cpp | 4 ++ Common/GPU/thin3d.h | 2 + UI/DevScreens.cpp | 8 ++++ 6 files changed, 65 insertions(+), 23 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index e57345cfab6d..3f4e6a85a7ec 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -1455,6 +1455,50 @@ bool VulkanContext::CreateShaderModule(const std::vector &spirv, VkSha } } +// Only to be used for debugging lost device handling. +// This works on NVIDIA to cause a lost device, need to try others. +void VulkanContext::IntentionallyLoseDevice() { + _assert_(device_); + VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + b.size = 1024; + b.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + b.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + VmaAllocationCreateInfo allocCreateInfo{}; + allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + VmaAllocationInfo allocInfo{}; + + VkBuffer buffer; + VmaAllocation alloc; + + VkResult result = vmaCreateBuffer(Allocator(), &b, &allocCreateInfo, &buffer, &alloc, &allocInfo); + _assert_(result == VK_SUCCESS); + + VkCommandPoolCreateInfo ci{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + VkCommandPool cmdPool; + vkCreateCommandPool(device_, &ci, nullptr, &cmdPool); + VkCommandBufferAllocateInfo cmdAllocInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO }; + cmdAllocInfo.commandPool = cmdPool; + cmdAllocInfo.commandBufferCount = 1; + cmdAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + VkCommandBuffer cmdBuf; + vkAllocateCommandBuffers(device_, &cmdAllocInfo, &cmdBuf); + VkCommandBufferBeginInfo beginInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; + vkBeginCommandBuffer(cmdBuf, &beginInfo); + // Nonsense! + VkBufferCopy info{ 0, 1000000000, 100000 }; + vkCmdCopyBuffer(cmdBuf, buffer, buffer, 1, &info); + vkEndCommandBuffer(cmdBuf); + VkSubmitInfo submitInfo{ VK_STRUCTURE_TYPE_SUBMIT_INFO }; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmdBuf; + // NOTE: Depending on which thread this is called from, this can itself be a violation (queueing stuff from a different thread + // on a queue used by another thread). + VkResult retval = vkQueueSubmit(gfx_queue_, 1, &submitInfo, VK_NULL_HANDLE); + // We might not actually lose the device immediately, but good to confirm. + NOTICE_LOG(G3D, "Tried to lose the device, vkQueueSubmit retval = %s", VulkanResultToString(retval)); + // At this point, the device should be lost. +} + void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -1712,27 +1756,6 @@ void VulkanDeleteList::PerformDeletes(VulkanContext *vulkan, VmaAllocator alloca queryPools_.clear(); } -void VulkanContext::GetImageMemoryRequirements(VkImage image, VkMemoryRequirements *mem_reqs, bool *dedicatedAllocation) { - if (Extensions().KHR_dedicated_allocation) { - VkImageMemoryRequirementsInfo2KHR memReqInfo2{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR}; - memReqInfo2.image = image; - - VkMemoryRequirements2KHR memReq2 = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR}; - VkMemoryDedicatedRequirementsKHR memDedicatedReq{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR}; - memReq2.pNext = &memDedicatedReq; - - vkGetImageMemoryRequirements2KHR(GetDevice(), &memReqInfo2, &memReq2); - - *mem_reqs = memReq2.memoryRequirements; - *dedicatedAllocation = - (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE) || - (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); - } else { - vkGetImageMemoryRequirements(GetDevice(), image, mem_reqs); - *dedicatedAllocation = false; - } -} - bool IsHashMaliDriverVersion(const VkPhysicalDeviceProperties &props) { // ARM used to put a hash in place of the driver version. // Now they only use major versions. We'll just make a bad heuristic. diff --git a/Common/GPU/Vulkan/VulkanContext.h b/Common/GPU/Vulkan/VulkanContext.h index de760b934b6a..edc1fca22120 100644 --- a/Common/GPU/Vulkan/VulkanContext.h +++ b/Common/GPU/Vulkan/VulkanContext.h @@ -365,8 +365,6 @@ class VulkanContext { return devicePerfClass_; } - void GetImageMemoryRequirements(VkImage image, VkMemoryRequirements *mem_reqs, bool *dedicatedAllocation); - VmaAllocator Allocator() const { return allocator_; } @@ -383,6 +381,10 @@ class VulkanContext { return availablePresentModes_; } + // Forces a device loss, to help debug device recovery. + // It'll create its own command buffer for this. + void IntentionallyLoseDevice(); + private: bool ChooseQueue(); @@ -476,6 +478,8 @@ class VulkanContext { std::vector cmdQueue_; VmaAllocator allocator_ = VK_NULL_HANDLE; + + bool deviceLost_ = false; }; // Detailed control. diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp index 23f664ba977b..538eeaf2588a 100644 --- a/Common/GPU/Vulkan/VulkanLoader.cpp +++ b/Common/GPU/Vulkan/VulkanLoader.cpp @@ -742,6 +742,7 @@ void VulkanFree() { const char *VulkanResultToString(VkResult res) { static char temp[128]{}; switch (res) { + case VK_SUCCESS: return "VK_SUCCESS"; case VK_NOT_READY: return "VK_NOT_READY"; case VK_TIMEOUT: return "VK_TIMEOUT"; case VK_EVENT_SET: return "VK_EVENT_SET"; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index bc7d9ba700cd..a5edda73ba94 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -408,6 +408,10 @@ class VKContext : public DrawContext { } } + void IntentionallyLoseDevice() override { + vulkan_->IntentionallyLoseDevice(); + } + DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override; BlendState *CreateBlendState(const BlendStateDesc &desc) override; InputLayout *CreateInputLayout(const InputLayoutDesc &desc) override; diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index b32fbb3cbaba..c492d1c98b7a 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -834,6 +834,8 @@ class DrawContext { // Not very elegant, but more elegant than the old passId hack. virtual void SetInvalidationCallback(InvalidationCallback callback) = 0; + virtual void IntentionallyLoseDevice() {} + protected: ShaderModule *vsPresets_[VS_MAX_PRESET]; ShaderModule *fsPresets_[FS_MAX_PRESET]; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 36aaf564f7ee..0b178cc62b0c 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -123,6 +123,14 @@ void DevMenuScreen::CreatePopupContents(UI::ViewGroup *parent) { items->Add(new CheckBox(&g_Config.bDrawFrameGraph, dev->T("Draw Frametimes Graph"))); items->Add(new Choice(dev->T("Reset limited logging")))->OnClick.Handle(this, &DevMenuScreen::OnResetLimitedLogging); + if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) { + items->Add(new Choice(dev->T("Crash GPU")))->OnClick.Add([&](UI::EventParams &) { + Draw::DrawContext *draw = screenManager()->getDrawContext(); + draw->IntentionallyLoseDevice(); + return UI::EVENT_DONE; + }); + } + scroll->Add(items); parent->Add(scroll); From f33776bc4a4d1868399f4f24c7b9b40b5acc1d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 29 May 2023 16:02:53 +0200 Subject: [PATCH 2/4] Fix a very tiny memory leak --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 287eaf236575..c1bde6d52570 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -507,6 +507,8 @@ void VulkanRenderManager::ThreadFunc() { // push more work when it feels like it, and just start working. if (task->runType == VKRRunType::EXIT) { // Oh, host wanted out. Let's leave. + delete task; + // In this case, there should be no more tasks. break; } From 485131a29cc85df7c7cb90b82fcda18cc04de6ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 29 May 2023 18:17:03 +0200 Subject: [PATCH 3/4] Win32: Plumb through an attempt at handling Vulkan device lost. Unfortunately, recreating the device fails on NV. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 5 +- Common/GPU/OpenGL/thin3d_gl.cpp | 5 +- Common/GPU/Vulkan/VulkanContext.cpp | 3 +- Common/GPU/Vulkan/VulkanDebug.cpp | 8 +++ Common/GPU/Vulkan/VulkanFrameData.cpp | 7 ++- Common/GPU/Vulkan/VulkanFrameData.h | 3 ++ Common/GPU/Vulkan/VulkanQueueRunner.cpp | 4 ++ Common/GPU/Vulkan/VulkanRenderManager.cpp | 63 +++++++++++++++++++++-- Common/GPU/Vulkan/VulkanRenderManager.h | 10 +++- Common/GPU/Vulkan/thin3d_vulkan.cpp | 11 ++-- Common/GPU/thin3d.h | 2 +- Common/GraphicsContext.h | 4 ++ Common/UI/Screen.cpp | 24 +++++---- Common/UI/Screen.h | 2 +- Common/UI/UIScreen.cpp | 9 ++-- Common/UI/UIScreen.h | 2 +- Core/Core.cpp | 15 +++++- UI/EmuScreen.cpp | 7 ++- UI/EmuScreen.h | 2 +- UI/NativeApp.cpp | 1 + Windows/EmuThread.cpp | 32 ++++++++++-- Windows/GPU/WindowsVulkanContext.cpp | 5 ++ Windows/GPU/WindowsVulkanContext.h | 1 + 23 files changed, 183 insertions(+), 42 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 4a90c318c1f7..7c1dbbbc4bb7 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -137,7 +137,7 @@ class D3D11DrawContext : public DrawContext { void DrawUP(const void *vdata, int vertexCount) override; void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override; - void BeginFrame() override; + bool BeginFrame() override; std::string GetInfoString(InfoField info) const override { switch (info) { @@ -1448,7 +1448,7 @@ void D3D11DrawContext::Clear(int mask, uint32_t colorval, float depthVal, int st } } -void D3D11DrawContext::BeginFrame() { +bool D3D11DrawContext::BeginFrame() { context_->OMSetRenderTargets(1, &curRenderTargetView_, curDepthStencilView_); if (curBlend_ != nullptr) { @@ -1475,6 +1475,7 @@ void D3D11DrawContext::BeginFrame() { context_->PSSetConstantBuffers(0, 1, &curPipeline_->dynamicUniforms); } } + return true; } void D3D11DrawContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y, int z, Framebuffer *dstfb, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBit, const char *tag) { diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index d4054987f006..4d8c01fa8604 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -367,7 +367,7 @@ class OpenGLContext : public DrawContext { Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override; Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override; - void BeginFrame() override; + bool BeginFrame() override; void EndFrame() override; void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override; @@ -782,10 +782,11 @@ OpenGLContext::~OpenGLContext() { } } -void OpenGLContext::BeginFrame() { +bool OpenGLContext::BeginFrame() { renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS); FrameData &frameData = frameData_[renderManager_.GetCurFrame()]; renderManager_.BeginPushBuffer(frameData.push); + return true; } void OpenGLContext::EndFrame() { diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 3f4e6a85a7ec..47b6152a4215 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -702,7 +702,8 @@ VkResult VulkanContext::CreateDevice() { VkResult res = vkCreateDevice(physical_devices_[physical_device_], &device_info, nullptr, &device_); if (res != VK_SUCCESS) { init_error_ = "Unable to create Vulkan device"; - ERROR_LOG(G3D, "Unable to create Vulkan device"); + ERROR_LOG(G3D, "Unable to create Vulkan device: '%s'", VulkanResultToString(res)); + return res; } else { VulkanLoadDeviceFunctions(device_, extensionsLookup_); } diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp index 00a226897413..e3972d0288c9 100644 --- a/Common/GPU/Vulkan/VulkanDebug.cpp +++ b/Common/GPU/Vulkan/VulkanDebug.cpp @@ -76,6 +76,14 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( // Extended validation (ARM best practices) // Non-fifo validation not recommended return false; + + // These get triggered during the device lost simulation. Ignore. + case -556648736: + case 1812873262: + case 337425955: + WARN_LOG(G3D, "Validation message %d typical of device lost simulation, ignoring.", messageCode); + return false; + default: break; } diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp index 90d2c434839c..1a1e221ccc37 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.cpp +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -93,6 +93,7 @@ void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) { _dbg_assert_(hasAcquired); + _dbg_assert_(!deviceLost); hasAcquired = false; _dbg_assert_(!skipSwap); @@ -132,6 +133,8 @@ VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) { } void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) { + _dbg_assert_(!deviceLost); + VkCommandBuffer cmdBufs[3]; int numCmdBufs = 0; @@ -206,7 +209,9 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame } if (res == VK_ERROR_DEVICE_LOST) { - _assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan"); + ERROR_LOG(G3D, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan"); + deviceLost = true; + return; } else { _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res)); } diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 0e1344f24e85..93fe7ad4f214 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -88,6 +88,9 @@ struct FrameData { bool syncDone = false; + // Set if the device was just lost. + bool deviceLost = false; + // Swapchain. uint32_t curSwapchainImage = -1; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index e7670e99445f..5abd03693890 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -395,6 +395,9 @@ void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frame vkCmdEndDebugUtilsLabelEXT(cmd); } frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared); + if (frameData.deviceLost) { + goto bail; + } // When stepping in the GE debugger, we can end up here multiple times in a "frame". // So only acquire once. @@ -447,6 +450,7 @@ void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frame } } +bail: // Deleting all in one go should be easier on the instruction cache than deleting // them as we go - and easier to debug because we can look backwards in the frame. if (!keepSteps) { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index c1bde6d52570..af5e625894df 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -492,6 +492,9 @@ void VulkanRenderManager::DrainCompileQueue() { void VulkanRenderManager::ThreadFunc() { SetCurrentThreadName("RenderMan"); while (true) { + if (deviceLost_) { + break; + } // Pop a task of the queue and execute it. VKRRenderThreadTask *task = nullptr; { @@ -503,6 +506,12 @@ void VulkanRenderManager::ThreadFunc() { renderThreadQueue_.pop(); } + if (deviceLost_) { + delete task; + // We'll clear out the rest after the break. + break; + } + // Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to // push more work when it feels like it, and just start working. if (task->runType == VKRRunType::EXIT) { @@ -516,6 +525,16 @@ void VulkanRenderManager::ThreadFunc() { delete task; } + { + // Make sure nothing is left. + std::unique_lock lock(pushMutex_); + while (!renderThreadQueue_.empty()) { + VKRRenderThreadTask *task = renderThreadQueue_.front(); + renderThreadQueue_.pop(); + delete task; + } + } + // Wait for the device to be done with everything, before tearing stuff down. // TODO: Do we need this? vkDeviceWaitIdle(vulkan_->GetDevice()); @@ -523,8 +542,11 @@ void VulkanRenderManager::ThreadFunc() { VLOG("PULL: Quitting"); } -void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) { +bool VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) { VLOG("BeginFrame"); + if (deviceLost_) { + return false; + } VkDevice device = vulkan_->GetDevice(); int curFrame = vulkan_->GetCurFrame(); @@ -545,8 +567,15 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile // This must be the very first Vulkan call we do in a new frame. // Makes sure the very last command buffer from the frame before the previous has been fully executed. if (vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX) == VK_ERROR_DEVICE_LOST) { - _assert_msg_(false, "Device lost in vkWaitForFences"); + ERROR_LOG(G3D, "Device lost in vkWaitForFences"); + frameData.deviceLost = true; + deviceLost_ = true; + // If the render thread is waiting for an event that won't come, kick it loose. + pushCondVar_.notify_one(); + frameData.readyForFence = true; + return false; } + vkResetFences(device, 1, &frameData.fence); int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits; @@ -616,6 +645,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile frameData.profile.timestampDescriptions.push_back("initCmd"); VkCommandBuffer initCmd = GetInitCmd(); } + return true; } VkCommandBuffer VulkanRenderManager::GetInitCmd() { @@ -1280,6 +1310,10 @@ void VulkanRenderManager::Finish() { steps_.clear(); vulkan_->EndFrame(); insideFrame_ = false; + + if (deviceLost_) { + WARN_LOG(G3D, "VulkanRenderManager::Finish: Device lost"); + } } void VulkanRenderManager::Wipe() { @@ -1292,11 +1326,16 @@ void VulkanRenderManager::Wipe() { // Called on the render thread. // // Can be called again after a VKRRunType::SYNC on the same frame. -void VulkanRenderManager::Run(VKRRenderThreadTask &task) { +bool VulkanRenderManager::Run(VKRRenderThreadTask &task) { + _dbg_assert_(!deviceLost_); + FrameData &frameData = frameData_[task.frame]; _dbg_assert_(!frameData.hasPresentCommands); frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_); + if (frameData.deviceLost) { + return false; + } if (!frameData.hasMainCommands) { // Effectively resets both main and present command buffers, since they both live in this pool. @@ -1312,8 +1351,9 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) { queueRunner_.PreprocessSteps(task.steps); // Likely during shutdown, happens in headless. - if (task.steps.empty() && !frameData.hasAcquired) + if (task.steps.empty() && !frameData.hasAcquired) { frameData.skipSwap = true; + } //queueRunner_.LogSteps(stepsOnThread, false); if (IsVREnabled()) { int passes = GetVRPassesCount(); @@ -1326,10 +1366,18 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) { queueRunner_.RunSteps(task.steps, frameData, frameDataShared_); } + if (frameData.deviceLost) { + deviceLost_ = true; + return false; + } + switch (task.runType) { case VKRRunType::PRESENT: frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_); - + if (frameData.deviceLost) { + deviceLost_ = true; + return false; + } if (!frameData.skipSwap) { VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_); if (res == VK_ERROR_OUT_OF_DATE_KHR) { @@ -1369,7 +1417,12 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) { _dbg_assert_(false); } + if (frameData.deviceLost) { + deviceLost_ = true; + } VLOG("PULL: Finished running frame %d", task.frame); + + return !deviceLost_; } // Called from main thread. diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 58ef8b5d0b84..232a6c9fe9f2 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -185,7 +185,8 @@ class VulkanRenderManager { ~VulkanRenderManager(); // Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again. - void BeginFrame(bool enableProfiling, bool enableLogProfiler); + // A false return value means that the device is lost, and we should just try to end the frame ASAP without doing anything. + bool BeginFrame(bool enableProfiling, bool enableLogProfiler); // Can run on a different thread! void Finish(); // Zaps queued up commands. Use if you know there's a risk you've queued up stuff that has already been deleted. Can happen during in-game shutdown. @@ -461,13 +462,16 @@ class VulkanRenderManager { void ResetStats(); void DrainCompileQueue(); + bool DeviceIsLost() const { return deviceLost_; } + private: void EndCurRenderStep(); void ThreadFunc(); void CompileThreadFunc(); - void Run(VKRRenderThreadTask &task); + // Fails if the device was lost. + bool Run(VKRRenderThreadTask &task); // Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot). void FlushSync(); @@ -481,6 +485,8 @@ class VulkanRenderManager { int outOfDateFrames_ = 0; + bool deviceLost_ = false; + // Submission time state // Note: These are raw backbuffer-sized. Rotated. diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index a5edda73ba94..42d0d4a24797 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -476,7 +476,7 @@ class VKContext : public DrawContext { void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override; - void BeginFrame() override; + bool BeginFrame() override; void EndFrame() override; void WipeQueue() override; @@ -1072,15 +1072,18 @@ VKContext::~VKContext() { vulkan_->Delete().QueueDeletePipelineCache(pipelineCache_); } -void VKContext::BeginFrame() { - // TODO: Bad dependency on g_Config here! - renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS, debugFlags_ & DebugFlags::PROFILE_SCOPES); +bool VKContext::BeginFrame() { + if (!renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS, debugFlags_ & DebugFlags::PROFILE_SCOPES)) { + // Something failed badly, let's bail. + return false; + } FrameData &frame = frame_[vulkan_->GetCurFrame()]; push_->BeginFrame(); frame.descriptorPool.Reset(); + return true; } void VKContext::EndFrame() { diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index c492d1c98b7a..29864010629c 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -805,7 +805,7 @@ class DrawContext { virtual void DrawUP(const void *vdata, int vertexCount) = 0; // Frame management (for the purposes of sync and resource management, necessary with modern APIs). Default implementations here. - virtual void BeginFrame() {} + virtual bool BeginFrame() { return true; } virtual void EndFrame() = 0; virtual void WipeQueue() {} diff --git a/Common/GraphicsContext.h b/Common/GraphicsContext.h index 873722764c24..945067b5f091 100644 --- a/Common/GraphicsContext.h +++ b/Common/GraphicsContext.h @@ -38,5 +38,9 @@ class GraphicsContext { // Should strive to get rid of these. virtual void Poll() {} + virtual bool DeviceIsLost() const { + return false; + } + virtual Draw::DrawContext *GetDrawContext() = 0; }; diff --git a/Common/UI/Screen.cpp b/Common/UI/Screen.cpp index 0eeaa66214e9..05602533f642 100644 --- a/Common/UI/Screen.cpp +++ b/Common/UI/Screen.cpp @@ -165,21 +165,23 @@ void ScreenManager::render() { // TODO: Make really sure that this "mismatched" pre/post only happens // when screens are "compatible" (both are UIScreens, for example). - backback.screen->preRender(); - backback.screen->render(); + if (backback.screen->preRender()) { + backback.screen->render(); + stack_.back().screen->render(); + if (postRenderCb_) + postRenderCb_(getUIContext(), postRenderUserdata_); + backback.screen->postRender(); + break; + } + } + default: + _assert_(stack_.back().screen); + if (stack_.back().screen->preRender()) { stack_.back().screen->render(); if (postRenderCb_) postRenderCb_(getUIContext(), postRenderUserdata_); - backback.screen->postRender(); - break; + stack_.back().screen->postRender(); } - default: - _assert_(stack_.back().screen); - stack_.back().screen->preRender(); - stack_.back().screen->render(); - if (postRenderCb_) - postRenderCb_(getUIContext(), postRenderUserdata_); - stack_.back().screen->postRender(); break; } } else { diff --git a/Common/UI/Screen.h b/Common/UI/Screen.h index fca213fdb814..9cd730f30041 100644 --- a/Common/UI/Screen.h +++ b/Common/UI/Screen.h @@ -50,7 +50,7 @@ class Screen { virtual void onFinish(DialogResult reason) {} virtual void update() {} - virtual void preRender() {} + virtual bool preRender() { return true; } // If this returns false, something is really bad and we should try to skip the rest of the frame, the error will be handled at the end. virtual void render() {} virtual void postRender() {} virtual void resized() {} diff --git a/Common/UI/UIScreen.cpp b/Common/UI/UIScreen.cpp index 3da8425421a9..719a9b593097 100644 --- a/Common/UI/UIScreen.cpp +++ b/Common/UI/UIScreen.cpp @@ -189,13 +189,15 @@ void UIScreen::deviceRestored() { root_->DeviceRestored(screenManager()->getDrawContext()); } -void UIScreen::preRender() { +bool UIScreen::preRender() { using namespace Draw; Draw::DrawContext *draw = screenManager()->getDrawContext(); if (!draw) { - return; + return true; + } + if (!draw->BeginFrame()) { + return false; } - draw->BeginFrame(); // Bind and clear the back buffer draw->BindFramebufferAsRenderTarget(nullptr, { RPAction::CLEAR, RPAction::CLEAR, RPAction::CLEAR, 0xFF000000 }, "UI"); screenManager()->getUIContext()->BeginFrame(); @@ -209,6 +211,7 @@ void UIScreen::preRender() { viewport.MinDepth = 0.0; draw->SetViewport(viewport); draw->SetTargetSize(g_display.pixel_xres, g_display.pixel_yres); + return true; } void UIScreen::postRender() { diff --git a/Common/UI/UIScreen.h b/Common/UI/UIScreen.h index 60eb1749b044..a1f1e86ae407 100644 --- a/Common/UI/UIScreen.h +++ b/Common/UI/UIScreen.h @@ -36,7 +36,7 @@ class UIScreen : public Screen { ~UIScreen(); void update() override; - void preRender() override; + bool preRender() override; void render() override; void postRender() override; void deviceLost() override; diff --git a/Core/Core.cpp b/Core/Core.cpp index daf858a30bfa..b77148441d2c 100644 --- a/Core/Core.cpp +++ b/Core/Core.cpp @@ -230,6 +230,10 @@ void Core_RunLoop(GraphicsContext *ctx) { Core_StateProcessed(); double startTime = time_now_d(); UpdateRunLoop(); + if (graphicsContext->DeviceIsLost()) { + // Let the outer loop take care of this. + return; + } // Simple throttling to not burn the GPU in the menu. double diffTime = time_now_d() - startTime; @@ -243,6 +247,10 @@ void Core_RunLoop(GraphicsContext *ctx) { while ((coreState == CORE_RUNNING || coreState == CORE_STEPPING) && GetUIState() == UISTATE_INGAME) { UpdateRunLoop(); + if (graphicsContext->DeviceIsLost()) { + // Let the outer loop take care of this. + break; + } if (!windowHidden && !Core_IsStepping()) { ctx->SwapBuffers(); @@ -337,6 +345,9 @@ bool Core_Run(GraphicsContext *ctx) { return false; } Core_RunLoop(ctx); + if (ctx->DeviceIsLost()) { + return true; + } continue; } @@ -345,6 +356,9 @@ bool Core_Run(GraphicsContext *ctx) { case CORE_STEPPING: // enter a fast runloop Core_RunLoop(ctx); + if (ctx->DeviceIsLost()) { + return true; + } if (coreState == CORE_POWERDOWN) { Core_StateProcessed(); return true; @@ -357,7 +371,6 @@ bool Core_Run(GraphicsContext *ctx) { case CORE_RUNTIME_ERROR: // Exit loop!! Core_StateProcessed(); - return true; case CORE_NEXTFRAME: diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 0d926f9531da..02f3facea09b 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -1414,10 +1414,12 @@ static void DrawFrameTimes(UIContext *ctx, const Bounds &bounds) { ctx->RebindTexture(); } -void EmuScreen::preRender() { +bool EmuScreen::preRender() { using namespace Draw; DrawContext *draw = screenManager()->getDrawContext(); - draw->BeginFrame(); + if (!draw->BeginFrame()) { + return false; + } // Here we do NOT bind the backbuffer or clear the screen, unless non-buffered. // The emuscreen is different than the others - we really want to allow the game to render to framebuffers // before we ever bind the backbuffer for rendering. On mobile GPUs, switching back and forth between render @@ -1443,6 +1445,7 @@ void EmuScreen::preRender() { draw->SetViewport(viewport); } draw->SetTargetSize(g_display.pixel_xres, g_display.pixel_yres); + return true; } void EmuScreen::postRender() { diff --git a/UI/EmuScreen.h b/UI/EmuScreen.h index 7eda3988d413..af0a4fb411b9 100644 --- a/UI/EmuScreen.h +++ b/UI/EmuScreen.h @@ -44,7 +44,7 @@ class EmuScreen : public UIScreen { void update() override; void render() override; - void preRender() override; + bool preRender() override; void postRender() override; void dialogFinished(const Screen *dialog, DialogResult result) override; void sendMessage(const char *msg, const char *value) override; diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index f5f6b0e508f0..c90c4c4eeffb 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -1045,6 +1045,7 @@ void RenderOverlays(UIContext *dc, void *userdata) { void NativeRender(GraphicsContext *graphicsContext) { _dbg_assert_(graphicsContext != nullptr); _dbg_assert_(g_screenManager != nullptr); + _dbg_assert_(!graphicsContext->DeviceIsLost()) g_GameManager.Update(); diff --git a/Windows/EmuThread.cpp b/Windows/EmuThread.cpp index 71af97e683f7..52f4c227f841 100644 --- a/Windows/EmuThread.cpp +++ b/Windows/EmuThread.cpp @@ -259,6 +259,7 @@ void MainThreadFunc() { // No safe way out without graphics. ExitProcess(1); + return; // This return never executes, but helps the compiler. } GraphicsContext *graphicsContext = g_graphicsContext; @@ -301,6 +302,28 @@ void MainThreadFunc() { if (!Core_IsActive()) UpdateUIState(UISTATE_MENU); Core_Run(g_graphicsContext); + if (g_graphicsContext->DeviceIsLost()) { + // Try to recreate the device here. + NativeShutdownGraphics(); + graphicsContext->StopThread(); + graphicsContext->ShutdownFromRenderThread(); + delete graphicsContext; + graphicsContext = nullptr; + + bool success = CreateGraphicsBackend(&error_string, &g_graphicsContext); + if (success) { + graphicsContext = g_graphicsContext; + // Main thread is the render thread. + success = g_graphicsContext->InitFromRenderThread(&error_string); + } + if (!success) { + ERROR_LOG(G3D, "Failed to recreate Vulkan device after device loss"); + coreState = CORE_POWERDOWN; + break; + } + NativeInitGraphics(graphicsContext); + } + if (coreState == CORE_BOOT_ERROR) { break; } @@ -329,11 +352,12 @@ void MainThreadFunc() { if (!useEmuThread) { NativeShutdownGraphics(); } + if (g_graphicsContext) { + g_graphicsContext->ThreadEnd(); + g_graphicsContext->ShutdownFromRenderThread(); - g_graphicsContext->ThreadEnd(); - g_graphicsContext->ShutdownFromRenderThread(); - - g_graphicsContext->Shutdown(); + g_graphicsContext->Shutdown(); + } UpdateConsolePosition(); NativeShutdown(); diff --git a/Windows/GPU/WindowsVulkanContext.cpp b/Windows/GPU/WindowsVulkanContext.cpp index 818f398420c8..d51762c49516 100644 --- a/Windows/GPU/WindowsVulkanContext.cpp +++ b/Windows/GPU/WindowsVulkanContext.cpp @@ -119,6 +119,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m vulkan_->ChooseDevice(deviceNum); if (vulkan_->CreateDevice() != VK_SUCCESS) { *error_message = vulkan_->InitError(); + vulkan_->DestroyInstance(); delete vulkan_; vulkan_ = nullptr; return false; @@ -184,3 +185,7 @@ void WindowsVulkanContext::Poll() { void *WindowsVulkanContext::GetAPIContext() { return vulkan_; } + +bool WindowsVulkanContext::DeviceIsLost() const { + return renderManager_->DeviceIsLost(); +} diff --git a/Windows/GPU/WindowsVulkanContext.h b/Windows/GPU/WindowsVulkanContext.h index 49e6613e3eb5..dfc4f5625970 100644 --- a/Windows/GPU/WindowsVulkanContext.h +++ b/Windows/GPU/WindowsVulkanContext.h @@ -35,6 +35,7 @@ class WindowsVulkanContext : public WindowsGraphicsContext { void Poll() override; void *GetAPIContext() override; + bool DeviceIsLost() const override; Draw::DrawContext *GetDrawContext() override { return draw_; } private: From 6df78568c854afdd557e777d4c9c0375c91d57de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 29 May 2023 18:32:37 +0200 Subject: [PATCH 4/4] Clear the shader cache --- GPU/Vulkan/ShaderManagerVulkan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index fcae89a8ea41..9a781c2217a4 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -237,6 +237,7 @@ ShaderManagerVulkan::~ShaderManagerVulkan() { } void ShaderManagerVulkan::DeviceLost() { + Clear(); // We only really need to do this if the device is actually lost, so DeviceLost might need an argument. draw_ = nullptr; }