Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment with handling VK_LOST_DEVICE #17532

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Common/GPU/D3D11/thin3d_d3d11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class D3D11DrawContext : public DrawContext {
void DrawUP(const void *vdata, int vertexCount) override;
void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override;

void BeginFrame() override;
bool BeginFrame() override;

std::string GetInfoString(InfoField info) const override {
switch (info) {
Expand Down Expand Up @@ -1448,7 +1448,7 @@ void D3D11DrawContext::Clear(int mask, uint32_t colorval, float depthVal, int st
}
}

void D3D11DrawContext::BeginFrame() {
bool D3D11DrawContext::BeginFrame() {
context_->OMSetRenderTargets(1, &curRenderTargetView_, curDepthStencilView_);

if (curBlend_ != nullptr) {
Expand All @@ -1475,6 +1475,7 @@ void D3D11DrawContext::BeginFrame() {
context_->PSSetConstantBuffers(0, 1, &curPipeline_->dynamicUniforms);
}
}
return true;
}

void D3D11DrawContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x, int y, int z, Framebuffer *dstfb, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBit, const char *tag) {
Expand Down
5 changes: 3 additions & 2 deletions Common/GPU/OpenGL/thin3d_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ class OpenGLContext : public DrawContext {
Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override;
Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;

void BeginFrame() override;
bool BeginFrame() override;
void EndFrame() override;

void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
Expand Down Expand Up @@ -782,10 +782,11 @@ OpenGLContext::~OpenGLContext() {
}
}

void OpenGLContext::BeginFrame() {
bool OpenGLContext::BeginFrame() {
renderManager_.BeginFrame(debugFlags_ & DebugFlags::PROFILE_TIMESTAMPS);
FrameData &frameData = frameData_[renderManager_.GetCurFrame()];
renderManager_.BeginPushBuffer(frameData.push);
return true;
}

void OpenGLContext::EndFrame() {
Expand Down
68 changes: 46 additions & 22 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,8 @@ VkResult VulkanContext::CreateDevice() {
VkResult res = vkCreateDevice(physical_devices_[physical_device_], &device_info, nullptr, &device_);
if (res != VK_SUCCESS) {
init_error_ = "Unable to create Vulkan device";
ERROR_LOG(G3D, "Unable to create Vulkan device");
ERROR_LOG(G3D, "Unable to create Vulkan device: '%s'", VulkanResultToString(res));
return res;
} else {
VulkanLoadDeviceFunctions(device_, extensionsLookup_);
}
Expand Down Expand Up @@ -1455,6 +1456,50 @@ bool VulkanContext::CreateShaderModule(const std::vector<uint32_t> &spirv, VkSha
}
}

// Only to be used for debugging lost device handling.
// This works on NVIDIA to cause a lost device, need to try others.
void VulkanContext::IntentionallyLoseDevice() {
_assert_(device_);
VkBufferCreateInfo b{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
b.size = 1024;
b.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaAllocationCreateInfo allocCreateInfo{};
allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
VmaAllocationInfo allocInfo{};

VkBuffer buffer;
VmaAllocation alloc;

VkResult result = vmaCreateBuffer(Allocator(), &b, &allocCreateInfo, &buffer, &alloc, &allocInfo);
_assert_(result == VK_SUCCESS);

VkCommandPoolCreateInfo ci{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
VkCommandPool cmdPool;
vkCreateCommandPool(device_, &ci, nullptr, &cmdPool);
VkCommandBufferAllocateInfo cmdAllocInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
cmdAllocInfo.commandPool = cmdPool;
cmdAllocInfo.commandBufferCount = 1;
cmdAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
VkCommandBuffer cmdBuf;
vkAllocateCommandBuffers(device_, &cmdAllocInfo, &cmdBuf);
VkCommandBufferBeginInfo beginInfo{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cmdBuf, &beginInfo);
// Nonsense!
VkBufferCopy info{ 0, 1000000000, 100000 };
vkCmdCopyBuffer(cmdBuf, buffer, buffer, 1, &info);
vkEndCommandBuffer(cmdBuf);
VkSubmitInfo submitInfo{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmdBuf;
// NOTE: Depending on which thread this is called from, this can itself be a violation (queueing stuff from a different thread
// on a queue used by another thread).
VkResult retval = vkQueueSubmit(gfx_queue_, 1, &submitInfo, VK_NULL_HANDLE);
// We might not actually lose the device immediately, but good to confirm.
NOTICE_LOG(G3D, "Tried to lose the device, vkQueueSubmit retval = %s", VulkanResultToString(retval));
// At this point, the device should be lost.
}

void TransitionImageLayout2(VkCommandBuffer cmd, VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
Expand Down Expand Up @@ -1712,27 +1757,6 @@ void VulkanDeleteList::PerformDeletes(VulkanContext *vulkan, VmaAllocator alloca
queryPools_.clear();
}

void VulkanContext::GetImageMemoryRequirements(VkImage image, VkMemoryRequirements *mem_reqs, bool *dedicatedAllocation) {
if (Extensions().KHR_dedicated_allocation) {
VkImageMemoryRequirementsInfo2KHR memReqInfo2{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR};
memReqInfo2.image = image;

VkMemoryRequirements2KHR memReq2 = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR};
VkMemoryDedicatedRequirementsKHR memDedicatedReq{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR};
memReq2.pNext = &memDedicatedReq;

vkGetImageMemoryRequirements2KHR(GetDevice(), &memReqInfo2, &memReq2);

*mem_reqs = memReq2.memoryRequirements;
*dedicatedAllocation =
(memDedicatedReq.requiresDedicatedAllocation != VK_FALSE) ||
(memDedicatedReq.prefersDedicatedAllocation != VK_FALSE);
} else {
vkGetImageMemoryRequirements(GetDevice(), image, mem_reqs);
*dedicatedAllocation = false;
}
}

bool IsHashMaliDriverVersion(const VkPhysicalDeviceProperties &props) {
// ARM used to put a hash in place of the driver version.
// Now they only use major versions. We'll just make a bad heuristic.
Expand Down
8 changes: 6 additions & 2 deletions Common/GPU/Vulkan/VulkanContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,6 @@ class VulkanContext {
return devicePerfClass_;
}

void GetImageMemoryRequirements(VkImage image, VkMemoryRequirements *mem_reqs, bool *dedicatedAllocation);

VmaAllocator Allocator() const {
return allocator_;
}
Expand All @@ -383,6 +381,10 @@ class VulkanContext {
return availablePresentModes_;
}

// Forces a device loss, to help debug device recovery.
// It'll create its own command buffer for this.
void IntentionallyLoseDevice();

private:
bool ChooseQueue();

Expand Down Expand Up @@ -476,6 +478,8 @@ class VulkanContext {
std::vector<VkCommandBuffer> cmdQueue_;

VmaAllocator allocator_ = VK_NULL_HANDLE;

bool deviceLost_ = false;
};

// Detailed control.
Expand Down
8 changes: 8 additions & 0 deletions Common/GPU/Vulkan/VulkanDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// Extended validation (ARM best practices)
// Non-fifo validation not recommended
return false;

// These get triggered during the device lost simulation. Ignore.
case -556648736:
case 1812873262:
case 337425955:
WARN_LOG(G3D, "Validation message %d typical of device lost simulation, ignoring.", messageCode);
return false;

default:
break;
}
Expand Down
7 changes: 6 additions & 1 deletion Common/GPU/Vulkan/VulkanFrameData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared)

VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) {
_dbg_assert_(hasAcquired);
_dbg_assert_(!deviceLost);
hasAcquired = false;
_dbg_assert_(!skipSwap);

Expand Down Expand Up @@ -132,6 +133,8 @@ VkCommandBuffer FrameData::GetInitCmd(VulkanContext *vulkan) {
}

void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &sharedData) {
_dbg_assert_(!deviceLost);

VkCommandBuffer cmdBufs[3];
int numCmdBufs = 0;

Expand Down Expand Up @@ -206,7 +209,9 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
}

if (res == VK_ERROR_DEVICE_LOST) {
_assert_msg_(false, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
ERROR_LOG(G3D, "Lost the Vulkan device in vkQueueSubmit! If this happens again, switch Graphics Backend away from Vulkan");
deviceLost = true;
return;
} else {
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res));
}
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/VulkanFrameData.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ struct FrameData {

bool syncDone = false;

// Set if the device was just lost.
bool deviceLost = false;

// Swapchain.
uint32_t curSwapchainImage = -1;

Expand Down
1 change: 1 addition & 0 deletions Common/GPU/Vulkan/VulkanLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ void VulkanFree() {
const char *VulkanResultToString(VkResult res) {
static char temp[128]{};
switch (res) {
case VK_SUCCESS: return "VK_SUCCESS";
case VK_NOT_READY: return "VK_NOT_READY";
case VK_TIMEOUT: return "VK_TIMEOUT";
case VK_EVENT_SET: return "VK_EVENT_SET";
Expand Down
4 changes: 4 additions & 0 deletions Common/GPU/Vulkan/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, FrameData &frame
vkCmdEndDebugUtilsLabelEXT(cmd);
}
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared);
if (frameData.deviceLost) {
goto bail;
}

// When stepping in the GE debugger, we can end up here multiple times in a "frame".
// So only acquire once.
Expand Down Expand Up @@ -447,6 +450,7 @@ void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, FrameData &frame
}
}

bail:
// Deleting all in one go should be easier on the instruction cache than deleting
// them as we go - and easier to debug because we can look backwards in the frame.
if (!keepSteps) {
Expand Down
Loading