Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bound-check dynamically indexed shader input registers #4603

Merged
merged 2 commits into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 49 additions & 14 deletions src/dxbc/dxbc_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ namespace dxvk {
return this->emitDclGlobalFlags(ins);

case DxbcOpcode::DclIndexRange:
return; // not needed for anything
return this->emitDclIndexRange(ins);

case DxbcOpcode::DclTemps:
return this->emitDclTemps(ins);
Expand Down Expand Up @@ -375,6 +375,21 @@ namespace dxvk {
}


void DxbcCompiler::emitDclIndexRange(const DxbcShaderInstruction& ins) {
// dcl_index_range has one operand:
// (0) Range start, either an input or output register
// (1) Range end
uint32_t index = ins.dst[0].idxDim - 1u;

DxbcIndexRange range = { };
range.type = ins.dst[0].type;
range.start = ins.dst[0].idx[index].offset;
range.length = ins.imm[0].u32;

m_indexRanges.push_back(range);
}


void DxbcCompiler::emitDclTemps(const DxbcShaderInstruction& ins) {
// dcl_temps has one operand:
// (imm0) Number of temp registers
Expand Down Expand Up @@ -5737,14 +5752,37 @@ namespace dxvk {

DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw(
const DxbcRegister& reg) {
if (reg.type == DxbcOperandType::IndexableTemp) {
bool doBoundsCheck = reg.idx[1].relReg != nullptr;
DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]);
// Try to find index range for the given register
const DxbcIndexRange* indexRange = nullptr;

if (reg.idxDim && reg.idx[reg.idxDim - 1u].relReg) {
uint32_t offset = reg.idx[reg.idxDim - 1u].offset;

for (const auto& range : m_indexRanges) {
if (reg.type == range.type && offset >= range.start && offset < range.start + range.length)
indexRange = &range;
}
}

if (reg.type == DxbcOperandType::IndexableTemp || indexRange) {
bool doBoundsCheck = reg.idx[reg.idxDim - 1u].relReg != nullptr;

if (doBoundsCheck) {
uint32_t boundsCheck = m_module.opULessThan(
m_module.defBoolType(), vectorId.id,
m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength));
DxbcRegisterValue indexId = emitIndexLoad(reg.idx[reg.idxDim - 1u]);
uint32_t boundsCheck = 0u;

if (reg.type == DxbcOperandType::IndexableTemp) {
boundsCheck = m_module.opULessThan(
m_module.defBoolType(), indexId.id,
m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength));
} else {
uint32_t adjustedId = m_module.opISub(getVectorTypeId(indexId.type),
indexId.id, m_module.consti32(indexRange->start));

boundsCheck = m_module.opULessThan(
m_module.defBoolType(), adjustedId,
m_module.constu32(indexRange->length));
}

// Kind of ugly to have an empty else block here but there's no
// way for us to know the current block ID for the phi below
Expand Down Expand Up @@ -6155,13 +6193,11 @@ namespace dxvk {

uint32_t threadId = m_module.opLoad(
intTypeId, m_cs.builtinLocalInvocationIndex);

uint32_t strideId = m_module.constu32(numThreads);
uint32_t zeroId = m_module.constu32(0);
uint32_t zeroId = m_module.constu32(0);

for (uint32_t e = 0; e < numElementsPerThread; e++) {
uint32_t ofsId = m_module.opIAdd(intTypeId, threadId,
m_module.opIMul(intTypeId, strideId, m_module.constu32(e)));
m_module.constu32(numThreads * e));

uint32_t ptrId = m_module.opAccessChain(
ptrTypeId, m_gRegs[i].varId, 1, &ofsId);
Expand All @@ -6183,9 +6219,8 @@ namespace dxvk {

m_module.opLabel(cond.labelIf);

uint32_t ofsId = m_module.opIAdd(intTypeId,
m_module.constu32(numThreads * numElementsPerThread),
threadId);
uint32_t ofsId = m_module.opIAdd(intTypeId, threadId,
m_module.constu32(numThreads * numElementsPerThread));

uint32_t ptrId = m_module.opAccessChain(
ptrTypeId, m_gRegs[i].varId, 1, &ofsId);
Expand Down
17 changes: 16 additions & 1 deletion src/dxbc/dxbc_compiler.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <array>
#include <utility>
#include <vector>

#include "../spirv/spirv_module.h"
Expand Down Expand Up @@ -135,6 +136,13 @@ namespace dxvk {
uint32_t component = 0;
};


struct DxbcIndexRange {
DxbcOperandType type;
uint32_t start;
uint32_t length;
};


/**
* \brief Vertex shader-specific structure
Expand Down Expand Up @@ -445,6 +453,10 @@ namespace dxvk {
// xfb output registers for geometry shaders
std::vector<DxbcXfbVar> m_xfbVars;

/////////////////////////////////////////////
// Dynamically indexed input and output regs
std::vector<DxbcIndexRange> m_indexRanges = { };

//////////////////////////////////////////////////////
// Shader resource variables. These provide access to
// constant buffers, samplers, textures, and UAVs.
Expand Down Expand Up @@ -473,7 +485,7 @@ namespace dxvk {
uint32_t m_vArrayLengthId = 0;

uint32_t m_vArray = 0;

////////////////////////////////////////////////////
// Per-vertex input and output blocks. Depending on
// the shader stage, these may be declared as arrays.
Expand Down Expand Up @@ -546,6 +558,9 @@ namespace dxvk {
void emitDclGlobalFlags(
const DxbcShaderInstruction& ins);

void emitDclIndexRange(
const DxbcShaderInstruction& ins);

void emitDclTemps(
const DxbcShaderInstruction& ins);

Expand Down
Loading