diff --git a/src.cmake b/src.cmake index 9bac6a5b15..0f1f993ff0 100644 --- a/src.cmake +++ b/src.cmake @@ -147,6 +147,9 @@ set(RENDERERLIST set(GLSLSOURCELIST ${ENGINE_DIR}/renderer/glsl_source/material_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/material_fp.glsl + ${ENGINE_DIR}/renderer/glsl_source/cull_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/clearSurfaces_cp.glsl + ${ENGINE_DIR}/renderer/glsl_source/processSurfaces_cp.glsl ${ENGINE_DIR}/renderer/glsl_source/skybox_vp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_fp.glsl ${ENGINE_DIR}/renderer/glsl_source/ssao_vp.glsl diff --git a/src/engine/renderer/Material.cpp b/src/engine/renderer/Material.cpp index 39efcbe8c8..c2b6edba75 100644 --- a/src/engine/renderer/Material.cpp +++ b/src/engine/renderer/Material.cpp @@ -34,11 +34,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Material.cpp #include "Material.h" - #include "tr_local.h" -GLSSBO materialsSSBO( "materials", 0 ); -GLIndirectBuffer commandBuffer( "drawCommands" ); +GLSSBO materialsSSBO( "materials", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceDescriptorsSSBO( "surfaceDescriptors", 1, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLSSBO surfaceCommandsSSBO( "surfaceCommands", 2, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLBuffer culledCommandsBuffer( "culledCommands", 3, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); +GLUBO surfaceBatchesUBO( "surfaceBatches", 0, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_RANGE_BIT ); +GLBuffer atomicCommandCountersBuffer( "atomicCommandCounters", 4, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT, GL_MAP_FLUSH_EXPLICIT_BIT ); MaterialSystem materialSystem; static void ComputeDynamics( shaderStage_t* pStage ) { @@ -956,6 +959,8 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { pStage->colorRenderer( pStage ); + drawSurf->drawCommandIDs[stage] = lastCommandID; + if ( pStage->dynamic ) { drawSurf->materialsSSBOOffset[stage] = ( SSBOOffset - dynamicDrawSurfsOffset + drawSurfCount * material.shader->GetPaddedSize() ) / material.shader->GetPaddedSize(); @@ -978,37 +983,153 @@ void MaterialSystem::GenerateWorldMaterialsBuffer() { void MaterialSystem::GenerateWorldCommandBuffer() { Log::Debug( "Generating world command buffer" ); - uint count = 0; - for ( const MaterialPack& pack : materialPacks ) { - for ( const Material& material : pack.materials ) { - count += material.drawCommands.size(); + totalBatchCount = 0; + + uint batchOffset = 0; + uint globalID = 0; + for ( MaterialPack& pack : materialPacks ) { + for ( Material& material : pack.materials ) { + material.surfaceCommandBatchOffset = batchOffset; + + const uint cmdCount = material.drawCommands.size(); + const uint batchCount = cmdCount % SURFACE_COMMANDS_PER_BATCH == 0 ? cmdCount / SURFACE_COMMANDS_PER_BATCH + : cmdCount / SURFACE_COMMANDS_PER_BATCH + 1; + + material.surfaceCommandBatchOffset = batchOffset; + material.surfaceCommandBatchCount = batchCount; + + batchOffset += batchCount; + material.globalID = globalID; + + totalBatchCount += batchCount; + globalID++; } } - if ( count == 0 ) { - return; - } + Log::Debug( "Total batch count: %u", totalBatchCount ); - Log::Debug( "CmdBuffer size: %u", count ); + skipDrawCommands = true; + drawSurf_t* drawSurf; - commandBuffer.BindBuffer(); - glBufferData( GL_DRAW_INDIRECT_BUFFER, count * sizeof( GLIndirectBuffer::GLIndirectCommand ), nullptr, GL_STATIC_DRAW ); + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsCount = totalDrawSurfs; + descriptorSize = BOUNDING_SPHERE_SIZE + maxStages; + glBufferData( GL_SHADER_STORAGE_BUFFER, surfaceDescriptorsCount * descriptorSize * sizeof( uint32_t ), + nullptr, GL_STATIC_DRAW ); + uint32_t* surfaceDescriptors = surfaceDescriptorsSSBO.MapBufferRange( surfaceDescriptorsCount * descriptorSize ); + + culledCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH; + surfaceCommandsCount = totalBatchCount * SURFACE_COMMANDS_PER_BATCH + 1; + + surfaceCommandsSSBO.BindBuffer(); + surfaceCommandsSSBO.BufferStorage( surfaceCommandsCount * SURFACE_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + surfaceCommandsSSBO.MapAll(); + SurfaceCommand* surfaceCommands = ( SurfaceCommand* ) surfaceCommandsSSBO.GetData(); + memset( surfaceCommands, 0, surfaceCommandsCount * sizeof( SurfaceCommand ) * MAX_VIEWFRAMES ); + + culledCommandsBuffer.BindBuffer( GL_SHADER_STORAGE_BUFFER ); + culledCommandsBuffer.BufferStorage( GL_SHADER_STORAGE_BUFFER, + culledCommandsCount * INDIRECT_COMMAND_SIZE * MAX_VIEWFRAMES, 1, nullptr ); + culledCommandsBuffer.MapAll( GL_SHADER_STORAGE_BUFFER ); + GLIndirectBuffer::GLIndirectCommand* culledCommands = ( GLIndirectBuffer::GLIndirectCommand* ) culledCommandsBuffer.GetData(); + memset( culledCommands, 0, culledCommandsCount * sizeof( GLIndirectBuffer::GLIndirectCommand ) * MAX_VIEWFRAMES ); + culledCommandsBuffer.FlushAll( GL_SHADER_STORAGE_BUFFER ); + + surfaceBatchesUBO.BindBuffer(); + // Multiply by 2 because we write a uvec2, which is aligned as vec4 + glBufferData( GL_UNIFORM_BUFFER, MAX_SURFACE_COMMAND_BATCHES * 2 * sizeof( SurfaceCommandBatch ), nullptr, GL_STATIC_DRAW ); + SurfaceCommandBatch* surfaceCommandBatches = + ( SurfaceCommandBatch* ) surfaceBatchesUBO.MapBufferRange( MAX_SURFACE_COMMAND_BATCHES * 2 * SURFACE_COMMAND_BATCH_SIZE ); + + // memset( (void*) surfaceCommandBatches, 0, MAX_SURFACE_COMMAND_BATCHES * 2 * sizeof( SurfaceCommandBatch ) ); + // Fuck off gcc + for ( int i = 0; i < MAX_SURFACE_COMMAND_BATCHES * 2; i++ ) { + surfaceCommandBatches[i] = {}; + } - GLIndirectBuffer::GLIndirectCommand* commands = commandBuffer.MapBufferRange( count ); - uint offset = 0; + uint id = 0; + uint matID = 0; + uint subID = 0; for ( MaterialPack& pack : materialPacks ) { - for ( Material& material : pack.materials ) { - material.staticCommandOffset = offset; - - for ( const DrawCommand& drawCmd : material.drawCommands ) { - memcpy( commands, &drawCmd.cmd, sizeof( GLIndirectBuffer::GLIndirectCommand ) ); - commands++; - offset++; + for ( Material& mat : pack.materials ) { + for ( uint i = 0; i < mat.surfaceCommandBatchCount; i++ ) { + surfaceCommandBatches[id * 4 + subID].materialIDs[0] = matID; + surfaceCommandBatches[id * 4 + subID].materialIDs[1] = mat.surfaceCommandBatchOffset; + subID++; + if ( subID == 4 ) { + id++; + subID = 0; + } } + matID++; } } - commandBuffer.UnmapBuffer(); + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.BufferStorage( GL_ATOMIC_COUNTER_BUFFER, + MAX_COMMAND_COUNTERS * MAX_VIEWS, MAX_FRAMES, nullptr ); + atomicCommandCountersBuffer.MapAll( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = (uint32_t*) atomicCommandCountersBuffer.GetData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * MAX_VIEWFRAMES * sizeof(uint32_t) ); + + for ( int i = 0; i < tr.refdef.numDrawSurfs; i++ ) { + drawSurf = &tr.refdef.drawSurfs[i]; + if ( drawSurf->entity != &tr.worldEntity ) { + continue; + } + + shader_t* shader = drawSurf->shader; + if ( !shader ) { + continue; + } + + shader = shader->remappedShader ? shader->remappedShader : shader; + if ( shader->isSky || shader->isPortal ) { + continue; + } + + tess.multiDrawPrimitives = 0; + tess.numIndexes = 0; + tess.numVertexes = 0; + tess.attribsSet = 0; + + skipSurface = false; + rb_surfaceTable[Util::ordinal( *( drawSurf->surface ) )]( drawSurf->surface ); + + // Don't add SF_SKIP surfaces + if ( skipSurface ) { + continue; + } + + SurfaceDescriptor surface; + VectorCopy( ( ( srfGeneric_t* ) drawSurf->surface )->origin, surface.boundingSphere.origin ); + surface.boundingSphere.radius = ( ( srfGeneric_t* ) drawSurf->surface )->radius; + + for ( int stage = 0; stage < drawSurf->shader->numStages; stage++ ) { + const Material* material = &materialPacks[drawSurf->materialPackIDs[stage]].materials[drawSurf->materialIDs[stage]]; + uint cmdID = material->surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH + drawSurf->drawCommandIDs[stage]; + cmdID++; // Add 1 because the first surface command is always reserved as a fake command + surface.surfaceCommandIDs[stage] = cmdID; + + SurfaceCommand surfaceCommand; + surfaceCommand.enabled = 0; + surfaceCommand.drawCommand = material->drawCommands[drawSurf->drawCommandIDs[stage]].cmd; + surfaceCommands[cmdID] = surfaceCommand; + } + memcpy( surfaceDescriptors, &surface, descriptorSize * sizeof( uint32_t ) ); + surfaceDescriptors += descriptorSize; + } + + for ( int i = 0; i < MAX_VIEWFRAMES; i++ ) { + memcpy( surfaceCommands + surfaceCommandsCount * i, surfaceCommands, surfaceCommandsCount * sizeof( SurfaceCommand ) ); + } + + surfaceDescriptorsSSBO.BindBuffer(); + surfaceDescriptorsSSBO.UnmapBuffer(); + + surfaceBatchesUBO.BindBuffer(); + surfaceBatchesUBO.UnmapBuffer(); + GL_CheckErrors(); } @@ -1268,6 +1389,7 @@ void MaterialSystem::GenerateWorldMaterials() { backEnd.currentEntity = &tr.worldEntity; drawSurf_t* drawSurf; + totalDrawSurfs = 0; uint id = 0; uint previousMaterialID = 0; @@ -1298,6 +1420,8 @@ void MaterialSystem::GenerateWorldMaterials() { continue; } + totalDrawSurfs++; + for ( int stage = 0; stage < shader->numStages; stage++ ) { shaderStage_t* pStage = shader->stages[stage]; @@ -1662,6 +1786,114 @@ void MaterialSystem::UpdateDynamicSurfaces() { materialsSSBO.UnmapBuffer(); } +void MaterialSystem::UpdateFrameData() { + /* atomicCommandCountersBuffer.AreaIncr(); + + atomicCommandCountersBuffer.BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + uint32_t* atomicCommandCounters = atomicCommandCountersBuffer.GetCurrentAreaData(); + memset( atomicCommandCounters, 0, MAX_COMMAND_COUNTERS * sizeof(uint32_t)); + atomicCommandCountersBuffer.FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + atomicCommandCountersBuffer.UnBindBuffer( GL_ATOMIC_COUNTER_BUFFER ); */ + + atomicCommandCountersBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + gl_clearSurfacesShader->BindProgram( 0 ); + gl_clearSurfacesShader->SetUniform_Frame( nextFrame ); + gl_clearSurfacesShader->DispatchCompute( MAX_VIEWS, 1, 1 ); + atomicCommandCountersBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::QueueSurfaceCull( const uint viewID, const frustum_t* frustum ) { + memcpy( frames[nextFrame].viewFrames[viewID].frustum, frustum, sizeof( frustum_t ) ); + frames[nextFrame].viewCount++; +} + +void MaterialSystem::CullSurfaces() { + surfaceDescriptorsSSBO.BindBufferBase(); + surfaceCommandsSSBO.BindBufferBase(); + culledCommandsBuffer.BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.BindBufferBase(); + atomicCommandCountersBuffer.BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + for ( uint view = 0; view < frames[nextFrame].viewCount; view++ ) { + frustum_t* frustum = &frames[nextFrame].viewFrames[view].frustum; + + vec4_t frustumPlanes[6]; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( PVSLocked ? lockedFrustum[j].normal : frustum[0][j].normal, frustumPlanes[j] ); + frustumPlanes[j][3] = PVSLocked ? lockedFrustum[j].dist : frustum[0][j].dist; + } + + gl_cullShader->BindProgram( 0 ); + uint globalWorkGroupX = totalDrawSurfs % MAX_COMMAND_COUNTERS == 0 ? + totalDrawSurfs / MAX_COMMAND_COUNTERS : totalDrawSurfs / MAX_COMMAND_COUNTERS + 1; + gl_cullShader->SetUniform_TotalDrawSurfs( totalDrawSurfs ); + gl_cullShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + + if ( PVSLocked ) { + if ( r_lockpvs->integer == 0 ) { + PVSLocked = false; + } + } + if ( r_lockpvs->integer == 1 && !PVSLocked ) { + PVSLocked = true; + for ( int j = 0; j < 6; j++ ) { + VectorCopy( frustum[0][j].normal, lockedFrustum[j].normal ); + lockedFrustum[j].dist = frustum[0][j].dist; + } + } + + // FIXME: Make far plane work properly + gl_cullShader->SetUniform_Frustum( frustumPlanes ); + + gl_cullShader->DispatchCompute( globalWorkGroupX, 1, 1 ); + + gl_processSurfacesShader->BindProgram( 0 ); + gl_processSurfacesShader->SetUniform_Frame( nextFrame ); + gl_processSurfacesShader->SetUniform_ViewID( view ); + gl_processSurfacesShader->SetUniform_SurfaceCommandsOffset( surfaceCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + gl_processSurfacesShader->SetUniform_CulledCommandsOffset( culledCommandsCount * ( MAX_VIEWS * nextFrame + view ) ); + + glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT ); + gl_processSurfacesShader->DispatchCompute( totalBatchCount, 1, 1 ); + } + + surfaceDescriptorsSSBO.UnBindBufferBase(); + surfaceCommandsSSBO.UnBindBufferBase(); + culledCommandsBuffer.UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + surfaceBatchesUBO.UnBindBufferBase(); + atomicCommandCountersBuffer.UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + + GL_CheckErrors(); +} + +void MaterialSystem::StartFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + frames[nextFrame].viewCount = 0; + + // renderedMaterials.clear(); + // UpdateDynamicSurfaces(); + // UpdateFrameData(); +} + +void MaterialSystem::EndFrame() { + if ( !generatedWorldCommandBuffer ) { + return; + } + + currentFrame = nextFrame; + nextFrame++; + if ( nextFrame >= MAX_FRAMES ) { + nextFrame = 0; + } + + currentView = 0; + return; +} + void MaterialSystem::GeneratePortalBoundingSpheres() { Log::Debug( "Generating portal bounding spheres" ); @@ -1703,6 +1935,14 @@ void MaterialSystem::Free() { skyShaders.clear(); renderedMaterials.clear(); + surfaceCommandsSSBO.UnmapBuffer(); + culledCommandsBuffer.UnmapBuffer(); + atomicCommandCountersBuffer.UnmapBuffer(); + + currentFrame = 0; + nextFrame = 1; + maxStages = 0; + for ( MaterialPack& pack : materialPacks ) { for ( Material& material : pack.materials ) { material.drawCommands.clear(); @@ -1727,6 +1967,7 @@ void MaterialSystem::AddDrawCommand( const uint materialID, const uint materialP cmd.materialsSSBOOffset = materialsSSBOOffset; materialPacks[materialPackID].materials[materialID].drawCommands.emplace_back(cmd); + lastCommandID = materialPacks[materialPackID].materials[materialID].drawCommands.size() - 1; cmd.textureCount = 0; } @@ -1746,12 +1987,19 @@ void MaterialSystem::AddPortalSurfaces() { return Distance( backEnd.viewParms.orientation.origin, lhs.origin ) - lhs.radius < Distance( backEnd.viewParms.orientation.origin, rhs.origin ) - rhs.radius; } ); + + uint count = 0; for ( const drawSurfBoundingSphere& sphere : portalBounds ) { R_MirrorViewBySurface( &portalSurfaces[sphere.drawSurfID] ); + count++; + // Limit this a bit until portal visibility readback is done + if ( count > 2 ) { + return; + } } } -void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort ) { +void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort, const uint viewID ) { if ( !r_drawworld->integer ) { return; } @@ -1759,6 +2007,11 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS if ( frameStart ) { renderedMaterials.clear(); UpdateDynamicSurfaces(); + UpdateFrameData(); + // StartFrame(); + + // Make sure compute dispatches from the last frame finished writing to memory + glMemoryBarrier( GL_COMMAND_BARRIER_BIT ); frameStart = false; } @@ -1767,7 +2020,7 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS for ( MaterialPack& materialPack : materialPacks ) { if ( materialPack.fromSort >= fromSort && materialPack.toSort <= toSort ) { for ( Material& material : materialPack.materials ) { - RenderMaterial( material ); + RenderMaterial( material, viewID ); renderedMaterials.emplace_back( &material ); } } @@ -1790,7 +2043,7 @@ void MaterialSystem::RenderMaterials( const shaderSort_t fromSort, const shaderS } } -void MaterialSystem::RenderMaterial( Material& material ) { +void MaterialSystem::RenderMaterial( Material& material, const uint viewID ) { backEnd.currentEntity = &tr.worldEntity; GL_State( material.stateBits ); @@ -1920,10 +2173,24 @@ void MaterialSystem::RenderMaterial( Material& material ) { } material.texturesResident = true; - glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, - BUFFER_OFFSET( material.staticCommandOffset * sizeof( GLIndirectBuffer::GLIndirectCommand ) ), + culledCommandsBuffer.BindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.BindBuffer( GL_PARAMETER_BUFFER_ARB ); + + glMultiDrawElementsIndirectCountARB( GL_TRIANGLES, GL_UNSIGNED_INT, + BUFFER_OFFSET( material.surfaceCommandBatchOffset * SURFACE_COMMANDS_PER_BATCH * sizeof( GLIndirectBuffer::GLIndirectCommand ) + + ( culledCommandsCount * ( MAX_VIEWS * currentFrame + viewID ) + * sizeof( GLIndirectBuffer::GLIndirectCommand ) ) ), + //+ ( culledCommandsCount * ( MAX_VIEWS * currentFrame + currentView ) + //* sizeof( GLIndirectBuffer::GLIndirectCommand ) ), + material.globalID * sizeof( uint32_t ) + + ( MAX_COMMAND_COUNTERS * ( MAX_VIEWS * currentFrame + viewID ) ) * sizeof( uint32_t ), material.drawCommands.size(), 0 ); + culledCommandsBuffer.UnBindBuffer( GL_DRAW_INDIRECT_BUFFER ); + + atomicCommandCountersBuffer.UnBindBuffer( GL_PARAMETER_BUFFER_ARB ); + if ( material.usePolygonOffset ) { glDisable( GL_POLYGON_OFFSET_FILL ); } diff --git a/src/engine/renderer/Material.h b/src/engine/renderer/Material.h index cdade452ee..03a4002306 100644 --- a/src/engine/renderer/Material.h +++ b/src/engine/renderer/Material.h @@ -71,7 +71,10 @@ struct Material { uint currentStaticDrawSurfCount = 0; uint currentDynamicDrawSurfCount = 0; - uint staticCommandOffset = 0; + uint globalID = 0; + uint surfaceCommandBatchOffset = 0; + uint surfaceCommandBatchCount = 0; + uint surfaceCommandBatchPadding = 0; uint id = 0; bool useSync = false; @@ -134,6 +137,51 @@ struct drawSurfBoundingSphere { uint drawSurfID; }; +#define MAX_SURFACE_COMMANDS 16 +#define MAX_COMMAND_COUNTERS 64 +#define SURFACE_COMMANDS_PER_BATCH 64 + +#define MAX_SURFACE_COMMAND_BATCHES 2048 + +#define BOUNDING_SPHERE_SIZE 4 + +#define INDIRECT_COMMAND_SIZE 5 +#define SURFACE_COMMAND_SIZE 6 +#define SURFACE_COMMAND_BATCH_SIZE 4 // Aligned to 4 components + +#define MAX_FRAMES 2 +#define MAX_VIEWFRAMES MAX_VIEWS * MAX_FRAMES // Buffer 2 frames for each view + +struct ViewFrame { + uint viewID = 0; + uint portalViews[MAX_VIEWS]; + frustum_t frustum; +}; + +struct Frame { + uint viewCount = 0; + ViewFrame viewFrames[MAX_VIEWS]; +}; + +struct BoundingSphere { + vec3_t origin; + float radius; +}; + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint surfaceCommandIDs[MAX_SURFACE_COMMANDS] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +}; + +struct SurfaceCommand { + uint enabled; // uint because bool in GLSL is always 4 bytes + GLIndirectBuffer::GLIndirectCommand drawCommand; +}; + +struct SurfaceCommandBatch { + uint materialIDs[4] { 0, 0, 0, 0 }; +}; + class MaterialSystem { public: bool generatedWorldCommandBuffer = false; @@ -142,6 +190,13 @@ class MaterialSystem { bool generatingWorldCommandBuffer = false; vec3_t worldViewBounds[2] = {}; + uint currentView = 0; + + uint8_t maxStages = 0; + uint descriptorSize; + + std::vector drawCommands; + std::vector portalSurfacesTmp; std::vector portalSurfaces; std::vector portalBounds; @@ -166,16 +221,22 @@ class MaterialSystem { { shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS } }; - bool frameStart = true; + bool frameStart = false; void AddTexture( Texture* texture ); void AddDrawCommand( const uint materialID, const uint materialPackID, const uint materialsSSBOOffset, const GLuint count, const GLuint firstIndex ); void AddPortalSurfaces(); - void RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort ); + void RenderMaterials( const shaderSort_t fromSort, const shaderSort_t toSort, const uint viewID ); void UpdateDynamicSurfaces(); + void QueueSurfaceCull( const uint viewID, const frustum_t* frustum ); + void CullSurfaces(); + + void StartFrame(); + void EndFrame(); + void AddStageTextures( drawSurf_t* drawSurf, shaderStage_t* pStage, Material* material ); void GenerateWorldMaterials(); void GenerateWorldMaterialsBuffer(); @@ -187,16 +248,36 @@ class MaterialSystem { void Free(); private: + bool PVSLocked = false; + frustum_t lockedFrustum; + DrawCommand cmd; + uint lastCommandID; + uint totalDrawSurfs; + uint totalBatchCount = 0; + + uint surfaceCommandsCount = 0; + uint culledCommandsCount = 0; + uint surfaceDescriptorsCount = 0; + std::vector dynamicDrawSurfs; uint dynamicDrawSurfsOffset = 0; uint dynamicDrawSurfsSize = 0; - void RenderMaterial( Material& material ); + Frame frames[MAX_FRAMES]; + uint currentFrame = 0; + uint nextFrame = 1; + + void RenderMaterial( Material& material, const uint viewID ); + void UpdateFrameData(); }; extern GLSSBO materialsSSBO; -extern GLIndirectBuffer commandBuffer; +extern GLSSBO surfaceDescriptorsSSBO; // Global +extern GLSSBO surfaceCommandsSSBO; // Per viewframe, GPU updated +extern GLBuffer culledCommandsBuffer; // Per viewframe +extern GLUBO surfaceBatchesUBO; // Global +extern GLBuffer atomicCommandCountersBuffer; // Per viewframe extern MaterialSystem materialSystem; #endif // MATERIAL_H diff --git a/src/engine/renderer/gl_shader.cpp b/src/engine/renderer/gl_shader.cpp index a3fedc0cb4..87add5dea5 100644 --- a/src/engine/renderer/gl_shader.cpp +++ b/src/engine/renderer/gl_shader.cpp @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include "gl_shader.h" +#include "Material.h" // We currently write GLBinaryHeader to a file and memcpy all over it. // Make sure it's a pod, so we don't put a std::string in it or something @@ -42,6 +43,9 @@ ShaderKind shaderKind = ShaderKind::Unknown; GLShader_generic2D *gl_generic2DShader = nullptr; GLShader_generic *gl_genericShader = nullptr; GLShader_genericMaterial *gl_genericShaderMaterial = nullptr; +GLShader_cull *gl_cullShader = nullptr; +GLShader_clearSurfaces *gl_clearSurfacesShader = nullptr; +GLShader_processSurfaces *gl_processSurfacesShader = nullptr; GLShader_lightMapping *gl_lightMappingShader = nullptr; GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial = nullptr; GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ = nullptr; @@ -464,6 +468,8 @@ static std::string GenComputeVersionDeclaration() { GLEW_ARB_explicit_uniform_location, "ARB_explicit_uniform_location" ); addExtension( str, glConfig2.shaderImageLoadStoreAvailable, 420, GLEW_ARB_shader_image_load_store, "ARB_shader_image_load_store" ); + addExtension( str, glConfig2.shaderAtomicCountersAvailable, 420, + GLEW_ARB_shader_atomic_counters, "ARB_shader_atomic_counters" ); return str; } @@ -539,6 +545,28 @@ static std::string GenFragmentHeader() { return str; } +static std::string GenComputeHeader() { + std::string str; + + // Compute shader compatibility defines + AddDefine( str, "MAX_VIEWS", MAX_VIEWS ); + AddDefine( str, "MAX_FRAMES", MAX_FRAMES ); + AddDefine( str, "MAX_VIEWFRAMES", MAX_VIEWFRAMES ); + AddDefine( str, "MAX_SURFACE_COMMAND_BATCHES", MAX_SURFACE_COMMAND_BATCHES ); + AddDefine( str, "MAX_COMMAND_COUNTERS", MAX_COMMAND_COUNTERS ); + + return str; +} + +static std::string GenWorldHeader() { + std::string str; + + // Shader compatibility defines that use map data for compile-time values + AddDefine( str, "MAX_SURFACE_COMMANDS", materialSystem.maxStages ); + + return str; +} + static std::string GenEngineConstants() { // Engine constants std::string str; @@ -725,9 +753,15 @@ void GLShaderManager::GenerateBuiltinHeaders() { GLCompatHeader = GLHeader("GLCompatHeader", GenCompatHeader(), this); GLVertexHeader = GLHeader("GLVertexHeader", GenVertexHeader(), this); GLFragmentHeader = GLHeader("GLFragmentHeader", GenFragmentHeader(), this); + GLComputeHeader = GLHeader( "GLComputeHeader", GenComputeHeader(), this ); + GLWorldHeader = GLHeader( "GLWorldHeader", GenWorldHeader(), this ); GLEngineConstants = GLHeader("GLEngineConstants", GenEngineConstants(), this); } +void GLShaderManager::GenerateWorldHeaders() { + GLWorldHeader = GLHeader( "GLWorldHeader", GenWorldHeader(), this ); +} + std::string GLShaderManager::BuildDeformShaderText( const std::string& steps ) { std::string shaderText; @@ -858,22 +892,31 @@ std::string GLShaderManager::BuildGPUShaderText( Str::StringRef mainShaderNa std::string line; while ( std::getline( shaderTextStream, line, '\n' ) ) { - std::string::size_type position = line.find( "#insert" ); + const std::string::size_type position = line.find( "#insert" ); if ( position == std::string::npos ) { shaderMain += line + "\n"; continue; } + const std::string::iterator beginIt = std::find_if( line.begin(), line.end(), + []( unsigned char character ) { + return !std::isspace( character ); + } ); + if ( beginIt - line.begin() != int( position ) ) { // Signed/unsigned CI bullshit + shaderMain += line + "\n"; + continue; + } + std::string shaderInsertPath = line.substr( position + 8, std::string::npos ); switch ( shaderType ) { case GL_VERTEX_SHADER: - shaderMain += GetShaderText( "glsl/" + shaderInsertPath + "_vp.glsl" ); + shaderMain += GetShaderText( "glsl/" + shaderInsertPath + ".glsl" ); break; case GL_FRAGMENT_SHADER: - shaderMain += GetShaderText( "glsl/" + shaderInsertPath + "_fp.glsl" ); + shaderMain += GetShaderText( "glsl/" + shaderInsertPath + ".glsl" ); break; case GL_COMPUTE_SHADER: - shaderMain += GetShaderText( "glsl/" + shaderInsertPath + "_cp.glsl" ); + shaderMain += GetShaderText( "glsl/" + shaderInsertPath + ".glsl" ); break; default: break; @@ -1064,6 +1107,8 @@ void GLShaderManager::InitShader( GLShader *shader ) combinedShaderText = GLComputeVersionDeclaration.getText() + GLCompatHeader.getText() + + GLComputeHeader.getText() + + GLWorldHeader.getText() + GLEngineConstants.getText(); } @@ -1276,7 +1321,8 @@ void GLShaderManager::CompileGPUShaders( GLShader *shader, shaderProgram_t *prog program->CS = CompileShader( shader->GetName(), computeShaderTextWithMacros, { &GLComputeVersionDeclaration, - // &GLComputeHeader, + &GLComputeHeader, + &GLWorldHeader, &GLCompatHeader, &GLEngineConstants }, GL_COMPUTE_SHADER ); @@ -3055,3 +3101,23 @@ void GLShader_fxaa::BuildShaderFragmentLibNames( std::string& fragmentInlines ) { fragmentInlines += "fxaa3_11"; } + +GLShader_cull::GLShader_cull( GLShaderManager* manager ) : + GLShader( "cull", ATTR_POSITION, manager, false, false, true ), + u_TotalDrawSurfs( this ), + u_SurfaceCommandsOffset( this ), + u_Frustum( this ) { +} + +GLShader_clearSurfaces::GLShader_clearSurfaces( GLShaderManager* manager ) : + GLShader( "clearSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ) { +} + +GLShader_processSurfaces::GLShader_processSurfaces( GLShaderManager* manager ) : + GLShader( "processSurfaces", ATTR_POSITION, manager, false, false, true ), + u_Frame( this ), + u_ViewID( this ), + u_SurfaceCommandsOffset( this ), + u_CulledCommandsOffset( this ) { +} diff --git a/src/engine/renderer/gl_shader.h b/src/engine/renderer/gl_shader.h index 093c7dc7cf..a3eed03fa0 100644 --- a/src/engine/renderer/gl_shader.h +++ b/src/engine/renderer/gl_shader.h @@ -111,7 +111,6 @@ class GLShader const uint32_t _vertexAttribsRequired; uint32_t _vertexAttribs; // can be set by uniforms GLShaderManager *_shaderManager; - size_t _uniformStorageSize; bool _hasVertexShader; std::string _vertexShaderText; @@ -122,6 +121,7 @@ class GLShader std::vector< shaderProgram_t > _shaderPrograms; + size_t _uniformStorageSize; std::vector< GLUniform * > _uniforms; std::vector< GLUniformBlock * > _uniformBlocks; std::vector< GLCompileMacro * > _compileMacros; @@ -351,6 +351,8 @@ class GLShaderManager GLHeader GLCompatHeader; GLHeader GLVertexHeader; GLHeader GLFragmentHeader; + GLHeader GLComputeHeader; + GLHeader GLWorldHeader; GLHeader GLEngineConstants; GLShaderManager() : _totalBuildTime( 0 ) @@ -361,6 +363,7 @@ class GLShaderManager void InitDriverInfo(); void GenerateBuiltinHeaders(); + void GenerateWorldHeaders(); template< class T > void load( T *& shader ) @@ -708,6 +711,56 @@ class GLUniform1i : protected GLUniform int currentValue = 0; }; +class GLUniform1ui : protected GLUniform { + protected: + GLUniform1ui( GLShader* shader, const char* name, const bool global = false ) : + GLUniform( shader, name, "uint", 1, 1, global ) { + } + + inline void SetValue( uint value ) { + shaderProgram_t* p = _shader->GetProgram(); + + if ( _global || !_shader->UseMaterialSystem() ) { + ASSERT_EQ( p, glState.currentProgram ); + } + +#if defined( LOG_GLSL_UNIFORMS ) + if ( r_logFile->integer ) { + GLimp_LogComment( va( "GLSL_SetUniform1i( %s, shader: %s, value: %d ) ---\n", + this->GetName(), _shader->GetName().c_str(), value ) ); + } +#endif + + if ( _shader->UseMaterialSystem() && !_global ) { + currentValue = value; + return; + } + +#if defined( USE_UNIFORM_FIREWALL ) + uint* firewall = ( uint* ) &p->uniformFirewall[_firewallIndex]; + + if ( *firewall == value ) { + return; + } + + *firewall = value; +#endif + glUniform1ui( p->uniformLocations[_locationIndex], value ); + } + public: + size_t GetSize() override { + return sizeof( uint ); + } + + uint32_t* WriteToBuffer( uint32_t* buffer ) override { + memcpy( buffer, ¤tValue, sizeof( uint ) ); + return buffer + 1; + } + + private: + uint currentValue = 0; +}; + class GLUniform1Bool : protected GLUniform { protected: // GLSL std430 bool is always 4 bytes, which might not correspond to C++ bool @@ -1266,54 +1319,116 @@ class GLUniformBlock } }; -class GLSSBO { +class GLBuffer { public: std::string _name; const GLuint _bindingPoint; + const GLbitfield _flags; + const GLbitfield _mapFlags; + const GLuint64 SYNC_TIMEOUT = 10000000000; // 10 seconds - GLSSBO( const char* name, const GLuint bindingPoint ) : + GLBuffer( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : _name( name ), - _bindingPoint( bindingPoint ) { + _bindingPoint( bindingPoint ), + _flags( flags ), + _mapFlags( mapFlags ) { } - public: const char* GetName() { return _name.c_str(); } - void BindBufferBase() { - glBindBufferBase( GL_SHADER_STORAGE_BUFFER, _bindingPoint, handle ); + void BindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, handle ); } - void BindBuffer() { - glBindBuffer( GL_SHADER_STORAGE_BUFFER, handle ); + void UnBindBufferBase( const GLenum target ) { + glBindBufferBase( target, _bindingPoint, 0 ); + } + + void BindBuffer( const GLenum target ) { + glBindBuffer( target, handle ); + } + + void UnBindBuffer( const GLenum target ) { + glBindBuffer( target, 0 ); + } + + void BufferStorage( const GLenum target, const GLsizeiptr newAreaSize, const GLsizeiptr areaCount, const void* data ) { + areaSize = newAreaSize; + maxAreas = areaCount; + glBufferStorage( target, areaSize * areaCount * sizeof(uint32_t), data, _flags ); + syncs.resize( areaCount ); + } + + void AreaIncr() { + syncs[area] = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); + area++; + if ( area >= maxAreas ) { + area = 0; + } + } + + void MapAll( const GLenum target ) { + if ( !mapped ) { + mapped = true; + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, areaSize * maxAreas * sizeof( uint32_t ), _flags | _mapFlags ); + } + } + + uint32_t* GetCurrentAreaData() { + if ( syncs[area] != nullptr ) { + if ( glClientWaitSync( syncs[area], GL_SYNC_FLUSH_COMMANDS_BIT, SYNC_TIMEOUT ) == GL_TIMEOUT_EXPIRED ) { + Sys::Drop( "Failed buffer %s area %u sync", _name, area ); + } + glDeleteSync( syncs[area] ); + } + + return data + area * areaSize; + } + + uint32_t* GetData() { + return data; } - uint32_t* MapBufferRange( const GLuint count ) { + void FlushCurrentArea( GLenum target ) { + glFlushMappedBufferRange( target, area * areaSize * sizeof( uint32_t ), areaSize * sizeof( uint32_t ) ); + } + + void FlushAll( GLenum target ) { + glFlushMappedBufferRange( target, 0, maxAreas * areaSize * sizeof( uint32_t ) ); + } + + uint32_t* MapBufferRange( const GLenum target, const GLuint count ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, 0, count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT ); + _flags | _mapFlags ); } return data; } - uint32_t* MapBufferRange( const GLuint offset, const GLuint count ) { + uint32_t* MapBufferRange( const GLenum target, const GLuint offset, const GLuint count ) { if ( !mapped ) { mapped = true; - data = ( uint32_t* ) glMapBufferRange( GL_SHADER_STORAGE_BUFFER, + mappedTarget = target; + data = ( uint32_t* ) glMapBufferRange( target, offset * sizeof( uint32_t ), count * sizeof( uint32_t ), - GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT ); + _flags | _mapFlags ); } return data; } void UnmapBuffer() { - mapped = false; - glUnmapBuffer( GL_SHADER_STORAGE_BUFFER ); + if ( mapped ) { + mapped = false; + glUnmapBuffer( mappedTarget ); + } } void GenBuffer() { @@ -1325,11 +1440,148 @@ class GLSSBO { } private: + GLenum mappedTarget; GLuint handle; bool mapped = false; + std::vector syncs; + GLsizeiptr area = 0; + GLsizeiptr areaSize = 0; + GLsizeiptr maxAreas = 0; uint32_t* data; }; +class GLSSBO : public GLBuffer { + public: + GLSSBO( const char* name, const GLuint bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_SHADER_STORAGE_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_SHADER_STORAGE_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_SHADER_STORAGE_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_SHADER_STORAGE_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_SHADER_STORAGE_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_SHADER_STORAGE_BUFFER, offset, count ); + } +}; + +class GLUBO : public GLBuffer { + public: + GLUBO( const char* name, const GLsizeiptr bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_UNIFORM_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_UNIFORM_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_UNIFORM_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_UNIFORM_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_UNIFORM_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_UNIFORM_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_UNIFORM_BUFFER, offset, count ); + } +}; + +class GLAtomicCounterBuffer : public GLBuffer { + public: + GLAtomicCounterBuffer( const char* name, const GLsizeiptr bindingPoint, const GLbitfield flags, const GLbitfield mapFlags ) : + GLBuffer( name, bindingPoint, flags, mapFlags ) { + } + + public: + const char* GetName() { + return _name.c_str(); + } + + void BindBufferBase() { + GLBuffer::BindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void UnBindBufferBase() { + GLBuffer::UnBindBufferBase( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BindBuffer() { + GLBuffer::BindBuffer( GL_ATOMIC_COUNTER_BUFFER ); + } + + void BufferStorage( const GLsizeiptr areaSize, const GLsizeiptr areaCount, const void* data ) { + GLBuffer::BufferStorage( GL_ATOMIC_COUNTER_BUFFER, areaSize, areaCount, data ); + } + + void MapAll() { + GLBuffer::MapAll( GL_ATOMIC_COUNTER_BUFFER ); + } + + void FlushCurrentArea() { + GLBuffer::FlushCurrentArea( GL_ATOMIC_COUNTER_BUFFER ); + } + + uint32_t* MapBufferRange( const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, count ); + } + + uint32_t* MapBufferRange( const GLsizeiptr offset, const GLsizeiptr count ) { + return GLBuffer::MapBufferRange( GL_ATOMIC_COUNTER_BUFFER, offset, count ); + } +}; + class GLIndirectBuffer { public: @@ -1357,7 +1609,7 @@ class GLIndirectBuffer { glBindBuffer( GL_DRAW_INDIRECT_BUFFER, handle ); } - GLIndirectCommand* MapBufferRange( const GLuint count ) { + GLIndirectCommand* MapBufferRange( const GLsizeiptr count ) { return (GLIndirectCommand*) glMapBufferRange( GL_DRAW_INDIRECT_BUFFER, 0, count * sizeof( GLIndirectCommand ), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT ); @@ -2928,6 +3180,78 @@ class u_Color : } }; +class u_Frame : + GLUniform1ui { + public: + u_Frame( GLShader* shader ) : + GLUniform1ui( shader, "u_Frame" ) { + } + + void SetUniform_Frame( const uint frame ) { + this->SetValue( frame ); + } +}; + +class u_ViewID : + GLUniform1ui { + public: + u_ViewID( GLShader* shader ) : + GLUniform1ui( shader, "u_ViewID" ) { + } + + void SetUniform_ViewID( const uint viewID ) { + this->SetValue( viewID ); + } +}; + +class u_TotalDrawSurfs : + GLUniform1ui { + public: + u_TotalDrawSurfs( GLShader* shader ) : + GLUniform1ui( shader, "u_TotalDrawSurfs" ) { + } + + void SetUniform_TotalDrawSurfs( const uint totalDrawSurfs ) { + this->SetValue( totalDrawSurfs ); + } +}; + +class u_Frustum : + GLUniform4fv { + public: + u_Frustum( GLShader* shader ) : + GLUniform4fv( shader, "u_Frustum", 6 ) { + } + + void SetUniform_Frustum( vec4_t frustum[6] ) { + this->SetValue( 6, &frustum[0] ); + } +}; + +class u_SurfaceCommandsOffset : + GLUniform1ui { + public: + u_SurfaceCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_SurfaceCommandsOffset" ) { + } + + void SetUniform_SurfaceCommandsOffset( const uint surfaceCommandsOffset ) { + this->SetValue( surfaceCommandsOffset ); + } +}; + +class u_CulledCommandsOffset : + GLUniform1ui { + public: + u_CulledCommandsOffset( GLShader* shader ) : + GLUniform1ui( shader, "u_CulledCommandsOffset" ) { + } + + void SetUniform_CulledCommandsOffset( const uint culledCommandsOffset ) { + this->SetValue( culledCommandsOffset ); + } +}; + class u_ModelMatrix : GLUniformMatrix4f { @@ -3050,7 +3374,7 @@ class u_Bones : { public: u_Bones( GLShader *shader ) : - GLUniform4fv( shader, "u_Bones", MAX_BONES * 0 + 1 ) + GLUniform4fv( shader, "u_Bones", MAX_BONES ) { } @@ -4270,6 +4594,33 @@ class GLShader_fxaa : void BuildShaderFragmentLibNames( std::string& fragmentInlines ) override; }; +class GLShader_cull : + public GLShader, + public u_TotalDrawSurfs, + public u_SurfaceCommandsOffset, + public u_Frustum { + public: + GLShader_cull( GLShaderManager* manager ); +}; + +class GLShader_clearSurfaces : + public GLShader, + public u_Frame { + public: + GLShader_clearSurfaces( GLShaderManager* manager ); +}; + +class GLShader_processSurfaces : + public GLShader, + public u_Frame, + public u_ViewID, + public u_SurfaceCommandsOffset, + public u_CulledCommandsOffset { + public: + GLShader_processSurfaces( GLShaderManager* manager ); +}; + + std::string GetShaderPath(); extern ShaderKind shaderKind; @@ -4277,6 +4628,9 @@ extern ShaderKind shaderKind; extern GLShader_generic2D *gl_generic2DShader; extern GLShader_generic *gl_genericShader; extern GLShader_genericMaterial *gl_genericShaderMaterial; +extern GLShader_cull *gl_cullShader; +extern GLShader_clearSurfaces *gl_clearSurfacesShader; +extern GLShader_processSurfaces *gl_processSurfacesShader; extern GLShader_lightMapping *gl_lightMappingShader; extern GLShader_lightMappingMaterial *gl_lightMappingShaderMaterial; extern GLShader_forwardLighting_omniXYZ *gl_forwardLightingShader_omniXYZ; diff --git a/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl new file mode 100644 index 0000000000..7c42dc1881 --- /dev/null +++ b/src/engine/renderer/glsl_source/clearSurfaces_cp.glsl @@ -0,0 +1,53 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* clearSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(std430, binding = 4) writeonly buffer atomicCommandCountersBuffer { + uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; +}; + +uniform uint u_Frame; + +void main() { + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalInvocationID >= MAX_COMMAND_COUNTERS * MAX_VIEWS ) { + return; + } + atomicCommandCounters[globalInvocationID + MAX_COMMAND_COUNTERS * MAX_VIEWS * u_Frame] = 0; +} diff --git a/src/engine/renderer/glsl_source/cull_cp.glsl b/src/engine/renderer/glsl_source/cull_cp.glsl new file mode 100644 index 0000000000..74c762c4b8 --- /dev/null +++ b/src/engine/renderer/glsl_source/cull_cp.glsl @@ -0,0 +1,110 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* cull_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +// layout(rg16f, binding = 0) uniform image2D depthImage; + +struct BoundingSphere { + vec3 center; + float radius; +}; + +struct SurfaceDescriptor { + BoundingSphere boundingSphere; + uint surfaceCommandIDs[MAX_SURFACE_COMMANDS]; +}; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +layout(std430, binding = 1) readonly restrict buffer surfaceDescriptorsSSBO { + SurfaceDescriptor surfaces[]; +}; + +layout(std430, binding = 2) writeonly restrict buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +struct Plane { + vec3 normal; + float distance; +}; + +uniform uint u_TotalDrawSurfs; +uniform uint u_SurfaceCommandsOffset; +uniform vec4 u_Frustum[6]; // xyz - normal, w - distance + +bool CullSurface( in BoundingSphere boundingSphere ) { + for( int i = 0; i < 5; i++ ) { // Skip far plane for now because we always have it set to { 0, 0, 0, 0 } for some reason + const float distance = dot( u_Frustum[i].xyz, boundingSphere.center ) - u_Frustum[i].w; + + if( distance < -boundingSphere.radius ) { + return true; + } + } + return false; +} + +void ProcessSurfaceCommands( const in SurfaceDescriptor surface, const in bool enabled ) { + for( uint i = 0; i < MAX_SURFACE_COMMANDS; i++ ) { + const uint commandID = surface.surfaceCommandIDs[i]; + surfaceCommands[commandID + u_SurfaceCommandsOffset].enabled = enabled; + } +} + +void main() { + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x; + if( globalInvocationID >= u_TotalDrawSurfs ) { + return; + } + SurfaceDescriptor surface = surfaces[globalInvocationID]; + bool culled = CullSurface( surface.boundingSphere ); + + ProcessSurfaceCommands( surface, !culled ); +} diff --git a/src/engine/renderer/glsl_source/fogQuake3_fp.glsl b/src/engine/renderer/glsl_source/fogQuake3_fp.glsl index 9604201252..e14a642870 100644 --- a/src/engine/renderer/glsl_source/fogQuake3_fp.glsl +++ b/src/engine/renderer/glsl_source/fogQuake3_fp.glsl @@ -35,7 +35,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp vec4 color = texture2D(u_ColorMap, var_TexCoords); diff --git a/src/engine/renderer/glsl_source/fogQuake3_vp.glsl b/src/engine/renderer/glsl_source/fogQuake3_vp.glsl index cf360992cd..8b1099d401 100644 --- a/src/engine/renderer/glsl_source/fogQuake3_vp.glsl +++ b/src/engine/renderer/glsl_source/fogQuake3_vp.glsl @@ -47,7 +47,7 @@ void DeformVertex( inout vec4 pos, void main() { - #insert material + #insert material_vp vec4 position; localBasis LB; diff --git a/src/engine/renderer/glsl_source/generic_fp.glsl b/src/engine/renderer/glsl_source/generic_fp.glsl index 2ab30b8dff..aab118f00a 100644 --- a/src/engine/renderer/glsl_source/generic_fp.glsl +++ b/src/engine/renderer/glsl_source/generic_fp.glsl @@ -43,7 +43,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp vec4 color = texture2D(u_ColorMap, var_TexCoords); diff --git a/src/engine/renderer/glsl_source/generic_vp.glsl b/src/engine/renderer/glsl_source/generic_vp.glsl index d48f6a6b58..5d82b3a074 100644 --- a/src/engine/renderer/glsl_source/generic_vp.glsl +++ b/src/engine/renderer/glsl_source/generic_vp.glsl @@ -55,7 +55,7 @@ void DeformVertex( inout vec4 pos, void main() { - #insert material + #insert material_vp vec4 position; localBasis LB; diff --git a/src/engine/renderer/glsl_source/heatHaze_fp.glsl b/src/engine/renderer/glsl_source/heatHaze_fp.glsl index 2f0a3dfd66..d1d2bf9a4b 100644 --- a/src/engine/renderer/glsl_source/heatHaze_fp.glsl +++ b/src/engine/renderer/glsl_source/heatHaze_fp.glsl @@ -34,7 +34,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp vec4 color; diff --git a/src/engine/renderer/glsl_source/heatHaze_vp.glsl b/src/engine/renderer/glsl_source/heatHaze_vp.glsl index 126ec116fe..4c7ceb06bd 100644 --- a/src/engine/renderer/glsl_source/heatHaze_vp.glsl +++ b/src/engine/renderer/glsl_source/heatHaze_vp.glsl @@ -42,7 +42,7 @@ void DeformVertex( inout vec4 pos, void main() { - #insert material + #insert material_vp vec4 deformVec; float d1, d2; diff --git a/src/engine/renderer/glsl_source/lightMapping_fp.glsl b/src/engine/renderer/glsl_source/lightMapping_fp.glsl index 011046c304..2f6a9534f4 100644 --- a/src/engine/renderer/glsl_source/lightMapping_fp.glsl +++ b/src/engine/renderer/glsl_source/lightMapping_fp.glsl @@ -58,7 +58,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp // Compute view direction in world space. vec3 viewDir = normalize(u_ViewOrigin - var_Position); diff --git a/src/engine/renderer/glsl_source/lightMapping_vp.glsl b/src/engine/renderer/glsl_source/lightMapping_vp.glsl index f56bfd0ff7..40a88d4f24 100644 --- a/src/engine/renderer/glsl_source/lightMapping_vp.glsl +++ b/src/engine/renderer/glsl_source/lightMapping_vp.glsl @@ -60,7 +60,7 @@ void DeformVertex(inout vec4 pos, inout vec3 normal, inout vec2 st, inout vec4 c void main() { - #insert material + #insert material_vp localBasis LB; vec4 position, color; diff --git a/src/engine/renderer/glsl_source/processSurfaces_cp.glsl b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl new file mode 100644 index 0000000000..5a3a5676ac --- /dev/null +++ b/src/engine/renderer/glsl_source/processSurfaces_cp.glsl @@ -0,0 +1,96 @@ +/* +=========================================================================== + +Daemon BSD Source Code +Copyright (c) 2024 Daemon Developers +All rights reserved. + +This file is part of the Daemon BSD Source Code (Daemon Source Code). + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Daemon developers nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAEMON DEVELOPERS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=========================================================================== +*/ + +/* processSurfaces_cp.glsl */ + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +struct GLIndirectCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint baseInstance; +}; + +struct SurfaceCommand { + bool enabled; + GLIndirectCommand drawCommand; +}; + +struct SurfaceCommandBatch { + uvec2 materialIDs[2]; +}; + +layout(std430, binding = 2) readonly buffer surfaceCommandsSSBO { + SurfaceCommand surfaceCommands[]; +}; + +layout(std430, binding = 3) writeonly buffer culledCommandsSSBO { + GLIndirectCommand culledCommands[]; +}; + +layout(std140, binding = 0) uniform ub_SurfaceBatches { + SurfaceCommandBatch surfaceBatches[MAX_SURFACE_COMMAND_BATCHES]; +}; + +layout (binding = 4) uniform atomic_uint atomicCommandCounters[MAX_COMMAND_COUNTERS * MAX_VIEWS * MAX_FRAMES]; + +uniform uint u_Frame; +uniform uint u_ViewID; +uniform uint u_SurfaceCommandsOffset; +uniform uint u_CulledCommandsOffset; + +void AddDrawCommand( in uint commandID, in uvec2 materialID ) { + SurfaceCommand command = surfaceCommands[commandID + u_SurfaceCommandsOffset]; + if( command.enabled ) { + const uint atomicCmdID = atomicCounterIncrement( atomicCommandCounters[materialID.x + + MAX_COMMAND_COUNTERS * ( MAX_VIEWS * u_Frame + u_ViewID )] ); + culledCommands[atomicCmdID + materialID.y * MAX_COMMAND_COUNTERS + u_CulledCommandsOffset] = command.drawCommand; + } +} + +void main() { + const uint globalGroupID = gl_WorkGroupID.z * gl_NumWorkGroups.x * gl_NumWorkGroups.y + + gl_WorkGroupID.y * gl_NumWorkGroups.x + + gl_WorkGroupID.x; + const uint globalInvocationID = gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x + * gl_NumWorkGroups.y * gl_WorkGroupSize.y + + gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + + gl_GlobalInvocationID.x + + 1; // Add 1 because the first surface command is always reserved as a fake command + const uvec2 materialID = surfaceBatches[globalGroupID / 2].materialIDs[globalGroupID % 2]; + + AddDrawCommand( globalInvocationID, materialID ); +} diff --git a/src/engine/renderer/glsl_source/reflection_CB_fp.glsl b/src/engine/renderer/glsl_source/reflection_CB_fp.glsl index ffbb0f0227..d5f49b1161 100644 --- a/src/engine/renderer/glsl_source/reflection_CB_fp.glsl +++ b/src/engine/renderer/glsl_source/reflection_CB_fp.glsl @@ -38,7 +38,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp // compute view direction in world space vec3 viewDir = normalize(var_Position - u_ViewOrigin); diff --git a/src/engine/renderer/glsl_source/reflection_CB_vp.glsl b/src/engine/renderer/glsl_source/reflection_CB_vp.glsl index ae035ec5dc..45df2a0733 100644 --- a/src/engine/renderer/glsl_source/reflection_CB_vp.glsl +++ b/src/engine/renderer/glsl_source/reflection_CB_vp.glsl @@ -42,7 +42,7 @@ void DeformVertex( inout vec4 pos, void main() { - #insert material + #insert material_vp vec4 position; localBasis LB; diff --git a/src/engine/renderer/glsl_source/screen_fp.glsl b/src/engine/renderer/glsl_source/screen_fp.glsl index 21ad3740f6..8af646bcbd 100644 --- a/src/engine/renderer/glsl_source/screen_fp.glsl +++ b/src/engine/renderer/glsl_source/screen_fp.glsl @@ -32,7 +32,7 @@ DECLARE_OUTPUT(vec4) void main() { - #insert material + #insert material_fp // calculate the screen texcoord in the 0.0 to 1.0 range vec2 st = gl_FragCoord.st / r_FBufSize; diff --git a/src/engine/renderer/glsl_source/screen_vp.glsl b/src/engine/renderer/glsl_source/screen_vp.glsl index 26d6bd8fbc..fd35a2885e 100644 --- a/src/engine/renderer/glsl_source/screen_vp.glsl +++ b/src/engine/renderer/glsl_source/screen_vp.glsl @@ -31,7 +31,7 @@ OUT(smooth) vec4 var_Color; void main() { - #insert material + #insert material_vp // transform vertex position into homogenous clip-space gl_Position = u_ModelViewProjectionMatrix * vec4(attr_Position, 1.0); diff --git a/src/engine/renderer/glsl_source/skybox_fp.glsl b/src/engine/renderer/glsl_source/skybox_fp.glsl index 59514bdfa2..a8d109c92a 100644 --- a/src/engine/renderer/glsl_source/skybox_fp.glsl +++ b/src/engine/renderer/glsl_source/skybox_fp.glsl @@ -54,7 +54,7 @@ float ComputeCloudParametric( vec3 skyVec, float radiusWorld, float cloudHeight void main() { - #insert material + #insert material_fp // compute incident ray vec3 incidentRay = normalize(var_Position); diff --git a/src/engine/renderer/glsl_source/skybox_vp.glsl b/src/engine/renderer/glsl_source/skybox_vp.glsl index aea62fb2ce..e3e236c420 100644 --- a/src/engine/renderer/glsl_source/skybox_vp.glsl +++ b/src/engine/renderer/glsl_source/skybox_vp.glsl @@ -30,7 +30,7 @@ OUT(smooth) vec3 var_Position; void main() { - #insert material + #insert material_vp // transform vertex position into homogenous clip-space gl_Position = u_ModelViewProjectionMatrix * vec4(attr_Position, 1.0); diff --git a/src/engine/renderer/shaders.cpp b/src/engine/renderer/shaders.cpp index b9aee09c9e..1a7dc740bb 100644 --- a/src/engine/renderer/shaders.cpp +++ b/src/engine/renderer/shaders.cpp @@ -60,6 +60,9 @@ #include "skybox_fp.glsl.h" #include "material_vp.glsl.h" #include "material_fp.glsl.h" +#include "cull_cp.glsl.h" +#include "processSurfaces_cp.glsl.h" +#include "clearSurfaces_cp.glsl.h" std::unordered_map shadermap({ { "glsl/blurX_fp.glsl", std::string(reinterpret_cast(blurX_fp_glsl), sizeof(blurX_fp_glsl)) }, @@ -71,6 +74,8 @@ std::unordered_map shadermap({ { "glsl/computeLight_fp.glsl", std::string(reinterpret_cast(computeLight_fp_glsl), sizeof(computeLight_fp_glsl)) }, { "glsl/contrast_fp.glsl", std::string(reinterpret_cast(contrast_fp_glsl), sizeof(contrast_fp_glsl)) }, { "glsl/contrast_vp.glsl", std::string(reinterpret_cast(contrast_vp_glsl), sizeof(contrast_vp_glsl)) }, + { "glsl/clearSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( clearSurfaces_cp_glsl ), sizeof( clearSurfaces_cp_glsl ) ) }, + { "glsl/cull_cp.glsl", std::string( reinterpret_cast< const char* >( cull_cp_glsl ), sizeof( cull_cp_glsl ) ) }, { "glsl/debugShadowMap_fp.glsl", std::string(reinterpret_cast(debugShadowMap_fp_glsl), sizeof(debugShadowMap_fp_glsl)) }, { "glsl/debugShadowMap_vp.glsl", std::string(reinterpret_cast(debugShadowMap_vp_glsl), sizeof(debugShadowMap_vp_glsl)) }, { "glsl/deformVertexes_vp.glsl", std::string(reinterpret_cast(deformVertexes_vp_glsl), sizeof(deformVertexes_vp_glsl)) }, @@ -105,6 +110,7 @@ std::unordered_map shadermap({ { "glsl/motionblur_vp.glsl", std::string(reinterpret_cast(motionblur_vp_glsl), sizeof(motionblur_vp_glsl)) }, { "glsl/portal_fp.glsl", std::string(reinterpret_cast(portal_fp_glsl), sizeof(portal_fp_glsl)) }, { "glsl/portal_vp.glsl", std::string(reinterpret_cast(portal_vp_glsl), sizeof(portal_vp_glsl)) }, + { "glsl/processSurfaces_cp.glsl", std::string( reinterpret_cast< const char* >( processSurfaces_cp_glsl ), sizeof( processSurfaces_cp_glsl ) ) }, { "glsl/reflection_CB_fp.glsl", std::string(reinterpret_cast(reflection_CB_fp_glsl), sizeof(reflection_CB_fp_glsl)) }, { "glsl/reflection_CB_vp.glsl", std::string(reinterpret_cast(reflection_CB_vp_glsl), sizeof(reflection_CB_vp_glsl)) }, { "glsl/refraction_C_fp.glsl", std::string(reinterpret_cast(refraction_C_fp_glsl), sizeof(refraction_C_fp_glsl)) }, diff --git a/src/engine/renderer/tr_backend.cpp b/src/engine/renderer/tr_backend.cpp index 52a2132a77..4844f9f62b 100644 --- a/src/engine/renderer/tr_backend.cpp +++ b/src/engine/renderer/tr_backend.cpp @@ -4871,6 +4871,8 @@ static void RB_RenderView( bool depthPass ) if ( ( backEnd.refdef.rdflags & RDF_HYPERSPACE ) ) { RB_Hyperspace(); + + materialSystem.currentView++; return; } else @@ -4889,11 +4891,9 @@ static void RB_RenderView( bool depthPass ) startTime = ri.Milliseconds(); } - materialSystem.frameStart = true; - if( depthPass ) { if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH ); + materialSystem.RenderMaterials( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_DEPTH, shaderSort_t::SS_DEPTH, DRAWSURFACES_ALL ); RB_RunVisTests(); @@ -4907,7 +4907,7 @@ static void RB_RenderView( bool depthPass ) { // draw everything that is not the gun if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, DRAWSURFACES_ALL_FAR ); @@ -4920,7 +4920,7 @@ static void RB_RenderView( bool depthPass ) { // draw everything that is opaque if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_FOG, shaderSort_t::SS_OPAQUE, DRAWSURFACES_ALL ); } @@ -4952,7 +4952,7 @@ static void RB_RenderView( bool depthPass ) // draw everything that is translucent if ( glConfig2.materialSystemAvailable ) { - materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS ); + materialSystem.RenderMaterials( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS, backEnd.viewParms.viewID ); } RB_RenderDrawSurfaces( shaderSort_t::SS_ENVIRONMENT_NOFOG, shaderSort_t::SS_POST_PROCESS, DRAWSURFACES_ALL ); @@ -4980,6 +4980,8 @@ static void RB_RenderView( bool depthPass ) backEnd.pc.c_portals++; } + materialSystem.currentView++; + backEnd.pc.c_views++; } @@ -5014,6 +5016,13 @@ static void RB_RenderPostProcess() tr.refdef.pixelTarget[(i * 4) + 3] = 255; //set the alpha pure white } } + + if( glConfig2.materialSystemAvailable ) { + // Dispatch the cull compute shaders for queued once we're done with post-processing + // We'll only use the results from those shaders in the next frame so we don't block the pipeline + materialSystem.CullSurfaces(); + materialSystem.EndFrame(); + } GL_CheckErrors(); } @@ -6070,6 +6079,9 @@ void RB_ExecuteRenderCommands( const void *data ) backEnd.smpFrame = 1; } + + materialSystem.frameStart = true; + // Log::Warn( "======" ); while ( cmd != nullptr ) { cmd = cmd->ExecuteSelf(); diff --git a/src/engine/renderer/tr_bsp.cpp b/src/engine/renderer/tr_bsp.cpp index ee955c7953..54674d456a 100644 --- a/src/engine/renderer/tr_bsp.cpp +++ b/src/engine/renderer/tr_bsp.cpp @@ -7078,4 +7078,7 @@ void RE_LoadWorldMap( const char *name ) tr.mapLightFactor = pow( 2, tr.mapOverBrightBits ); tr.mapInverseLightFactor = 1.0f / tr.mapLightFactor; } + + tr.worldLoaded = true; + GLSL_InitWorldShadersOrError(); } diff --git a/src/engine/renderer/tr_init.cpp b/src/engine/renderer/tr_init.cpp index 66cfb87cec..35ab9a96ec 100644 --- a/src/engine/renderer/tr_init.cpp +++ b/src/engine/renderer/tr_init.cpp @@ -112,6 +112,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA cvar_t *r_arb_shading_language_420pack; cvar_t *r_arb_explicit_uniform_location; cvar_t *r_arb_shader_image_load_store; + cvar_t *r_arb_shader_atomic_counters; cvar_t *r_checkGLErrors; cvar_t *r_logFile; @@ -1107,6 +1108,7 @@ ScreenshotCmd screenshotPNGRegistration("screenshotPNG", ssFormat_t::SSF_PNG, "p r_arb_shading_language_420pack = Cvar_Get( "r_arb_shading_language_420pack", "1", CVAR_CHEAT | CVAR_LATCH ); r_arb_explicit_uniform_location = Cvar_Get( "r_arb_explicit_uniform_location", "1", CVAR_CHEAT | CVAR_LATCH ); r_arb_shader_image_load_store = Cvar_Get( "r_arb_shader_image_load_store", "1", CVAR_CHEAT | CVAR_LATCH ); + r_arb_shader_atomic_counters = Cvar_Get( "r_arb_shader_atomic_counters", "1", CVAR_CHEAT | CVAR_LATCH ); r_picMip = Cvar_Get( "r_picMip", "0", CVAR_LATCH | CVAR_ARCHIVE ); r_imageMaxDimension = Cvar_Get( "r_imageMaxDimension", "0", CVAR_LATCH | CVAR_ARCHIVE ); diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index f88222881a..54bef148f7 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -1531,6 +1531,8 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; orientationr_t orientation; orientationr_t world; + uint viewID = 0; + vec3_t pvsOrigin; // may be different than or.origin for portals int portalLevel; // number of portals this view is through @@ -1668,6 +1670,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; uint materialIDs[ MAX_SHADER_STAGES ]; uint materialPackIDs[ MAX_SHADER_STAGES ]; bool texturesDynamic[ MAX_SHADER_STAGES ]; + uint drawCommandIDs[ MAX_SHADER_STAGES ]; inline int index() const { return int( ( sort & SORT_INDEX_MASK ) ); @@ -2655,6 +2658,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; deluxeMode_t worldDeluxe; deluxeMode_t modelDeluxe; + bool worldLoaded; world_t *world; TextureManager textureManager; @@ -2927,6 +2931,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; extern cvar_t *r_arb_shading_language_420pack; extern cvar_t *r_arb_explicit_uniform_location; extern cvar_t *r_arb_shader_image_load_store; + extern cvar_t *r_arb_shader_atomic_counters; extern cvar_t *r_nobind; // turns off binding to appropriate textures extern cvar_t *r_singleShader; // make most world faces use default shader @@ -3434,6 +3439,7 @@ inline bool checkGLErrors() extern shaderCommands_t tess; void GLSL_InitGPUShaders(); + void GLSL_InitWorldShadersOrError(); void GLSL_ShutdownGPUShaders(); void GLSL_FinishGPUShaders(); diff --git a/src/engine/renderer/tr_main.cpp b/src/engine/renderer/tr_main.cpp index 9ebda2a103..6289d51718 100644 --- a/src/engine/renderer/tr_main.cpp +++ b/src/engine/renderer/tr_main.cpp @@ -2881,11 +2881,10 @@ void R_RenderView( viewParms_t *parms ) // because it requires the decalBits R_CullDecalProjectors(); - if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { - materialSystem.GenerateWorldMaterials(); - } - - if ( !glConfig2.materialSystemAvailable ) { + if ( glConfig2.materialSystemAvailable ) { + tr.viewParms.viewID = tr.viewCount; + materialSystem.QueueSurfaceCull( tr.viewCount, (frustum_t*) tr.viewParms.frustums[0] ); + } else { R_AddWorldSurfaces(); } diff --git a/src/engine/renderer/tr_public.h b/src/engine/renderer/tr_public.h index 160566ff87..70fac421ee 100644 --- a/src/engine/renderer/tr_public.h +++ b/src/engine/renderer/tr_public.h @@ -92,6 +92,7 @@ struct glconfig2_t bool shadingLanguage420PackAvailable; bool explicitUniformLocationAvailable; bool shaderImageLoadStoreAvailable; + bool shaderAtomicCountersAvailable; bool materialSystemAvailable; bool gpuShader4Available; bool textureGatherAvailable; diff --git a/src/engine/renderer/tr_scene.cpp b/src/engine/renderer/tr_scene.cpp index 11c8a9fbb2..dba6230408 100644 --- a/src/engine/renderer/tr_scene.cpp +++ b/src/engine/renderer/tr_scene.cpp @@ -625,11 +625,19 @@ void RE_RenderScene( const refdef_t *fd ) Vector4Copy( fd->gradingWeights, parms.gradingWeights ); // TODO: Add cmds for updating dynamic surfaces and for culling here - materialSystem.frameStart = true; + // materialSystem.frameStart = true; R_AddClearBufferCmd(); R_AddSetupLightsCmd(); + if ( glConfig2.materialSystemAvailable && !materialSystem.generatedWorldCommandBuffer ) { + materialSystem.GenerateWorldMaterials(); + } + + if ( glConfig2.materialSystemAvailable ) { + materialSystem.StartFrame(); + } + R_RenderView( &parms ); R_RenderPostProcess(); diff --git a/src/engine/renderer/tr_shade.cpp b/src/engine/renderer/tr_shade.cpp index 08da17a914..b2ec843772 100644 --- a/src/engine/renderer/tr_shade.cpp +++ b/src/engine/renderer/tr_shade.cpp @@ -60,6 +60,23 @@ static void EnableAvailableFeatures() } } +// For shaders that require map data for compile-time values +void GLSL_InitWorldShadersOrError() { + // make sure the render thread is stopped + R_SyncRenderThread(); + + GL_CheckErrors(); + + gl_shaderManager.GenerateWorldHeaders(); + + // Material system shaders that are always loaded if material system is available + if ( glConfig2.materialSystemAvailable ) { + gl_shaderManager.load( gl_cullShader ); + } + + gl_shaderManager.buildAll(); +} + static void GLSL_InitGPUShadersOrError() { // make sure the render thread is stopped @@ -86,6 +103,9 @@ static void GLSL_InitGPUShadersOrError() gl_shaderManager.load( gl_skyboxShaderMaterial ); gl_shaderManager.load( gl_fogQuake3ShaderMaterial ); gl_shaderManager.load( gl_heatHazeShaderMaterial ); + gl_shaderManager.load( gl_cullShader ); + gl_shaderManager.load( gl_clearSurfacesShader ); + gl_shaderManager.load( gl_processSurfacesShader ); } // standard light mapping @@ -291,6 +311,9 @@ void GLSL_ShutdownGPUShaders() gl_genericShader = nullptr; gl_genericShaderMaterial = nullptr; + gl_cullShader = nullptr; + gl_clearSurfacesShader = nullptr; + gl_processSurfacesShader = nullptr; gl_lightMappingShader = nullptr; gl_lightMappingShaderMaterial = nullptr; gl_forwardLightingShader_omniXYZ = nullptr; diff --git a/src/engine/renderer/tr_shader.cpp b/src/engine/renderer/tr_shader.cpp index 1bb29c2451..800d3abf2f 100644 --- a/src/engine/renderer/tr_shader.cpp +++ b/src/engine/renderer/tr_shader.cpp @@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "tr_local.h" #include "gl_shader.h" #include "framework/CvarSystem.h" +#include "Material.h" #include static const int MAX_SHADERTABLE_HASH = 1024; @@ -5902,6 +5903,14 @@ static shader_t *FinishShader() // Copy the current global shader to a newly allocated shader. shader_t *ret = MakeShaderPermanent(); + if ( glConfig2.materialSystemAvailable && !tr.worldLoaded ) { + uint8_t maxStages = ret->numStages; + if ( maxStages % 4 != 0 ) { // Aligned to 4 components + maxStages = ( maxStages / 4 + 1 ) * 4; + } + materialSystem.maxStages = maxStages > materialSystem.maxStages ? maxStages : materialSystem.maxStages; + } + // generate depth-only shader if necessary if( !shader.isSky && shader.numStages > 0 && diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index b9c40029f5..1292368881 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -1458,7 +1458,6 @@ static void Tess_SurfaceVBOMesh( srfVBOMesh_t *srf ) { GLimp_LogComment( "--- Tess_SurfaceVBOMesh ---\n" ); - Tess_SurfaceVBO( srf->vbo, srf->ibo, srf->numIndexes, srf->firstIndex ); } diff --git a/src/engine/renderer/tr_vbo.cpp b/src/engine/renderer/tr_vbo.cpp index 7005f786ca..822a663e71 100644 --- a/src/engine/renderer/tr_vbo.cpp +++ b/src/engine/renderer/tr_vbo.cpp @@ -1076,7 +1076,11 @@ static void R_InitLightUBO() static void R_InitMaterialBuffers() { if( glConfig2.materialSystemAvailable ) { materialsSSBO.GenBuffer(); - commandBuffer.GenBuffer(); + surfaceDescriptorsSSBO.GenBuffer(); + surfaceCommandsSSBO.GenBuffer(); + culledCommandsBuffer.GenBuffer(); + surfaceBatchesUBO.GenBuffer(); + atomicCommandCountersBuffer.GenBuffer(); } } @@ -1197,7 +1201,11 @@ void R_ShutdownVBOs() if ( glConfig2.materialSystemAvailable ) { materialsSSBO.DelBuffer(); - commandBuffer.DelBuffer(); + surfaceDescriptorsSSBO.DelBuffer(); + surfaceCommandsSSBO.DelBuffer(); + culledCommandsBuffer.DelBuffer(); + surfaceBatchesUBO.DelBuffer(); + atomicCommandCountersBuffer.DelBuffer(); } tess.verts = tess.vertsBuffer = nullptr; diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 5e5653afaf..44fd381c90 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -1963,10 +1963,14 @@ static void GLimp_InitExtensions() // made required in OpenGL 4.2 glConfig2.shaderImageLoadStoreAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_image_load_store, r_arb_shader_image_load_store->value ); + // made required in OpenGL 4.2 + glConfig2.shaderAtomicCountersAvailable = LOAD_EXTENSION_WITH_TEST( ExtFlag_NONE, ARB_shader_atomic_counters, r_arb_shader_atomic_counters->value ); + glConfig2.materialSystemAvailable = glConfig2.shaderDrawParametersAvailable && glConfig2.SSBOAvailable && glConfig2.multiDrawIndirectAvailable && glConfig2.bindlessTexturesAvailable && glConfig2.computeShaderAvailable && glConfig2.shadingLanguage420PackAvailable && glConfig2.explicitUniformLocationAvailable && glConfig2.shaderImageLoadStoreAvailable + && glConfig2.shaderAtomicCountersAvailable && r_smp->integer == 0 // Currently doesn't work with r_smp 1 && r_useMaterialSystem->integer == 1; // Allow disabling it without disabling any extensions