diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp index ae807bd6c2..baa444bf47 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp @@ -4,7 +4,6 @@ #include -#include "Common/Align.h" #include "Common/FileUtil.h" #include "Common/StringUtil.h" @@ -18,8 +17,6 @@ #include "VideoCommon/Debugger.h" #include "VideoCommon/GeometryShaderGen.h" -#include "VideoCommon/GeometryShaderManager.h" -#include "VideoCommon/Statistics.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -36,25 +33,6 @@ ID3D11GeometryShader* GeometryShaderCache::GetCopyGeometryShader() return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? CopyGeometryShader : nullptr; } -ID3D11Buffer* gscbuf = nullptr; - -ID3D11Buffer*& GeometryShaderCache::GetConstantBuffer() -{ - // TODO: divide the global variables of the generated shaders into about 5 constant buffers to - // speed this up - if (GeometryShaderManager::dirty) - { - D3D11_MAPPED_SUBRESOURCE map; - D3D::context->Map(gscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - memcpy(map.pData, &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); - D3D::context->Unmap(gscbuf, 0); - GeometryShaderManager::dirty = false; - - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); - } - return gscbuf; -} - const char clear_shader_code[] = { "struct VSOUTPUT\n" "{\n" @@ -116,15 +94,6 @@ const char copy_shader_code[] = { void GeometryShaderCache::Init() { - unsigned int gbsize = Common::AlignUp(static_cast(sizeof(GeometryShaderConstants)), - 16); // must be a multiple of 16 - D3D11_BUFFER_DESC gbdesc = CD3D11_BUFFER_DESC(gbsize, D3D11_BIND_CONSTANT_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - HRESULT hr = D3D::device->CreateBuffer(&gbdesc, nullptr, &gscbuf); - CHECK(hr == S_OK, "Create geometry shader constant buffer (size=%u)", gbsize); - D3D::SetDebugObjectName(gscbuf, - "geometry shader constant buffer used to emulate the GX pipeline"); - // used when drawing clear quads ClearGeometryShader = D3D::CompileAndCreateGeometryShader(clear_shader_code); CHECK(ClearGeometryShader != nullptr, "Create clear geometry shader"); @@ -138,8 +107,6 @@ void GeometryShaderCache::Init() void GeometryShaderCache::Shutdown() { - SAFE_RELEASE(gscbuf); - SAFE_RELEASE(ClearGeometryShader); SAFE_RELEASE(CopyGeometryShader); } diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h index 28c1ee3680..38ffde1b51 100644 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h +++ b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h @@ -20,7 +20,8 @@ public: static ID3D11GeometryShader* GetClearGeometryShader(); static ID3D11GeometryShader* GetCopyGeometryShader(); - static ID3D11Buffer*& GetConstantBuffer(); + static ID3D11Buffer* GetConstantBuffer(); + static void UpdateConstantBuffer(const void* data, u32 data_size); }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp index e09e32d8a2..c86086614f 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp @@ -4,7 +4,6 @@ #include -#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" #include "Common/MsgHandler.h" @@ -20,8 +19,6 @@ #include "VideoCommon/Debugger.h" #include "VideoCommon/PixelShaderGen.h" -#include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/Statistics.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -32,7 +29,6 @@ ID3D11PixelShader* s_AnaglyphProgram = nullptr; ID3D11PixelShader* s_DepthResolveProgram = nullptr; ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr}; ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr}; -ID3D11Buffer* pscbuf = nullptr; const char clear_program_code[] = {"void main(\n" "out float4 ocol0 : SV_Target,\n" @@ -277,36 +273,8 @@ ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram() return s_DepthResolveProgram; } -static void UpdateConstantBuffers() -{ - if (PixelShaderManager::dirty) - { - D3D11_MAPPED_SUBRESOURCE map; - D3D::context->Map(pscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - memcpy(map.pData, &PixelShaderManager::constants, sizeof(PixelShaderConstants)); - D3D::context->Unmap(pscbuf, 0); - PixelShaderManager::dirty = false; - - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); - } -} - -ID3D11Buffer* PixelShaderCache::GetConstantBuffer() -{ - UpdateConstantBuffers(); - return pscbuf; -} - void PixelShaderCache::Init() { - unsigned int cbsize = Common::AlignUp(static_cast(sizeof(PixelShaderConstants)), - 16); // must be a multiple of 16 - D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - D3D::device->CreateBuffer(&cbdesc, nullptr, &pscbuf); - CHECK(pscbuf != nullptr, "Create pixel shader constant buffer"); - D3D::SetDebugObjectName(pscbuf, "pixel shader constant buffer used to emulate the GX pipeline"); - // used when drawing clear quads s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code); CHECK(s_ClearProgram != nullptr, "Create clear pixel shader"); @@ -334,8 +302,6 @@ void PixelShaderCache::InvalidateMSAAShaders() void PixelShaderCache::Shutdown() { - SAFE_RELEASE(pscbuf); - SAFE_RELEASE(s_ClearProgram); SAFE_RELEASE(s_AnaglyphProgram); SAFE_RELEASE(s_DepthResolveProgram); diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h index 22b6b0cc0c..da1b3b720d 100644 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ b/Source/Core/VideoBackends/D3D/PixelShaderCache.h @@ -21,8 +21,6 @@ public: static void Init(); static void Shutdown(); - static ID3D11Buffer* GetConstantBuffer(); - static ID3D11PixelShader* GetColorCopyProgram(bool multisampled); static ID3D11PixelShader* GetClearProgram(); static ID3D11PixelShader* GetAnaglyphProgram(); diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index bfaa5763ba..93098f53fe 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -63,7 +63,7 @@ typedef struct _Nv_Stereo_Image_Header #define NVSTEREO_IMAGE_SIGNATURE 0x4433564e Renderer::Renderer(int backbuffer_width, int backbuffer_height) - : ::Renderer(backbuffer_width, backbuffer_height) + : ::Renderer(backbuffer_width, backbuffer_height, AbstractTextureFormat::RGBA8) { m_last_multisamples = g_ActiveConfig.iMultisamples; m_last_stereo_mode = g_ActiveConfig.stereo_mode != StereoMode::Off; @@ -167,16 +167,6 @@ void Renderer::SetupDeviceObjects() D3D::SetDebugObjectName(m_reset_rast_state, "rasterizer state for Renderer::ResetAPIState"); m_screenshot_texture = nullptr; - - CD3D11_BUFFER_DESC vbo_desc(UTILITY_VBO_SIZE, D3D11_BIND_VERTEX_BUFFER, D3D11_USAGE_DYNAMIC, - D3D11_CPU_ACCESS_WRITE); - hr = D3D::device->CreateBuffer(&vbo_desc, nullptr, &m_utility_vertex_buffer); - CHECK(SUCCEEDED(hr), "Create utility VBO"); - - CD3D11_BUFFER_DESC ubo_desc(UTILITY_UBO_SIZE, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, - D3D11_CPU_ACCESS_WRITE); - hr = D3D::device->CreateBuffer(&ubo_desc, nullptr, &m_utility_uniform_buffer); - CHECK(SUCCEEDED(hr), "Create utility UBO"); } // Kill off all device objects @@ -196,8 +186,6 @@ void Renderer::TeardownDeviceObjects() SAFE_RELEASE(m_reset_rast_state); SAFE_RELEASE(m_screenshot_texture); SAFE_RELEASE(m_3d_vision_texture); - SAFE_RELEASE(m_utility_vertex_buffer); - SAFE_RELEASE(m_utility_uniform_buffer); } void Renderer::Create3DVisionTexture(int width, int height) @@ -273,25 +261,6 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin return DXPipeline::Create(config); } -void Renderer::UpdateUtilityUniformBuffer(const void* uniforms, u32 uniforms_size) -{ - DEBUG_ASSERT(uniforms_size > 0 && uniforms_size < UTILITY_UBO_SIZE); - D3D11_MAPPED_SUBRESOURCE mapped; - HRESULT hr = D3D::context->Map(m_utility_uniform_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - CHECK(SUCCEEDED(hr), "Map utility UBO"); - std::memcpy(mapped.pData, uniforms, uniforms_size); - D3D::context->Unmap(m_utility_uniform_buffer, 0); -} - -void Renderer::UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices) -{ - D3D11_MAPPED_SUBRESOURCE mapped; - HRESULT hr = D3D::context->Map(m_utility_vertex_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - CHECK(SUCCEEDED(hr), "Map utility VBO"); - std::memcpy(mapped.pData, vertices, num_vertices * vertex_stride); - D3D::context->Unmap(m_utility_vertex_buffer, 0); -} - void Renderer::SetPipeline(const AbstractPipeline* pipeline) { const DXPipeline* dx_pipeline = static_cast(pipeline); @@ -308,54 +277,6 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline) D3D::stateman->SetPixelShader(dx_pipeline->GetPixelShader()); } -void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) -{ - // Copy in uniforms. - if (uniforms_size > 0) - { - UpdateUtilityUniformBuffer(uniforms, uniforms_size); - D3D::stateman->SetVertexConstants(m_utility_uniform_buffer); - D3D::stateman->SetPixelConstants(m_utility_uniform_buffer); - D3D::stateman->SetGeometryConstants(m_utility_uniform_buffer); - } - - // If the vertices are larger than our buffer, we need to break it up into multiple draws. - const char* vertices_ptr = static_cast(vertices); - while (num_vertices > 0) - { - u32 vertices_this_draw = num_vertices; - if (vertices_ptr) - { - vertices_this_draw = std::min(vertices_this_draw, UTILITY_VBO_SIZE / vertex_stride); - DEBUG_ASSERT(vertices_this_draw > 0); - UpdateUtilityVertexBuffer(vertices_ptr, vertex_stride, vertices_this_draw); - D3D::stateman->SetVertexBuffer(m_utility_vertex_buffer, vertex_stride, 0); - } - - // Apply pending state and draw. - D3D::stateman->Apply(); - D3D::context->Draw(vertices_this_draw, 0); - vertices_ptr += vertex_stride * vertices_this_draw; - num_vertices -= vertices_this_draw; - } -} - -void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* uniforms, - u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z) -{ - D3D::stateman->SetComputeShader(static_cast(shader)->GetD3DComputeShader()); - - if (uniforms_size > 0) - { - UpdateUtilityUniformBuffer(uniforms, uniforms_size); - D3D::stateman->SetComputeConstants(m_utility_uniform_buffer); - } - - D3D::stateman->Apply(); - D3D::context->Dispatch(groups_x, groups_y, groups_z); -} - TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) { TargetRectangle result; @@ -549,15 +470,29 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne { // In D3D, the viewport rectangle must fit within the render target. D3D11_VIEWPORT vp; - vp.TopLeftX = MathUtil::Clamp(x, 0.0f, static_cast(m_target_width - 1)); - vp.TopLeftY = MathUtil::Clamp(y, 0.0f, static_cast(m_target_height - 1)); - vp.Width = MathUtil::Clamp(width, 1.0f, static_cast(m_target_width) - vp.TopLeftX); - vp.Height = MathUtil::Clamp(height, 1.0f, static_cast(m_target_height) - vp.TopLeftY); + vp.TopLeftX = MathUtil::Clamp(x, 0.0f, static_cast(m_current_framebuffer_width - 1)); + vp.TopLeftY = MathUtil::Clamp(y, 0.0f, static_cast(m_current_framebuffer_height - 1)); + vp.Width = + MathUtil::Clamp(width, 1.0f, static_cast(m_current_framebuffer_width) - vp.TopLeftX); + vp.Height = + MathUtil::Clamp(height, 1.0f, static_cast(m_current_framebuffer_height) - vp.TopLeftY); vp.MinDepth = near_depth; vp.MaxDepth = far_depth; D3D::context->RSSetViewports(1, &vp); } +void Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + D3D::stateman->Apply(); + D3D::context->Draw(num_vertices, base_vertex); +} + +void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + D3D::stateman->Apply(); + D3D::context->DrawIndexed(num_indices, base_index, base_vertex); +} + void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index 24b02af672..4750a0e69a 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -50,6 +50,8 @@ public: void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; void SetFullscreen(bool enable_fullscreen) override; bool IsFullscreen() const override; @@ -73,11 +75,6 @@ public: void ReinterpretPixelData(unsigned int convtype) override; - void DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) override; - void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size, - u32 groups_x, u32 groups_y, u32 groups_z) override; - private: void SetupDeviceObjects(); void TeardownDeviceObjects(); @@ -89,9 +86,6 @@ private: void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, u32 src_width, u32 src_height); - void UpdateUtilityUniformBuffer(const void* uniforms, u32 uniforms_size); - void UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices); - StateCache m_state_cache; std::array m_clear_blend_states{}; @@ -103,9 +97,6 @@ private: ID3D11Texture2D* m_screenshot_texture = nullptr; D3DTexture2D* m_3d_vision_texture = nullptr; - ID3D11Buffer* m_utility_vertex_buffer = nullptr; - ID3D11Buffer* m_utility_uniform_buffer = nullptr; - u32 m_last_multisamples = 1; bool m_last_stereo_mode = false; bool m_last_fullscreen_state = false; diff --git a/Source/Core/VideoBackends/D3D/VertexManager.cpp b/Source/Core/VideoBackends/D3D/VertexManager.cpp index 9d37aa8bf5..8a84ebadc3 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D/VertexManager.cpp @@ -6,6 +6,7 @@ #include +#include "Common/Align.h" #include "Common/CommonTypes.h" #include "VideoBackends/D3D/BoundingBox.h" @@ -19,11 +20,14 @@ #include "VideoCommon/BoundingBox.h" #include "VideoCommon/Debugger.h" +#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -33,15 +37,34 @@ const u32 MAX_IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 8; const u32 MAX_VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE; const u32 MAX_BUFFER_SIZE = MAX_IBUFFER_SIZE + MAX_VBUFFER_SIZE; +static ID3D11Buffer* AllocateConstantBuffer(u32 size) +{ + const u32 cbsize = Common::AlignUp(size, 16u); // must be a multiple of 16 + const CD3D11_BUFFER_DESC cbdesc(cbsize, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, + D3D11_CPU_ACCESS_WRITE); + ID3D11Buffer* cbuf; + const HRESULT hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &cbuf); + CHECK(hr == S_OK, "shader constant buffer (size=%u)", cbsize); + D3D::SetDebugObjectName(cbuf, "constant buffer used to emulate the GX pipeline"); + return cbuf; +} + +static void UpdateConstantBuffer(ID3D11Buffer* const buffer, const void* data, u32 data_size) +{ + D3D11_MAPPED_SUBRESOURCE map; + D3D::context->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); + memcpy(map.pData, data, data_size); + D3D::context->Unmap(buffer, 0); + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); +} + void VertexManager::CreateDeviceObjects() { D3D11_BUFFER_DESC bufdesc = CD3D11_BUFFER_DESC(MAX_BUFFER_SIZE, D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - m_vertexDrawOffset = 0; - m_indexDrawOffset = 0; - for (int i = 0; i < MAX_BUFFER_COUNT; i++) { m_buffers[i] = nullptr; @@ -50,12 +73,18 @@ void VertexManager::CreateDeviceObjects() D3D::SetDebugObjectName(m_buffers[i], "Buffer of VertexManager"); } - m_currentBuffer = 0; - m_bufferCursor = MAX_BUFFER_SIZE; + m_buffer_cursor = MAX_BUFFER_SIZE; + + m_vertex_constant_buffer = AllocateConstantBuffer(sizeof(VertexShaderConstants)); + m_geometry_constant_buffer = AllocateConstantBuffer(sizeof(GeometryShaderConstants)); + m_pixel_constant_buffer = AllocateConstantBuffer(sizeof(PixelShaderConstants)); } void VertexManager::DestroyDeviceObjects() { + SAFE_RELEASE(m_pixel_constant_buffer); + SAFE_RELEASE(m_geometry_constant_buffer); + SAFE_RELEASE(m_vertex_constant_buffer); for (int i = 0; i < MAX_BUFFER_COUNT; i++) { SAFE_RELEASE(m_buffers[i]); @@ -64,12 +93,12 @@ void VertexManager::DestroyDeviceObjects() VertexManager::VertexManager() { - LocalVBuffer.resize(MAXVBUFFERSIZE); + m_staging_vertex_buffer.resize(MAXVBUFFERSIZE); - m_cur_buffer_pointer = m_base_buffer_pointer = &LocalVBuffer[0]; - m_end_buffer_pointer = m_base_buffer_pointer + LocalVBuffer.size(); + m_cur_buffer_pointer = m_base_buffer_pointer = &m_staging_vertex_buffer[0]; + m_end_buffer_pointer = m_base_buffer_pointer + m_staging_vertex_buffer.size(); - LocalIBuffer.resize(MAXIBUFFERSIZE); + m_staging_index_buffer.resize(MAXIBUFFERSIZE); CreateDeviceObjects(); } @@ -79,71 +108,103 @@ VertexManager::~VertexManager() DestroyDeviceObjects(); } -void VertexManager::PrepareDrawBuffers(u32 stride) +void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ + // Just use the one buffer for all three. + UpdateConstantBuffer(m_vertex_constant_buffer, uniforms, uniforms_size); + D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); + D3D::stateman->SetGeometryConstants(m_vertex_constant_buffer); + D3D::stateman->SetPixelConstants(m_vertex_constant_buffer); + VertexShaderManager::dirty = true; + GeometryShaderManager::dirty = true; + PixelShaderManager::dirty = true; +} + +void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) +{ + m_cur_buffer_pointer = m_base_buffer_pointer; + IndexGenerator::Start(m_staging_index_buffer.data()); +} + +void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) { D3D11_MAPPED_SUBRESOURCE map; - u32 vertexBufferSize = u32(m_cur_buffer_pointer - m_base_buffer_pointer); - u32 indexBufferSize = IndexGenerator::GetIndexLen() * sizeof(u16); + u32 vertexBufferSize = Common::AlignUp(num_vertices * vertex_stride, sizeof(u16)); + u32 indexBufferSize = num_indices * sizeof(u16); u32 totalBufferSize = vertexBufferSize + indexBufferSize; - u32 cursor = m_bufferCursor; - u32 padding = m_bufferCursor % stride; + u32 cursor = m_buffer_cursor; + u32 padding = vertex_stride > 0 ? (m_buffer_cursor % vertex_stride) : 0; if (padding) { - cursor += stride - padding; + cursor += vertex_stride - padding; } D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; if (cursor + totalBufferSize >= MAX_BUFFER_SIZE) { // Wrap around - m_currentBuffer = (m_currentBuffer + 1) % MAX_BUFFER_COUNT; + m_current_buffer = (m_current_buffer + 1) % MAX_BUFFER_COUNT; cursor = 0; MapType = D3D11_MAP_WRITE_DISCARD; } - m_vertexDrawOffset = cursor; - m_indexDrawOffset = cursor + vertexBufferSize; + *out_base_vertex = vertex_stride > 0 ? (cursor / vertex_stride) : 0; + *out_base_index = (cursor + vertexBufferSize) / sizeof(u16); - D3D::context->Map(m_buffers[m_currentBuffer], 0, MapType, 0, &map); + D3D::context->Map(m_buffers[m_current_buffer], 0, MapType, 0, &map); u8* mappedData = reinterpret_cast(map.pData); - memcpy(mappedData + m_vertexDrawOffset, m_base_buffer_pointer, vertexBufferSize); - memcpy(mappedData + m_indexDrawOffset, GetIndexBuffer(), indexBufferSize); - D3D::context->Unmap(m_buffers[m_currentBuffer], 0); + if (vertexBufferSize > 0) + std::memcpy(mappedData + cursor, m_base_buffer_pointer, vertexBufferSize); + if (indexBufferSize > 0) + std::memcpy(mappedData + cursor + vertexBufferSize, m_staging_index_buffer.data(), + indexBufferSize); + D3D::context->Unmap(m_buffers[m_current_buffer], 0); - m_bufferCursor = cursor + totalBufferSize; + m_buffer_cursor = cursor + totalBufferSize; ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertexBufferSize); ADDSTAT(stats.thisFrame.bytesIndexStreamed, indexBufferSize); + + D3D::stateman->SetVertexBuffer(m_buffers[m_current_buffer], vertex_stride, 0); + D3D::stateman->SetIndexBuffer(m_buffers[m_current_buffer]); } -void VertexManager::Draw(u32 stride) +void VertexManager::UploadConstants() { - u32 indices = IndexGenerator::GetIndexLen(); + if (VertexShaderManager::dirty) + { + UpdateConstantBuffer(m_vertex_constant_buffer, &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + VertexShaderManager::dirty = false; + } + if (GeometryShaderManager::dirty) + { + UpdateConstantBuffer(m_geometry_constant_buffer, &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + GeometryShaderManager::dirty = false; + } + if (PixelShaderManager::dirty) + { + UpdateConstantBuffer(m_pixel_constant_buffer, &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + PixelShaderManager::dirty = false; + } - D3D::stateman->SetVertexBuffer(m_buffers[m_currentBuffer], stride, 0); - D3D::stateman->SetIndexBuffer(m_buffers[m_currentBuffer]); - - u32 baseVertex = m_vertexDrawOffset / stride; - u32 startIndex = m_indexDrawOffset / sizeof(u16); - - D3D::stateman->Apply(); - D3D::context->DrawIndexed(indices, startIndex, baseVertex); - - INCSTAT(stats.thisFrame.numDrawCalls); + D3D::stateman->SetPixelConstants(m_pixel_constant_buffer, g_ActiveConfig.bEnablePixelLighting ? + m_vertex_constant_buffer : + nullptr); + D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); + D3D::stateman->SetGeometryConstants(m_geometry_constant_buffer); } -void VertexManager::vFlush() +void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { - u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(); - PrepareDrawBuffers(stride); - - if (!m_current_pipeline_object) - return; - FramebufferManager::SetIntegerEFBRenderTarget( m_current_pipeline_config.blending_state.logicopenable); + if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) { D3D::context->OMSetRenderTargetsAndUnorderedAccessViews( @@ -151,21 +212,7 @@ void VertexManager::vFlush() nullptr); } - g_renderer->SetPipeline(m_current_pipeline_object); - - ID3D11Buffer* vertexConstants = VertexShaderCache::GetConstantBuffer(); - D3D::stateman->SetPixelConstants(PixelShaderCache::GetConstantBuffer(), - g_ActiveConfig.bEnablePixelLighting ? vertexConstants : nullptr); - D3D::stateman->SetVertexConstants(vertexConstants); - D3D::stateman->SetGeometryConstants(GeometryShaderCache::GetConstantBuffer()); - - Draw(stride); + D3D::stateman->Apply(); + D3D::context->DrawIndexed(num_indices, base_index, base_vertex); } - -void VertexManager::ResetBuffer(u32 stride) -{ - m_cur_buffer_pointer = m_base_buffer_pointer; - IndexGenerator::Start(GetIndexBuffer()); -} - -} // namespace +} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexManager.h b/Source/Core/VideoBackends/D3D/VertexManager.h index 922f854c6b..a7eab78ef5 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.h +++ b/Source/Core/VideoBackends/D3D/VertexManager.h @@ -42,32 +42,32 @@ public: std::unique_ptr CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; protected: - void ResetBuffer(u32 stride) override; - u16* GetIndexBuffer() { return &LocalIBuffer[0]; } + void CreateDeviceObjects() override; + void DestroyDeviceObjects() override; + void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadConstants() override; + void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; private: - void PrepareDrawBuffers(u32 stride); - void Draw(u32 stride); - // temp - void vFlush() override; - - u32 m_vertexDrawOffset; - u32 m_indexDrawOffset; - u32 m_currentBuffer; - u32 m_bufferCursor; - enum { MAX_BUFFER_COUNT = 2 }; - ID3D11Buffer* m_buffers[MAX_BUFFER_COUNT]; + ID3D11Buffer* m_buffers[MAX_BUFFER_COUNT] = {}; + u32 m_current_buffer = 0; + u32 m_buffer_cursor = 0; - std::vector LocalVBuffer; - std::vector LocalIBuffer; + std::vector m_staging_vertex_buffer; + std::vector m_staging_index_buffer; + + ID3D11Buffer* m_vertex_constant_buffer = nullptr; + ID3D11Buffer* m_geometry_constant_buffer = nullptr; + ID3D11Buffer* m_pixel_constant_buffer = nullptr; }; -} // namespace +} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp index 2033a38701..b7fc3a2582 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp @@ -4,7 +4,6 @@ #include -#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" #include "Common/MsgHandler.h" @@ -23,7 +22,6 @@ #include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" -#include "VideoCommon/VertexShaderManager.h" namespace DX11 { @@ -49,25 +47,6 @@ ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() return ClearLayout; } -ID3D11Buffer* vscbuf = nullptr; - -ID3D11Buffer*& VertexShaderCache::GetConstantBuffer() -{ - // TODO: divide the global variables of the generated shaders into about 5 constant buffers to - // speed this up - if (VertexShaderManager::dirty) - { - D3D11_MAPPED_SUBRESOURCE map; - D3D::context->Map(vscbuf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - memcpy(map.pData, &VertexShaderManager::constants, sizeof(VertexShaderConstants)); - D3D::context->Unmap(vscbuf, 0); - VertexShaderManager::dirty = false; - - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); - } - return vscbuf; -} - // this class will load the precompiled shaders into our cache template class VertexShaderCacheInserter : public LinearDiskCacheReader @@ -121,14 +100,6 @@ void VertexShaderCache::Init() {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, }; - unsigned int cbsize = Common::AlignUp(static_cast(sizeof(VertexShaderConstants)), - 16); // must be a multiple of 16 - D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(cbsize, D3D11_BIND_CONSTANT_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - HRESULT hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &vscbuf); - CHECK(hr == S_OK, "Create vertex shader constant buffer (size=%u)", cbsize); - D3D::SetDebugObjectName(vscbuf, "vertex shader constant buffer used to emulate the GX pipeline"); - D3DBlob* blob; D3D::CompileVertexShader(simple_shader_code, &blob); D3D::device->CreateInputLayout(simpleelems, 2, blob->Data(), blob->Size(), &SimpleLayout); @@ -156,8 +127,6 @@ void VertexShaderCache::Init() void VertexShaderCache::Shutdown() { - SAFE_RELEASE(vscbuf); - SAFE_RELEASE(SimpleVertexShader); SAFE_RELEASE(ClearVertexShader); diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h index b59d75be47..d7f1958806 100644 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ b/Source/Core/VideoBackends/D3D/VertexShaderCache.h @@ -23,8 +23,6 @@ public: static void Init(); static void Shutdown(); - static ID3D11Buffer*& GetConstantBuffer(); - static ID3D11VertexShader* GetSimpleVertexShader(); static ID3D11VertexShader* GetClearVertexShader(); static ID3D11InputLayout* GetSimpleInputLayout(); diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 7df5358b16..684bc1350b 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -161,7 +161,8 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) D3D::InitUtils(); BBox::Init(); - return true; + + return g_renderer->Initialize(); } void VideoBackend::Shutdown() diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 147d5916ff..d790331e36 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -64,7 +64,7 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) g_framebuffer_manager = std::make_unique(); g_texture_cache = std::make_unique(); g_shader_cache = std::make_unique(); - return g_shader_cache->Initialize(); + return g_renderer->Initialize() && g_shader_cache->Initialize(); } void VideoBackend::Shutdown() diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index c80506c15b..0fe6f1186e 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -14,7 +14,7 @@ namespace Null { // Init functions -Renderer::Renderer() : ::Renderer(1, 1) +Renderer::Renderer() : ::Renderer(1, 1, AbstractTextureFormat::RGBA8) { UpdateActiveConfig(); } diff --git a/Source/Core/VideoBackends/Null/VertexManager.cpp b/Source/Core/VideoBackends/Null/VertexManager.cpp index 0e121ffa0d..872e9c3657 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.cpp +++ b/Source/Core/VideoBackends/Null/VertexManager.cpp @@ -22,6 +22,10 @@ VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_dec return std::make_unique(vtx_decl); } +void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ +} + VertexManager::VertexManager() : m_local_v_buffer(MAXVBUFFERSIZE), m_local_i_buffer(MAXIBUFFERSIZE) { } @@ -30,15 +34,24 @@ VertexManager::~VertexManager() { } -void VertexManager::ResetBuffer(u32 stride) +void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) { m_cur_buffer_pointer = m_base_buffer_pointer = m_local_v_buffer.data(); m_end_buffer_pointer = m_cur_buffer_pointer + m_local_v_buffer.size(); IndexGenerator::Start(&m_local_i_buffer[0]); } -void VertexManager::vFlush() +void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) { } -} // namespace +void VertexManager::UploadConstants() +{ +} + +void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +{ +} + +} // namespace Null diff --git a/Source/Core/VideoBackends/Null/VertexManager.h b/Source/Core/VideoBackends/Null/VertexManager.h index 863090cab6..1d6a706ec3 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.h +++ b/Source/Core/VideoBackends/Null/VertexManager.h @@ -20,11 +20,16 @@ public: std::unique_ptr CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + protected: - void ResetBuffer(u32 stride) override; + void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadConstants() override; + void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; private: - void vFlush() override; std::vector m_local_v_buffer; std::vector m_local_i_buffer; }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 78458aa3fe..c8d54acd5a 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -259,6 +259,21 @@ void ProgramShaderCache::UploadConstants() } } +void ProgramShaderCache::UploadConstants(const void* data, u32 data_size) +{ + // allocate and copy + const u32 alloc_size = Common::AlignUp(data_size, s_ubo_align); + auto buffer = s_buffer->Map(alloc_size, s_ubo_align); + std::memcpy(buffer.first, data, data_size); + s_buffer->Unmap(alloc_size); + + // bind the same sub-buffer to all stages + for (u32 index = 1; index <= 3; index++) + glBindBufferRange(GL_UNIFORM_BUFFER, index, s_buffer->m_buffer, buffer.second, data_size); + + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); +} + bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, const std::string& gcode) { @@ -539,6 +554,11 @@ void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format) s_last_VAO = new_VAO; } +bool ProgramShaderCache::IsValidVertexFormatBound() +{ + return s_last_VAO != 0 && s_last_VAO != s_attributeless_VAO; +} + void ProgramShaderCache::InvalidateVertexFormat() { s_last_VAO = 0; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index 0096c6e5b9..b94e733167 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -69,6 +69,7 @@ class ProgramShaderCache { public: static void BindVertexFormat(const GLVertexFormat* vertex_format); + static bool IsValidVertexFormatBound(); static void InvalidateVertexFormat(); static void InvalidateLastProgram(); @@ -83,6 +84,7 @@ public: static u32 GetUniformBufferAlignment(); static void InvalidateConstants(); static void UploadConstants(); + static void UploadConstants(const void* data, u32 data_size); static void Init(); static void Shutdown(); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 120774b4f7..55de5ec0dc 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -355,7 +355,8 @@ static void InitDriverInfo() // Init functions Renderer::Renderer(std::unique_ptr main_gl_context) : ::Renderer(static_cast(std::max(main_gl_context->GetBackBufferWidth(), 1u)), - static_cast(std::max(main_gl_context->GetBackBufferHeight(), 1u))), + static_cast(std::max(main_gl_context->GetBackBufferHeight(), 1u)), + AbstractTextureFormat::RGBA8), m_main_gl_context(std::move(main_gl_context)) { bool bSuccess = true; @@ -811,6 +812,23 @@ bool Renderer::IsHeadless() const return m_main_gl_context->IsHeadless(); } +bool Renderer::Initialize() +{ + if (!::Renderer::Initialize()) + return false; + + // Initialize the FramebufferManager + g_framebuffer_manager = std::make_unique( + m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); + m_current_framebuffer_width = m_target_width; + m_current_framebuffer_height = m_target_height; + + m_post_processor = std::make_unique(); + s_raster_font = std::make_unique(); + + return true; +} + void Renderer::Shutdown() { ::Renderer::Shutdown(); @@ -822,18 +840,6 @@ void Renderer::Shutdown() m_post_processor.reset(); } -void Renderer::Init() -{ - // Initialize the FramebufferManager - g_framebuffer_manager = std::make_unique( - m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - - m_post_processor = std::make_unique(); - s_raster_font = std::make_unique(); -} - std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) { return std::make_unique(config); @@ -1178,6 +1184,27 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne glDepthRangef(near_depth, far_depth); } +void Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + glDrawArrays(static_cast(m_graphics_pipeline)->GetGLPrimitive(), base_vertex, + num_vertices); +} + +void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + if (g_ogl_config.bSupportsGLBaseVertex) + { + glDrawElementsBaseVertex(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + num_indices, GL_UNSIGNED_SHORT, + static_cast(nullptr) + base_index, base_vertex); + } + else + { + glDrawElements(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + num_indices, GL_UNSIGNED_SHORT, static_cast(nullptr) + base_index); + } +} + void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { @@ -1669,54 +1696,6 @@ void Renderer::SetInterlacingMode() // TODO } -void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) -{ - // Copy in uniforms. - if (uniforms_size > 0) - UploadUtilityUniforms(uniforms, uniforms_size); - - // Draw from base index if there is vertex data. - if (vertices) - { - StreamBuffer* vbuf = static_cast(g_vertex_manager.get())->GetVertexBuffer(); - auto buf = vbuf->Map(vertex_stride * num_vertices, vertex_stride); - std::memcpy(buf.first, vertices, vertex_stride * num_vertices); - vbuf->Unmap(vertex_stride * num_vertices); - glDrawArrays(m_graphics_pipeline->GetGLPrimitive(), buf.second / vertex_stride, num_vertices); - } - else - { - glDrawArrays(m_graphics_pipeline->GetGLPrimitive(), 0, num_vertices); - } -} - -void Renderer::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ - DEBUG_ASSERT(uniforms_size > 0); - - auto buf = ProgramShaderCache::GetUniformBuffer()->Map( - uniforms_size, ProgramShaderCache::GetUniformBufferAlignment()); - std::memcpy(buf.first, uniforms, uniforms_size); - ProgramShaderCache::GetUniformBuffer()->Unmap(uniforms_size); - glBindBufferRange(GL_UNIFORM_BUFFER, 1, ProgramShaderCache::GetUniformBuffer()->m_buffer, - buf.second, uniforms_size); - - // This is rather horrible, but because of how the UBOs are bound, this forces it to rebind. - ProgramShaderCache::InvalidateConstants(); -} - -void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* uniforms, - u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z) -{ - glUseProgram(static_cast(shader)->GetGLComputeProgramID()); - if (uniforms_size > 0) - UploadUtilityUniforms(uniforms, uniforms_size); - - glDispatchCompute(groups_x, groups_y, groups_z); - ProgramShaderCache::InvalidateLastProgram(); -} - std::unique_ptr Renderer::CreateAsyncShaderCompiler() { return std::make_unique(); diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index f0cc668563..2de8ff01a5 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -88,7 +88,7 @@ public: bool IsHeadless() const override; - void Init(); + bool Initialize() override; void Shutdown() override; std::unique_ptr CreateTexture(const TextureConfig& config) override; @@ -116,6 +116,8 @@ public: void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; void RenderText(const std::string& text, int left, int top, u32 color) override; @@ -137,18 +139,14 @@ public: void ReinterpretPixelData(unsigned int convtype) override; - void DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) override; - - void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size, - u32 groups_x, u32 groups_y, u32 groups_z) override; - std::unique_ptr CreateAsyncShaderCompiler() override; // Only call methods from this on the GPU thread. GLContext* GetMainGLContext() const { return m_main_gl_context.get(); } bool IsGLES() const { return m_main_gl_context->IsGLES(); } + const OGLPipeline* GetCurrentGraphicsPipeline() const { return m_graphics_pipeline; } + private: void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const void* data); @@ -165,7 +163,6 @@ private: void ApplyBlendingState(const BlendingState state, bool force = false); void ApplyRasterizationState(const RasterizationState state, bool force = false); void ApplyDepthState(const DepthState state, bool force = false); - void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size); std::unique_ptr m_main_gl_context; std::array m_bound_textures{}; diff --git a/Source/Core/VideoBackends/OGL/StreamBuffer.h b/Source/Core/VideoBackends/OGL/StreamBuffer.h index 83b79bd906..2a8efba1af 100644 --- a/Source/Core/VideoBackends/OGL/StreamBuffer.h +++ b/Source/Core/VideoBackends/OGL/StreamBuffer.h @@ -19,6 +19,8 @@ public: static std::unique_ptr Create(u32 type, u32 size); virtual ~StreamBuffer(); + u32 GetCurrentOffset() const { return m_iterator; } + /* This mapping function will return a pair of: * - the pointer to the mapped buffer * - the offset into the real GPU buffer (always multiple of stride) diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 00cf6b3cfa..3883a13595 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -15,6 +15,7 @@ #include "Common/StringUtil.h" #include "VideoBackends/OGL/BoundingBox.h" +#include "VideoBackends/OGL/OGLPipeline.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" @@ -31,11 +32,6 @@ namespace OGL const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024; const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024; -static std::unique_ptr s_vertexBuffer; -static std::unique_ptr s_indexBuffer; -static size_t s_baseVertex; -static size_t s_index_offset; - VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE) { CreateDeviceObjects(); @@ -48,58 +44,45 @@ VertexManager::~VertexManager() void VertexManager::CreateDeviceObjects() { - s_vertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE); - m_vertex_buffers = s_vertexBuffer->m_buffer; - - s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE); - m_index_buffers = s_indexBuffer->m_buffer; + m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE); + m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE); } void VertexManager::DestroyDeviceObjects() { - s_vertexBuffer.reset(); - s_indexBuffer.reset(); + m_vertex_buffer.reset(); + m_index_buffer.reset(); } -StreamBuffer* VertexManager::GetVertexBuffer() const +void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { - return s_vertexBuffer.get(); -} - -OGL::StreamBuffer* VertexManager::GetIndexBuffer() const -{ - return s_indexBuffer.get(); + ProgramShaderCache::InvalidateConstants(); + ProgramShaderCache::UploadConstants(uniforms, uniforms_size); } GLuint VertexManager::GetVertexBufferHandle() const { - return m_vertex_buffers; + return m_vertex_buffer->m_buffer; } GLuint VertexManager::GetIndexBufferHandle() const { - return m_index_buffers; + return m_index_buffer->m_buffer; } -void VertexManager::PrepareDrawBuffers(u32 stride) +static void CheckBufferBinding() { - u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride; - u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16); - // The index buffer is part of the VAO state, therefore we need to bind it first. - const GLVertexFormat* vertex_format = - static_cast(VertexLoaderManager::GetCurrentVertexFormat()); - ProgramShaderCache::BindVertexFormat(vertex_format); - s_vertexBuffer->Unmap(vertex_data_size); - s_indexBuffer->Unmap(index_data_size); - - ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size); - ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size); + if (!ProgramShaderCache::IsValidVertexFormatBound()) + { + ProgramShaderCache::BindVertexFormat( + static_cast(VertexLoaderManager::GetCurrentVertexFormat())); + } } -void VertexManager::ResetBuffer(u32 stride) +void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) { - if (m_cull_all) + if (cull_all) { // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data(); @@ -109,68 +92,41 @@ void VertexManager::ResetBuffer(u32 stride) } else { - // The index buffer is part of the VAO state, therefore we need to bind it first. - const GLVertexFormat* vertex_format = - static_cast(VertexLoaderManager::GetCurrentVertexFormat()); - ProgramShaderCache::BindVertexFormat(vertex_format); + CheckBufferBinding(); - auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride); + auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride); m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; - s_baseVertex = buffer.second / stride; - buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16)); + buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16)); IndexGenerator::Start((u16*)buffer.first); - s_index_offset = buffer.second; } } -void VertexManager::Draw(u32 stride) +void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) { - u32 index_size = IndexGenerator::GetIndexLen(); - u32 max_index = IndexGenerator::GetNumVerts(); - GLenum primitive_mode = 0; + u32 vertex_data_size = num_vertices * vertex_stride; + u32 index_data_size = num_indices * sizeof(u16); - switch (m_current_primitive_type) - { - case PrimitiveType::Points: - primitive_mode = GL_POINTS; - break; - case PrimitiveType::Lines: - primitive_mode = GL_LINES; - break; - case PrimitiveType::Triangles: - primitive_mode = GL_TRIANGLES; - break; - case PrimitiveType::TriangleStrip: - primitive_mode = GL_TRIANGLE_STRIP; - break; - } + *out_base_vertex = vertex_stride > 0 ? (m_vertex_buffer->GetCurrentOffset() / vertex_stride) : 0; + *out_base_index = m_index_buffer->GetCurrentOffset() / sizeof(u16); - if (g_ogl_config.bSupportsGLBaseVertex) - { - glDrawRangeElementsBaseVertex(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT, - (u8*)nullptr + s_index_offset, (GLint)s_baseVertex); - } - else - { - glDrawRangeElements(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT, - (u8*)nullptr + s_index_offset); - } + CheckBufferBinding(); + m_vertex_buffer->Unmap(vertex_data_size); + m_index_buffer->Unmap(index_data_size); - INCSTAT(stats.thisFrame.numDrawCalls); + ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size); + ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size); } -void VertexManager::vFlush() +void VertexManager::UploadConstants() { - GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat(); - u32 stride = nativeVertexFmt->GetVertexStride(); - - PrepareDrawBuffers(stride); - - // upload global constants ProgramShaderCache::UploadConstants(); +} +void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +{ if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) { glEnable(GL_STENCIL_TEST); @@ -178,8 +134,8 @@ void VertexManager::vFlush() if (m_current_pipeline_object) { - g_renderer->SetPipeline(m_current_pipeline_object); - Draw(stride); + static_cast(g_renderer.get())->SetPipeline(m_current_pipeline_object); + static_cast(g_renderer.get())->DrawIndexed(base_index, num_indices, base_vertex); } if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) @@ -191,5 +147,4 @@ void VertexManager::vFlush() g_Config.iSaveTargetId++; ClearEFBCache(); } - -} // namespace +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 39203c5f4c..388c559716 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -35,27 +35,26 @@ public: std::unique_ptr CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; - StreamBuffer* GetVertexBuffer() const; - StreamBuffer* GetIndexBuffer() const; GLuint GetVertexBufferHandle() const; GLuint GetIndexBufferHandle() const; protected: - void ResetBuffer(u32 stride) override; + void CreateDeviceObjects() override; + void DestroyDeviceObjects() override; + void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadConstants() override; + void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; private: - void Draw(u32 stride); - void vFlush() override; - void PrepareDrawBuffers(u32 stride); - - GLuint m_vertex_buffers; - GLuint m_index_buffers; + std::unique_ptr m_vertex_buffer; + std::unique_ptr m_index_buffer; // Alternative buffers in CPU memory for primatives we are going to discard. std::vector m_cpu_v_buffer; std::vector m_cpu_i_buffer; }; -} +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 6e364113cc..c4ccbdcfd5 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -179,7 +179,8 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) g_texture_cache = std::make_unique(); g_sampler_cache = std::make_unique(); g_shader_cache = std::make_unique(); - static_cast(g_renderer.get())->Init(); + if (!g_renderer->Initialize()) + return false; TextureConverter::Init(); BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); return g_shader_cache->Initialize(); diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index 2c6a97027b..0b64022725 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -25,7 +25,8 @@ #include "VideoCommon/VideoConfig.h" SWRenderer::SWRenderer(std::unique_ptr window) - : ::Renderer(static_cast(MAX_XFB_WIDTH), static_cast(MAX_XFB_HEIGHT)), + : ::Renderer(static_cast(MAX_XFB_WIDTH), static_cast(MAX_XFB_HEIGHT), + AbstractTextureFormat::RGBA8), m_window(std::move(window)) { } diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index e62a0a2348..4a53b4041c 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -48,14 +48,29 @@ SWVertexLoader::~SWVertexLoader() { } -void SWVertexLoader::ResetBuffer(u32 stride) +void SWVertexLoader::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ +} + +void SWVertexLoader::ResetBuffer(u32 vertex_stride, bool cull_all) { m_cur_buffer_pointer = m_base_buffer_pointer = m_local_vertex_buffer.data(); m_end_buffer_pointer = m_cur_buffer_pointer + m_local_vertex_buffer.size(); IndexGenerator::Start(m_local_index_buffer.data()); } -void SWVertexLoader::vFlush() +void SWVertexLoader::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + *out_base_vertex = 0; + *out_base_index = 0; +} + +void SWVertexLoader::UploadConstants() +{ +} + +void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { DebugUtil::OnObjectBegin(); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index abb4c86b7c..dabbdef168 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -23,9 +23,14 @@ public: std::unique_ptr CreateNativeVertexFormat(const PortableVertexDeclaration& vdec) override; -private: - void ResetBuffer(u32 stride) override; - void vFlush() override; + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + +protected: + void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadConstants() override; + void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; void SetFormat(u8 attributeIndex, u8 primitiveType); void ParseVertex(const PortableVertexDeclaration& vdec, int index); diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index a0ad349849..0ab16bdfa8 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -95,7 +95,7 @@ bool VideoSoftware::Initialize(const WindowSystemInfo& wsi) g_perf_query = std::make_unique(); g_texture_cache = std::make_unique(); g_shader_cache = std::make_unique(); - return g_shader_cache->Initialize(); + return g_renderer->Initialize() && g_shader_cache->Initialize(); } void VideoSoftware::Shutdown() diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index ce52b0718d..6bec599882 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -49,7 +49,8 @@ namespace Vulkan { Renderer::Renderer(std::unique_ptr swap_chain) : ::Renderer(swap_chain ? static_cast(swap_chain->GetWidth()) : 1, - swap_chain ? static_cast(swap_chain->GetHeight()) : 0), + swap_chain ? static_cast(swap_chain->GetHeight()) : 0, + swap_chain ? swap_chain->GetTextureFormat() : AbstractTextureFormat::Undefined), m_swap_chain(std::move(swap_chain)) { UpdateActiveConfig(); @@ -57,13 +58,7 @@ Renderer::Renderer(std::unique_ptr swap_chain) m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex; } -Renderer::~Renderer() -{ - UpdateActiveConfig(); - - DestroyShaders(); - DestroySemaphores(); -} +Renderer::~Renderer() = default; Renderer* Renderer::GetInstance() { @@ -77,6 +72,9 @@ bool Renderer::IsHeadless() const bool Renderer::Initialize() { + if (!::Renderer::Initialize()) + return false; + BindEFBToStateTracker(); if (!CreateSemaphores()) @@ -98,6 +96,23 @@ bool Renderer::Initialize() return false; } + // Swap chain render pass. + if (m_swap_chain) + { + m_swap_chain_render_pass = + g_object_cache->GetRenderPass(m_swap_chain->GetSurfaceFormat().format, VK_FORMAT_UNDEFINED, + 1, VK_ATTACHMENT_LOAD_OP_LOAD); + m_swap_chain_clear_render_pass = + g_object_cache->GetRenderPass(m_swap_chain->GetSurfaceFormat().format, VK_FORMAT_UNDEFINED, + 1, VK_ATTACHMENT_LOAD_OP_CLEAR); + if (m_swap_chain_render_pass == VK_NULL_HANDLE || + m_swap_chain_clear_render_pass == VK_NULL_HANDLE) + { + PanicAlert("Failed to create swap chain render passes."); + return false; + } + } + m_bounding_box = std::make_unique(); if (!m_bounding_box->Initialize()) { @@ -131,6 +146,18 @@ bool Renderer::Initialize() return true; } +void Renderer::Shutdown() +{ + ::Renderer::Shutdown(); + + // Submit the current command buffer, in case there's a partial frame. + StateTracker::GetInstance()->EndRenderPass(); + g_command_buffer_mgr->ExecuteCommandBuffer(false, true); + + DestroyShaders(); + DestroySemaphores(); +} + bool Renderer::CreateSemaphores() { // Create two semaphores, one that is triggered when the swapchain buffer is ready, another after @@ -232,164 +259,12 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline) StateTracker::GetInstance()->SetPipeline(static_cast(pipeline)); } -void Renderer::DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) -{ - // Binding the utility pipeline layout breaks the standard layout. - StateTracker::GetInstance()->SetPendingRebind(); - - // Upload uniforms. - VkBuffer uniform_buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - u32 uniform_buffer_offset = 0; - if (uniforms_size > 0) - std::tie(uniform_buffer, uniform_buffer_offset) = - UpdateUtilityUniformBuffer(uniforms, uniforms_size); - - // Upload vertices. - VkBuffer vertex_buffer = VK_NULL_HANDLE; - VkDeviceSize vertex_buffer_offset = 0; - if (vertices) - { - u32 vertices_size = vertex_stride * num_vertices; - StreamBuffer* vbo_buf = g_object_cache->GetUtilityShaderVertexBuffer(); - if (!vbo_buf->ReserveMemory(vertices_size, vertex_stride)) - { - Util::ExecuteCurrentCommandsAndRestoreState(true); - if (!vbo_buf->ReserveMemory(vertices_size, vertex_stride)) - { - PanicAlert("Failed to reserve vertex buffer space for utility draw."); - return; - } - } - - vertex_buffer = vbo_buf->GetBuffer(); - vertex_buffer_offset = vbo_buf->GetCurrentOffset(); - std::memcpy(vbo_buf->GetCurrentHostPointer(), vertices, vertices_size); - vbo_buf->CommitMemory(vertices_size); - } - - // Allocate descriptor sets. - std::array dsets; - dsets[0] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER)); - dsets[1] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - - // Flush first if failed. - if (dsets[0] == VK_NULL_HANDLE || dsets[1] == VK_NULL_HANDLE) - { - Util::ExecuteCurrentCommandsAndRestoreState(true); - dsets[0] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER)); - dsets[1] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - - if (dsets[0] == VK_NULL_HANDLE || dsets[1] == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate descriptor sets in utility draw."); - return; - } - } - - // Build UBO descriptor set. - std::array dswrites; - VkDescriptorBufferInfo dsbuffer = {uniform_buffer, 0, std::max(uniforms_size, 4u)}; - dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, dsets[0], 0, 0, 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, &dsbuffer, nullptr}; - dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - dsets[1], - 0, - 0, - NUM_PIXEL_SHADER_SAMPLERS, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - StateTracker::GetInstance()->GetPSSamplerBindings().data(), - nullptr, - nullptr}; - - // Build commands. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - StateTracker::GetInstance()->GetPipeline()->GetVkPipeline()); - if (vertex_buffer != VK_NULL_HANDLE) - vkCmdBindVertexBuffers(command_buffer, 0, 1, &vertex_buffer, &vertex_buffer_offset); - - // Update and bind descriptors. - VkPipelineLayout pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UTILITY); - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), - dswrites.data(), 0, nullptr); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, - static_cast(dsets.size()), dsets.data(), 1, &uniform_buffer_offset); - - // Ensure we're in a render pass before drawing, just in case we had to flush. - StateTracker::GetInstance()->BeginRenderPass(); - vkCmdDraw(command_buffer, num_vertices, 1, 0, 0); -} - -void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* uniforms, - u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z) -{ - // Binding the utility pipeline layout breaks the standard layout. - StateTracker::GetInstance()->SetPendingRebind(); - StateTracker::GetInstance()->EndRenderPass(); - - // Upload uniforms. - VkBuffer uniform_buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - u32 uniform_buffer_offset = 0; - if (uniforms_size > 0) - std::tie(uniform_buffer, uniform_buffer_offset) = - UpdateUtilityUniformBuffer(uniforms, uniforms_size); - - // Flush first if failed. - VkDescriptorSet dset = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); - if (dset == VK_NULL_HANDLE) - { - Util::ExecuteCurrentCommandsAndRestoreState(true); - dset = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); - if (dset == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate descriptor sets in utility dispatch."); - return; - } - } - - std::array dswrites; - VkDescriptorBufferInfo dsbuffer = {uniform_buffer, 0, std::max(uniforms_size, 4u)}; - dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, dset, 0, 0, 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, &dsbuffer, nullptr}; - dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - dset, - 1, - 0, - NUM_PIXEL_SHADER_SAMPLERS, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - StateTracker::GetInstance()->GetPSSamplerBindings().data(), - nullptr, - nullptr}; - - // TODO: Texel buffers, storage images. - - // Build commands. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - VkPipelineLayout pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_UTILITY); - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - static_cast(shader)->GetComputePipeline()); - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), - dswrites.data(), 0, nullptr); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, - &dset, 1, &uniform_buffer_offset); - vkCmdDispatch(command_buffer, groups_x, groups_y, groups_z); -} - void Renderer::RenderText(const std::string& text, int left, int top, u32 color) { u32 backbuffer_width = m_swap_chain->GetWidth(); u32 backbuffer_height = m_swap_chain->GetHeight(); - m_raster_font->PrintMultiLineText(m_swap_chain->GetRenderPass(), text, + m_raster_font->PrintMultiLineText(m_swap_chain_render_pass, text, left * 2.0f / static_cast(backbuffer_width) - 1, 1 - top * 2.0f / static_cast(backbuffer_height), backbuffer_width, backbuffer_height, color); @@ -807,20 +682,18 @@ void Renderer::DrawScreen(VKTexture* xfb_texture, const EFBRectangle& xfb_region m_current_framebuffer_width = backbuffer->GetWidth(); m_current_framebuffer_height = backbuffer->GetHeight(); + // Draw to the backbuffer. + VkRect2D region = {{0, 0}, {backbuffer->GetWidth(), backbuffer->GetHeight()}}; + StateTracker::GetInstance()->SetRenderPass(m_swap_chain_render_pass, + m_swap_chain_clear_render_pass); + StateTracker::GetInstance()->SetFramebuffer(m_swap_chain->GetCurrentFramebuffer(), region); + // Begin render pass for rendering to the swap chain. VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; - VkRenderPassBeginInfo info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - m_swap_chain->GetRenderPass(), - m_swap_chain->GetCurrentFramebuffer(), - {{0, 0}, {backbuffer->GetWidth(), backbuffer->GetHeight()}}, - 1, - &clear_value}; - vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &info, - VK_SUBPASS_CONTENTS_INLINE); + StateTracker::GetInstance()->BeginClearRenderPass(region, &clear_value, 1); // Draw - BlitScreen(m_swap_chain->GetRenderPass(), GetTargetRectangle(), xfb_region, + BlitScreen(m_swap_chain_render_pass, GetTargetRectangle(), xfb_region, xfb_texture->GetRawTexIdentifier()); // Draw OSD @@ -831,7 +704,7 @@ void Renderer::DrawScreen(VKTexture* xfb_texture, const EFBRectangle& xfb_region OSD::DrawMessages(); // End drawing to backbuffer - vkCmdEndRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer()); + StateTracker::GetInstance()->EndRenderPass(); // Transition the backbuffer to PRESENT_SRC to ensure all commands drawing // to it have finished before present. @@ -1021,6 +894,8 @@ void Renderer::RestoreAPIState() static_cast(m_current_framebuffer)->TransitionForSample(); BindEFBToStateTracker(); + BPFunctions::SetViewport(); + BPFunctions::SetScissor(); // Instruct the state tracker to re-bind everything before the next draw StateTracker::GetInstance()->SetPendingRebind(); @@ -1158,6 +1033,23 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne StateTracker::GetInstance()->SetViewport(viewport); } +void Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + if (StateTracker::GetInstance()->Bind()) + return; + + vkCmdDraw(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_vertices, 1, base_vertex, 0); +} + +void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + if (!StateTracker::GetInstance()->Bind()) + return; + + vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, + base_vertex, 0); +} + void Renderer::RecompileShaders() { DestroyShaders(); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index ba8fd889bd..bd7efc88ca 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -37,6 +37,9 @@ public: bool IsHeadless() const override; + bool Initialize() override; + void Shutdown() override; + std::unique_ptr CreateTexture(const TextureConfig& config) override; std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; @@ -52,8 +55,6 @@ public: SwapChain* GetSwapChain() const { return m_swap_chain.get(); } BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); } - bool Initialize(); - void RenderText(const std::string& pstr, int left, int top, u32 color) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; @@ -86,11 +87,8 @@ public: void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; - - void DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) override; - void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size, - u32 groups_x, u32 groups_y, u32 groups_z) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; private: bool CreateSemaphores(); @@ -124,6 +122,8 @@ private: VkSemaphore m_image_available_semaphore = VK_NULL_HANDLE; VkSemaphore m_rendering_finished_semaphore = VK_NULL_HANDLE; + VkRenderPass m_swap_chain_render_pass = VK_NULL_HANDLE; + VkRenderPass m_swap_chain_clear_render_pass = VK_NULL_HANDLE; std::unique_ptr m_swap_chain; std::unique_ptr m_bounding_box; diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index f3b2752ddd..471a8b12f5 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -218,6 +218,37 @@ void StateTracker::UpdatePixelShaderConstants() PixelShaderManager::dirty = false; } +void StateTracker::UpdateConstants(const void* data, u32 data_size) +{ + if (!m_uniform_stream_buffer->ReserveMemory( + data_size, g_vulkan_context->GetUniformBufferAlignment(), true, true, false)) + { + WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); + Util::ExecuteCurrentCommandsAndRestoreState(false); + } + + for (u32 binding = 0; binding < NUM_UBO_DESCRIPTOR_SET_BINDINGS; binding++) + { + if (m_bindings.uniform_buffer_bindings[binding].buffer != m_uniform_stream_buffer->GetBuffer()) + { + m_bindings.uniform_buffer_bindings[binding].buffer = m_uniform_stream_buffer->GetBuffer(); + m_dirty_flags |= DIRTY_FLAG_VS_UBO << binding; + } + m_bindings.uniform_buffer_offsets[binding] = + static_cast(m_uniform_stream_buffer->GetCurrentOffset()); + } + m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; + + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + m_uniform_stream_buffer->CommitMemory(data_size); + + // Cached data is now out-of-sync. + VertexShaderManager::dirty = true; + GeometryShaderManager::dirty = true; + PixelShaderManager::dirty = true; +} + bool StateTracker::ReserveConstantStorage() { // Since we invalidate all constants on command buffer execution, it doesn't matter if this @@ -473,16 +504,16 @@ bool StateTracker::Bind(bool rebind_all /*= false*/) { vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, m_num_active_descriptor_sets, - m_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_descriptor_sets.data(), m_num_dynamic_offsets, m_bindings.uniform_buffer_offsets.data()); } else if (m_dirty_flags & DIRTY_FLAG_DYNAMIC_OFFSETS) { - vkCmdBindDescriptorSets( - command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), - DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, 1, - &m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS], - NUM_UBO_DESCRIPTOR_SET_BINDINGS, m_bindings.uniform_buffer_offsets.data()); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_pipeline->GetVkPipelineLayout(), + DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, 1, + &m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS], + m_num_dynamic_offsets, m_bindings.uniform_buffer_offsets.data()); } if (m_dirty_flags & DIRTY_FLAG_VIEWPORT || rebind_all) @@ -639,6 +670,14 @@ void StateTracker::EndClearRenderPass() } bool StateTracker::UpdateDescriptorSet() +{ + if (m_pipeline->GetUsage() == AbstractPipelineUsage::GX) + return UpdateGXDescriptorSet(); + else + return UpdateUtilityDescriptorSet(); +} + +bool StateTracker::UpdateGXDescriptorSet() { const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO 1 + // Samplers @@ -729,6 +768,50 @@ bool StateTracker::UpdateDescriptorSet() vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_writes, writes.data(), 0, nullptr); m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS; + m_num_dynamic_offsets = NUM_UBO_DESCRIPTOR_SET_BINDINGS; + return true; +} + +bool StateTracker::UpdateUtilityDescriptorSet() +{ + // Allocate descriptor sets. + m_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER)); + m_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); + if (m_descriptor_sets[0] == VK_NULL_HANDLE || m_descriptor_sets[1] == VK_NULL_HANDLE) + { + return false; + } + + // Build UBO descriptor set. + std::array dswrites; + dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_descriptor_sets[0], + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS], + nullptr}; + dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_descriptor_sets[1], + 0, + 0, + NUM_PIXEL_SHADER_SAMPLERS, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.ps_samplers.data(), + nullptr, + nullptr}; + + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), + dswrites.data(), 0, nullptr); + m_num_active_descriptor_sets = NUM_UTILITY_DRAW_DESCRIPTOR_SETS; + m_num_dynamic_offsets = 1; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h index 895e139756..a3d7a365ae 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.h +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -30,10 +30,6 @@ public: static bool CreateInstance(); static void DestroyInstance(); - const std::array& GetPSSamplerBindings() const - { - return m_bindings.ps_samplers; - } VkFramebuffer GetFramebuffer() const { return m_framebuffer; } const VKPipeline* GetPipeline() const { return m_pipeline; } void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); @@ -47,6 +43,9 @@ public: void UpdateGeometryShaderConstants(); void UpdatePixelShaderConstants(); + // Updates constants from external data, e.g. utility draws. + void UpdateConstants(const void* data, u32 data_size); + void SetTexture(size_t index, VkImageView view); void SetSampler(size_t index, VkSampler sampler); @@ -104,7 +103,8 @@ private: // Number of descriptor sets for game draws. enum { - NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1 + NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1, + NUM_UTILITY_DRAW_DESCRIPTOR_SETS = 2 }; enum DITRY_FLAG : u32 @@ -133,6 +133,8 @@ private: bool IsViewportWithinRenderArea() const; bool UpdateDescriptorSet(); + bool UpdateGXDescriptorSet(); + bool UpdateUtilityDescriptorSet(); // Allocates storage in the uniform buffer of the specified size. If this storage cannot be // allocated immediately, the current command buffer will be submitted and all stage's @@ -167,6 +169,7 @@ private: } m_bindings; size_t m_uniform_buffer_reserve_size = 0; u32 m_num_active_descriptor_sets = 0; + u32 m_num_dynamic_offsets = 0; // rasterization VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp index ffddfe4d07..3ba46b9baa 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp @@ -142,11 +142,8 @@ std::unique_ptr SwapChain::Create(void* display_handle, void* native_ std::unique_ptr swap_chain = std::make_unique(display_handle, native_handle, surface, vsync); - if (!swap_chain->CreateSwapChain() || !swap_chain->CreateRenderPass() || - !swap_chain->SetupSwapChainImages()) - { + if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages()) return nullptr; - } return swap_chain; } @@ -175,13 +172,27 @@ bool SwapChain::SelectSurfaceFormat() return true; } - // Use the first surface format, just use what it prefers. - // Some drivers seem to return a SRGB format here (Intel Mesa). - // This results in gamma correction when presenting to the screen, which we don't want. - // Use a linear format instead, if this is the case. - m_surface_format.format = Util::GetLinearFormat(surface_formats[0].format); - m_surface_format.colorSpace = surface_formats[0].colorSpace; - return true; + // Try to find a suitable format. + for (const VkSurfaceFormatKHR& surface_format : surface_formats) + { + // Some drivers seem to return a SRGB format here (Intel Mesa). + // This results in gamma correction when presenting to the screen, which we don't want. + // Use a linear format instead, if this is the case. + VkFormat format = Util::GetLinearFormat(surface_format.format); + if (format == VK_FORMAT_R8G8B8A8_UNORM) + m_texture_format = AbstractTextureFormat::RGBA8; + else if (format == VK_FORMAT_B8G8R8A8_UNORM) + m_texture_format = AbstractTextureFormat::BGRA8; + else + continue; + + m_surface_format.format = format; + m_surface_format.colorSpace = surface_format.colorSpace; + return true; + } + + PanicAlert("Failed to find a suitable format for swap chain buffers."); + return false; } bool SwapChain::SelectPresentMode() @@ -236,14 +247,6 @@ bool SwapChain::SelectPresentMode() return true; } -bool SwapChain::CreateRenderPass() -{ - // render pass for rendering to the swap chain - m_render_pass = g_object_cache->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, 1, - VK_ATTACHMENT_LOAD_OP_CLEAR); - return m_render_pass != VK_NULL_HANDLE; -} - bool SwapChain::CreateSwapChain() { // Look up surface properties to determine image count and dimensions @@ -367,6 +370,9 @@ bool SwapChain::SetupSwapChainImages() images.data()); ASSERT(res == VK_SUCCESS); + VkRenderPass render_pass = g_object_cache->GetRenderPass( + m_surface_format.format, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_CLEAR); + m_swap_chain_images.reserve(image_count); for (uint32_t i = 0; i < image_count; i++) { @@ -382,7 +388,7 @@ bool SwapChain::SetupSwapChainImages() VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, nullptr, 0, - m_render_pass, + render_pass, 1, &view, m_width, @@ -499,7 +505,7 @@ bool SwapChain::RecreateSurface(void* native_handle) } // Finally re-create the swap chain - if (!CreateSwapChain() || !SetupSwapChainImages() || !CreateRenderPass()) + if (!CreateSwapChain() || !SetupSwapChainImages()) return false; return true; diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.h b/Source/Core/VideoBackends/Vulkan/SwapChain.h index 8cf2776a36..4494d7bc57 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.h +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.h @@ -10,6 +10,7 @@ #include "Common/CommonTypes.h" #include "VideoBackends/Vulkan/Constants.h" #include "VideoBackends/Vulkan/Texture2D.h" +#include "VideoCommon/TextureConfig.h" namespace Vulkan { @@ -33,10 +34,10 @@ public: void* GetNativeHandle() const { return m_native_handle; } VkSurfaceKHR GetSurface() const { return m_surface; } VkSurfaceFormatKHR GetSurfaceFormat() const { return m_surface_format; } + AbstractTextureFormat GetTextureFormat() const { return m_texture_format; } bool IsVSyncEnabled() const { return m_vsync_enabled; } bool IsStereoEnabled() const { return m_layers == 2; } VkSwapchainKHR GetSwapChain() const { return m_swap_chain; } - VkRenderPass GetRenderPass() const { return m_render_pass; } u32 GetWidth() const { return m_width; } u32 GetHeight() const { return m_height; } u32 GetCurrentImageIndex() const { return m_current_swap_chain_image_index; } @@ -69,8 +70,6 @@ private: bool CreateSwapChain(); void DestroySwapChain(); - bool CreateRenderPass(); - bool SetupSwapChainImages(); void DestroySwapChainImages(); @@ -88,14 +87,13 @@ private: VkSurfaceKHR m_surface = VK_NULL_HANDLE; VkSurfaceFormatKHR m_surface_format = {}; VkPresentModeKHR m_present_mode = VK_PRESENT_MODE_RANGE_SIZE_KHR; + AbstractTextureFormat m_texture_format = AbstractTextureFormat::Undefined; bool m_vsync_enabled; VkSwapchainKHR m_swap_chain = VK_NULL_HANDLE; std::vector m_swap_chain_images; u32 m_current_swap_chain_image_index = 0; - VkRenderPass m_render_pass = VK_NULL_HANDLE; - u32 m_width = 0; u32 m_height = 0; u32 m_layers = 0; diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp index 4ffa5c0146..35c9bcfbaf 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ b/Source/Core/VideoBackends/Vulkan/Util.cpp @@ -130,6 +130,9 @@ VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format) case AbstractTextureFormat::D32F_S8: return VK_FORMAT_D32_SFLOAT_S8_UINT; + case AbstractTextureFormat::Undefined: + return VK_FORMAT_UNDEFINED; + default: PanicAlert("Unhandled texture format."); return VK_FORMAT_R8G8B8A8_UNORM; diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp index 4fa3a92b09..bd0ab0b54d 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp @@ -69,25 +69,14 @@ VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_dec return std::make_unique(vtx_decl); } -void VertexManager::PrepareDrawBuffers(u32 stride) +void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { - size_t vertex_data_size = IndexGenerator::GetNumVerts() * stride; - size_t index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16); - - m_vertex_stream_buffer->CommitMemory(vertex_data_size); - m_index_stream_buffer->CommitMemory(index_data_size); - - ADDSTAT(stats.thisFrame.bytesVertexStreamed, static_cast(vertex_data_size)); - ADDSTAT(stats.thisFrame.bytesIndexStreamed, static_cast(index_data_size)); - - StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0); - StateTracker::GetInstance()->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0, - VK_INDEX_TYPE_UINT16); + StateTracker::GetInstance()->UpdateConstants(uniforms, uniforms_size); } -void VertexManager::ResetBuffer(u32 stride) +void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) { - if (m_cull_all) + if (cull_all) { // Not drawing on the gpu, so store in a heap buffer instead m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); @@ -97,7 +86,8 @@ void VertexManager::ResetBuffer(u32 stride) } // Attempt to allocate from buffers - bool has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, stride); + bool has_vbuffer_allocation = + m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, vertex_stride); bool has_ibuffer_allocation = m_index_stream_buffer->ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); if (!has_vbuffer_allocation || !has_ibuffer_allocation) @@ -108,7 +98,7 @@ void VertexManager::ResetBuffer(u32 stride) // Attempt to allocate again, this may cause a fence wait if (!has_vbuffer_allocation) - has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, stride); + has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, vertex_stride); if (!has_ibuffer_allocation) has_ibuffer_allocation = m_index_stream_buffer->ReserveMemory(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16)); @@ -123,34 +113,40 @@ void VertexManager::ResetBuffer(u32 stride) m_end_buffer_pointer = m_vertex_stream_buffer->GetCurrentHostPointer() + MAXVBUFFERSIZE; m_cur_buffer_pointer = m_vertex_stream_buffer->GetCurrentHostPointer(); IndexGenerator::Start(reinterpret_cast(m_index_stream_buffer->GetCurrentHostPointer())); - - // Update base indices - m_current_draw_base_vertex = - static_cast(m_vertex_stream_buffer->GetCurrentOffset() / stride); - m_current_draw_base_index = - static_cast(m_index_stream_buffer->GetCurrentOffset() / sizeof(u16)); } -void VertexManager::vFlush() +void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) { - const VertexFormat* vertex_format = - static_cast(VertexLoaderManager::GetCurrentVertexFormat()); - u32 vertex_stride = vertex_format->GetVertexStride(); + const u32 vertex_data_size = num_vertices * vertex_stride; + const u32 index_data_size = num_indices * sizeof(u16); - // Figure out the number of indices to draw - u32 index_count = IndexGenerator::GetIndexLen(); + *out_base_vertex = + vertex_stride > 0 ? + static_cast(m_vertex_stream_buffer->GetCurrentOffset() / vertex_stride) : + 0; + *out_base_index = static_cast(m_index_stream_buffer->GetCurrentOffset() / sizeof(u16)); - // Update tracked state + m_vertex_stream_buffer->CommitMemory(vertex_data_size); + m_index_stream_buffer->CommitMemory(index_data_size); + + ADDSTAT(stats.thisFrame.bytesVertexStreamed, static_cast(vertex_data_size)); + ADDSTAT(stats.thisFrame.bytesIndexStreamed, static_cast(index_data_size)); + + StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0); + StateTracker::GetInstance()->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0, + VK_INDEX_TYPE_UINT16); +} + +void VertexManager::UploadConstants() +{ StateTracker::GetInstance()->UpdateVertexShaderConstants(); StateTracker::GetInstance()->UpdateGeometryShaderConstants(); StateTracker::GetInstance()->UpdatePixelShaderConstants(); +} - // Commit memory to device. - // NOTE: This must be done after constant upload, as a constant buffer overrun can cause - // the current command buffer to be executed, and we want the buffer space to be associated - // with the command buffer that has the corresponding draw. - PrepareDrawBuffers(vertex_stride); - +void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +{ // Flush all EFB pokes and invalidate the peek cache. FramebufferManager::GetInstance()->InvalidatePeekCache(); FramebufferManager::GetInstance()->FlushEFBPokes(); @@ -168,19 +164,14 @@ void VertexManager::vFlush() } // Bind all pending state to the command buffer - if (m_current_pipeline_object) + if (StateTracker::GetInstance()->Bind()) { - g_renderer->SetPipeline(m_current_pipeline_object); - if (!StateTracker::GetInstance()->Bind()) - { - WARN_LOG(VIDEO, "Skipped draw of %u indices", index_count); - return; - } - - // Execute the draw - vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), index_count, 1, - m_current_draw_base_index, m_current_draw_base_vertex, 0); - INCSTAT(stats.thisFrame.numDrawCalls); + vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, + base_vertex, 0); + } + else + { + WARN_LOG(VIDEO, "Skipped draw of %u indices", num_indices); } StateTracker::GetInstance()->OnDraw(); diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.h b/Source/Core/VideoBackends/Vulkan/VertexManager.h index 1f892e4151..65c31e11f4 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.h +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.h @@ -27,20 +27,19 @@ public: std::unique_ptr CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; -protected: - void PrepareDrawBuffers(u32 stride); - void ResetBuffer(u32 stride) override; + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; -private: - void vFlush() override; +protected: + void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadConstants() override; + void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; std::vector m_cpu_vertex_buffer; std::vector m_cpu_index_buffer; std::unique_ptr m_vertex_stream_buffer; std::unique_ptr m_index_stream_buffer; - - u32 m_current_draw_base_vertex = 0; - u32 m_current_draw_base_index = 0; }; } diff --git a/Source/Core/VideoCommon/IndexGenerator.cpp b/Source/Core/VideoCommon/IndexGenerator.cpp index 6cb49aceba..5e45cc6a5f 100644 --- a/Source/Core/VideoCommon/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/IndexGenerator.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "Common/CommonTypes.h" #include "Common/Compiler.h" @@ -56,6 +57,13 @@ void IndexGenerator::AddIndices(int primitive, u32 numVerts) base_index += numVerts; } +void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices) +{ + std::memcpy(index_buffer_current, indices, sizeof(u16) * num_indices); + index_buffer_current += num_indices; + base_index += num_vertices; +} + // Triangles template DOLPHIN_FORCE_INLINE u16* IndexGenerator::WriteTriangle(u16* Iptr, u32 index1, u32 index2, diff --git a/Source/Core/VideoCommon/IndexGenerator.h b/Source/Core/VideoCommon/IndexGenerator.h index 8d2a5e8700..67f16b66c1 100644 --- a/Source/Core/VideoCommon/IndexGenerator.h +++ b/Source/Core/VideoCommon/IndexGenerator.h @@ -18,6 +18,8 @@ public: static void AddIndices(int primitive, u32 numVertices); + static void AddExternalIndices(const u16* indices, u32 num_indices, u32 num_vertices); + // returns numprimitives static u32 GetNumVerts() { return base_index; } static u32 GetIndexLen() { return (u32)(index_buffer_current - BASEIptr); } diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 2e42ed984a..ba60e1636c 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -78,8 +78,10 @@ static float AspectToWidescreen(float aspect) return aspect * ((16.0f / 9.0f) / (4.0f / 3.0f)); } -Renderer::Renderer(int backbuffer_width, int backbuffer_height) - : m_backbuffer_width(backbuffer_width), m_backbuffer_height(backbuffer_height) +Renderer::Renderer(int backbuffer_width, int backbuffer_height, + AbstractTextureFormat backbuffer_format) + : m_backbuffer_width(backbuffer_width), m_backbuffer_height(backbuffer_height), + m_backbuffer_format(backbuffer_format) { UpdateActiveConfig(); UpdateDrawRectangle(); @@ -93,6 +95,11 @@ Renderer::Renderer(int backbuffer_width, int backbuffer_height) Renderer::~Renderer() = default; +bool Renderer::Initialize() +{ + return true; +} + void Renderer::Shutdown() { // First stop any framedumping, which might need to dump the last xfb frame. This process @@ -697,6 +704,11 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const m_last_xfb_region = xfb_rect; + // Since we use the common pipelines here and draw vertices if a batch is currently being + // built by the vertex loader, we end up trampling over its pointer, as we share the buffer + // with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this. + g_vertex_manager->Flush(); + // TODO: merge more generic parts into VideoCommon { std::lock_guard guard(m_swap_mutex); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index d5dd761c40..bea1f5ba0a 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -32,6 +32,7 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/FPSCounter.h" #include "VideoCommon/RenderState.h" +#include "VideoCommon/TextureConfig.h" #include "VideoCommon/VideoCommon.h" class AbstractFramebuffer; @@ -72,13 +73,16 @@ enum class OSDMessage : s32 class Renderer { public: - Renderer(int backbuffer_width, int backbuffer_height); + Renderer(int backbuffer_width, int backbuffer_height, AbstractTextureFormat backbuffer_format); virtual ~Renderer(); using ClearColor = std::array; virtual bool IsHeadless() const = 0; + virtual bool Initialize(); + virtual void Shutdown(); + virtual void SetPipeline(const AbstractPipeline* pipeline) {} virtual void SetScissorRect(const MathUtil::Rectangle& rc) {} virtual void SetTexture(u32 index, const AbstractTexture* texture) {} @@ -110,6 +114,10 @@ public: { } + // Drawing with currently-bound pipeline state. + virtual void Draw(u32 base_vertex, u32 num_vertices) {} + virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {} + // Shader modules/objects. virtual std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) = 0; @@ -188,18 +196,6 @@ public: virtual std::unique_ptr CreateAsyncShaderCompiler(); - virtual void Shutdown(); - - // Drawing utility shaders. - virtual void DrawUtilityPipeline(const void* uniforms, u32 uniforms_size, const void* vertices, - u32 vertex_stride, u32 num_vertices) - { - } - virtual void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, - u32 uniforms_size, u32 groups_x, u32 groups_y, u32 groups_z) - { - } - void ShowOSDMessage(OSDMessage message); protected: @@ -229,6 +225,7 @@ protected: // Backbuffer (window) size and render area int m_backbuffer_width = 0; int m_backbuffer_height = 0; + AbstractTextureFormat m_backbuffer_format = AbstractTextureFormat::Undefined; TargetRectangle m_target_rectangle = {}; FPSCounter m_fps_counter; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 8364b039c5..0e178e4d9b 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -27,6 +27,7 @@ #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" @@ -131,7 +132,7 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, // need to alloc new buffer if (m_is_flushed) { - g_vertex_manager->ResetBuffer(stride); + g_vertex_manager->ResetBuffer(stride, cullall); m_is_flushed = false; } @@ -209,6 +210,28 @@ std::pair VertexManagerBase::ResetFlushAspectRatioCount() return val; } +void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride, + u32 num_vertices, const u16* indices, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + // The GX vertex list should be flushed before any utility draws occur. + ASSERT(m_is_flushed); + + // Copy into the buffers usually used for GX drawing. + ResetBuffer(std::max(vertex_stride, 1u), false); + if (vertices) + { + const u32 copy_size = vertex_stride * num_vertices; + ASSERT((m_cur_buffer_pointer + copy_size) <= m_end_buffer_pointer); + std::memcpy(m_cur_buffer_pointer, vertices, copy_size); + m_cur_buffer_pointer += copy_size; + } + if (indices) + IndexGenerator::AddExternalIndices(indices, num_indices, num_vertices); + + CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index); +} + static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, bool has_arbitrary_mips) { @@ -384,19 +407,35 @@ void VertexManagerBase::Flush() if (!m_cull_all) { + // Update and upload constants. Note for the Vulkan backend, this must occur before the + // vertex/index buffer is committed, otherwise the data will be associated with the + // previous command buffer, instead of the one with the draw if there is an overflow. + GeometryShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); + UploadConstants(); + + // Now the vertices can be flushed to the GPU. + const u32 num_indices = IndexGenerator::GetIndexLen(); + u32 base_vertex, base_index; + CommitBuffer(IndexGenerator::GetNumVerts(), + VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, + &base_vertex, &base_index); + // Update the pipeline, or compile one if needed. UpdatePipelineConfig(); UpdatePipelineObject(); + if (m_current_pipeline_object) + { + g_renderer->SetPipeline(m_current_pipeline_object); + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - // set the rest of the global constants - GeometryShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); + DrawCurrentBatch(base_index, num_indices, base_vertex); + INCSTAT(stats.thisFrame.numDrawCalls); - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - g_vertex_manager->vFlush(); - if (PerfQueryBase::ShouldEmulate()) - g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + if (PerfQueryBase::ShouldEmulate()) + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + } } GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); @@ -413,7 +452,6 @@ void VertexManagerBase::Flush() void VertexManagerBase::DoState(PointerWrap& p) { p.Do(m_zslope); - g_vertex_manager->vDoState(p); } void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format) diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index 88d9d9fbe0..cd3e4ed552 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -69,9 +69,29 @@ public: m_pipeline_config_changed = true; } + // Utility pipeline drawing (e.g. EFB copies, post-processing, UI). + virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) = 0; + void UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices, + const u16* indices, u32 num_indices, u32* out_base_vertex, + u32* out_base_index); + protected: - virtual void vDoState(PointerWrap& p) {} - virtual void ResetBuffer(u32 stride) = 0; + // Vertex buffers/index buffer creation. + virtual void CreateDeviceObjects() {} + virtual void DestroyDeviceObjects() {} + + // Prepares the buffer for the next batch of vertices. + virtual void ResetBuffer(u32 vertex_stride, bool cull_all) = 0; + + // Commits/uploads the current batch of vertices. + virtual void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) = 0; + + // Uploads uniform buffers for GX draws. + virtual void UploadConstants() = 0; + + // Issues the draw call for the current batch in the backend. + virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) = 0; u8* m_cur_buffer_pointer = nullptr; u8* m_base_buffer_pointer = nullptr; @@ -98,10 +118,6 @@ private: size_t m_flush_count_4_3 = 0; size_t m_flush_count_anamorphic = 0; - virtual void vFlush() = 0; - - virtual void CreateDeviceObjects() {} - virtual void DestroyDeviceObjects() {} void UpdatePipelineConfig(); void UpdatePipelineObject(); };