From 7b628c99ec4731ab606b8dd50ce3c3223d81b4eb Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 19 Dec 2015 15:45:21 +1000 Subject: [PATCH 1/2] D3D: Implement vectored efb pokes, increase util vertex buffer size to 64KiB --- Source/Core/VideoBackends/D3D/D3DUtil.cpp | 100 +++++++++++++++++++++- Source/Core/VideoBackends/D3D/D3DUtil.h | 3 + Source/Core/VideoBackends/D3D/Render.cpp | 54 ++++++------ Source/Core/VideoBackends/D3D/Render.h | 2 + 4 files changed, 131 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.cpp b/Source/Core/VideoBackends/D3D/D3DUtil.cpp index e26f960fa3..1b56fad9f7 100644 --- a/Source/Core/VideoBackends/D3D/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D/D3DUtil.cpp @@ -34,6 +34,8 @@ public: buf->Release(); } + int GetSize() const { return max_size; } + // returns vertex offset to the new data int AppendData(void* data, int size, int vertex_size) { @@ -59,6 +61,37 @@ public: return (offset - size) / vertex_size; } + int BeginAppendData(void** write_ptr, int size, int vertex_size) + { + _dbg_assert_(VIDEO, size < max_size); + + D3D11_MAPPED_SUBRESOURCE map; + int aligned_offset = ((offset + vertex_size - 1) / vertex_size) * vertex_size; // align offset to vertex_size bytes + if (aligned_offset + size > max_size) + { + // wrap buffer around and notify observers + offset = 0; + aligned_offset = 0; + context->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); + + for (bool* observer : observers) + *observer = true; + } + else + { + context->Map(buf, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &map); + } + + *write_ptr = reinterpret_cast(map.pData) + aligned_offset; + offset = aligned_offset + size; + return aligned_offset / vertex_size; + } + + void EndAppendData() + { + context->Unmap(buf, 0); + } + void AddWrapObserver(bool* observer) { observers.push_back(observer); @@ -460,7 +493,7 @@ bool stq_observer, stsq_observer, cq_observer, clearq_observer; void InitUtils() { - util_vbuf = new UtilVertexBuffer(0x4000); + util_vbuf = new UtilVertexBuffer(65536); // 64KiB float border[4] = { 0.f, 0.f, 0.f, 0.f }; D3D11_SAMPLER_DESC samDesc = CD3D11_SAMPLER_DESC(D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, 0.f, 1, D3D11_COMPARISON_ALWAYS, border, 0.f, 0.f); @@ -647,6 +680,71 @@ void drawClearQuad(u32 Color, float z) stateman->SetGeometryShader(nullptr); } +static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col) +{ + vert->x = x; + vert->y = y; + vert->z = z; + vert->col = col; +} + +void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points) +{ + const size_t COL_QUAD_SIZE = sizeof(ColVertex) * 6; + + // Set common state + stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); + stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); + stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); + stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); + stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + stateman->SetVertexBuffer(util_vbuf->GetBuffer(), sizeof(ColVertex), 0); + stateman->Apply(); + + // if drawing a large number of points at once, this will have to be split into multiple passes. + size_t points_per_draw = util_vbuf->GetSize() / COL_QUAD_SIZE; + size_t current_point_index = 0; + while (current_point_index < num_points) + { + size_t points_to_draw = std::min(num_points - current_point_index, points_per_draw); + size_t required_bytes = COL_QUAD_SIZE * points_to_draw; + + // map and reserve enough buffer space for this draw + void* buffer_ptr; + int base_vertex_index = util_vbuf->BeginAppendData(&buffer_ptr, (int)required_bytes, sizeof(ColVertex)); + + // generate quads for each efb point + ColVertex* base_vertex_ptr = reinterpret_cast(buffer_ptr); + for (size_t i = 0; i < points_to_draw; i++) + { + // generate quad from the single point (clip-space coordinates) + const EfbPokeData* point = &points[current_point_index]; + float x1 = float(point->x) * 2.0f / EFB_WIDTH - 1.0f; + float y1 = -float(point->y) * 2.0f / EFB_HEIGHT + 1.0f; + float x2 = float(point->x + 1) * 2.0f / EFB_WIDTH - 1.0f; + float y2 = -float(point->y + 1) * 2.0f / EFB_HEIGHT + 1.0f; + float z = (type == POKE_Z) ? (1.0f - float(point->data & 0xFFFFFF) / 16777216.0f) : 0.0f; + u32 col = (type == POKE_Z) ? 0 : ((point->data & 0xFF00FF00) | ((point->data >> 16) & 0xFF) | ((point->data << 16) & 0xFF0000)); + current_point_index++; + + // quad -> triangles + ColVertex* vertex = &base_vertex_ptr[i * 6]; + InitColVertex(&vertex[0], x1, y1, z, col); + InitColVertex(&vertex[1], x2, y1, z, col); + InitColVertex(&vertex[2], x1, y2, z, col); + InitColVertex(&vertex[3], x1, y2, z, col); + InitColVertex(&vertex[4], x2, y1, z, col); + InitColVertex(&vertex[5], x2, y2, z, col); + } + + // unmap the util buffer, and issue the draw + util_vbuf->EndAppendData(); + context->Draw(6 * (UINT)points_to_draw, base_vertex_index); + } + + stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); +} + } // namespace D3D } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.h b/Source/Core/VideoBackends/D3D/D3DUtil.h index 2f2000f004..a21c901c2e 100644 --- a/Source/Core/VideoBackends/D3D/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D/D3DUtil.h @@ -8,6 +8,7 @@ #include #include "Common/MathUtil.h" +#include "VideoCommon/RenderBase.h" namespace DX11 { @@ -67,6 +68,8 @@ namespace D3D u32 slice = 0); void drawClearQuad(u32 Color, float z); void drawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2); + + void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points); } } diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 970a7e5bea..434742c5c1 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -474,55 +474,55 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) else if (alpha_read_mode.ReadMode == 1) return (ret | 0xFF000000); // GX_READ_FF else /*if(alpha_read_mode.ReadMode == 0)*/ return (ret & 0x00FFFFFF); // GX_READ_00 } - else if (type == POKE_COLOR) + else // if (type == POKE_COLOR || type == POKE_Z) { - u32 rgbaColor = (poke_data & 0xFF00FF00) | ((poke_data >> 16) & 0xFF) | ((poke_data << 16) & 0xFF0000); + std::vector vector; + EfbPokeData d; + d.x = x; + d.y = y; + d.data = poke_data; + vector.push_back(d); + PokeEFB(type, vector); + } - // TODO: The first five PE registers may change behavior of EFB pokes, this isn't implemented, yet. - ResetAPIState(); + return 0; +} +void Renderer::PokeEFB(EFBAccessType type, const std::vector& data) +{ + ResetAPIState(); + + if (type == POKE_COLOR) + { D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight()); D3D::context->RSSetViewports(1, &vp); - D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), nullptr); - D3D::drawColorQuad(rgbaColor, 0.f, - (float)RectToLock.left * 2.f / GetTargetWidth() - 1.f, - - (float)RectToLock.top * 2.f / GetTargetHeight() + 1.f, - (float)RectToLock.right * 2.f / GetTargetWidth() - 1.f, - - (float)RectToLock.bottom * 2.f / GetTargetHeight() + 1.f); - - RestoreAPIState(); } else // if (type == POKE_Z) { - // TODO: The first five PE registers may change behavior of EFB pokes, this isn't implemented, yet. - ResetAPIState(); - D3D::stateman->PushBlendState(clearblendstates[3]); D3D::stateman->PushDepthState(cleardepthstates[1]); D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight(), 1.0f - MathUtil::Clamp(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f, 1.0f - MathUtil::Clamp((xfmem.viewport.farZ - MathUtil::Clamp(xfmem.viewport.zRange, 0.0f, 16777215.0f)), 0.0f, 16777215.0f) / 16777216.0f); + D3D::context->RSSetViewports(1, &vp); D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); - D3D::drawColorQuad(0, 1.0f - float(poke_data & 0xFFFFFF) / 16777216.0f, - (float)RectToLock.left * 2.f / GetTargetWidth() - 1.f, - - (float)RectToLock.top * 2.f / GetTargetHeight() + 1.f, - (float)RectToLock.right * 2.f / GetTargetWidth() - 1.f, - - (float)RectToLock.bottom * 2.f / GetTargetHeight() + 1.f); - - D3D::stateman->PopDepthState(); - D3D::stateman->PopBlendState(); - - RestoreAPIState(); } - return 0; -} + D3D::DrawEFBPokeQuads(type, data.data(), data.size()); + if (type == POKE_Z) + { + D3D::stateman->PopDepthState(); + D3D::stateman->PopBlendState(); + } + + RestoreAPIState(); +} void Renderer::SetViewport() { diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index ab1ef5f265..a6da457be8 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -37,6 +37,8 @@ public: void RenderText(const std::string& text, int left, int top, u32 color) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; + void PokeEFB(EFBAccessType type, const std::vector& data) override; + u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; From a61fc372bbb538cba7bf43a6402f3036fb468810 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 20 Dec 2015 00:34:56 +1000 Subject: [PATCH 2/2] VideoCommon: Change PokeEFB to take a pointer rather than a vector This saves allocating a vector for the pass-through path. --- Source/Core/VideoBackends/D3D/Render.cpp | 14 ++------------ Source/Core/VideoBackends/D3D/Render.h | 2 +- .../VideoBackends/OGL/FramebufferManager.cpp | 6 +++--- .../Core/VideoBackends/OGL/FramebufferManager.h | 2 +- Source/Core/VideoBackends/OGL/Render.cpp | 17 ++--------------- Source/Core/VideoBackends/OGL/Render.h | 2 +- Source/Core/VideoCommon/AsyncRequests.cpp | 12 +++++++++--- Source/Core/VideoCommon/RenderBase.cpp | 7 ------- Source/Core/VideoCommon/RenderBase.h | 2 +- 9 files changed, 20 insertions(+), 44 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 434742c5c1..e7a04ea6ba 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -474,21 +474,11 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) else if (alpha_read_mode.ReadMode == 1) return (ret | 0xFF000000); // GX_READ_FF else /*if(alpha_read_mode.ReadMode == 0)*/ return (ret & 0x00FFFFFF); // GX_READ_00 } - else // if (type == POKE_COLOR || type == POKE_Z) - { - std::vector vector; - EfbPokeData d; - d.x = x; - d.y = y; - d.data = poke_data; - vector.push_back(d); - PokeEFB(type, vector); - } return 0; } -void Renderer::PokeEFB(EFBAccessType type, const std::vector& data) +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) { ResetAPIState(); @@ -513,7 +503,7 @@ void Renderer::PokeEFB(EFBAccessType type, const std::vector& data) FramebufferManager::GetEFBDepthTexture()->GetDSV()); } - D3D::DrawEFBPokeQuads(type, data.data(), data.size()); + D3D::DrawEFBPokeQuads(type, points, num_points); if (type == POKE_Z) { diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index a6da457be8..8e25419a5a 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -37,7 +37,7 @@ public: void RenderText(const std::string& text, int left, int top, u32 color) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const std::vector& data) override; + void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp index 6bde570d66..e826c031b0 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp @@ -643,7 +643,7 @@ void FramebufferManager::GetTargetSize(unsigned int *width, unsigned int *height *height = m_targetHeight; } -void FramebufferManager::PokeEFB(EFBAccessType type, const std::vector& data) +void FramebufferManager::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) { g_renderer->ResetAPIState(); @@ -657,10 +657,10 @@ void FramebufferManager::PokeEFB(EFBAccessType type, const std::vectorRestoreAPIState(); diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h index a1d2895562..24978990eb 100644 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/FramebufferManager.h @@ -92,7 +92,7 @@ public: // convtype=0 -> rgb8->rgba6, convtype=2 -> rgba6->rgb8 static void ReinterpretPixelData(unsigned int convtype); - static void PokeEFB(EFBAccessType type, const std::vector& data); + static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); private: XFBSourceBase* CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 4720925304..11e028c5c6 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -944,19 +944,6 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) } } - case POKE_COLOR: - case POKE_Z: - { - std::vector vector; - EfbPokeData d; - d.x = x; - d.y = y; - d.data = poke_data; - vector.push_back(d); - PokeEFB(type, vector); - break; - } - default: break; } @@ -964,9 +951,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) return 0; } -void Renderer::PokeEFB(EFBAccessType type, const std::vector& data) +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) { - FramebufferManager::PokeEFB(type, data); + FramebufferManager::PokeEFB(type, points, num_points); } u16 Renderer::BBoxRead(int index) diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index b939911cee..3eb64663ab 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -87,7 +87,7 @@ public: void FlipImageData(u8 *data, int w, int h, int pixel_width = 3); u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const std::vector& data) override; + void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index 993b6bce2c..1ce7c8fdb7 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -44,7 +44,7 @@ void AsyncRequests::PullEventsInternal() } while(!m_queue.empty() && m_queue.front().type == first_event.type); lock.unlock(); - g_renderer->PokeEFB(t, m_merged_efb_pokes); + g_renderer->PokeEFB(t, m_merged_efb_pokes.data(), m_merged_efb_pokes.size()); lock.lock(); continue; } @@ -109,11 +109,17 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) switch (e.type) { case Event::EFB_POKE_COLOR: - g_renderer->AccessEFB(POKE_COLOR, e.efb_poke.x, e.efb_poke.y, e.efb_poke.data); + { + EfbPokeData poke = { e.efb_poke.x, e.efb_poke.y, e.efb_poke.data }; + g_renderer->PokeEFB(POKE_COLOR, &poke, 1); + } break; case Event::EFB_POKE_Z: - g_renderer->AccessEFB(POKE_Z, e.efb_poke.x, e.efb_poke.y, e.efb_poke.data); + { + EfbPokeData poke = { e.efb_poke.x, e.efb_poke.y, e.efb_poke.data }; + g_renderer->PokeEFB(POKE_Z, &poke, 1); + } break; case Event::EFB_PEEK_COLOR: diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 89bfa4dce1..3742145ebc 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -617,10 +617,3 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const XFBWrited = false; } -void Renderer::PokeEFB(EFBAccessType type, const std::vector& data) -{ - for (EfbPokeData poke : data) - { - AccessEFB(type, poke.x, poke.y, poke.data); - } -} diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index ad53f2f20c..773fb45ee5 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -115,7 +115,7 @@ public: static void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, float Gamma = 1.0f); virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0; - virtual void PokeEFB(EFBAccessType type, const std::vector& data); + virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0; virtual u16 BBoxRead(int index) = 0; virtual void BBoxWrite(int index, u16 value) = 0;