From 613781c7650d3cbd494a212eacdff10ea5140894 Mon Sep 17 00:00:00 2001 From: NanoByte011 Date: Fri, 26 Dec 2014 01:25:24 -0700 Subject: [PATCH] Cleanup and refactor of zfreeze port Based on the feedback from pull request #1767 I have put in most of degasus's suggestions in here now. I think we have a real winner here as moving the code to VertexManagerBase for a function has allowed OGL to utilize zfreeze now :) Correct use of the vertex pointer has also corrected most of the issue found in pull request #1767 that JMC47 stated. Which also for me now has Mario Tennis working with no polygon spikes on the characters anymore! Shadows are still an issue and probably in the other games with shadow problems. Rebel Strike also seems better but random skybox glitches can show up. --- .../Core/VideoBackends/D3D/VertexManager.cpp | 41 +--------------- Source/Core/VideoBackends/D3D/VertexManager.h | 1 - .../Core/VideoBackends/OGL/VertexManager.cpp | 48 +------------------ Source/Core/VideoBackends/OGL/VertexManager.h | 6 +-- Source/Core/VideoCommon/PixelShaderGen.h | 4 +- .../Core/VideoCommon/PixelShaderManager.cpp | 26 +++------- Source/Core/VideoCommon/PixelShaderManager.h | 3 +- Source/Core/VideoCommon/VertexManagerBase.cpp | 40 ++++++++++++++++ Source/Core/VideoCommon/VertexManagerBase.h | 2 + 9 files changed, 57 insertions(+), 114 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/VertexManager.cpp b/Source/Core/VideoBackends/D3D/VertexManager.cpp index 8f925452c2..5f878cc29b 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D/VertexManager.cpp @@ -178,49 +178,12 @@ void VertexManager::vFlush(bool useDstAlpha) } u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(); - u32 indices = IndexGenerator::GetIndexLen(); PrepareDrawBuffers(stride); - if (!bpmem.genMode.zfreeze && indices >= 3) + if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3) { - float vtx[9]; - float out[12]; - - // Lookup vertices of the last rendered triangle and software-transform them - // This allows us to determine the depth slope, which will be used if zfreeze - // is enabled in the following flush. - for (unsigned int i = 0; i < 3; ++i) - { - const int base_index = GetIndexBuffer()[indices - 3 + i]; - u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride]; - vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; - vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; - vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; - - VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); - - // viewport offset ignored because we only look at coordinate differences. - out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd; - out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht; - out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; - } - float dx31 = out[8] - out[0]; - float dx12 = out[0] - out[4]; - float dy12 = out[1] - out[5]; - float dy31 = out[9] - out[1]; - - float DF31 = out[10] - out[2]; - float DF21 = out[6] - out[2]; - float a = DF31 * -dy12 - DF21 * dy31; - float b = dx31 * DF21 + dx12 * DF31; - float c = -dx12 * dy31 - dx31 * -dy12; - - float slope_dfdx = -a / c; - float slope_dfdy = -b / c; - float slope_f0 = out[2]; - - PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0); + CalculateZSlope(stride); } VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers(); diff --git a/Source/Core/VideoBackends/D3D/VertexManager.h b/Source/Core/VideoBackends/D3D/VertexManager.h index 38fcd088fd..0b124d7512 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.h +++ b/Source/Core/VideoBackends/D3D/VertexManager.h @@ -22,7 +22,6 @@ public: protected: virtual void ResetBuffer(u32 stride) override; u16* GetIndexBuffer() { return &LocalIBuffer[0]; } - u8* GetVertexBuffer() { return &LocalVBuffer[0]; } private: diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index d3a8d91bca..427a5ecee3 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -42,13 +42,6 @@ static size_t s_index_offset; VertexManager::VertexManager() { - LocalVBuffer.resize(MAXVBUFFERSIZE); - - s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0]; - s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); - - LocalIBuffer.resize(MAXIBUFFERSIZE); - CreateDeviceObjects(); } @@ -138,7 +131,6 @@ void VertexManager::vFlush(bool useDstAlpha) { GLVertexFormat *nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat(); u32 stride = nativeVertexFmt->GetVertexStride(); - u32 indices = IndexGenerator::GetIndexLen(); if (m_last_vao != nativeVertexFmt->VAO) { @@ -148,45 +140,9 @@ void VertexManager::vFlush(bool useDstAlpha) PrepareDrawBuffers(stride); - if (!bpmem.genMode.zfreeze && indices >= 3) + if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3) { - float vtx[9]; - float out[12]; - - // Lookup vertices of the last rendered triangle and software-transform them - // This allows us to determine the depth slope, which will be used if zfreeze - // is enabled in the following flush. - for (unsigned int i = 0; i < 3; ++i) - { - const int base_index = GetIndexBuffer()[indices - 3 + i]; - u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride]; - vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; - vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; - vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; - - VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); - - // viewport offset ignored because we only look at coordinate differences. - out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd; - out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht; - out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; - } - float dx31 = out[8] - out[0]; - float dx12 = out[0] - out[4]; - float dy12 = out[1] - out[5]; - float dy31 = out[9] - out[1]; - - float DF31 = out[10] - out[2]; - float DF21 = out[6] - out[2]; - float a = DF31 * -dy12 - DF21 * dy31; - float b = dx31 * DF21 + dx12 * DF31; - float c = -dx12 * dy31 - dx31 * -dy12; - - float slope_dfdx = -a / c; - float slope_dfdy = -b / c; - float slope_f0 = out[2]; - - PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0); + CalculateZSlope(stride); } // Makes sure we can actually do Dual source blending diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 0e9efd9c83..f0c6ae9109 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -42,15 +42,11 @@ public: GLuint m_last_vao; protected: virtual void ResetBuffer(u32 stride) override; - u16* GetIndexBuffer() { return &LocalIBuffer[0]; } - u8* GetVertexBuffer() { return &LocalVBuffer[0]; } + private: void Draw(u32 stride); void vFlush(bool useDstAlpha) override; void PrepareDrawBuffers(u32 stride); - - std::vector LocalVBuffer; - std::vector LocalIBuffer; }; } diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index c889bd62a0..eb787fdc81 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -20,8 +20,8 @@ #define C_INDTEXMTX (C_INDTEXSCALE + 2) //21 #define C_FOGCOLOR (C_INDTEXMTX + 6) //27 #define C_FOGI (C_FOGCOLOR + 1) //28 -#define C_FOGF (C_FOGI + 1) //29 -#define C_ZSLOPE (C_FOGF + 1) //30 +#define C_FOGF (C_FOGI + 2) //29 +#define C_ZSLOPE (C_FOGF + 1) //31 #define C_PENVCONST_END (C_ZSLOPE + 2) diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index b55147eb15..d0004a0921 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -14,8 +14,6 @@ bool PixelShaderManager::s_bFogRangeAdjustChanged; bool PixelShaderManager::s_bViewPortChanged; -bool PixelShaderManager::s_bZSlopeChanged; -static float zslope[3]; std::array PixelShaderManager::s_tev_color; std::array PixelShaderManager::s_tev_konst_color; @@ -50,7 +48,7 @@ void PixelShaderManager::Dirty() SetDestAlpha(); SetZTextureBias(); SetViewportChanged(); - SetZSlopeChanged(0, 0, 1); + SetZSlope(0, 0, 1); SetIndTexScaleChanged(false); SetIndTexScaleChanged(true); SetIndMatrixChanged(0); @@ -115,17 +113,6 @@ void PixelShaderManager::SetConstants() dirty = true; s_bViewPortChanged = false; } - - if (s_bZSlopeChanged) - { - constants.zslope[0] = zslope[0]; - constants.zslope[1] = zslope[1]; - constants.zslope[2] = zslope[2]; - constants.zslope[3] = 0; - - dirty = true; - s_bZSlopeChanged = false; - } } void PixelShaderManager::SetTevColor(int index, int component, s32 value) @@ -182,12 +169,13 @@ void PixelShaderManager::SetViewportChanged() s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation } -void PixelShaderManager::SetZSlopeChanged(float dfdx, float dfdy, float f0) +void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0) { - zslope[0] = dfdx; - zslope[1] = dfdy; - zslope[2] = f0; - s_bZSlopeChanged = true; + constants.zslope[0] = dfdx; + constants.zslope[1] = dfdy; + constants.zslope[2] = f0; + constants.zslope[3] = 0; + dirty = true; } void PixelShaderManager::SetIndTexScaleChanged(bool high) diff --git a/Source/Core/VideoCommon/PixelShaderManager.h b/Source/Core/VideoCommon/PixelShaderManager.h index 16c760f70f..faa15cff7e 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.h +++ b/Source/Core/VideoCommon/PixelShaderManager.h @@ -36,7 +36,7 @@ public: static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt); static void SetZTextureBias(); static void SetViewportChanged(); - static void SetZSlopeChanged(float dfdx, float dfdy, float f0); + static void SetZSlope(float dfdx, float dfdy, float f0); static void SetIndMatrixChanged(int matrixidx); static void SetTevKSelChanged(int id); static void SetZTextureTypeChanged(); @@ -51,7 +51,6 @@ public: static bool s_bFogRangeAdjustChanged; static bool s_bViewPortChanged; - static bool s_bZSlopeChanged; // These colors aren't available from global BP state, // hence we keep a copy of them around. diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 38cfd19630..80ea3b5bb9 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -241,3 +241,43 @@ void VertexManager::DoState(PointerWrap& p) { g_vertex_manager->vDoState(p); } + +void VertexManager::CalculateZSlope(u32 stride) +{ + float vtx[9]; + float out[12]; + + // Lookup vertices of the last rendered triangle and software-transform them + // This allows us to determine the depth slope, which will be used if zfreeze + // is enabled in the following flush. + for (unsigned int i = 0; i < 3; ++i) + { + u8* vtx_ptr = s_pCurBufferPointer - stride * (3 - i); + vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; + vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; + vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; + + VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); + + // viewport offset ignored because we only look at coordinate differences. + out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd; + out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht; + out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; + } + float dx31 = out[8] - out[0]; + float dx12 = out[0] - out[4]; + float dy12 = out[1] - out[5]; + float dy31 = out[9] - out[1]; + + float DF31 = out[10] - out[2]; + float DF21 = out[6] - out[2]; + float a = DF31 * -dy12 - DF21 * dy31; + float b = dx31 * DF21 + dx12 * DF31; + float c = -dx12 * dy31 - dx31 * -dy12; + + float slope_dfdx = -a / c; + float slope_dfdy = -b / c; + float slope_f0 = out[2]; + + PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0); +} diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index c854cd3586..524f3e5a0c 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -41,6 +41,8 @@ public: static void DoState(PointerWrap& p); + static void CalculateZSlope(u32 stride); + protected: virtual void vDoState(PointerWrap& p) { }