diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 143fdd952a..a0e704c0b8 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -91,8 +91,6 @@ const Info GFX_SHADER_PRECOMPILER_THREADS{ const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE{ {System::GFX, "Settings", "SaveTextureCacheToState"}, true}; -const Info GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true}; -const Info GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true}; const Info GFX_SW_DUMP_OBJECTS{{System::GFX, "Settings", "SWDumpObjects"}, false}; const Info GFX_SW_DUMP_TEV_STAGES{{System::GFX, "Settings", "SWDumpTevStages"}, false}; const Info GFX_SW_DUMP_TEV_TEX_FETCHES{{System::GFX, "Settings", "SWDumpTevTexFetches"}, diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 9418198976..798f27b3ac 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -73,8 +73,6 @@ extern const Info GFX_SHADER_COMPILER_THREADS; extern const Info GFX_SHADER_PRECOMPILER_THREADS; extern const Info GFX_SAVE_TEXTURE_CACHE_TO_STATE; -extern const Info GFX_SW_ZCOMPLOC; -extern const Info GFX_SW_ZFREEZE; extern const Info GFX_SW_DUMP_OBJECTS; extern const Info GFX_SW_DUMP_TEV_STAGES; extern const Info GFX_SW_DUMP_TEV_TEX_FETCHES; diff --git a/Source/Core/VideoBackends/Software/Clipper.cpp b/Source/Core/VideoBackends/Software/Clipper.cpp index dc22a6a201..f13ebb90b7 100644 --- a/Source/Core/VideoBackends/Software/Clipper.cpp +++ b/Source/Core/VideoBackends/Software/Clipper.cpp @@ -289,10 +289,42 @@ void ProcessTriangle(OutputVertexData* v0, OutputVertexData* v1, OutputVertexDat { INCSTAT(g_stats.this_frame.num_triangles_in) - bool backface; - - if (!CullTest(v0, v1, v2, backface)) + if (IsTriviallyRejected(v0, v1, v2)) + { + INCSTAT(g_stats.this_frame.num_triangles_rejected) + // NOTE: The slope used by zfreeze shouldn't be updated if the triangle is + // trivially rejected during clipping return; + } + + bool backface = IsBackface(v0, v1, v2); + + if (!backface) + { + if (bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) + { + // cull frontfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v1); + PerspectiveDivide(v2); + Rasterizer::UpdateZSlope(v0, v1, v2); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } + else + { + if (bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) + { + // cull backfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v2); + PerspectiveDivide(v1); + Rasterizer::UpdateZSlope(v0, v2, v1); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } int indices[NUM_INDICES] = {0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, @@ -461,19 +493,18 @@ void ProcessPoint(OutputVertexData* center) Rasterizer::DrawTriangleFrontFace(&ur, &lr, &ul); } -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface) +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) { int mask = CalcClipMask(v0); mask &= CalcClipMask(v1); mask &= CalcClipMask(v2); - if (mask) - { - INCSTAT(g_stats.this_frame.num_triangles_rejected) - return false; - } + return mask != 0; +} +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) +{ float x0 = v0->projectedPosition.x; float x1 = v1->projectedPosition.x; float x2 = v2->projectedPosition.x; @@ -486,29 +517,14 @@ bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const Outp float normalZDir = (x0 * w2 - x2 * w0) * y1 + (x2 * y0 - x0 * y2) * w1 + (y2 * w0 - y0 * w2) * x1; - backface = normalZDir <= 0.0f; + bool backface = normalZDir <= 0.0f; // Jimmie Johnson's Anything with an Engine has a positive viewport, while other games have a // negative viewport. The positive viewport does not require vertices to be vertically mirrored, // but the backface test does need to be inverted for things to be drawn. if (xfmem.viewport.ht > 0) backface = !backface; - // TODO: Are these tests / the definition of backface above backwards? - if ((bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) && - !backface) // cull frontfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - if ((bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) && - backface) // cull backfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - return true; + return backface; } void PerspectiveDivide(OutputVertexData* vertex) diff --git a/Source/Core/VideoBackends/Software/Clipper.h b/Source/Core/VideoBackends/Software/Clipper.h index 4b18023696..21be39c4fe 100644 --- a/Source/Core/VideoBackends/Software/Clipper.h +++ b/Source/Core/VideoBackends/Software/Clipper.h @@ -15,8 +15,10 @@ void ProcessLine(OutputVertexData* v0, OutputVertexData* v1); void ProcessPoint(OutputVertexData* v); -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface); +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); + +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); void PerspectiveDivide(OutputVertexData* vertex); } // namespace Clipper diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index f886858124..726692138c 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -20,16 +20,82 @@ namespace Rasterizer { static constexpr int BLOCK_SIZE = 2; +struct SlopeContext +{ + SlopeContext(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, + s32 x0, s32 y0) + : x0(x0), y0(y0) + { + // adjust a little less than 0.5 + const float adjust = 0.495f; + + xOff = ((float)x0 - v0->screenPosition.x) + adjust; + yOff = ((float)y0 - v0->screenPosition.y) + adjust; + + dx10 = v1->screenPosition.x - v0->screenPosition.x; + dx20 = v2->screenPosition.x - v0->screenPosition.x; + dy10 = v1->screenPosition.y - v0->screenPosition.y; + dy20 = v2->screenPosition.y - v0->screenPosition.y; + } + s32 x0; + s32 y0; + float xOff; + float yOff; + float dx10; + float dx20; + float dy10; + float dy20; +}; + +struct Slope +{ + Slope() = default; + Slope(float f0, float f1, float f2, const SlopeContext& ctx) : f0(f0) + { + float delta_20 = f2 - f0; + float delta_10 = f1 - f0; + + // x2 - x0 y1 - y0 x1 - x0 y2 - y0 + float a = delta_20 * ctx.dy10 - delta_10 * ctx.dy20; + float b = ctx.dx20 * delta_10 - ctx.dx10 * delta_20; + float c = ctx.dx20 * ctx.dy10 - ctx.dx10 * ctx.dy20; + + dfdx = a / c; + dfdy = b / c; + + x0 = ctx.x0; + y0 = ctx.y0; + xOff = ctx.xOff; + yOff = ctx.yOff; + } + + // These default values are used in the unlikely case that zfreeze is enabled when drawing the + // first primitive. + // TODO: This is just a guess! + float dfdx = 0.0f; + float dfdy = 0.0f; + float f0 = 1.0f; + + // Both an s32 value and a float value are used to minimize rounding error + // TODO: is this really needed? + s32 x0 = 0; + s32 y0 = 0; + float xOff = 0.0f; + float yOff = 0.0f; + + float GetValue(s32 x, s32 y) const + { + float dx = xOff + (float)(x - x0); + float dy = yOff + (float)(y - y0); + return f0 + (dfdx * dx) + (dfdy * dy); + } +}; + static Slope ZSlope; static Slope WSlope; static Slope ColorSlopes[2][4]; static Slope TexSlopes[8][3]; -static s32 vertex0X; -static s32 vertex0Y; -static float vertexOffsetX; -static float vertexOffsetY; - static Tev tev; static RasterBlock rasterBlock; @@ -37,11 +103,9 @@ void Init() { tev.Init(); - // Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the - // first primitive. - // TODO: This is just a guess! - ZSlope.dfdx = ZSlope.dfdy = 0.f; - ZSlope.f0 = 1.f; + // The other slopes are set each for each primitive drawn, but zfreeze means that the z slope + // needs to be set to an (untested) default value. + ZSlope = Slope(); } // Returns approximation of log2(f) in s28.4 @@ -75,12 +139,9 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { INCSTAT(g_stats.this_frame.rasterized_pixels); - float dx = vertexOffsetX + (float)(x - vertex0X); - float dy = vertexOffsetY + (float)(y - vertex0Y); + s32 z = (s32)std::clamp(ZSlope.GetValue(x, y), 0.0f, 16777215.0f); - s32 z = (s32)std::clamp(ZSlope.GetValue(dx, dy), 0.0f, 16777215.0f); - - if (bpmem.UseEarlyDepthTest() && g_ActiveConfig.bZComploc) + if (bpmem.UseEarlyDepthTest()) { // TODO: Test if perf regs are incremented even if test is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT_ZCOMPLOC); @@ -104,7 +165,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) { for (int comp = 0; comp < 4; comp++) { - u16 color = (u16)ColorSlopes[i][comp].GetValue(dx, dy); + u16 color = (u16)ColorSlopes[i][comp].GetValue(x, y); // clamp color value to 0 u16 mask = ~(color >> 8); @@ -136,31 +197,6 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi) tev.Draw(); } -static void InitTriangle(float X1, float Y1, s32 xi, s32 yi) -{ - vertex0X = xi; - vertex0Y = yi; - - // adjust a little less than 0.5 - const float adjust = 0.495f; - - vertexOffsetX = ((float)xi - X1) + adjust; - vertexOffsetY = ((float)yi - Y1) + adjust; -} - -static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, float DX12, - float DY12, float DY31) -{ - float DF31 = f3 - f1; - float DF21 = f2 - f1; - float a = DF31 * -DY12 - DF21 * DY31; - float b = DX31 * DF21 + DX12 * DF31; - float c = -DX12 * DY31 - DX31 * -DY12; - slope->dfdx = -a / c; - slope->dfdy = -b / c; - slope->f0 = f1; -} - static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord) { auto texUnit = bpmem.tex.GetUnit(texmap); @@ -220,22 +256,22 @@ static void BuildBlock(s32 blockX, s32 blockY) { RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; - float dx = vertexOffsetX + (float)(xi + blockX - vertex0X); - float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y); + s32 x = xi + blockX; + s32 y = yi + blockY; - float invW = 1.0f / WSlope.GetValue(dx, dy); + float invW = 1.0f / WSlope.GetValue(x, y); pixel.InvW = invW; // tex coords for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { float projection = invW; - float q = TexSlopes[i][2].GetValue(dx, dy) * invW; + float q = TexSlopes[i][2].GetValue(x, y) * invW; if (q != 0.0f) projection = invW / q; - pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection; - pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection; + pixel.Uv[i][0] = TexSlopes[i][0].GetValue(x, y) * projection; + pixel.Uv[i][1] = TexSlopes[i][1].GetValue(x, y) * projection; } } } @@ -265,11 +301,27 @@ static void BuildBlock(s32 blockX, s32 blockY) } } +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) +{ + if (!bpmem.genMode.zfreeze) + { + const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9; + const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9; + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); + } +} + void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) { INCSTAT(g_stats.this_frame.num_triangles_drawn); + // The zslope should be updated now, even if the triangle is rejected by the scissor test, as + // zfreeze depends on it + UpdateZSlope(v0, v1, v2); + // adapted from http://devmaster.net/posts/6145/advanced-rasterization // 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output @@ -334,42 +386,26 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v if (minx >= maxx || miny >= maxy) return; - // Setup slopes - float fltx1 = v0->screenPosition.x; - float flty1 = v0->screenPosition.y; - float fltdx31 = v2->screenPosition.x - fltx1; - float fltdx12 = fltx1 - v1->screenPosition.x; - float fltdy12 = flty1 - v1->screenPosition.y; - float fltdy31 = v2->screenPosition.y - flty1; - - InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + // Set up the remaining slopes + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w}; - InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31); - - // TODO: The zfreeze emulation is not quite correct, yet! - // Many things might prevent us from reaching this line (culling, clipping, scissoring). - // However, the zslope is always guaranteed to be calculated unless all vertices are trivially - // rejected during clipping! - // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring - // tests fail. - if (!bpmem.genMode.zfreeze || !g_ActiveConfig.bZFreeze) - InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, - fltdx12, fltdy12, fltdy31); + WSlope = Slope(w[0], w[1], w[2], ctx); for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) { for (int comp = 0; comp < 4; comp++) - InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], - fltdx31, fltdx12, fltdy12, fltdy31); + ColorSlopes[i][comp] = Slope(v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], ctx); } for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++) { for (int comp = 0; comp < 3; comp++) - InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], - v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31); + { + TexSlopes[i][comp] = Slope(v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], + v2->texCoords[i][comp] * w[2], ctx); + } } // Half-edge constants diff --git a/Source/Core/VideoBackends/Software/Rasterizer.h b/Source/Core/VideoBackends/Software/Rasterizer.h index bae35f7cd8..c278809966 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.h +++ b/Source/Core/VideoBackends/Software/Rasterizer.h @@ -11,20 +11,13 @@ namespace Rasterizer { void Init(); +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); void SetTevReg(int reg, int comp, s16 color); -struct Slope -{ - float dfdx; - float dfdy; - float f0; - - float GetValue(float dx, float dy) const { return f0 + (dfdx * dx) + (dfdy * dy); } -}; - struct RasterBlockPixel { float InvW; diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index af99e5ba85..287fcf4a4b 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -32,6 +32,13 @@ SWVertexLoader::SWVertexLoader() = default; SWVertexLoader::~SWVertexLoader() = default; +DataReader SWVertexLoader::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall) +{ + // The software renderer needs cullall to be false for zfreeze to work + return VertexManagerBase::PrepareForAdditionalData(primitive, count, stride, false); +} + void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { DebugUtil::OnObjectBegin(); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index bbda8da037..59b6ca65fd 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -19,6 +19,9 @@ public: SWVertexLoader(); ~SWVertexLoader(); + DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, + bool cullall) override; + protected: void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 6905920405..64e0f7774b 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -840,8 +840,7 @@ void Tev::Draw() output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; } - const bool late_ztest = !bpmem.zcontrol.early_ztest || !g_ActiveConfig.bZComploc; - if (late_ztest && bpmem.zmode.testenable) + if (bpmem.UseLateDepthTest()) { // TODO: Check against hw if these values get incremented even if depth testing is disabled EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT); diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index b3dd49aa61..c413889713 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -99,8 +99,8 @@ public: PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); - DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, - bool cullall); + virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall); void FlushData(u32 count, u32 stride); void Flush(); diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 4221422abf..cd35be1ae7 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -98,8 +98,6 @@ void VideoConfig::Refresh() iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS); iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS); - bZComploc = Config::Get(Config::GFX_SW_ZCOMPLOC); - bZFreeze = Config::Get(Config::GFX_SW_ZFREEZE); bDumpObjects = Config::Get(Config::GFX_SW_DUMP_OBJECTS); bDumpTevStages = Config::Get(Config::GFX_SW_DUMP_TEV_STAGES); bDumpTevTextureFetches = Config::Get(Config::GFX_SW_DUMP_TEV_TEX_FETCHES); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 6cf8122736..def1435b7f 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -153,8 +153,6 @@ struct VideoConfig final // VideoSW Debugging int drawStart = 0; int drawEnd = 0; - bool bZComploc = false; - bool bZFreeze = false; bool bDumpObjects = false; bool bDumpTevStages = false; bool bDumpTevTextureFetches = false;