mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 15:31:17 +01:00
Fixes Mario Tennis Gimmick Courts and adds support for FastDepthCalc
- Calculate ZSlope every flush but only set PixelShader Constant on Reset Buffer when zfreeze - Fixed another Pixel Shader bug in D3D that was giving me grief
This commit is contained in:
parent
6d5065c58d
commit
add59b3bea
@ -181,12 +181,10 @@ void VertexManager::vFlush(bool useDstAlpha)
|
|||||||
|
|
||||||
PrepareDrawBuffers(stride);
|
PrepareDrawBuffers(stride);
|
||||||
|
|
||||||
if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3)
|
if (!bpmem.genMode.zfreeze)
|
||||||
{
|
|
||||||
CalculateZSlope(stride);
|
CalculateZSlope(stride);
|
||||||
}
|
|
||||||
|
|
||||||
// if cull mode is CULL_ALL, ignore triangles and quads
|
// If cull mode is CULL_ALL, do not render these triangles
|
||||||
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES)
|
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -202,6 +200,9 @@ void VertexManager::ResetBuffer(u32 stride)
|
|||||||
{
|
{
|
||||||
s_pCurBufferPointer = s_pBaseBufferPointer;
|
s_pCurBufferPointer = s_pBaseBufferPointer;
|
||||||
IndexGenerator::Start(GetIndexBuffer());
|
IndexGenerator::Start(GetIndexBuffer());
|
||||||
|
|
||||||
|
if (bpmem.genMode.zfreeze)
|
||||||
|
PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -89,6 +89,9 @@ void VertexManager::ResetBuffer(u32 stride)
|
|||||||
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
|
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
|
||||||
IndexGenerator::Start((u16*)buffer.first);
|
IndexGenerator::Start((u16*)buffer.first);
|
||||||
s_index_offset = buffer.second;
|
s_index_offset = buffer.second;
|
||||||
|
|
||||||
|
if (bpmem.genMode.zfreeze)
|
||||||
|
PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexManager::Draw(u32 stride)
|
void VertexManager::Draw(u32 stride)
|
||||||
@ -140,12 +143,10 @@ void VertexManager::vFlush(bool useDstAlpha)
|
|||||||
|
|
||||||
PrepareDrawBuffers(stride);
|
PrepareDrawBuffers(stride);
|
||||||
|
|
||||||
if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3)
|
if (!bpmem.genMode.zfreeze)
|
||||||
{
|
|
||||||
CalculateZSlope(stride);
|
CalculateZSlope(stride);
|
||||||
}
|
|
||||||
|
|
||||||
// if cull mode is CULL_ALL, ignore triangles and quads
|
// If cull mode is CULL_ALL, do not render these triangles
|
||||||
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES)
|
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -271,7 +271,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||||||
GenerateVSOutputMembers<T>(out, ApiType);
|
GenerateVSOutputMembers<T>(out, ApiType);
|
||||||
out.Write("};\n");
|
out.Write("};\n");
|
||||||
|
|
||||||
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
|
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest()
|
||||||
|
&& (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
|
||||||
|
// We can't allow early_ztest for zfreeze because a reference poly is used
|
||||||
|
// to control the depth and we need a depth test after the alpha test.
|
||||||
|
&& !bpmem.genMode.zfreeze;
|
||||||
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
|
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
|
||||||
|
|
||||||
if (forced_early_z)
|
if (forced_early_z)
|
||||||
@ -365,7 +369,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||||||
out.Write("void main(\n");
|
out.Write("void main(\n");
|
||||||
out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
|
out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
|
||||||
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
|
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
|
||||||
per_pixel_depth ? "\n out float depth : SV_Depth," : "");
|
(per_pixel_depth && bpmem.zmode.testenable) ? "\n out float depth : SV_Depth," : "");
|
||||||
|
|
||||||
out.Write(" in centroid float4 colors_0 : COLOR0,\n");
|
out.Write(" in centroid float4 colors_0 : COLOR0,\n");
|
||||||
out.Write(" in centroid float4 colors_1 : COLOR1\n");
|
out.Write(" in centroid float4 colors_1 : COLOR1\n");
|
||||||
@ -1023,7 +1027,11 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_T
|
|||||||
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
||||||
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
||||||
// At least this seems to be less buggy.
|
// At least this seems to be less buggy.
|
||||||
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ;
|
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest()
|
||||||
|
&& bpmem.zmode.updateenable
|
||||||
|
&& !g_ActiveConfig.backend_info.bSupportsEarlyZ
|
||||||
|
&& !bpmem.genMode.zfreeze; // Might not be neccessary
|
||||||
|
|
||||||
if (!uid_data->alpha_test_use_zcomploc_hack)
|
if (!uid_data->alpha_test_use_zcomploc_hack)
|
||||||
{
|
{
|
||||||
out.Write("\t\tdiscard;\n");
|
out.Write("\t\tdiscard;\n");
|
||||||
@ -1117,7 +1125,7 @@ static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, A
|
|||||||
if (ApiType == API_OPENGL)
|
if (ApiType == API_OPENGL)
|
||||||
out.Write("\tscreenpos.y = %i - screenpos.y - 1;\n", EFB_HEIGHT);
|
out.Write("\tscreenpos.y = %i - screenpos.y - 1;\n", EFB_HEIGHT);
|
||||||
|
|
||||||
out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xffffff);\n");
|
out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -50,7 +50,7 @@ void PixelShaderManager::Dirty()
|
|||||||
SetZTextureBias();
|
SetZTextureBias();
|
||||||
SetViewportChanged();
|
SetViewportChanged();
|
||||||
SetEfbScaleChanged();
|
SetEfbScaleChanged();
|
||||||
SetZSlope(0, 0, 1);
|
SetZSlope(0, 0, (float)0xFFFFFF);
|
||||||
SetIndTexScaleChanged(false);
|
SetIndTexScaleChanged(false);
|
||||||
SetIndTexScaleChanged(true);
|
SetIndTexScaleChanged(true);
|
||||||
SetIndMatrixChanged(0);
|
SetIndMatrixChanged(0);
|
||||||
@ -116,7 +116,8 @@ void PixelShaderManager::SetConstants()
|
|||||||
s_bViewPortChanged = false;
|
s_bViewPortChanged = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s_bEFBScaleChanged) {
|
if (s_bEFBScaleChanged)
|
||||||
|
{
|
||||||
constants.efbscale[0] = 1.0f / float(Renderer::EFBToScaledXf(1));
|
constants.efbscale[0] = 1.0f / float(Renderer::EFBToScaledXf(1));
|
||||||
constants.efbscale[1] = 1.0f / float(Renderer::EFBToScaledYf(1));
|
constants.efbscale[1] = 1.0f / float(Renderer::EFBToScaledYf(1));
|
||||||
dirty = true;
|
dirty = true;
|
||||||
|
@ -25,6 +25,8 @@ u8 *VertexManager::s_pEndBufferPointer;
|
|||||||
|
|
||||||
PrimitiveType VertexManager::current_primitive_type;
|
PrimitiveType VertexManager::current_primitive_type;
|
||||||
|
|
||||||
|
Slope VertexManager::ZSlope;
|
||||||
|
|
||||||
bool VertexManager::IsFlushed;
|
bool VertexManager::IsFlushed;
|
||||||
|
|
||||||
static const PrimitiveType primitive_from_gx[8] = {
|
static const PrimitiveType primitive_from_gx[8] = {
|
||||||
@ -246,6 +248,8 @@ void VertexManager::CalculateZSlope(u32 stride)
|
|||||||
{
|
{
|
||||||
float vtx[9];
|
float vtx[9];
|
||||||
float out[12];
|
float out[12];
|
||||||
|
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
|
||||||
|
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
|
||||||
|
|
||||||
// Lookup vertices of the last rendered triangle and software-transform them
|
// Lookup vertices of the last rendered triangle and software-transform them
|
||||||
// This allows us to determine the depth slope, which will be used if zfreeze
|
// This allows us to determine the depth slope, which will be used if zfreeze
|
||||||
@ -260,9 +264,11 @@ void VertexManager::CalculateZSlope(u32 stride)
|
|||||||
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
|
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
|
||||||
|
|
||||||
// Transform to Screenspace
|
// Transform to Screenspace
|
||||||
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd + (xfmem.viewport.xOrig - 342);
|
float w = out[3 + i * 4];
|
||||||
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht + (xfmem.viewport.yOrig - 342);
|
|
||||||
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
out[0 + i * 4] = out[0 + i * 4] / w * xfmem.viewport.wd + viewOffset[0];
|
||||||
|
out[1 + i * 4] = out[1 + i * 4] / w * xfmem.viewport.ht + viewOffset[1];
|
||||||
|
out[2 + i * 4] = out[2 + i * 4] / w * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
float dx31 = out[8] - out[0];
|
float dx31 = out[8] - out[0];
|
||||||
@ -276,9 +282,11 @@ void VertexManager::CalculateZSlope(u32 stride)
|
|||||||
float b = dx31 * DF21 + dx12 * DF31;
|
float b = dx31 * DF21 + dx12 * DF31;
|
||||||
float c = -dx12 * dy31 - dx31 * -dy12;
|
float c = -dx12 * dy31 - dx31 * -dy12;
|
||||||
|
|
||||||
float slope_dfdx = -a / c;
|
// Stop divide by zero
|
||||||
float slope_dfdy = -b / c;
|
if (c == 0)
|
||||||
float slope_f0 = out[2] - (out[0] * slope_dfdx + out[1] * slope_dfdy);
|
return;
|
||||||
|
|
||||||
PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0);
|
ZSlope.dfdx = -a / c;
|
||||||
|
ZSlope.dfdy = -b / c;
|
||||||
|
ZSlope.f0 = out[2] - (out[0] * ZSlope.dfdx + out[1] * ZSlope.dfdy);
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,13 @@ enum PrimitiveType {
|
|||||||
PRIMITIVE_TRIANGLES,
|
PRIMITIVE_TRIANGLES,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Slope
|
||||||
|
{
|
||||||
|
float dfdx;
|
||||||
|
float dfdy;
|
||||||
|
float f0;
|
||||||
|
};
|
||||||
|
|
||||||
class VertexManager
|
class VertexManager
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
@ -41,8 +48,6 @@ public:
|
|||||||
|
|
||||||
static void DoState(PointerWrap& p);
|
static void DoState(PointerWrap& p);
|
||||||
|
|
||||||
static void CalculateZSlope(u32 stride);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void vDoState(PointerWrap& p) { }
|
virtual void vDoState(PointerWrap& p) { }
|
||||||
|
|
||||||
@ -57,6 +62,9 @@ protected:
|
|||||||
static u32 GetRemainingSize();
|
static u32 GetRemainingSize();
|
||||||
static u32 GetRemainingIndices(int primitive);
|
static u32 GetRemainingIndices(int primitive);
|
||||||
|
|
||||||
|
static Slope ZSlope;
|
||||||
|
static void CalculateZSlope(u32 stride);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool IsFlushed;
|
static bool IsFlushed;
|
||||||
|
|
||||||
|
@ -692,6 +692,7 @@ void VertexShaderManager::ResetView()
|
|||||||
|
|
||||||
void VertexShaderManager::TransformToClipSpace(const float* data, float *out)
|
void VertexShaderManager::TransformToClipSpace(const float* data, float *out)
|
||||||
{
|
{
|
||||||
|
// Can we use constants.posnormalmatrix here instead?
|
||||||
const float *world_matrix = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
|
const float *world_matrix = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
|
||||||
const float *proj_matrix = &g_fProjectionMatrix[0];
|
const float *proj_matrix = &g_fProjectionMatrix[0];
|
||||||
|
|
||||||
@ -700,8 +701,6 @@ void VertexShaderManager::TransformToClipSpace(const float* data, float *out)
|
|||||||
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
|
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
|
||||||
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
|
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
|
||||||
|
|
||||||
// TODO: this requires g_fProjectionMatrix to be up to date, which is not really a good design decision.
|
|
||||||
|
|
||||||
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
|
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
|
||||||
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
|
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
|
||||||
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
|
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user