From 25b8edd2a6c18b60586bbb39d539b939c1dc8515 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 08:50:59 +0100 Subject: [PATCH 01/10] ogl: fix signed vs unsigned comparison warning --- Source/Core/VideoBackends/OGL/RasterFont.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/RasterFont.cpp b/Source/Core/VideoBackends/OGL/RasterFont.cpp index 8e6ad2d6c8..d281f9a554 100644 --- a/Source/Core/VideoBackends/OGL/RasterFont.cpp +++ b/Source/Core/VideoBackends/OGL/RasterFont.cpp @@ -10,10 +10,10 @@ namespace OGL { -static const u32 char_width = 8; -static const u32 char_height = 13; -static const u32 char_offset = 32; -static const u32 char_count = 95; +static const int char_width = 8; +static const int char_height = 13; +static const int char_offset = 32; +static const int char_count = 95; const u8 rasters[char_count][char_height] = { {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, @@ -141,11 +141,11 @@ RasterFont::RasterFont() glActiveTexture(GL_TEXTURE0+8); glBindTexture(GL_TEXTURE_2D, texture); u32* texture_data = new u32[char_width*char_count*char_height]; - for (u32 y = 0; y < char_height; y++) + for (int y = 0; y < char_height; y++) { - for (u32 c = 0; c < char_count; c++) + for (int c = 0; c < char_count; c++) { - for (u32 x = 0; x < char_width; x++) + for (int x = 0; x < char_width; x++) { bool pixel = (0 != (rasters[c][y] & (1<<(char_width-x-1)))); texture_data[char_width*char_count*y+char_width*c+x] = pixel ? -1 : 0; From 3e14bf511f2b367cc85f2b68d44de503341c704b Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 08:57:54 +0100 Subject: [PATCH 02/10] ShaderGen/D3D: inline centroid --- Source/Core/VideoCommon/PixelShaderGen.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 6211066f4d..b08124faa0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -253,6 +253,14 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (per_pixel_depth) out.Write("#define depth gl_FragDepth\n"); + // We use the flag "centroid" to fix some MSAA rendering bugs. With MSAA, the + // pixel shader will be executed for each pixel which has at least one passed sample. + // So there may be rendered pixels where the center of the pixel isn't in the primitive. + // As the pixel shader usually renders at the center of the pixel, this position may be + // outside the primitive. This will lead to sampling outside the texture, sign changes, ... + // As a workaround, we interpolate at the centroid of the coveraged pixel, which + // is always inside the primitive. + // Without MSAA, this flag is defined to have no effect. out.Write("centroid in float4 colors_02;\n"); out.Write("centroid in float4 colors_12;\n"); @@ -303,18 +311,15 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", per_pixel_depth ? "\n out float depth : SV_Depth," : ""); - // Use centroid sampling to make MSAA work properly - const char* optCentroid = "centroid"; - - out.Write(" in %s float4 colors_0 : COLOR0,\n", optCentroid); - out.Write(" in %s float4 colors_1 : COLOR1", optCentroid); + out.Write(" in centroid float4 colors_0 : COLOR0,\n"); + out.Write(" in centroid float4 colors_1 : COLOR1"); // compute window position if needed because binding semantic WPOS is not widely supported for (unsigned int i = 0; i < numTexgen; ++i) - out.Write(",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i); - out.Write(",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen); + out.Write(",\n in centroid float3 uv%d : TEXCOORD%d", i, i); + out.Write(",\n in centroid float4 clipPos : TEXCOORD%d", numTexgen); if (g_ActiveConfig.bEnablePixelLighting) - out.Write(",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1); + out.Write(",\n in centroid float4 Normal : TEXCOORD%d", numTexgen + 1); out.Write(" ) {\n"); } From fe9fcfdd07873e6767b1d5a625e595127e1c9b06 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 09:02:45 +0100 Subject: [PATCH 03/10] ShaderGen: merge early-z blocks of ogl+d3d --- Source/Core/VideoCommon/PixelShaderGen.cpp | 82 +++++++++++----------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b08124faa0..702a2dc22d 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -171,9 +171,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T unsigned int numStages = bpmem.genMode.numtevstages + 1; unsigned int numTexgen = bpmem.genMode.numtexgens; - const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); - const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); - out.Write("//Pixel Shader for TEV stages\n"); out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, numTexgen, bpmem.genMode.numindstages); @@ -244,6 +241,39 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (ApiType == API_OPENGL) out.Write("};\n"); + const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); + const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); + + if (forced_early_z) + { + // Zcomploc (aka early_ztest) is a way to control whether depth test is done before + // or after texturing and alpha test. PC graphics APIs used to provide no way to emulate + // this feature properly until 2012: Depth tests were always done after alpha testing. + // Most importantly, it was not possible to write to the depth buffer without also writing + // a color value (unless color writing was disabled altogether). + + // OpenGL has a flag which allows the driver to still update the depth buffer if alpha + // test fails. The driver isn't required to do this, but I (degasus) assume all of them do + // because it's the much faster code path for the GPU. + + // D3D11 also has a way to force the driver to enable early-z, so we're fine here. + if(ApiType == API_OPENGL) + { + out.Write("layout(early_fragment_tests) in;\n"); + } + else + { + out.Write("[earlydepthstencil]\n"); + } + } + else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) + { + static bool warn_once = true; + if (warn_once) + WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current configuration. Make sure to enable fast depth calculations. If this message still shows up your hardware isn't able to emulate the feature properly (a GPU with D3D 11.0 / OGL 4.2 support is required)."); + warn_once = false; + } + if (ApiType == API_OPENGL) { out.Write("out vec4 ocol0;\n"); @@ -276,36 +306,10 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("centroid in float4 Normal_2;\n"); } - if (forced_early_z) - { - // HACK: This doesn't force the driver to write to depth buffer if alpha test fails. - // It just allows it, but it seems that all drivers do. - out.Write("layout(early_fragment_tests) in;\n"); - } - else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) - { - static bool warn_once = true; - if (warn_once) - WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current configuration. Make sure to enable fast depth calculations. If this message still shows up your hardware isn't able to emulate the feature properly (a GPU with D3D 11.0 / OGL 4.2 support is required)."); - warn_once = false; - } - out.Write("void main()\n{\n"); } else // D3D { - if (forced_early_z) - { - out.Write("[earlydepthstencil]\n"); - } - else if (bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED) && is_writing_shadercode) - { - static bool warn_once = true; - if (warn_once) - WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current configuration. Make sure to enable fast depth calculations. If this message still shows up your hardware isn't able to emulate the feature properly (a GPU with D3D 11.0 / OGL 4.2 support is required)."); - warn_once = false; - } - out.Write("void main(\n"); out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n", dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "", @@ -950,19 +954,13 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T if (per_pixel_depth) out.Write("\t\tdepth = 1.0;\n"); - // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before - // or after texturing and alpha test. PC graphics APIs have no way to support this - // feature properly as of 2012: Depth buffer and depth test are not - // programmable and the depth test is always done after texturing. - // Most importantly, they do not allow writing to the z-buffer without - // writing a color value (unless color writing is disabled altogether). - // We implement "depth test before texturing" by disabling alpha test when early-z is in use. - // It seems to be less buggy than not to update the depth buffer if alpha test fails, - // but both ways wouldn't be accurate. - - // OpenGL 4.2 has a flag which allows the driver to still update the depth buffer - // if alpha test fails. The driver doesn't have to, but I assume they all do because - // it's the much faster code path for the GPU. + // ZCOMPLOC HACK: + // The only way to emulate alpha test + early-z is to force early-z in the shader. + // As this isn't available on all drivers and as we can't emulate this feature otherwise, + // we are only able to choose which one we want to respect more. + // Tests seem to have proven that writing depth even when the alpha test fails is more + // important that a reliable alpha test, so we just force the alpha test to always succeed. + // At least this seems to be less buggy. uid_data.alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ; if (!uid_data.alpha_test_use_zcomploc_hack) { From b0878c54b2a2ee1e0fd45f037562a9a6a61d919c Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 10:19:54 +0100 Subject: [PATCH 04/10] ShaderGen: use cbuffers for D3D --- Source/Core/VideoCommon/PixelShaderGen.cpp | 5 +++-- Source/Core/VideoCommon/ShaderGenCommon.h | 23 +-------------------- Source/Core/VideoCommon/VertexShaderGen.cpp | 5 +++-- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 702a2dc22d..ec58c606f1 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -221,6 +221,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (ApiType == API_OPENGL) out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 1" : ""); + else + out.Write("cbuffer PSBlock {\n"); DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]"); DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]"); @@ -238,8 +240,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[32]"); DeclareUniform(out, ApiType, C_PMATERIALS, "int4", I_PMATERIALS"[4]"); - if (ApiType == API_OPENGL) - out.Write("};\n"); + out.Write("};\n"); const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index ca3650855c..bf9b382633 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -151,31 +151,10 @@ private: std::vector constant_usage; // TODO: Is vector appropriate here? }; -template -static inline void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num) -{ - if (ApiType == API_OPENGL) - return; // Nothing to do here - - object.Write(" : register(%s%d)", prefix, num); -} - -template -static inline void WriteLocation(T& object, API_TYPE ApiType) -{ - if (ApiType == API_OPENGL) - return; - - object.Write("uniform "); -} - template static inline void DeclareUniform(T& object, API_TYPE api_type, const u32 num, const char* type, const char* name) { - WriteLocation(object, api_type); - object.Write("%s %s ", type, name); - WriteRegister(object, api_type, "c", num); - object.Write(";\n"); + object.Write("%s %s;\n", type, name); } /** diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 41549609c7..05d67ed8e5 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -84,6 +84,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ // uniforms if (api_type == API_OPENGL) out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); + else + out.Write("cbuffer VSBlock {\n"); DeclareUniform(out, api_type, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); DeclareUniform(out, api_type, C_PROJECTION, "float4", I_PROJECTION"[4]"); @@ -96,8 +98,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ DeclareUniform(out, api_type, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); DeclareUniform(out, api_type, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); - if (api_type == API_OPENGL) - out.Write("};\n"); + out.Write("};\n"); GenerateVSOutputStruct(out, api_type); From c82991df5ba360986f18cfee32c34405f65df09f Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 10:29:32 +0100 Subject: [PATCH 05/10] ShaderGen: inline constant shaders --- Source/Core/VideoCommon/PixelShaderGen.cpp | 33 ++++++++++----------- Source/Core/VideoCommon/ShaderGenCommon.h | 7 ----- Source/Core/VideoCommon/VertexShaderGen.cpp | 25 ++++++++-------- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ec58c606f1..3ce0403f6c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -223,24 +223,23 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("layout(std140%s) uniform PSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 1" : ""); else out.Write("cbuffer PSBlock {\n"); + out.Write( + "\tint4 " I_COLORS"[4];\n" + "\tint4 " I_KCOLORS"[4];\n" + "\tint4 " I_ALPHA";\n" + "\tfloat4 " I_TEXDIMS"[8];\n" + "\tint4 " I_ZBIAS"[2];\n" + "\tint4 " I_INDTEXSCALE"[2];\n" + "\tint4 " I_INDTEXMTX"[6];\n" + "\tint4 " I_FOGCOLOR";\n" + "\tint4 " I_FOGI";\n" + "\tfloat4 " I_FOGF"[2];\n" - DeclareUniform(out, ApiType, C_COLORS, "int4", I_COLORS"[4]"); - DeclareUniform(out, ApiType, C_KCOLORS, "int4", I_KCOLORS"[4]"); - DeclareUniform(out, ApiType, C_ALPHA, "int4", I_ALPHA); - DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); - DeclareUniform(out, ApiType, C_ZBIAS, "int4", I_ZBIAS"[2]"); - DeclareUniform(out, ApiType, C_INDTEXSCALE, "int4", I_INDTEXSCALE"[2]"); - DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); - DeclareUniform(out, ApiType, C_FOGCOLOR, "int4", I_FOGCOLOR); - DeclareUniform(out, ApiType, C_FOGI, "int4", I_FOGI); - DeclareUniform(out, ApiType, C_FOGF, "float4", I_FOGF"[2]"); - - // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! - DeclareUniform(out, ApiType, C_PLIGHT_COLORS, "int4", I_PLIGHT_COLORS"[8]"); - DeclareUniform(out, ApiType, C_PLIGHTS, "float4", I_PLIGHTS"[32]"); - DeclareUniform(out, ApiType, C_PMATERIALS, "int4", I_PMATERIALS"[4]"); - - out.Write("};\n"); + // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! + "\tint4 " I_PLIGHT_COLORS"[8];\n" + "\tfloat4 " I_PLIGHTS"[32];\n" + "\tint4 " I_PMATERIALS"[4];\n" + "};\n"); const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED); const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z); diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index bf9b382633..5b79f30221 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -150,13 +150,6 @@ public: private: std::vector constant_usage; // TODO: Is vector appropriate here? }; - -template -static inline void DeclareUniform(T& object, API_TYPE api_type, const u32 num, const char* type, const char* name) -{ - object.Write("%s %s;\n", type, name); -} - /** * Checks if there has been */ diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 05d67ed8e5..32bd8ae9fb 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -86,19 +86,18 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("layout(std140%s) uniform VSBlock {\n", g_ActiveConfig.backend_info.bSupportsBindingLayout ? ", binding = 2" : ""); else out.Write("cbuffer VSBlock {\n"); - - DeclareUniform(out, api_type, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]"); - DeclareUniform(out, api_type, C_PROJECTION, "float4", I_PROJECTION"[4]"); - DeclareUniform(out, api_type, C_MATERIALS, "int4", I_MATERIALS"[4]"); - DeclareUniform(out, api_type, C_LIGHT_COLORS, "int4", I_LIGHT_COLORS"[8]"); - DeclareUniform(out, api_type, C_LIGHTS, "float4", I_LIGHTS"[32]"); - DeclareUniform(out, api_type, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]"); - DeclareUniform(out, api_type, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]"); - DeclareUniform(out, api_type, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]"); - DeclareUniform(out, api_type, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]"); - DeclareUniform(out, api_type, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS); - - out.Write("};\n"); + out.Write( + "\tfloat4 " I_POSNORMALMATRIX"[6];\n" + "\tfloat4 " I_PROJECTION"[4];\n" + "\tint4 " I_MATERIALS"[4];\n" + "\tint4 " I_LIGHT_COLORS"[8];\n" + "\tfloat4 " I_LIGHTS"[32];\n" + "\tfloat4 " I_TEXMATRICES"[24];\n" + "\tfloat4 " I_TRANSFORMMATRICES"[64];\n" + "\tfloat4 " I_NORMALMATRICES"[32];\n" + "\tfloat4 " I_POSTTRANSFORMMATRICES"[64];\n" + "\tfloat4 " I_DEPTHPARAMS";\n" + "};\n"); GenerateVSOutputStruct(out, api_type); From 2bc2b73e03a7a08addd6797f2fc4976fbaac0299 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 17:00:11 +0100 Subject: [PATCH 06/10] PixelShaderGen: drop fmod as it isn't used any more --- Source/Core/VideoCommon/PixelShaderGen.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 3ce0403f6c..b79e802715 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -195,14 +195,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (ApiType == API_OPENGL) { - // Fmod implementation gleaned from Nvidia - // At http://http.developer.nvidia.com/Cg/fmod.html - out.Write("float fmod( float x, float y )\n"); - out.Write("{\n"); - out.Write("\tfloat z = fract( abs( x / y) ) * abs( y );\n"); - out.Write("\treturn (x < 0.0) ? -z : z;\n"); - out.Write("}\n"); - // Declare samplers for (int i = 0; i < 8; ++i) out.Write("uniform sampler2D samp%d;\n", i); From c80717ac2e5b4e0cf109d74e2e7edaebc683781c Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 17 Mar 2014 17:11:27 +0100 Subject: [PATCH 07/10] PixelShaderGen: extract iround --- Source/Core/VideoCommon/PixelShaderGen.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b79e802715..7f62056aeb 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -124,8 +124,8 @@ static const char *tevAInputTable[] = static const char *tevRasTable[] = { - "int4(round(colors_0 * 255.0))", - "int4(round(colors_1 * 255.0))", + "iround(colors_0 * 255.0)", + "iround(colors_1 * 255.0)", "ERROR13", //2 "ERROR14", //3 "ERROR15", //4 @@ -193,6 +193,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n" "}\n\n"); + // rounding + casting to integer at once in a single function + out.Write("int iround(float x) { return int (round(x)); }\n" + "int2 iround(float2 x) { return int2(round(x)); }\n" + "int3 iround(float3 x) { return int3(round(x)); }\n" + "int4 iround(float4 x) { return int4(round(x)); }\n\n"); + if (ApiType == API_OPENGL) { // Declare samplers @@ -386,7 +392,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); } - out.Write("\tint2 fixpoint_uv%d = int2(round(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0));\n\n", i, i, i); + out.Write("\tint2 fixpoint_uv%d = iround(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0);\n\n", i, i, i); // TODO: S24 overflows here? } } @@ -473,12 +479,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // The performance impact of this additional calculation doesn't matter, but it prevents // the host GPU driver from performing any early depth test optimizations. if (g_ActiveConfig.bFastDepthCalc) - out.Write("\tint zCoord = int(round(rawpos.z * float(0xFFFFFF)));\n"); + out.Write("\tint zCoord = iround(rawpos.z * float(0xFFFFFF));\n"); else { out.SetConstantsUsed(C_ZBIAS+1, C_ZBIAS+1); // the screen space depth value = far z + (clip z / clip w) * z range - out.Write("\tint zCoord = " I_ZBIAS"[1].x + int(round((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y)));\n"); + out.Write("\tint zCoord = " I_ZBIAS"[1].x + iround((clipPos.z / clipPos.w) * float(" I_ZBIAS"[1].y));\n"); } // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either @@ -887,9 +893,9 @@ static inline void SampleTexture(T& out, const char *texcoords, const char *texs out.SetConstantsUsed(C_TEXDIMS+texmap,C_TEXDIMS+texmap); if (ApiType == API_D3D) - out.Write("int4(round(255.0 * Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy))).%s;\n", texmap,texmap, texcoords, texmap, texswap); + out.Write("iround(255.0 * Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"[%d].xy)).%s;\n", texmap,texmap, texcoords, texmap, texswap); else - out.Write("int4(round(255.0 * texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy))).%s;\n", texmap, texcoords, texmap, texswap); + out.Write("iround(255.0 * texture(samp%d,%s.xy * " I_TEXDIMS"[%d].xy)).%s;\n", texmap, texcoords, texmap, texswap); } static const char *tevAlphaFuncsTable[] = @@ -1029,7 +1035,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel); } - out.Write("\tint ifog = int(round(fog * 256.0));\n"); + out.Write("\tint ifog = iround(fog * 256.0);\n"); out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); } From 0e0da8c8fd165b9fbd704ad7846ad4a000e3ae55 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 30 Mar 2014 12:19:15 +0200 Subject: [PATCH 08/10] ShaderGen: make clipPos readonly --- Source/Core/VideoCommon/PixelShaderGen.cpp | 7 ++----- Source/Core/VideoCommon/VertexShaderGen.cpp | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 7f62056aeb..f92054d0cf 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -298,7 +298,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { out.Write("centroid in float3 uv%d_2;\n", i); } - out.Write("centroid in float4 clipPos_2;\n"); + out.Write("centroid in float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("centroid in float4 Normal_2;\n"); @@ -349,7 +349,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tfloat3 uv%d = uv%d_2;\n", i, i); } } - out.Write("\tfloat4 clipPos = clipPos_2;\n"); if (g_ActiveConfig.bEnablePixelLighting) { out.Write("\tfloat4 Normal = Normal_2;\n"); @@ -372,8 +371,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T GenerateLightingShader(out, uid_data.lighting, components, I_PMATERIALS, I_PLIGHT_COLORS, I_PLIGHTS, "colors_", "colors_"); } - out.Write("\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n"); - // HACK to handle cases where the tex gen is not enabled if (numTexgen == 0) { @@ -1018,7 +1015,7 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data) if (bpmem.fogRange.Base.Enabled) { out.SetConstantsUsed(C_FOGF, C_FOGF); - out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOGF"[0].y)) - 1.0 - " I_FOGF"[0].x;\n"); + out.Write("\tfloat x_adjust = (2.0 * (rawpos.x / " I_FOGF"[0].y)) - 1.0 - " I_FOGF"[0].x;\n"); out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOGF"[0].z * " I_FOGF"[0].z) / " I_FOGF"[0].z;\n"); out.Write("\tze *= x_adjust;\n"); } diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 32bd8ae9fb..2a98502091 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -137,7 +137,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write("centroid out float3 uv%d_2;\n", i); } } - out.Write("centroid out float4 clipPos_2;\n"); + out.Write("centroid out float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) out.Write("centroid out float4 Normal_2;\n"); @@ -434,7 +434,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i); - out.Write(" clipPos_2 = o.clipPos;\n"); + out.Write(" clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) out.Write(" Normal_2 = o.Normal;\n"); From 6109958e68112005506c66801ab73e54cac8b4bb Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 30 Mar 2014 12:22:59 +0200 Subject: [PATCH 09/10] ShaderGen: OGL: don't make a copy of "Normal" --- Source/Core/VideoCommon/PixelShaderGen.cpp | 6 +----- Source/Core/VideoCommon/VertexShaderGen.cpp | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index f92054d0cf..28e65c9682 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -301,7 +301,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("centroid in float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) { - out.Write("centroid in float4 Normal_2;\n"); + out.Write("centroid in float4 Normal;\n"); } out.Write("void main()\n{\n"); @@ -349,10 +349,6 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.Write("\tfloat3 uv%d = uv%d_2;\n", i, i); } } - if (g_ActiveConfig.bEnablePixelLighting) - { - out.Write("\tfloat4 Normal = Normal_2;\n"); - } } if (g_ActiveConfig.bEnablePixelLighting) diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 2a98502091..9bb1ffcf2e 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -139,7 +139,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ } out.Write("centroid out float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) - out.Write("centroid out float4 Normal_2;\n"); + out.Write("centroid out float4 Normal;\n"); out.Write("centroid out float4 colors_02;\n"); out.Write("centroid out float4 colors_12;\n"); @@ -436,7 +436,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i); out.Write(" clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) - out.Write(" Normal_2 = o.Normal;\n"); + out.Write(" Normal = o.Normal;\n"); out.Write("colors_02 = o.colors_0;\n"); out.Write("colors_12 = o.colors_1;\n"); From 813e30ec2a337e09b5a0339144c9f6b4d356b9a8 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 30 Mar 2014 12:36:57 +0200 Subject: [PATCH 10/10] ShaderGen: make uv readonly --- Source/Core/VideoCommon/PixelShaderGen.cpp | 24 ++++++++------------- Source/Core/VideoCommon/VertexShaderGen.cpp | 4 ++-- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 28e65c9682..f29cb3c692 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -296,7 +296,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T // Let's set up attributes for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) { - out.Write("centroid in float3 uv%d_2;\n", i); + out.Write("centroid in float3 uv%d;\n", i); } out.Write("centroid in float4 clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) @@ -305,6 +305,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T } out.Write("void main()\n{\n"); + out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); } else // D3D { @@ -337,18 +338,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T { // On Mali, global variables must be initialized as constants. // This is why we initialize these variables locally instead. - out.Write("\tfloat4 rawpos = gl_FragCoord;\n"); out.Write("\tfloat4 colors_0 = colors_02;\n"); out.Write("\tfloat4 colors_1 = colors_12;\n"); - // compute window position if needed because binding semantic WPOS is not widely supported - // Let's set up attributes - if (numTexgen) - { - for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - { - out.Write("\tfloat3 uv%d = uv%d_2;\n", i, i); - } - } } if (g_ActiveConfig.bEnablePixelLighting) @@ -377,15 +368,18 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T out.SetConstantsUsed(C_TEXDIMS, C_TEXDIMS+numTexgen-1); for (unsigned int i = 0; i < numTexgen; ++i) { + out.Write("\tint2 fixpoint_uv%d = iround(", i); // optional perspective divides uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i; if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ) { - out.Write("\tif (uv%d.z != 0.0)\n", i); - out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i); + out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i); } - - out.Write("\tint2 fixpoint_uv%d = iround(uv%d.xy * " I_TEXDIMS"[%d].zw * 128.0);\n\n", i, i, i); + else + { + out.Write("uv%d.xy", i); + } + out.Write(" * " I_TEXDIMS"[%d].zw * 128.0);\n\n", i); // TODO: S24 overflows here? } } diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index 9bb1ffcf2e..e372c96827 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -134,7 +134,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ { if (i < xfregs.numTexGen.numTexGens) { - out.Write("centroid out float3 uv%d_2;\n", i); + out.Write("centroid out float3 uv%d;\n", i); } } out.Write("centroid out float4 clipPos;\n"); @@ -433,7 +433,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ // clipPos/w needs to be done in pixel shader, not here for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i) - out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i); + out.Write(" uv%d.xyz = o.tex%d;\n", i, i); out.Write(" clipPos = o.clipPos;\n"); if (g_ActiveConfig.bEnablePixelLighting) out.Write(" Normal = o.Normal;\n");