diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index a11169d27a..953da5bb61 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -51,11 +51,21 @@ inline u32 _rotl(u32 x, int shift) { return (x << shift) | (x >> (32 - shift)); } +inline u64 _rotl64(u64 x, unsigned int shift){ + unsigned int n = shift % 64; + return (x << n) | (x >> (64 - n)); +} + inline u32 _rotr(u32 x, int shift) { shift &= 31; if (!shift) return x; return (x >> shift) | (x << (32 - shift)); } + +inline u64 _rotr64(u64 x, unsigned int shift){ + unsigned int n = shift % 64; + return (x >> n) | (x << (64 - n)); +} #define SLEEP(x) usleep(x*1000) #else // WIN32 // Function Cross-Compatibility diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index c6db5582bb..1cf39c17ea 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -88,57 +88,58 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format) return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2; } -/*u64 TexDecoder_GetTlutHash(const u8* src, int len) +u64 TexDecoder_GetFullHash(const u8 *key, int len, u64 seed) { - //char str[40000], st[20]; str[0]='\0';for (int i=0;i> r; + k *= m; + + h ^= k; + h *= m; + } + + const u8 * data2 = (const u8*)data; + + switch(len & 7) + { + case 7: h ^= u64(data2[6]) << 48; + case 6: h ^= u64(data2[5]) << 40; + case 5: h ^= u64(data2[4]) << 32; + case 4: h ^= u64(data2[3]) << 24; + case 3: h ^= u64(data2[2]) << 16; + case 2: h ^= u64(data2[1]) << 8; + case 1: h ^= u64(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +u64 TexDecoder_GetFastHash(const u8 *src, int len, u64 seed) { - int len = TexDecoder_GetTextureSizeInBytes(width, height, texformat); - u64 hash = seed ? seed : 0xbeefbabe1337c0de; - int step = len / 29 / 8; + u64 hash = seed ? seed : 0x1337c0debeefbabe; + int step = (len / 8) / 37; if (!step) step = 1; for (int i = 0; i < len / 8; i += step) { - hash = _rotl(hash, 17) ^ ((u64 *)src)[i]; - } - return hash; -} -*/ - -u32 TexDecoder_GetTlutHash(const u8* src, int len) -{ - //char str[40000], st[20]; str[0]='\0';for (int i=0;iCheckDeviceMultiSampleType( i, D3DDEVTYPE_HAL, D3DFMT_X8R8G8B8, TRUE, D3DMULTISAMPLE_2_SAMPLES, &qlevels)) @@ -208,7 +211,7 @@ void Enumerate() } } } - + */ // Determine if INTZ is supported. Code from ATI's doc. // http://developer.amd.com/gpu_assets/Advanced%20DX9%20Capabilities%20for%20ATI%20Radeon%20Cards.pdf a.supports_intz = D3D_OK == D3D->CheckDeviceFormat( diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index a3420d4e3e..897ea27f48 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -42,27 +42,26 @@ LinearDiskCache g_ps_disk_cache; static float lastPSconstants[C_COLORMATRIX+16][4]; -static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram = 0; +static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[4]; +static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[4]; +static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[4]; static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_ClearZProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_FSAAProgram = 0; -static LPDIRECT3DPIXELSHADER9 s_FSAAColorMatrixProgram = 0; -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram() + + +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode) { - return s_ColorMatrixProgram; + return s_ColorMatrixProgram[SSAAMode]; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram() +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode) { - return s_DepthMatrixProgram; + return s_DepthMatrixProgram[SSAAMode]; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram() +LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode) { - return s_ColorCopyProgram; + return s_ColorCopyProgram[SSAAMode]; } LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() @@ -70,16 +69,6 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() return s_ClearProgram; } -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAProgram() -{ - return s_FSAAProgram; -} - -LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetFSAAColorMatrixProgram() -{ - return s_FSAAColorMatrixProgram; -} - void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 || @@ -123,7 +112,8 @@ public: void PixelShaderCache::Init() { - char pprog[2048]; + //program used for clear screen + char pprog[3072]; sprintf(pprog, "void main(\n" "out float4 ocol0 : COLOR0,\n" " in float4 incol0 : COLOR0){\n" @@ -131,61 +121,180 @@ void PixelShaderCache::Init() "}\n"); s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //Used for Copy/resolve the color buffer + //1 Sample sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float4 uv0 : TEXCOORD0){\n" - "ocol0 = tex2D(samp0,uv0.xy);\n" + "in float2 uv0 : TEXCOORD0){\n" + "ocol0 = tex2D(samp0,uv0);\n" "}\n"); - s_ColorCopyProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_ColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); - - sprintf(pprog, "uniform sampler samp0 : register(s0);\n" - "uniform float4 cColMatrix[5] : register(c%d);\n" - "void main(\n" - "out float4 ocol0 : COLOR0,\n" - " in float4 uv0 : TEXCOORD0){\n" - "float4 texcol = tex2D(samp0,uv0.xy);\n" - "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" - "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" - "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" - "}\n",C_COLORMATRIX); - s_DepthMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //2 samples sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0,\n" - "in float2 uv1 : TEXCOORD1,\n" - "in float2 uv2 : TEXCOORD2,\n" - "in float2 uv3 : TEXCOORD3){\n" - "ocol0 = (tex2D(samp0,uv0) + tex2D(samp0,uv1) + tex2D(samp0,uv2) + tex2D(samp0,uv3))*0.25f;\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy))*0.5f;\n" "}\n"); - s_FSAAProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + //4 Samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n" + "}\n"); + s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 Samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4){\n" + "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n" + "}\n"); + s_ColorCopyProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //Color conversion Programs + //1 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0){\n" + "float4 texcol = tex2D(samp0,uv0);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //2 samples sprintf(pprog, "uniform sampler samp0 : register(s0);\n" "uniform float4 cColMatrix[5] : register(c%d);\n" "void main(\n" "out float4 ocol0 : COLOR0,\n" - "in float2 uv0 : TEXCOORD0,\n" - "in float2 uv1 : TEXCOORD1,\n" - "in float2 uv2 : TEXCOORD2,\n" - "in float2 uv3 : TEXCOORD3,\n" - "in float4 uv4 : TEXCOORD4){\n" - "float4 texcol = float4(0.0f,0.0f,0.0f,0.0f);\n" - "texcol = (tex2D(samp0,float2(clamp(uv0.x,uv4.x,uv4.z),clamp(uv0.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv1.x,uv4.x,uv4.z),clamp(uv1.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv2.x,uv4.x,uv4.z),clamp(uv2.y,uv4.y,uv4.w))) + tex2D(samp0,float2(clamp(uv3.x,uv4.x,uv4.z),clamp(uv3.y,uv4.y,uv4.w))))*0.25f;\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n",C_COLORMATRIX); - s_FSAAColorMatrixProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //4 samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 samples + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_ColorMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //Depth copy programs + //1 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + " in float2 uv0 : TEXCOORD0){\n" + "float4 texcol = tex2D(samp0,uv0);\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //2 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //4 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); + + //9 sample + sprintf(pprog, "uniform sampler samp0 : register(s0);\n" + "uniform float4 cColMatrix[5] : register(c%d);\n" + "void main(\n" + "out float4 ocol0 : COLOR0,\n" + "in float4 uv0 : TEXCOORD0,\n" + "in float4 uv1 : TEXCOORD1,\n" + "in float4 uv2 : TEXCOORD2,\n" + "in float4 uv3 : TEXCOORD3,\n" + "in float4 uv4 : TEXCOORD4,\n" + "in float4 uv5 : TEXCOORD5){\n" + "float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n" + "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" + "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" + "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" + "}\n",C_COLORMATRIX); + s_DepthMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); Clear(); @@ -213,18 +322,18 @@ void PixelShaderCache::Clear() void PixelShaderCache::Shutdown() { - if (s_ColorMatrixProgram) s_ColorMatrixProgram->Release(); - s_ColorMatrixProgram = NULL; - if (s_ColorCopyProgram) s_ColorCopyProgram->Release(); - s_ColorCopyProgram = NULL; - if (s_DepthMatrixProgram) s_DepthMatrixProgram->Release(); - s_DepthMatrixProgram = NULL; + for(int i = 0;i<4;i++) + { + if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release(); + s_ColorMatrixProgram[i] = NULL; + if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release(); + s_ColorCopyProgram[i] = NULL; + if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release(); + s_DepthMatrixProgram[i] = NULL; + } if (s_ClearProgram) s_ClearProgram->Release(); s_ClearProgram = NULL; - if (s_FSAAProgram) s_FSAAProgram->Release(); - s_FSAAProgram = NULL; - if (s_FSAAColorMatrixProgram) s_FSAAColorMatrixProgram->Release(); - s_FSAAColorMatrixProgram = NULL; + Clear(); g_ps_disk_cache.Sync(); g_ps_disk_cache.Close(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h index 5cf64e1a3a..40ace0b67c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.h @@ -62,12 +62,10 @@ public: static void Shutdown(); static bool SetShader(bool dstAlpha); static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate); - static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(); - static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(); - static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(); - static LPDIRECT3DPIXELSHADER9 GetClearProgram(); - static LPDIRECT3DPIXELSHADER9 GetFSAAProgram(); - static LPDIRECT3DPIXELSHADER9 GetFSAAColorMatrixProgram(); + static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode); + static LPDIRECT3DPIXELSHADER9 GetClearProgram(); #if defined(_DEBUG) || defined(DEBUGFAST) static std::string GetCurrentShaderCode(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index e6f86b95f6..43a7ec7f4b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -259,7 +259,7 @@ bool Renderer::Init() UpdateActiveConfig(); int fullScreenRes, w_temp, h_temp; s_blendMode = 0; - // Anti-aliasing hasn't been implemented yet + // Multisample Anti-aliasing hasn't been implemented yet int backbuffer_ms_mode = 0; // g_ActiveConfig.iMultisampleMode; sscanf(g_Config.cFSResolution, "%dx%d", &w_temp, &h_temp); @@ -283,7 +283,7 @@ bool Renderer::Init() // TODO: Grab target width from configured resolution? s_target_width = s_backbuffer_width; s_target_height = s_backbuffer_height * ((float)EFB_HEIGHT / 480.0f); - s_LastAA = g_ActiveConfig.iMultisampleMode; + s_LastAA = (g_ActiveConfig.iMultisampleMode > 3)?0:g_ActiveConfig.iMultisampleMode; switch (s_LastAA) { @@ -295,6 +295,10 @@ bool Renderer::Init() s_target_width *= 2; s_target_height *= 2; break; + case 3: + s_target_width *= 3; + s_target_height *= 3; + break; default: break; }; @@ -580,14 +584,10 @@ static void EFBTextureToD3DBackBuffer(const EFBRectangle& sourceRc) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); - if(g_ActiveConfig.iMultisampleMode > 0 ) - { - D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetFSAAProgram(),VertexShaderCache::GetFSAAVertexShader()); - } - else - { - D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader()); - } + int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; + + D3D::drawShadedTexQuad(read_texture,&sourcerect,Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),PixelShaderCache::GetColorCopyProgram(SSAAMode),(SSAAMode != 0)?VertexShaderCache::GetFSAAVertexShader():VertexShaderCache::GetSimpleVertexShader()); + D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); @@ -878,7 +878,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, int x, int y) D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); - D3D::drawShadedTexQuad(read_texture,&RectToLock, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(),(BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram():PixelShaderCache::GetDepthMatrixProgram(),VertexShaderCache::GetSimpleVertexShader()); + D3D::drawShadedTexQuad( + read_texture, + &RectToLock, + Renderer::GetFullTargetWidth() , + Renderer::GetFullTargetHeight(), + (BufferFormat == FOURCC_RAWZ)?PixelShaderCache::GetColorMatrixProgram(0):PixelShaderCache::GetDepthMatrixProgram(0), + VertexShaderCache::GetSimpleVertexShader()); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); @@ -1159,6 +1165,10 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) s_target_width *= 2; s_target_height *= 2; break; + case 3: + s_target_width *= 3; + s_target_height *= 3; + break; default: break; }; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 660edd3b75..1cc4d0832a 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -156,7 +156,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); + if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -166,10 +166,15 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); + texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format)); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) - texID ^= tlutHash; + texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF)); + } + else + { + texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); } if (g_ActiveConfig.bSafeTextureCache) hash_value = texHash; @@ -328,9 +333,28 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo int tex_w = (abs(source_rect.GetWidth()) >> bScaleByHalf); int tex_h = (abs(source_rect.GetHeight()) >> bScaleByHalf); - - int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * tex_w)):tex_w; - int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * tex_h)):tex_h; + //compensate the texture grow if multisample is enabled to conserve memory usage + float MultiSampleCompensation = 1.0f; + if(g_ActiveConfig.iMultisampleMode > 0 && g_ActiveConfig.iMultisampleMode < 4) + { + switch (g_ActiveConfig.iMultisampleMode) + { + case 1: + MultiSampleCompensation = 2.0f/3.0f; + break; + case 2: + MultiSampleCompensation = 0.5f; + break; + case 3: + MultiSampleCompensation = 1.0f/3.0f; + break; + default: + break; + }; + } + int Scaledtex_w = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleX() * MultiSampleCompensation * tex_w)):tex_w; + int Scaledtex_h = (g_ActiveConfig.bCopyEFBScaled)?((int)(Renderer::GetTargetScaleY() * MultiSampleCompensation * tex_h)):tex_h; + TexCache::iterator iter; LPDIRECT3DTEXTURE9 tex; iter = textures.find(address); @@ -525,23 +549,15 @@ have_texture: D3DFORMAT bformat = FBManager::GetEFBDepthRTSurfaceFormat(); - if(!bFromZBuffer && g_ActiveConfig.iMultisampleMode > 0) - { - D3D::drawShadedTexQuad(read_texture, - &sourcerect, - Renderer::GetFullTargetWidth() , - Renderer::GetFullTargetHeight(), - PixelShaderCache::GetFSAAColorMatrixProgram(), - VertexShaderCache::GetFSAAVertexShader()); - } - else - { - D3D::drawShadedTexQuad(read_texture,&sourcerect, + int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; + D3D::drawShadedTexQuad( + read_texture, + &sourcerect, Renderer::GetFullTargetWidth() , Renderer::GetFullTargetHeight(), - ((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(): PixelShaderCache::GetColorMatrixProgram(), - VertexShaderCache::GetSimpleVertexShader()); - } + ((bformat != FOURCC_RAWZ && bformat != D3DFMT_D24X8) && bFromZBuffer)? PixelShaderCache::GetDepthMatrixProgram(SSAAMode): PixelShaderCache::GetColorMatrixProgram(SSAAMode), + (SSAAMode != 0)? VertexShaderCache::GetFSAAVertexShader() : VertexShaderCache::GetSimpleVertexShader()); + D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp index b888e5c8ff..925ef8d790 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexShaderCache.cpp @@ -162,11 +162,13 @@ void VertexShaderCache::Init() "float4 vColor0 : COLOR0;\n" "float2 vTexCoord : TEXCOORD0;\n" "};\n" - "void main(out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float4 inColor0: COLOR0)\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float4 inColor0: COLOR0)\n" "{\n" + "VSOUTPUT OUT;\n" "OUT.vPosition = inPosition;\n" "OUT.vColor0 = inColor0;\n" - "OUT.vTexCoord = inTEX0;\n" + "OUT.vTexCoord = inTEX0;\n" + "return OUT;\n" "}\n"); SimpleVertexShader = D3D::CompileAndCreateVertexShader(vSimpleProg, (int)strlen(vSimpleProg)); @@ -175,20 +177,24 @@ void VertexShaderCache::Init() sprintf(vFSAAProg, "struct VSOUTPUT\n" "{\n" "float4 vPosition : POSITION;\n" - "float2 vTexCoord : TEXCOORD0;\n" - "float2 vTexCoord1 : TEXCOORD1;\n" - "float2 vTexCoord2 : TEXCOORD2;\n" - "float2 vTexCoord3 : TEXCOORD3;\n" + "float4 vTexCoord : TEXCOORD0;\n" + "float4 vTexCoord1 : TEXCOORD1;\n" + "float4 vTexCoord2 : TEXCOORD2;\n" + "float4 vTexCoord3 : TEXCOORD3;\n" "float4 vTexCoord4 : TEXCOORD4;\n" + "float4 vTexCoord5 : TEXCOORD5;\n" "};\n" - "void main( out VSOUTPUT OUT,in float4 inPosition : POSITION,in float2 inTEX0 : TEXCOORD0,in float2 inTEX1 : TEXCOORD1,in float4 inTEX2 : TEXCOORD2)\n" + "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "{\n" + "VSOUTPUT OUT;" "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0 + (float2(-0.1830127f,-0.6830127f) * inTEX1);\n" - "OUT.vTexCoord1 = inTEX0 + (float2(-0.6830127f, 0.1830127f) * inTEX1);\n" - "OUT.vTexCoord2 = inTEX0 + (float2( 0.6830127f,-0.1830127f) * inTEX1);\n" - "OUT.vTexCoord3 = inTEX0 + (float2( 0.1830127f, 0.6830127f) * inTEX1);\n" - "OUT.vTexCoord4 = inTEX2;\n" + "OUT.vTexCoord = inTEX0.xyyx;\n" + "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-0.1830127f,-0.6830127f,-0.7071068f,-1.2247449f) * inTEX1.xyyx);\n" + "OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.6830127f, 0.1830127f, 0.7071068f, 1.2247449f) * inTEX1.xyyx);\n" + "OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.6830127f,-0.1830127f,-1.2247449f, 0.7071068f) * inTEX1.xyyx);\n" + "OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.1830127f, 0.6830127f, 1.2247449f,-0.7071068f) * inTEX1.xyyx);\n" + "OUT.vTexCoord5 = inTEX2;\n" + "return OUT;\n" "}\n"); FSAAVertexShader = D3D::CompileAndCreateVertexShader(vFSAAProg, (int)strlen(vFSAAProg)); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index 550c0373e1..5a1e28efe3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1081,9 +1081,9 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) GL_REPORT_ERRORD(); g_Config.iSaveTargetId = 0; - bool last_copy_efb_to_ram = !g_ActiveConfig.bCopyEFBToTexture; + bool last_copy_efb_to_Texture = g_ActiveConfig.bCopyEFBToTexture; UpdateActiveConfig(); - if (last_copy_efb_to_ram != (!g_ActiveConfig.bCopyEFBToTexture)) + if (last_copy_efb_to_Texture != g_ActiveConfig.bCopyEFBToTexture) TextureMngr::ClearRenderTargets(); // For testing zbuffer targets. diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 3460d69b0b..4aac7e1d6d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -265,7 +265,6 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width u32 FullFormat = (tex_format | (tlutfmt << 16)); if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetSafeTextureHash(ptr, expandedWidth, expandedHeight, tex_format, 0); // remove last arg if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -275,12 +274,17 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetTlutHash(&texMem[tlutaddr], (tex_format == GX_TF_C4) ? 32 : 128); + texHash = TexDecoder_GetFullHash(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + u64 tlutHash = TexDecoder_GetFullHash(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format)); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) - texID ^= tlutHash; + texID = texID ^ ((u32)(tlutHash & 0xFFFFFFFF)) ^ ((u32)((tlutHash >> 32) & 0xFFFFFFFF)); //DebugLog("addr: %08x | texID: %08x | texHash: %08x", address, texID, hash_value); } + else + { + texHash = TexDecoder_GetFastHash(ptr, TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format)); + } if (g_ActiveConfig.bSafeTextureCache) hash_value = texHash; }